In [None]:

!pip install -q transformers accelerate datasets peft bitsandbytes safetensors huggingface_hub trl evaluate


In [None]:

import json
from pathlib import Path

DATA_PATH = "/content/fintune5.json"  # upload your file here

with open(DATA_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

# data is a list of conversations (each is a list of {"role":..,"content":..})
print("Number of conversations:", len(data))
# show first conversation to confirm shape
print(data[0][:6])


In [None]:

flat = []
for conv in data:
    # conv is a list of turn dicts
    for i in range(len(conv)-1):
        if conv[i].get("role") == "user" and conv[i+1].get("role") == "assistant":
            user_msg = conv[i].get("content","").strip()
            assistant_msg = conv[i+1].get("content","").strip()
            if user_msg and assistant_msg:
                flat.append({"input": user_msg, "response": assistant_msg})

print("Flat examples:", len(flat))
# peek first 5
for ex in flat[:5]:
    print("-----")
    print("INPUT:", ex["input"])
    print("RESPONSE:", ex["response"])


In [None]:
# Colab cell (python)
import json, gzip

out_path = "/content/fintune_flat.jsonl"
with open(out_path, "w", encoding="utf-8") as fout:
    for ex in flat:
        fout.write(json.dumps(ex, ensure_ascii=False) + "\n")
print("Saved flat JSONL to", out_path)


In [None]:

from datasets import load_dataset
ds = load_dataset("json", data_files=out_path, split="train")
ds = ds.train_test_split(test_size=0.05, seed=42)
print(ds)


In [None]:

from huggingface_hub import login

login()

In [None]:

from transformers import AutoTokenizer, AutoModelForCausalLM

BASE_MODEL = ""
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, use_fast=False)


In [None]:
def tokenize_and_mask_fn(examples):
    inputs = examples["input"]
    responses = examples["response"]
    texts = [f"User: {u}\nAssistant: {r}" for u, r in zip(inputs, responses)]
    prompt_texts = [f"User: {u}\nAssistant:" for u in inputs]

    tokenized_full = tokenizer(texts, truncation=True, max_length=max_length, padding="max_length")
    tokenized_prompt = tokenizer(prompt_texts, truncation=True, max_length=max_length, padding="max_length")

    labels = tokenized_full["input_ids"].copy()

    # For each example, set label tokens corresponding to prompt to -100 so loss ignores them
    for i, prompt_ids in enumerate(tokenized_prompt["input_ids"]):
        prompt_len = 0
        # count how many tokens are not padding (token id != tokenizer.pad_token_id)
        for tok_id in prompt_ids:
            if tok_id != tokenizer.pad_token_id:
                prompt_len += 1
            else:
                break
        # mask prompt portion
        for j in range(prompt_len):
            labels[i][j] = -100

    tokenized_full["labels"] = labels
    return tokenized_full

tokenized = ds.map(tokenize_and_mask_fn, batched=True, batch_size=200,
                   remove_columns=ds["train"].column_names)
print(tokenized["train"][0])


In [None]:
# Colab cell (python)
import torch
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from transformers import AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForSeq2Seq

# Load model (8-bit if necessary)
try:
    model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, device_map="auto", torch_dtype=torch.float16)
except Exception as e:
    print("Float16 load failed, trying 8-bit (slower but fits small GPUs):", e)
    model = AutoModelForCausalLM.from_pretrained(BASE_MODEL, load_in_8bit=True, device_map="auto")

# If model loaded in k-bit:
model = prepare_model_for_kbit_training(model)

lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj","k_proj","v_proj","o_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
!pip install -U transformers accelerate


In [None]:
# Colab cell (python)
training_args = TrainingArguments(
    output_dir="./qwen-lora",
    per_device_train_batch_size=2,
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=8,
    num_train_epochs=3,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=20,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    save_total_limit=3,
    report_to=[],
    dataloader_pin_memory=False
)

data_collator = DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of=8, return_tensors="pt")
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized["train"],
    eval_dataset=tokenized["test"],
    data_collator=data_collator
)

trainer.train()
trainer.save_model("./qwen-lora")


In [None]:

model.save_pretrained("")   # adapter + config

# Example quick test:
from transformers import pipeline
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
print(pipe("User: How do I start budgeting?\nAssistant:", max_new_tokens=120)[0]["generated_text"])
