In [1]:
!pip install unsloth accelerate bitsandbytes datasets huggingface_hub transformers wandb -q

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.3/64.3 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m358.7/358.7 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.4/59.4 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m506.8/506.8 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.7/47.7 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m24.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m284.4/284.4 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/122.9 MB[0m [31m7.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [52]:
from datasets import load_dataset, Dataset, concatenate_datasets
import random, json
from unsloth import FastLanguageModel
from transformers import TrainingArguments, Trainer, TrainerCallback, DataCollatorForSeq2Seq
import wandb
from huggingface_hub import notebook_login
notebook_login()
wandb.login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…



True

In [50]:
# ============================
# LOAD DATASETS
# ============================

print("Loading GPTeacher...")
gp_teacher = load_dataset("teknium/GPTeacher-General-Instruct", split="train")

print("Loading Dolly 15k...")
dolly = load_dataset("databricks/databricks-dolly-15k", split="train")

# ============================
# PREPROCESS DATA (NORMALIZE TO A SINGLE FORMAT)
# ============================

def normalize_gpteacher(x):
    return {
        "instruction": x["instruction"],
        "input": x.get("input", ""),
        "output": x["response"]
    }

def normalize_dolly(x):
    return {
        "instruction": x["instruction"],
        "input": "",
        "output": x["response"]
    }

gpteacher_norm = gp_teacher.map(normalize_gpteacher)
dolly_norm = dolly.map(normalize_dolly)

# ============================
# MERGE DATASETS
# ============================

merged = concatenate_datasets([gpteacher_norm, dolly_norm])
merged = merged.shuffle(seed=42)

print("Merged dataset size:", len(merged))

# ============================
# OPTIONAL: TAKE A SMALL SUBSET (for free Colab)
# ============================

subset_size = 12000    # ideal for Qwen 3B LoRA on free GPU
merged_small = merged.select(range(subset_size))

# ============================
# CONVERT INTO CHAT FORMAT FOR UNSLOTH/QWEN
# ============================

def to_chat_format(sample):
    prompt = sample["instruction"]
    if sample["input"]:
        prompt += "\nInput:\n" + sample["input"]

    return {
        "messages": [
            {"role": "user", "content": prompt},
            {"role": "assistant", "content": sample["output"]}
        ]
    }

chat_data = merged_small.map(to_chat_format)
print("Chat-converted dataset sample:\n", chat_data[0])

# ============================
# SAVE PROCESSED DATA
# ============================
chat_data.save_to_disk("qwen_merged_dataset")

print("Dataset ready for Unsloth LoRA!")

Loading GPTeacher...
Loading Dolly 15k...
Merged dataset size: 104271
Chat-converted dataset sample:
 {'response': 'Amsterdam was founded in the late 13th century.', 'input': '', 'instruction': 'When was Amsterdam founded?', 'output': 'Amsterdam was founded in the late 13th century.', 'context': '', 'category': 'open_qa', 'messages': [{'content': 'When was Amsterdam founded?', 'role': 'user'}, {'content': 'Amsterdam was founded in the late 13th century.', 'role': 'assistant'}]}


Saving the dataset (0/1 shards):   0%|          | 0/12000 [00:00<?, ? examples/s]

Dataset ready for Unsloth LoRA!


In [51]:
def apply_template(example):
    msgs = []
    for m in example["messages"]:
        c = m.get("content", "")
        if c is None: c = ""
        if not isinstance(c, str): c = str(c)
        msgs.append({"role": m.get("role","user"), "content": c})

    text = tokenizer.apply_chat_template(
        msgs,
        tokenize=False,
        add_generation_prompt=False,
    )
    return {"text": text}

text_ds = chat_data.map(
    apply_template,
    remove_columns=chat_data.column_names,
)

def tokenize(example):
    out = tokenizer(
        example["text"],
        truncation=True,
        max_length=2048,
    )
    out["labels"] = out["input_ids"].copy()
    return out

tokenized_ds = text_ds.map(
    tokenize,
    remove_columns=["text"],
    batched=True,
)


def flatten_labels(example):
    # Flatten labels if nested
    if isinstance(example["labels"][0], list):
        example["labels"] = [tok for sub in example["labels"] for tok in sub]

    if isinstance(example["input_ids"][0], list):
        example["input_ids"] = [tok for sub in example["input_ids"] for tok in sub]

    return example

tokenized_ds = tokenized_ds.map(flatten_labels)


Map:   0%|          | 0/12000 [00:00<?, ? examples/s]

Map:   0%|          | 0/12000 [00:00<?, ? examples/s]

Map:   0%|          | 0/12000 [00:00<?, ? examples/s]

In [35]:
model_name = "Qwen/Qwen2.5-3B-Instruct"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    load_in_4bit = True
)

# LoRA Configuration
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha = 32,
    lora_dropout = 0.05,
    bias = "none",
    use_gradient_checkpointing = True,
)

==((====))==  Unsloth 2025.11.4: Fast Qwen2 patching. Transformers: 4.57.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post1. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [57]:
collator = DataCollatorForSeq2Seq(
    tokenizer=tokenizer,
    model=model,
    padding=True,
    return_tensors="pt"
)

args = TrainingArguments(
    output_dir="qwen3b-lora-merged",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=50,
    max_steps=800,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    report_to=["wandb"],
    run_name="qwen3b_lora_merged",
    remove_unused_columns=False,
)

trainer = Trainer(
    model=model,
    processing_class=tokenizer,
    data_collator=collator,
    train_dataset=tokenized_ds,
    args=args,
)

In [58]:
trainer.train()


model.save_pretrained("qwen3b-lora")
tokenizer.save_pretrained("qwen3b-lora")

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 12,000 | Num Epochs = 1 | Total steps = 800
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 7,372,800 of 3,093,311,488 (0.24% trained)


Step,Training Loss
10,2.3801
20,2.0556
30,1.6821
40,1.332
50,1.3486
60,1.2777
70,1.2584
80,1.1333
90,1.1901
100,1.1913


('qwen3b-lora/tokenizer_config.json',
 'qwen3b-lora/special_tokens_map.json',
 'qwen3b-lora/chat_template.jinja',
 'qwen3b-lora/vocab.json',
 'qwen3b-lora/merges.txt',
 'qwen3b-lora/added_tokens.json',
 'qwen3b-lora/tokenizer.json')

In [60]:
!zip -r qwen3b-lora.zip qwen3b-lora


  adding: qwen3b-lora/ (stored 0%)
  adding: qwen3b-lora/tokenizer_config.json (deflated 89%)
  adding: qwen3b-lora/chat_template.jinja (deflated 71%)
  adding: qwen3b-lora/vocab.json (deflated 61%)
  adding: qwen3b-lora/tokenizer.json (deflated 81%)
  adding: qwen3b-lora/merges.txt (deflated 57%)
  adding: qwen3b-lora/adapter_config.json (deflated 57%)
  adding: qwen3b-lora/special_tokens_map.json (deflated 69%)
  adding: qwen3b-lora/adapter_model.safetensors (deflated 8%)
  adding: qwen3b-lora/added_tokens.json (deflated 67%)
  adding: qwen3b-lora/README.md (deflated 65%)
