In [None]:
#!/usr/bin/env python3
"""
THIRD SET COLAB 1: Full Fine-tuning with Dolly Dataset
Uses Unsloth for 2x faster training
Dataset: Databricks Dolly 15k (instruction following)
Model: SmolLM2-135M with high-rank LoRA
"""

print("="*80)
print("🚀 THIRD SET COLAB 1: FULL FINE-TUNING WITH DOLLY")
print("="*80)

# INSTALL UNSLOTH
print("\n📦 Installing Unsloth...")
import subprocess
subprocess.run("pip install -q unsloth", shell=True)
subprocess.run("pip uninstall -y unsloth-colab xformers-local flash-attn", shell=True)
print("✅ Unsloth installed!")

# IMPORTS
print("\n📚 Importing...")
from unsloth import FastLanguageModel
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
import torch

print("✅ Imported!")
print(f"GPU: {torch.cuda.is_available()}")

# LOAD MODEL WITH UNSLOTH
print("\n📥 Loading SmolLM2-135M with Unsloth...")
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/SmolLM2-135M-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

print("✅ Model loaded!")

# ADD HIGH-RANK LORA FOR FULL FINE-TUNING
print("\n🔧 Configuring FULL fine-tuning (high-rank LoRA)...")
model = FastLanguageModel.get_peft_model(
    model,
    r=64,  # High rank for full-like fine-tuning
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=128,
    lora_dropout=0.1,
    bias="all",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("✅ High-rank LoRA configured!")
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable:,} ({trainable/total*100:.2f}%)")

# LOAD DOLLY DATASET
print("\n📚 Loading Databricks Dolly 15k dataset...")
dataset = load_dataset("databricks/databricks-dolly-15k", split="train[:600]")
print(f"✅ Loaded {len(dataset)} instruction examples!")

# FORMAT DATASET
print("\n🔧 Formatting...")

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token

def formatting_func(examples):
    instructions = examples["instruction"]
    inputs = examples["context"]
    outputs = examples["response"]
    texts = []

    for instruction, input_text, output in zip(instructions, inputs, outputs):
        # Handle cases with or without input
        if input_text:
            text = alpaca_prompt.format(instruction, input_text, output)
        else:
            text = alpaca_prompt.format(instruction, "", output)
        texts.append(text + EOS_TOKEN)

    return {"text": texts}

# Apply formatting using map
dataset = dataset.map(
    formatting_func,
    batched=True,
    remove_columns=dataset.column_names
)

print(f"✅ Formatted {len(dataset)} examples!")
print(f"\nExample:\n{dataset[0]['text'][:300]}...")

# TRAINING
print("\n🚀 Starting full fine-tuning with Unsloth (2x faster)...")
training_args = TrainingArguments(
    output_dir="./full_ft_dolly",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    warmup_steps=10,
    logging_steps=10,
    save_strategy="no",
    fp16=True,
    report_to="none",
    max_steps=60,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    args=training_args,
)

trainer.train()
print("\n✅ Full fine-tuning complete!")

# INFERENCE
print("\n🧪 Testing fully fine-tuned model...")
FastLanguageModel.for_inference(model)

test_prompts = [
    "Below is an instruction that describes a task.\n\n### Instruction:\nExplain what is machine learning in simple terms\n\n### Input:\n\n### Response:\n",
    "Below is an instruction that describes a task.\n\n### Instruction:\nWrite a Python function to check if a number is prime\n\n### Input:\n\n### Response:\n",
]

for prompt in test_prompts:
    print(f"\n{'='*60}")
    inst = prompt.split("### Instruction:")[1].split("###")[0].strip()
    print(f"Instruction: {inst}")
    print("Response:", end=" ")

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=100,
        temperature=0.7,
        use_cache=True,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response.split("### Response:")[-1].strip()
    print(answer)

# SAVE
print("\n💾 Saving...")
model.save_pretrained("./dolly_full_ft_model")
tokenizer.save_pretrained("./dolly_full_ft_model")

print("\n" + "="*80)
print("🎉 THIRD SET COLAB 1 COMPLETE!")
print("="*80)
print("Summary:")
print("  ✓ Model: SmolLM2-135M with Unsloth")
print("  ✓ Dataset: Databricks Dolly 15k (600 examples)")
print("  ✓ Method: Full fine-tuning (high-rank LoRA r=64)")
print(f"  ✓ Trainable: {trainable/total*100:.2f}% of parameters")
print("  ✓ Training: 2x faster with Unsloth optimization")
print("="*80)

🚀 THIRD SET COLAB 1: FULL FINE-TUNING WITH DOLLY

📦 Installing Unsloth...
✅ Unsloth installed!

📚 Importing...
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
✅ Imported!
GPU: True

📥 Loading SmolLM2-135M with Unsloth...
==((====))==  Unsloth 2025.11.1: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/112M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/742 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth: bias = `none` is supported for fast patching. You are using bias = all.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


✅ Model loaded!

🔧 Configuring FULL fine-tuning (high-rank LoRA)...


Unsloth 2025.11.1 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


✅ High-rank LoRA configured!
Trainable: 19,537,920 (19.35%)

📚 Loading Databricks Dolly 15k dataset...


README.md: 0.00B [00:00, ?B/s]

databricks-dolly-15k.jsonl:   0%|          | 0.00/13.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/15011 [00:00<?, ? examples/s]

✅ Loaded 600 instruction examples!

🔧 Formatting...


Map:   0%|          | 0/600 [00:00<?, ? examples/s]

✅ Formatted 600 examples!

Example:
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
When did Virgin Australia start operating?

### Input:
Virgin Australia, the trading name of Virgin Australia Airlines Pty L...

🚀 Starting full fine-tuning with Unsloth (2x faster)...


Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/600 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 600 | Num Epochs = 2 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 8
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 8 x 1) = 16
 "-____-"     Trainable parameters = 19,537,920 of 154,053,504 (12.68% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,2.6719
20,2.1608
30,1.9721
40,1.9948
50,1.8557
60,2.0021



✅ Full fine-tuning complete!

🧪 Testing fully fine-tuned model...

Instruction: Explain what is machine learning in simple terms
Response: Machine learning is a branch of computer science that deals with the development of algorithms that can be used to teach computers to perform tasks that would normally require human intelligence.

Instruction: Write a Python function to check if a number is prime
Response: True

###

💾 Saving...

🎉 THIRD SET COLAB 1 COMPLETE!
Summary:
  ✓ Model: SmolLM2-135M with Unsloth
  ✓ Dataset: Databricks Dolly 15k (600 examples)
  ✓ Method: Full fine-tuning (high-rank LoRA r=64)
  ✓ Trainable: 19.35% of parameters
  ✓ Training: 2x faster with Unsloth optimization
