In [None]:
#!/usr/bin/env python3
"""
THIRD SET COLAB 2: LoRA Fine-tuning with OpenOrca
Uses Unsloth for efficient training
Dataset: OpenOrca (reasoning tasks)
Model: SmolLM2-135M with standard LoRA
"""

print("="*80)
print("🚀 THIRD SET COLAB 2: LORA FINE-TUNING WITH OPENORCA")
print("="*80)

# INSTALL UNSLOTH
print("\n📦 Installing Unsloth...")
import subprocess
subprocess.run("pip install -q unsloth", shell=True)
subprocess.run("pip uninstall -y unsloth-colab xformers-local flash-attn", shell=True)
print("✅ Unsloth installed!")

# IMPORTS
print("\n📚 Importing...")
from unsloth import FastLanguageModel
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer
import torch

print("✅ Imported!")
print(f"GPU: {torch.cuda.is_available()}")

# LOAD MODEL
print("\n📥 Loading SmolLM2-135M...")
max_seq_length = 2048
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/SmolLM2-135M-bnb-4bit",
    max_seq_length=max_seq_length,
    dtype=None,
    load_in_4bit=True,
)

print("✅ Model loaded!")

# ADD STANDARD LORA
print("\n🔧 Adding standard LoRA (efficient)...")
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Standard rank for efficient training
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("✅ LoRA configured!")
trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
total = sum(p.numel() for p in model.parameters())
print(f"Trainable: {trainable:,} ({trainable/total*100:.2f}%)")

# LOAD OPENORCA DATASET
print("\n📚 Loading OpenOrca dataset...")
dataset = load_dataset("Open-Orca/OpenOrca", split="train[:500]")
print(f"✅ Loaded {len(dataset)} reasoning examples!")

# FORMAT
print("\n🔧 Formatting...")

def format_orca(examples):
    texts = []
    for i in range(len(examples['question'])):
        system = examples['system_prompt'][i]
        question = examples['question'][i]
        response = examples['response'][i]

        text = f"System: {system}\n\nQuestion: {question}\n\nAnswer: {response}"
        texts.append(text + tokenizer.eos_token)

    return {"text": texts}

dataset = dataset.map(format_orca, batched=True, remove_columns=dataset.column_names)

print(f"✅ Formatted {len(dataset)} examples!")
print(f"\nExample:\n{dataset[0]['text'][:300]}...")

# TRAINING
print("\n🚀 LoRA training with Unsloth...")
training_args = TrainingArguments(
    output_dir="./lora_orca",
    num_train_epochs=1,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    warmup_steps=5,
    logging_steps=10,
    save_strategy="no",
    fp16=True,
    report_to="none",
    max_steps=50,
)

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    args=training_args,
)

trainer.train()
print("\n✅ LoRA training complete!")

# INFERENCE
print("\n🧪 Testing LoRA model...")
FastLanguageModel.for_inference(model)

test_prompts = [
    "System: You are a helpful AI assistant.\n\nQuestion: What is Python?\n\nAnswer: ",
    "System: You are a helpful AI assistant.\n\nQuestion: Explain photosynthesis briefly.\n\nAnswer: ",
]

for prompt in test_prompts:
    print(f"\n{'='*60}")
    q = prompt.split("Question:")[1].split("Answer:")[0].strip()
    print(f"Question: {q}")
    print("Answer:", end=" ")

    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=80,
        temperature=0.7,
        use_cache=True,
        pad_token_id=tokenizer.eos_token_id
    )

    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    answer = response.split("Answer:")[-1].strip()
    print(answer)

# SAVE
print("\n💾 Saving...")
model.save_pretrained("./orca_lora_model")
tokenizer.save_pretrained("./orca_lora_model")

print("\n" + "="*80)
print("🎉 THIRD SET COLAB 2 COMPLETE!")
print("="*80)
print("Summary:")
print("  ✓ Model: SmolLM2-135M with Unsloth")
print("  ✓ Dataset: OpenOrca (500 reasoning examples)")
print("  ✓ Method: Standard LoRA (r=16)")
print(f"  ✓ Trainable: {trainable/total*100:.2f}% (efficient!)")
print("="*80)

🚀 THIRD SET COLAB 2: LORA FINE-TUNING WITH OPENORCA

📦 Installing Unsloth...
✅ Unsloth installed!

📚 Importing...
🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
✅ Imported!
GPU: True

📥 Loading SmolLM2-135M...
==((====))==  Unsloth 2025.11.1: Fast Llama patching. Transformers: 4.57.1.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/112M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/158 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/742 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.05.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.


✅ Model loaded!

🔧 Adding standard LoRA (efficient)...


Unsloth 2025.11.1 patched 30 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


✅ LoRA configured!
Trainable: 1,843,200 (2.21%)

📚 Loading OpenOrca dataset...


README.md: 0.00B [00:00, ?B/s]

1M-GPT4-Augmented.parquet:   0%|          | 0.00/1.01G [00:00<?, ?B/s]

3_5M-GPT3_5-Augmented.parquet:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

Generating train split: 0 examples [00:00, ? examples/s]

✅ Loaded 500 reasoning examples!

🔧 Formatting...


Map:   0%|          | 0/500 [00:00<?, ? examples/s]

✅ Formatted 500 examples!

Example:
System: 

Question: You will be given a definition of a task first, then some input of the task.
This task is about using the specified sentence and converting the sentence to Resource Description Framework (RDF) triplets of the form (subject, predicate object). The RDF triplets generated must be su...

🚀 LoRA training with Unsloth...


Unsloth: Tokenizing ["text"] (num_proc=12):   0%|          | 0/500 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 500 | Num Epochs = 2 | Total steps = 50
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 x 4 x 1) = 16
 "-____-"     Trainable parameters = 1,843,200 of 136,358,784 (1.35% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,2.7656
20,2.6888
30,2.5074
40,2.5731
50,2.529



✅ LoRA training complete!

🧪 Testing LoRA model...

Question: What is Python?
Answer: Python is a programming language that is used to create websites, applications, and software. It is a popular programming language that is used to develop websites, applications, and software. It is a general-purpose programming language that is used to develop web applications. It is a high-level programming language that is used to develop web applications. It is a general-purpose programming language that is used to

Question: Explain photosynthesis briefly.
Answer: Photosynthesis is the process by which plants use sunlight to convert carbon dioxide and water into glucose and oxygen. Plants use sunlight to photosynthesize and produce glucose and oxygen.

💾 Saving...

🎉 THIRD SET COLAB 2 COMPLETE!
Summary:
  ✓ Model: SmolLM2-135M with Unsloth
  ✓ Dataset: OpenOrca (500 reasoning examples)
  ✓ Method: Standard LoRA (r=16)
  ✓ Trainable: 2.21% (efficient!)
