# üöÄ Auto-Tuner: Fine-Tune Llama 3 with Unsloth

This notebook fine-tunes Llama 3 8B on your generated dataset using Unsloth (2x faster, 60% less memory).

**Requirements:**
- Free T4 GPU (Runtime ‚Üí Change runtime type ‚Üí T4 GPU)
- Dataset uploaded to Google Drive

**Time:** ~20-30 minutes for 100 examples

## üìã Configuration

**IMPORTANT:** Update these values before running!

In [None]:
# ============================================================================
# CONFIGURATION - UPDATE THESE VALUES
# ============================================================================

# Dataset filename (must exist in Drive: Finetune_Jobs/datasets/)
DATASET_FILENAME = "dataset-20251115_132156.jsonl"  # ‚Üê CHANGE THIS

# Model name (will be saved to Drive: Finetune_Jobs/models/)
MODEL_NAME = "customer-support-bot-v1"  # ‚Üê CHANGE THIS

# Training settings
MAX_SEQ_LENGTH = 2048        # Context window size
BATCH_SIZE = 2               # Larger = faster but more memory
GRADIENT_ACCUMULATION = 4    # Effective batch size = 2 * 4 = 8
LEARNING_RATE = 2e-4         # Learning rate
NUM_EPOCHS = 3               # Training epochs
WARMUP_STEPS = 5             # Warmup steps

print("‚úÖ Configuration loaded")
print(f"Dataset: {DATASET_FILENAME}")
print(f"Model: {MODEL_NAME}")

## üîó Step 1: Mount Google Drive

In [None]:
from google.colab import drive
import os

# Mount Drive
drive.mount('/content/drive')

# Set paths
DRIVE_ROOT = "/content/drive/MyDrive/Finetune_Jobs"
DATASET_PATH = f"{DRIVE_ROOT}/datasets/{DATASET_FILENAME}"
MODEL_OUTPUT_DIR = f"{DRIVE_ROOT}/models/{MODEL_NAME}"

# Create directories if they don't exist
os.makedirs(f"{DRIVE_ROOT}/datasets", exist_ok=True)
os.makedirs(f"{DRIVE_ROOT}/models", exist_ok=True)

# Verify dataset exists
if not os.path.exists(DATASET_PATH):
    raise FileNotFoundError(f"‚ùå Dataset not found: {DATASET_PATH}\n\nPlease upload {DATASET_FILENAME} to Drive: Finetune_Jobs/datasets/")

print(f"‚úÖ Drive mounted")
print(f"‚úÖ Dataset found: {DATASET_PATH}")
print(f"‚úÖ Model will be saved to: {MODEL_OUTPUT_DIR}")

## üì¶ Step 2: Install Unsloth

In [None]:
%%capture
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" "peft" "accelerate" "bitsandbytes"

print("‚úÖ Unsloth installed")

## ü§ñ Step 3: Load Base Model (Llama 3 8B)

In [None]:
from unsloth import FastLanguageModel
import torch

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/llama-3-8b-bnb-4bit",
    max_seq_length=MAX_SEQ_LENGTH,
    dtype=None,
    load_in_4bit=True,
)

print("‚úÖ Base model loaded (Llama 3 8B 4-bit)")
print(f"Model size: ~4.5GB")
print(f"Max sequence length: {MAX_SEQ_LENGTH}")

## üéõÔ∏è Step 4: Add LoRA Adapters

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
)

print("‚úÖ LoRA adapters added")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad):,}")

## üìä Step 5: Load and Preview Dataset

In [None]:
from datasets import load_dataset
import json

dataset = load_dataset("json", data_files=DATASET_PATH, split="train")

print(f"‚úÖ Dataset loaded: {len(dataset)} examples")
print(f"\nFirst example:")
print(json.dumps(dataset[0], indent=2))

print(f"\nüìã Preview of conversations:")
for i in range(min(3, len(dataset))):
    messages = dataset[i]['messages']
    user_msg = next((m['content'] for m in messages if m['role'] == 'user'), '')
    assistant_msg = next((m['content'] for m in messages if m['role'] == 'assistant'), '')
    print(f"\nExample {i+1}:")
    print(f"  User: {user_msg[:80]}...")
    print(f"  Assistant: {assistant_msg[:80]}...")

## üîÑ Step 6: Format Dataset for Training

In [None]:
def format_chat(example):
    messages = example['messages']
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=False
    )
    return {"text": text}

dataset = dataset.map(format_chat, batched=False)

print("‚úÖ Dataset formatted for training")
print(f"\nFormatted example:")
print(dataset[0]['text'][:500] + "...")

## üèãÔ∏è Step 7: Configure Trainer

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=MAX_SEQ_LENGTH,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=BATCH_SIZE,
        gradient_accumulation_steps=GRADIENT_ACCUMULATION,
        warmup_steps=WARMUP_STEPS,
        num_train_epochs=NUM_EPOCHS,
        learning_rate=LEARNING_RATE,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

print("‚úÖ Trainer configured")
print(f"Effective batch size: {BATCH_SIZE * GRADIENT_ACCUMULATION}")

## üöÄ Step 8: Start Training!

**This will take 20-30 minutes. Don't close the browser!**

In [None]:
import time

print("üöÄ Starting training...\n")
start_time = time.time()

trainer_stats = trainer.train()

elapsed = time.time() - start_time
print(f"\n‚úÖ Training complete!")
print(f"Time: {elapsed/60:.1f} minutes")
print(f"Final loss: {trainer_stats.training_loss:.4f}")

## üíæ Step 9: Save Model to Google Drive

In [None]:
model.save_pretrained(MODEL_OUTPUT_DIR)
tokenizer.save_pretrained(MODEL_OUTPUT_DIR)

import json
metadata = {
    "model_name": MODEL_NAME,
    "dataset": DATASET_FILENAME,
    "base_model": "unsloth/llama-3-8b-bnb-4bit",
    "training_loss": float(trainer_stats.training_loss),
    "num_examples": len(dataset),
    "num_epochs": NUM_EPOCHS,
    "training_time_minutes": elapsed / 60,
    "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
}

with open(f"{MODEL_OUTPUT_DIR}/metadata.json", "w") as f:
    json.dump(metadata, f, indent=2)

print(f"‚úÖ Model saved to: {MODEL_OUTPUT_DIR}")
print(f"\nMetadata:")
print(json.dumps(metadata, indent=2))

## üß™ Step 10: Test the Model

In [None]:
FastLanguageModel.for_inference(model)

test_messages = [
    {"role": "user", "content": "Hello! Can you help me?"}
]

inputs = tokenizer.apply_chat_template(
    test_messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt"
).to("cuda")

print("ü§ñ Testing model...\n")
print("User: Hello! Can you help me?\n")

outputs = model.generate(
    input_ids=inputs,
    max_new_tokens=128,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

response = tokenizer.decode(outputs[0], skip_special_tokens=True)
assistant_response = response.split("assistant\n")[-1] if "assistant" in response else response

print(f"Assistant: {assistant_response}")
print("\nüéâ All done! Your model is ready to use.")