# Day 2: LoRA Fine-Tuning

This notebook implements **LoRA (Low-Rank Adaptation)** - a parameter-efficient fine-tuning method.

**Method**: LoRA trains only ~0.1% of parameters by adding small adapter matrices

**Model**: mistralai/Mistral-7B-v0.1

**Expected Time**: 1-2 hours (faster than full fine-tuning!)

**GPU Required**: T4 (15GB)

**Target Accuracy**: 80-90% (close to full fine-tuning performance)

## 1. Setup Environment

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Install dependencies (including PEFT for LoRA)
!pip install -q torch transformers accelerate peft datasets evaluate scikit-learn pandas numpy wandb trl

In [None]:
# Import libraries
import torch
import pandas as pd
import numpy as np
import json
import time
from datetime import datetime
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding
)
from peft import LoraConfig, get_peft_model, TaskType
from datasets import Dataset
from sklearn.metrics import accuracy_score, f1_score, precision_recall_fscore_support, classification_report
import wandb

print(f"‚úÖ PyTorch version: {torch.__version__}")
print(f"‚úÖ CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"‚úÖ GPU: {torch.cuda.get_device_name(0)}")
    print(f"‚úÖ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

## 2. Mount Google Drive and Load Data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Load data
data_path = '/content/drive/MyDrive/Colab Notebooks/llm-finetuning-showdown/processed'

train_df = pd.read_csv(f'{data_path}/train.csv')
val_df = pd.read_csv(f'{data_path}/val.csv')
test_df = pd.read_csv(f'{data_path}/test.csv')

with open(f'{data_path}/label_mapping.json', 'r') as f:
    label_info = json.load(f)

# Convert text labels to numeric IDs
label_to_id = label_info['label_to_id']
train_df['label'] = train_df['label'].map(label_to_id)
val_df['label'] = val_df['label'].map(label_to_id)
test_df['label'] = test_df['label'].map(label_to_id)

print(f"‚úÖ Train samples: {len(train_df)}")
print(f"‚úÖ Val samples: {len(val_df)}")
print(f"‚úÖ Test samples: {len(test_df)}")
print(f"\n‚úÖ Number of categories: {label_info['num_labels']}")
print(f"\n‚úÖ Labels converted to numeric IDs")
print(f"   First label (should be 0-24): {train_df['label'].iloc[0]}")

## 3. Initialize Weights & Biases

In [None]:
# Login to W&B
wandb.login()

# Initialize project
wandb.init(
    project="llm-finetuning-showdown",
    name="lora-finetuning",
    config={
        "method": "lora",
        "model": "mistralai/Mistral-7B-v0.1",
        "task": "resume_classification",
        "num_labels": label_info['num_labels'],
        "learning_rate": 1e-4,
        "batch_size": 8,
        "epochs": 3,
        "lora_r": 8,
        "lora_alpha": 16,
        "lora_dropout": 0.1
    }
)

## 4. Load Model with LoRA Configuration

In [None]:
model_name = "mistralai/Mistral-7B-v0.1"

print(f"Loading base model: {model_name}")

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

# Load base model
base_model = AutoModelForSequenceClassification.from_pretrained(
    model_name,
    num_labels=label_info['num_labels'],
    torch_dtype=torch.float16,
    device_map="auto"
)
base_model.config.pad_token_id = tokenizer.pad_token_id

print(f"‚úÖ Base model loaded")
print(f"üìä Total parameters: {base_model.num_parameters():,}")

In [None]:
# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,  # LoRA rank
    lora_alpha=16,  # LoRA scaling
    lora_dropout=0.1,
    bias="none",
    target_modules=["q_proj", "v_proj"]  # Apply LoRA to attention layers
)

# Apply LoRA to model
model = get_peft_model(base_model, lora_config)

# Print trainable parameters
model.print_trainable_parameters()

trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
total_params = sum(p.numel() for p in model.parameters())
trainable_percent = 100 * trainable_params / total_params

print(f"\n‚úÖ LoRA Configuration Applied")
print(f"üìä Trainable parameters: {trainable_params:,} ({trainable_percent:.2f}%)")
print(f"üìä Total parameters: {total_params:,}")
print(f"üéØ Parameter reduction: {100 - trainable_percent:.2f}%")

## 5. Prepare Dataset

In [None]:
# Tokenization function - NO PADDING HERE (let Trainer handle it)
def tokenize_function(examples):
    return tokenizer(
        examples['text'],
        truncation=True,
        max_length=512
    )

# Convert to Hugging Face Dataset
train_dataset = Dataset.from_pandas(train_df)
val_dataset = Dataset.from_pandas(val_df)
test_dataset = Dataset.from_pandas(test_df)

# Tokenize
print("Tokenizing datasets...")
train_dataset = train_dataset.map(tokenize_function, batched=True)
val_dataset = val_dataset.map(tokenize_function, batched=True)
test_dataset = test_dataset.map(tokenize_function, batched=True)

# Clean up: Remove text column (no longer needed)
train_dataset = train_dataset.remove_columns(['text'])
val_dataset = val_dataset.remove_columns(['text'])
test_dataset = test_dataset.remove_columns(['text'])

# Rename 'label' to 'labels' (required by Trainer)
train_dataset = train_dataset.rename_column('label', 'labels')
val_dataset = val_dataset.rename_column('label', 'labels')
test_dataset = test_dataset.rename_column('label', 'labels')

# Set format for PyTorch
train_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
val_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])
test_dataset.set_format('torch', columns=['input_ids', 'attention_mask', 'labels'])

print(f"‚úÖ Datasets tokenized and ready")
print(f"   Final columns: {train_dataset.column_names}")

## 6. Define Metrics and Training Arguments

In [None]:
# Compute metrics function
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    
    accuracy = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(
        labels, predictions, average='weighted'
    )
    
    return {
        'accuracy': accuracy,
        'f1': f1,
        'precision': precision,
        'recall': recall
    }

# Training arguments - Optimized for A100 80GB (LoRA uses less memory)
training_args = TrainingArguments(
    output_dir="./results_lora",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=1e-4,  # Higher LR than full fine-tuning
    per_device_train_batch_size=16,  # Larger batch size for LoRA on A100
    per_device_eval_batch_size=32,   # Larger batch size for LoRA on A100
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    greater_is_better=True,
    warmup_steps=50,
    fp16=False,  # Disabled for A100
    bf16=True,   # Use BF16 - native A100 support
    report_to="wandb",
    run_name="lora-finetuning"
)

print("‚úÖ Training arguments configured")
print("üöÄ Optimized for A100 80GB: batch_size=16, eval_batch_size=32, BF16 precision")

## 7. Train Model with LoRA

In [None]:
# Create data collator for dynamic padding
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics
)

print("üöÄ Starting LoRA training...")
print(f"üìä Training samples: {len(train_dataset)}")
print(f"üìä Validation samples: {len(val_dataset)}")
print(f"‚è∞ Start time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Track training time
start_time = time.time()

# Train
train_result = trainer.train()

# Calculate training time
training_time = time.time() - start_time
training_hours = training_time / 3600

print(f"\n‚úÖ LoRA training completed!")
print(f"‚è∞ Training time: {training_hours:.2f} hours ({training_time:.2f} seconds)")
print(f"üìà Final training loss: {train_result.training_loss:.4f}")

## 8. Evaluate on Test Set

In [None]:
# Evaluate on test set
print("üß™ Evaluating on test set...")
test_results = trainer.evaluate(test_dataset)

print("\n" + "="*50)
print("LoRA FINE-TUNING RESULTS")
print("="*50)
print(f"Test Accuracy: {test_results['eval_accuracy']:.4f} ({test_results['eval_accuracy']*100:.2f}%)")
print(f"Test F1-Score: {test_results['eval_f1']:.4f}")
print(f"Test Precision: {test_results['eval_precision']:.4f}")
print(f"Test Recall: {test_results['eval_recall']:.4f}")
print(f"\nTraining Time: {training_hours:.2f} hours")
print(f"Trainable Parameters: {trainable_percent:.2f}% of total")
print(f"\nBaseline Accuracy: 73.00%")
print(f"Improvement over Baseline: +{(test_results['eval_accuracy']*100 - 73):.2f}%")

# Get detailed predictions
predictions = trainer.predict(test_dataset)
pred_labels = np.argmax(predictions.predictions, axis=1)
true_labels = predictions.label_ids

# Classification report
print("\nDetailed Classification Report:")
id_to_label = {v: k for k, v in label_info['label_to_id'].items()}
target_names = [id_to_label[i] for i in range(label_info['num_labels'])]
print(classification_report(true_labels, pred_labels, target_names=target_names))

## 9. Save Results and LoRA Adapter

In [None]:
# Save results to Google Drive
results_path = '/content/drive/MyDrive/Colab Notebooks/llm-finetuning-showdown'

lora_results = {
    "method": "lora",
    "model": model_name,
    "date": datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    "accuracy": float(test_results['eval_accuracy']),
    "f1_score": float(test_results['eval_f1']),
    "precision": float(test_results['eval_precision']),
    "recall": float(test_results['eval_recall']),
    "training_time_hours": float(training_hours),
    "training_time_seconds": float(training_time),
    "baseline_accuracy": 0.73,
    "improvement_over_baseline": float(test_results['eval_accuracy'] - 0.73),
    "total_parameters": total_params,
    "trainable_parameters": trainable_params,
    "trainable_percent": float(trainable_percent),
    "lora_config": {
        "r": 8,
        "lora_alpha": 16,
        "lora_dropout": 0.1,
        "target_modules": ["q_proj", "v_proj"]
    },
    "training_config": {
        "learning_rate": 1e-4,
        "batch_size": 8,
        "epochs": 3,
        "max_length": 512
    }
}

with open(f'{results_path}/lora_results.json', 'w') as f:
    json.dump(lora_results, f, indent=2)

print(f"‚úÖ Results saved to: {results_path}/lora_results.json")

# Save LoRA adapter (small file - only trained parameters)
model.save_pretrained(f'{results_path}/lora_adapter')
tokenizer.save_pretrained(f'{results_path}/lora_adapter')
print(f"‚úÖ LoRA adapter saved to: {results_path}/lora_adapter")
print(f"üì¶ Adapter size: ~10-50 MB (vs 14 GB for full model)")

# Log to W&B
wandb.log({
    "final_test_accuracy": test_results['eval_accuracy'],
    "final_test_f1": test_results['eval_f1'],
    "training_time_hours": training_hours,
    "improvement_over_baseline": test_results['eval_accuracy'] - 0.73,
    "trainable_percent": trainable_percent
})

wandb.finish()
print("\n‚úÖ LoRA Fine-Tuning Complete!")

## 10. Next Steps

**‚úÖ LoRA Fine-Tuning Complete!**

**Record your results:**
- Accuracy: ____%
- Training time: ___ hours
- Trainable parameters: ___% of total
- Adapter size: ~___ MB

**Compare with Full Fine-Tuning:**
- Speed improvement: ___x faster
- Accuracy difference: ___% points
- Memory savings: ___% less

**Next:**
- Run QLoRA fine-tuning (Day2_QLoRA_FineTuning.ipynb)
- Compare all three methods