# MLX_LM LoRA Training with CUDA (Google Colab)

Training a model with LoRA using GPU acceleration on Google Colab.

**Note:** This notebook is optimized for Google Colab with NVIDIA GPU support.

In [None]:
# System Information (GPU/CUDA Detection)
import platform
import os
import sys
import subprocess
from pathlib import Path

print("="*70)
print("SYSTEM INFORMATION & GPU DETECTION")
print("="*70)

# Python info
print(f"Python Version:     {sys.version}")
print(f"Python Executable:  {sys.executable}")

# Platform info
print(f"\nPlatform:           {platform.platform()}")
print(f"Machine Type:       {platform.machine()}")

# GPU/CUDA Detection
print(f"\n" + "-"*70)
print("GPU/CUDA INFORMATION")
print("-"*70)

try:
    gpu_info = subprocess.run(['nvidia-smi'], capture_output=True, text=True, timeout=10)
    if gpu_info.returncode == 0:
        print("✓ NVIDIA GPU detected")
        print("\nGPU Details:")
        for line in gpu_info.stdout.split('\n')[:20]:
            if line.strip():
                print(line)
    else:
        print("❌ No NVIDIA GPU detected")
except Exception as e:
    print(f"⚠ Could not detect GPU: {e}")

# Check PyTorch/TensorFlow CUDA availability
print(f"\n" + "-"*70)
print("DEEP LEARNING FRAMEWORK CUDA SUPPORT")
print("-"*70)

try:
    import torch
    print(f"\n✓ PyTorch {torch.__version__} installed")
    print(f"  CUDA available:    {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"  CUDA version:      {torch.version.cuda}")
        print(f"  GPU count:         {torch.cuda.device_count()}")
        for i in range(torch.cuda.device_count()):
            print(f"  GPU {i}:            {torch.cuda.get_device_name(i)}")
        print(f"  Current GPU:       {torch.cuda.current_device()}")
except ImportError:
    print("PyTorch not installed")

try:
    import tensorflow as tf
    print(f"\n✓ TensorFlow {tf.__version__} installed")
    gpus = tf.config.list_physical_devices('GPU')
    print(f"  GPUs detected:     {len(gpus)}")
    if gpus:
        for gpu in gpus:
            print(f"    - {gpu}")
except ImportError:
    print("TensorFlow not installed")

print(f"\nCurrent Directory:  {os.getcwd()}")
print("="*70)

In [None]:
# Install required packages for Colab training
print("Installing required packages...")

import subprocess
import sys

packages = [
    'torch',
    'transformers',
    'datasets',
    'peft',  # For LoRA
    'bitsandbytes',  # For quantization
    'psutil',
    'pandas',
    'scikit-learn',
    'matplotlib'
]

for package in packages:
    print(f"\nInstalling {package}...")
    try:
        subprocess.check_call([sys.executable, '-m', 'pip', 'install', '-q', package])
        print(f"✓ {package} installed successfully")
    except Exception as e:
        print(f"⚠ Error installing {package}: {e}")

print("\n" + "="*70)
print("Package installation complete!")
print("="*70)

In [None]:
# Mount Google Drive (for accessing training data)
from google.colab import drive
from pathlib import Path

print("="*70)
print("MOUNTING GOOGLE DRIVE")
print("="*70)

try:
    drive.mount('/content/drive')
    print("\n✓ Google Drive mounted successfully")
    
    # List contents
    drive_path = Path('/content/drive/MyDrive')
    print(f"\nDrive contents:")
    for item in list(drive_path.iterdir())[:10]:
        print(f"  - {item.name}")
except Exception as e:
    print(f"⚠ Could not mount Google Drive: {e}")
    print("You can upload data manually or use a different method.")

print("\n" + "="*70)

In [None]:
# Define paths and configuration
from pathlib import Path

# For Colab: use /content directory
# You can modify these paths based on where your data is stored

project_root = Path("/content/LLM_training")
project_root.mkdir(exist_ok=True)

data_dir = project_root / "data"
data_dir.mkdir(exist_ok=True)

checkpoint_dir = project_root / "checkpoints"
checkpoint_dir.mkdir(exist_ok=True)

model_cache_dir = project_root / "model_cache"
model_cache_dir.mkdir(exist_ok=True)

# Model from Hugging Face
model_name = "mistralai/Mistral-7B-v0.1"  # or "meta-llama/Llama-2-7b-hf"

adapter_path = str(checkpoint_dir / "adapters")
Path(adapter_path).mkdir(exist_ok=True)

print("="*70)
print("PATHS CONFIGURATION")
print("="*70)
print(f"Project Root:       {project_root}")
print(f"Data Directory:     {data_dir}")
print(f"Checkpoint Dir:     {checkpoint_dir}")
print(f"Model Cache:        {model_cache_dir}")
print(f"Adapter Path:       {adapter_path}")
print(f"Model Name:         {model_name}")
print(f"\nData directory exists: {data_dir.exists()}")
print(f"Checkpoint directory exists: {checkpoint_dir.exists()}")
print("="*70)

In [None]:
# Upload or prepare training data
import json
from pathlib import Path

print("="*70)
print("DATA PREPARATION")
print("="*70)

# Option 1: Upload from Google Drive
print("\n[Option 1] Check for data in Google Drive...")
drive_data = Path("/content/drive/MyDrive/LLM_training/data")
if drive_data.exists():
    print(f"✓ Found data in Drive: {drive_data}")
    # Copy to local storage
    import shutil
    for file in drive_data.glob("*.jsonl"):
        print(f"  Copying {file.name}...")
        shutil.copy(file, data_dir / file.name)
except:
    print(f"✗ No data found in Drive")

# Option 2: Check local /content/data
print("\n[Option 2] Check for uploaded data...")
local_data = Path("/content/data")
if local_data.exists():
    print(f"✓ Found local data: {local_data}")
    import shutil
    for file in local_data.glob("*.jsonl"):
        print(f"  Copying {file.name}...")
        shutil.copy(file, data_dir / file.name)
else:
    print(f"✗ No local data found")

# Show available data
print(f"\nAvailable training data in {data_dir}:")
data_files = list(data_dir.glob("*.jsonl"))
if data_files:
    for file in data_files:
        size_mb = file.stat().st_size / (1024*1024)
        with open(file, 'r') as f:
            lines = sum(1 for _ in f)
        print(f"  ✓ {file.name:<30} {size_mb:>8.2f} MB ({lines} samples)")
else:
    print("  No JSONL files found. Upload data to continue.")

print("\n" + "="*70)

In [None]:
# Define training parameters for CUDA/GPU
import torch

print("="*70)
print("TRAINING PARAMETERS (GPU-Optimized)")
print("="*70)

# GPU Memory optimization
if torch.cuda.is_available():
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / (1024**3)
    print(f"Available GPU Memory: {gpu_memory:.2f} GB")
    
    # Auto-adjust batch size based on GPU memory
    if gpu_memory >= 16:
        batch_size = 16
    elif gpu_memory >= 8:
        batch_size = 8
    else:
        batch_size = 4
else:
    batch_size = 2
    print("No GPU detected, using CPU (slow!)")

# Training parameters
iters = 100
learning_rate = 2e-4
lora_rank = 8
lora_alpha = 16
lora_dropout = 0.05
max_seq_length = 512  # Adjust based on your needs and GPU memory
val_batches = 25
val_interval = 10
save_every = 10
seed = 42
num_train_epochs = 3
warmup_steps = 500
weight_decay = 0.01
use_gradient_checkpointing = True
use_mixed_precision = True  # fp16 training for memory efficiency

print(f"\nBatch Size:              {batch_size}")
print(f"Iterations:              {iters}")
print(f"Epochs:                  {num_train_epochs}")
print(f"Learning Rate:           {learning_rate}")
print(f"LoRA Rank:               {lora_rank}")
print(f"LoRA Alpha:              {lora_alpha}")
print(f"LoRA Dropout:            {lora_dropout}")
print(f"Max Seq Length:          {max_seq_length}")
print(f"Validation Batches:      {val_batches}")
print(f"Validation Interval:     {val_interval}")
print(f"Save Every:              {save_every}")
print(f"Gradient Checkpointing:  {use_gradient_checkpointing}")
print(f"Mixed Precision (fp16):  {use_mixed_precision}")
print(f"Warmup Steps:            {warmup_steps}")
print(f"Weight Decay:            {weight_decay}")
print(f"Seed:                    {seed}")
print("="*70)

In [None]:
# Load model and prepare for training with LoRA
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from peft import get_peft_model, LoraConfig, TaskType
import torch

print("\n" + "="*70)
print("LOADING MODEL AND PREPARING LoRA")
print("="*70)

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"\n[1/3] Using device: {device}")

# Quantization config for memory efficiency
print(f"\n[2/3] Loading model: {model_name}")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

try:
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    print(f"✓ Tokenizer loaded")
    
    # Load model with quantization
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        quantization_config=bnb_config,
        device_map="auto",
        cache_dir=str(model_cache_dir),
        trust_remote_code=True,
    )
    print(f"✓ Model loaded and quantized (4-bit)")
    
    # Configure LoRA
    print(f"\n[3/3] Configuring LoRA...")
    peft_config = LoraConfig(
        task_type=TaskType.CAUSAL_LM,
        r=lora_rank,
        lora_alpha=lora_alpha,
        lora_dropout=lora_dropout,
        bias="none",
        target_modules=["q_proj", "v_proj"],
        inference_mode=False,
    )
    
    model = get_peft_model(model, peft_config)
    print(f"✓ LoRA configured")
    
    # Print trainable parameters
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    total_params = sum(p.numel() for p in model.parameters())
    print(f"\nModel Parameters:")
    print(f"  Trainable: {trainable_params:,}")
    print(f"  Total:     {total_params:,}")
    print(f"  % Trainable: {100 * trainable_params / total_params:.2f}%")
    
except Exception as e:
    print(f"❌ Error loading model: {e}")
    raise

print("\n" + "="*70)

In [None]:
# Load training data and create DataLoader
from datasets import load_dataset
from torch.utils.data import DataLoader
import json

print("\n" + "="*70)
print("LOADING TRAINING DATA")
print("="*70)

try:
    # Load JSONL files into dataset
    train_file = data_dir / "train.jsonl"
    val_file = data_dir / "valid.jsonl"
    
    print(f"\nLoading training data from {train_file}...")
    if train_file.exists():
        train_dataset = load_dataset('json', data_files=str(train_file), split='train')
        print(f"✓ Training dataset loaded: {len(train_dataset)} samples")
    else:
        print(f"❌ Training file not found: {train_file}")
        train_dataset = None
    
    print(f"\nLoading validation data from {val_file}...")
    if val_file.exists():
        val_dataset = load_dataset('json', data_files=str(val_file), split='train')
        print(f"✓ Validation dataset loaded: {len(val_dataset)} samples")
    else:
        print(f"⚠ Validation file not found: {val_file}")
        val_dataset = None
    
    # Show sample
    if train_dataset:
        print(f"\nSample training example:")
        sample = train_dataset[0]
        for key, value in sample.items():
            if isinstance(value, str) and len(value) > 100:
                print(f"  {key}: {value[:100]}...")
            else:
                print(f"  {key}: {value}")

except Exception as e:
    print(f"❌ Error loading data: {e}")
    raise

print("\n" + "="*70)

In [None]:
# Setup training with HuggingFace Trainer
from transformers import Trainer, TrainingArguments, DataCollatorForLanguageModeling
import json

print("\n" + "="*70)
print("TRAINING SETUP")
print("="*70)

# Tokenize function
def tokenize_function(examples):
    # Get text field (adjust based on your data structure)
    texts = examples.get('text') or examples.get('instruction') or examples.get('content')
    
    result = tokenizer(
        texts,
        padding="max_length",
        max_length=max_seq_length,
        truncation=True,
        return_tensors="pt"
    )
    result["labels"] = result["input_ids"].clone()
    return result

print("\nTokenizing datasets...")
if train_dataset:
    train_dataset = train_dataset.map(tokenize_function, batched=True, remove_columns=train_dataset.column_names)
    print(f"✓ Training data tokenized")

if val_dataset:
    val_dataset = val_dataset.map(tokenize_function, batched=True, remove_columns=val_dataset.column_names)
    print(f"✓ Validation data tokenized")

# Training arguments
training_args = TrainingArguments(
    output_dir=str(checkpoint_dir / "training_output"),
    overwrite_output_dir=True,
    num_train_epochs=num_train_epochs,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    warmup_steps=warmup_steps,
    weight_decay=weight_decay,
    logging_dir=str(checkpoint_dir / "logs"),
    logging_steps=10,
    save_steps=save_every,
    eval_steps=val_interval,
    save_total_limit=3,
    load_best_model_at_end=True,
    evaluation_strategy="steps",
    save_strategy="steps",
    learning_rate=learning_rate,
    seed=seed,
    fp16=use_mixed_precision,
    gradient_checkpointing=use_gradient_checkpointing,
    gradient_accumulation_steps=1,
)

print("\n" + "="*70)
print("TRAINING CONFIGURATION")
print("="*70)
print(f"Output dir:          {training_args.output_dir}")
print(f"Epochs:              {training_args.num_train_epochs}")
print(f"Batch size:          {training_args.per_device_train_batch_size}")
print(f"Learning rate:       {training_args.learning_rate}")
print(f"FP16:                {training_args.fp16}")
print(f"Grad checkpointing:  {training_args.gradient_checkpointing}")
print("="*70)

In [None]:
# Execute training
from transformers import Trainer

print("\n" + "="*70)
print("STARTING TRAINING")
print("="*70 + "\n")

try:
    data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False)
    
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset if train_dataset else None,
        eval_dataset=val_dataset if val_dataset else None,
        data_collator=data_collator,
    )
    
    print("Starting training loop...\n")
    train_result = trainer.train()
    
    print("\n" + "="*70)
    print("TRAINING COMPLETE")
    print("="*70)
    print(f"\nTraining Results:")
    print(f"  Final Loss:    {train_result.training_loss:.6f}")
    
    # Save adapter
    print(f"\nSaving LoRA adapter...")
    model.save_pretrained(adapter_path)
    print(f"✓ Adapter saved to: {adapter_path}")
    
except Exception as e:
    print(f"\n❌ Training error: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# Model Evaluation and Testing
import json
import numpy as np
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datetime import datetime

print("\n" + "="*70)
print("MODEL EVALUATION AND TESTING")
print("="*70)

# Load validation data
print("\n[1/4] Loading validation dataset...")
val_data_path = data_dir / "valid.jsonl"

if not val_data_path.exists():
    print(f"❌ Validation data not found at {val_data_path}")
else:
    val_examples = []
    with open(val_data_path, 'r') as f:
        for line in f:
            val_examples.append(json.loads(line))
    
    print(f"✓ Loaded {len(val_examples)} validation examples")
    
    # Calculate validation metrics
    print("\n[2/4] Computing validation metrics...")
    
    total_tokens = 0
    text_lengths = []
    
    for example in val_examples:
        text = example.get('text', '') or example.get('instruction', '') or str(example)
        tokens = len(tokenizer.encode(text))
        total_tokens += tokens
        text_lengths.append(tokens)
    
    text_lengths = np.array(text_lengths)
    
    print(f"\nValidation Metrics:")
    print(f"  Total samples:       {len(val_examples)}")
    print(f"  Total tokens:        {total_tokens:,}")
    print(f"  Mean tokens/sample:  {text_lengths.mean():.2f}")
    print(f"  Median tokens:       {np.median(text_lengths):.2f}")
    print(f"  Std Dev:             {text_lengths.std():.2f}")
    print(f"  Min tokens:          {text_lengths.min()}")
    print(f"  Max tokens:          {text_lengths.max()}")
    
    # Load training metrics if available
    print("\n[3/4] Loading training history...")
    training_log_file = checkpoint_dir / "training_output" / "trainer_state.json"
    
    if training_log_file.exists():
        with open(training_log_file, 'r') as f:
            trainer_state = json.load(f)
        
        print(f"✓ Training history loaded")
        if 'log_history' in trainer_state:
            print(f"\nTraining History Summary:")
            logs = trainer_state['log_history']
            
            train_losses = [log.get('loss', None) for log in logs if 'loss' in log]
            eval_losses = [log.get('eval_loss', None) for log in logs if 'eval_loss' in log]
            
            if train_losses:
                print(f"  Training Loss - Min: {min(train_losses):.6f}, Max: {max(train_losses):.6f}, Final: {train_losses[-1]:.6f}")
            if eval_losses:
                print(f"  Validation Loss - Min: {min(eval_losses):.6f}, Max: {max(eval_losses):.6f}, Final: {eval_losses[-1]:.6f}")
    else:
        print(f"⚠ Training history not found")
    
    # Adapter info
    print("\n[4/4] Adapter Information...")
    adapter_dir = Path(adapter_path)
    if adapter_dir.exists():
        adapter_files = list(adapter_dir.glob('*'))
        print(f"✓ Adapter saved with {len(adapter_files)} files")
        total_size_mb = sum(f.stat().st_size for f in adapter_files if f.is_file()) / (1024*1024)
        print(f"  Total size: {total_size_mb:.2f} MB")
    else:
        print(f"⚠ Adapter directory not found")

print("\n" + "="*70)
print("EVALUATION COMPLETE")
print("="*70)

In [None]:
# Detailed Quantitative Metrics Analysis
import pandas as pd
import matplotlib.pyplot as plt
import json
from datetime import datetime

print("\n" + "="*70)
print("DETAILED QUANTITATIVE METRICS ANALYSIS")
print("="*70)

metrics_summary = {
    'Timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    'Model': model_name,
    'Training Framework': 'HuggingFace Transformers + PEFT (LoRA)',
    'Device': 'CUDA GPU',
    'Adapter Path': adapter_path,
}

print(f"\nMetrics Summary:")
for key, value in metrics_summary.items():
    print(f"  {key:<25}: {value}")

# Load training logs
print("\n" + "-"*70)
print("TRAINING HISTORY ANALYSIS")
print("-"*70)

training_log_file = checkpoint_dir / "training_output" / "trainer_state.json"

if training_log_file.exists():
    try:
        with open(training_log_file, 'r') as f:
            trainer_state = json.load(f)
        
        logs = trainer_state.get('log_history', [])
        
        # Create DataFrame
        df_logs = pd.DataFrame(logs)
        print(f"\n✓ Loaded {len(df_logs)} training records")
        print(f"\nColumns: {list(df_logs.columns)}")
        
        # Statistics
        numeric_cols = df_logs.select_dtypes(include=['float64', 'int64']).columns
        print(f"\nMetrics Statistics:")
        for col in numeric_cols:
            if df_logs[col].notna().any():
                print(f"\n  {col}:")
                print(f"    Min:    {df_logs[col].min():.6f}")
                print(f"    Max:    {df_logs[col].max():.6f}")
                print(f"    Mean:   {df_logs[col].mean():.6f}")
                print(f"    Std:    {df_logs[col].std():.6f}")
                print(f"    Final:  {df_logs[col].dropna().iloc[-1]:.6f}")
        
        # Display last few steps
        print(f"\n  Last 5 training steps:")
        print(df_logs[['step', 'loss', 'eval_loss']].tail() if 'step' in df_logs.columns else df_logs.tail())
        
    except Exception as e:
        print(f"❌ Error loading training logs: {e}")
else:
    print(f"⚠ Training logs not found at {training_log_file}")

print("\n" + "="*70)

In [None]:
# Visualize Training Metrics
import matplotlib.pyplot as plt
import json
import pandas as pd

print("\n" + "="*70)
print("VISUALIZING TRAINING METRICS")
print("="*70)

training_log_file = checkpoint_dir / "training_output" / "trainer_state.json"

if training_log_file.exists():
    try:
        with open(training_log_file, 'r') as f:
            trainer_state = json.load(f)
        
        logs = trainer_state.get('log_history', [])
        df_logs = pd.DataFrame(logs)
        
        # Create figure with subplots
        fig, axes = plt.subplots(2, 2, figsize=(14, 10))
        fig.suptitle('Training Metrics Overview (CUDA/GPU)', fontsize=16, fontweight='bold')
        
        # Plot 1: Training Loss
        if 'loss' in df_logs.columns:
            loss_data = df_logs[df_logs['loss'].notna()]
            axes[0, 0].plot(loss_data['step'], loss_data['loss'], 'b-', linewidth=2, marker='o')
            axes[0, 0].set_title('Training Loss')
            axes[0, 0].set_xlabel('Step')
            axes[0, 0].set_ylabel('Loss')
            axes[0, 0].grid(True, alpha=0.3)
        
        # Plot 2: Validation Loss
        if 'eval_loss' in df_logs.columns:
            eval_data = df_logs[df_logs['eval_loss'].notna()]
            axes[0, 1].plot(eval_data['step'], eval_data['eval_loss'], 'r-', linewidth=2, marker='s')
            axes[0, 1].set_title('Validation Loss')
            axes[0, 1].set_xlabel('Step')
            axes[0, 1].set_ylabel('Loss')
            axes[0, 1].grid(True, alpha=0.3)
        
        # Plot 3: Learning Rate (if available)
        if 'learning_rate' in df_logs.columns:
            lr_data = df_logs[df_logs['learning_rate'].notna()]
            axes[1, 0].plot(lr_data['step'], lr_data['learning_rate'], 'g-', linewidth=2)
            axes[1, 0].set_title('Learning Rate')
            axes[1, 0].set_xlabel('Step')
            axes[1, 0].set_ylabel('LR')
            axes[1, 0].grid(True, alpha=0.3)
        else:
            axes[1, 0].text(0.5, 0.5, 'Learning Rate\ndata not available', 
                           ha='center', va='center', fontsize=12)
            axes[1, 0].set_title('Learning Rate')
        
        # Plot 4: Training vs Validation Loss comparison
        if 'loss' in df_logs.columns and 'eval_loss' in df_logs.columns:
            train_data = df_logs[df_logs['loss'].notna()]
            eval_data = df_logs[df_logs['eval_loss'].notna()]
            axes[1, 1].plot(train_data['step'], train_data['loss'], 'b-', label='Training', linewidth=2)
            axes[1, 1].plot(eval_data['step'], eval_data['eval_loss'], 'r-', label='Validation', linewidth=2)
            axes[1, 1].set_title('Training vs Validation Loss')
            axes[1, 1].set_xlabel('Step')
            axes[1, 1].set_ylabel('Loss')
            axes[1, 1].legend()
            axes[1, 1].grid(True, alpha=0.3)
        
        plt.tight_layout()
        
        # Save plot
        plot_path = checkpoint_dir / 'training_metrics_plot_cuda.png'
        plt.savefig(plot_path, dpi=150, bbox_inches='tight')
        print(f"✓ Training metrics plot saved to {plot_path}")
        plt.show()
        
    except Exception as e:
        print(f"❌ Error visualizing metrics: {e}")
else:
    print("⚠ No training metrics to visualize")

print("\n" + "="*70)

In [None]:
# Save trained adapter and cleanup
import shutil
from pathlib import Path

print("\n" + "="*70)
print("POST-TRAINING TASKS")
print("="*70)

# Save to Google Drive
print("\n[1/2] Saving results to Google Drive...")
try:
    drive_output = Path("/content/drive/MyDrive/LLM_training_output")
    drive_output.mkdir(exist_ok=True)
    
    # Copy adapter
    adapter_drive = drive_output / "adapters"
    if adapter_drive.exists():
        shutil.rmtree(adapter_drive)
    shutil.copytree(adapter_path, adapter_drive)
    print(f"✓ Adapter saved to Google Drive: {adapter_drive}")
    
    # Copy metrics and plots
    for file in checkpoint_dir.glob('*.png'):
        shutil.copy(file, drive_output / file.name)
        print(f"✓ Saved {file.name} to Drive")
    
except Exception as e:
    print(f"⚠ Could not save to Drive: {e}")

print("\n[2/2] Training Summary")
print(f"\n✓ Training completed successfully!")
print(f"  Model: {model_name}")
print(f"  LoRA Adapter saved at: {adapter_path}")
print(f"  Checkpoint directory: {checkpoint_dir}")
print(f"\nTo use the trained model:")
print(f"  from peft import PeftModel")
print(f"  model = PeftModel.from_pretrained(model, '{adapter_path}')")

print("\n" + "="*70)
print("ALL DONE!")
print("="*70)