# ðŸš€ Adaptive LoRA - Training Notebook

This notebook trains LoRA adapters for the Adaptive Multi-Adapter system.

**Requirements:**
- Google Colab Pro (A100 recommended)
- HuggingFace account with token

**Training time:** ~2 hours per adapter on A100

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Install dependencies
!pip install torch transformers accelerate bitsandbytes peft datasets wandb -q
print('Dependencies installed!')

In [None]:
# Configuration - Use Colab Secrets for tokens
import os
from huggingface_hub import login
from google.colab import userdata

# Login to HuggingFace using Colab secrets
HF_TOKEN = userdata.get('HF_TOKEN')  # Set in Colab: Settings > Secrets
login(HF_TOKEN)

# Weights & Biases (optional)
os.environ['WANDB_PROJECT'] = 'adaptive-lora'

# Model config
BASE_MODEL = 'meta-llama/Llama-3.2-3B-Instruct'
ADAPTER_NAME = 'code'  # Options: reasoning, code, creative, analysis

print(f'Training {ADAPTER_NAME} adapter on {BASE_MODEL}')

In [None]:
# Load model with 4-bit quantization
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True
)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    BASE_MODEL,
    quantization_config=bnb_config,
    device_map='auto',
    trust_remote_code=True
)

print(f'Model loaded! GPU: {torch.cuda.memory_allocated()/1e9:.1f}GB')

In [None]:
# Configure LoRA
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training

model = prepare_model_for_kbit_training(model)

# LoRA config - adjust per adapter type
lora_configs = {
    'reasoning': {'r': 32, 'lora_alpha': 64},
    'code': {'r': 64, 'lora_alpha': 128},
    'creative': {'r': 16, 'lora_alpha': 32},
    'analysis': {'r': 32, 'lora_alpha': 64}
}

config = LoraConfig(
    r=lora_configs[ADAPTER_NAME]['r'],
    lora_alpha=lora_configs[ADAPTER_NAME]['lora_alpha'],
    target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],
    lora_dropout=0.05,
    bias='none',
    task_type='CAUSAL_LM'
)

model = get_peft_model(model, config)
model.print_trainable_parameters()

In [None]:
# Load training data
from datasets import load_dataset

dataset = load_dataset('tatsu-lab/alpaca', split='train[:1000]')

def format_prompt(example):
    if example.get('input'):
        text = f"### Instruction:\n{example['instruction']}\n\n### Input:\n{example['input']}\n\n### Response:\n{example['output']}"
    else:
        text = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    return {'text': text}

dataset = dataset.map(format_prompt)

def tokenize(example):
    return tokenizer(example['text'], truncation=True, max_length=512, padding='max_length')

dataset = dataset.map(tokenize, batched=True)
print(f'Dataset size: {len(dataset)}')

In [None]:
# Training configuration
from transformers import TrainingArguments, Trainer, DataCollatorForLanguageModeling

training_args = TrainingArguments(
    output_dir=f'./models/{ADAPTER_NAME}',
    num_train_epochs=3,
    per_device_train_batch_size=4,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    lr_scheduler_type='cosine',
    warmup_ratio=0.1,
    logging_steps=10,
    save_strategy='epoch',
    fp16=True,
    optim='paged_adamw_8bit',
    report_to='none'
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset,
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)

print('Ready to train!')

In [None]:
# Train!
trainer.train()

In [None]:
# Save adapter
model.save_pretrained(f'./models/{ADAPTER_NAME}')
print(f'Adapter saved to ./models/{ADAPTER_NAME}')

# Push to Hub (optional)
# model.push_to_hub(f'your-username/adaptive-lora-{ADAPTER_NAME}')

In [None]:
# Test the adapter
model.eval()

test_prompt = '### Instruction:\nExplain what machine learning is.\n\n### Response:\n'
inputs = tokenizer(test_prompt, return_tensors='pt').to('cuda')

with torch.no_grad():
    outputs = model.generate(**inputs, max_new_tokens=100, do_sample=True, temperature=0.7)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

## Next Steps

1. **Train other adapters** - Change `ADAPTER_NAME` and rerun
2. **Download adapters** - Save to Google Drive or push to HuggingFace
3. **Train Router** - Use `experiments/train_router.py`
4. **Deploy** - Use the trained adapters with the serving infrastructure