# Kwanzaa Adapter Training on Google Colab (Free T4 GPU)

**Training Configuration:**
- Model: meta-llama/Llama-3.2-1B-Instruct
- Method: QLoRA (4-bit quantization)
- Training samples: 107
- Eval samples: 27
- Estimated time: 15-20 minutes on free T4

**Setup:**
1. Runtime > Change runtime type > T4 GPU
2. Upload `kwanzaa_train.jsonl` and `kwanzaa_eval.jsonl` to `/content/`
3. Run all cells in order
4. Download adapter from `/content/outputs/`

In [None]:
# Install dependencies
!pip install -q -U transformers datasets peft bitsandbytes trl accelerate huggingface_hub

In [None]:
# Login to Hugging Face
# Accept Llama 3.2 license at: https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct
from huggingface_hub import notebook_login
notebook_login()

In [None]:
# Load and prepare data
import json
from datasets import Dataset

def load_jsonl(file_path):
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            data.append(json.loads(line))
    return data

train_data = load_jsonl('/content/kwanzaa_train.jsonl')
eval_data = load_jsonl('/content/kwanzaa_eval.jsonl')

train_dataset = Dataset.from_list(train_data)
eval_dataset = Dataset.from_list(eval_data)

print(f"Training samples: {len(train_dataset)}")
print(f"Eval samples: {len(eval_dataset)}")
print(f"\nSample format:")
print(train_dataset[0])

In [None]:
# Configure model and tokenizer
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
from trl import SFTTrainer, DataCollatorForCompletionOnlyLM

MODEL_ID = "meta-llama/Llama-3.2-1B-Instruct"
OUTPUT_DIR = "/content/outputs/kwanzaa-adapter-v1"

# 4-bit quantization config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

print("Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

model = prepare_model_for_kbit_training(model)

# LoRA configuration
lora_config = LoraConfig(
    r=16,
    lora_alpha=32,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
)

model = get_peft_model(model, lora_config)
print(f"\nTrainable parameters: {model.print_trainable_parameters()}")

In [None]:
# Prepare chat template
def format_chat_template(example):
    messages = example["messages"]
    text = tokenizer.apply_chat_template(messages, tokenize=False)
    return {"text": text}

train_dataset = train_dataset.map(format_chat_template)
eval_dataset = eval_dataset.map(format_chat_template)

print("Sample formatted text:")
print(train_dataset[0]["text"][:500])

In [None]:
# Training configuration
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=3,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=8,
    gradient_checkpointing=True,
    optim="paged_adamw_8bit",
    learning_rate=2e-4,
    lr_scheduler_type="cosine",
    warmup_steps=10,
    logging_steps=10,
    eval_strategy="steps",
    eval_steps=50,
    save_strategy="epoch",
    save_total_limit=2,
    bf16=True,
    tf32=True,
    max_grad_norm=0.3,
    group_by_length=True,
    report_to="none",
    push_to_hub=False,
)

# Data collator for completion-only training
response_template = "<|start_header_id|>assistant<|end_header_id|>"
collator = DataCollatorForCompletionOnlyLM(
    response_template=response_template,
    tokenizer=tokenizer,
)

print("Training configuration ready!")

In [None]:
# Initialize trainer and train
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=collator,
    dataset_text_field="text",
    max_seq_length=2048,
    packing=False,
)

print("\n" + "="*50)
print("Starting training...")
print("="*50 + "\n")

trainer.train()

print("\n" + "="*50)
print("Training complete!")
print("="*50)

In [None]:
# Save adapter
print("\nSaving adapter...")
trainer.model.save_pretrained(OUTPUT_DIR)
tokenizer.save_pretrained(OUTPUT_DIR)

print(f"\nAdapter saved to: {OUTPUT_DIR}")
print("\nFiles created:")
!ls -lh {OUTPUT_DIR}

print("\n" + "="*50)
print("TRAINING COMPLETE!")
print("="*50)
print(f"\nDownload the adapter files from: {OUTPUT_DIR}")
print("\nAdapter files to download:")
print("  - adapter_config.json")
print("  - adapter_model.safetensors")
print("  - tokenizer files")

In [None]:
# Test the adapter
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,
    temperature=0.7,
)

test_messages = [
    {
        "role": "system",
        "content": "You are a researcher assistant specializing in Kwanzaa and African American culture."
    },
    {
        "role": "user",
        "content": '''Retrieved Documents:

[1] Title: "Kwanzaa Principles"
Content: "The seven principles of Kwanzaa are called Nguzo Saba..."

Query: What are the seven principles of Kwanzaa?'''
    }
]

print("\nTesting adapter:")
print("="*50)
result = pipe(test_messages)
print(result[0]['generated_text'][-1]['content'])

In [None]:
# (Optional) Push to Hugging Face Hub
# Uncomment to push to your HF account
# HF_REPO_NAME = "your-username/kwanzaa-adapter-v1"
#
# trainer.model.push_to_hub(HF_REPO_NAME)
# tokenizer.push_to_hub(HF_REPO_NAME)
#
# print(f"Adapter pushed to: https://huggingface.co/{HF_REPO_NAME}")