# ðŸ¦™ Fine-Tuning Meta LLaMA 2 7B with QLoRA on Google Colab
This notebook demonstrates **step-by-step fine-tuning of Meta's LLaMA 2 7B model using QLoRA**, an efficient low-memory adaptation method.

---

In [None]:

# ========================================
# STEP 1: INSTALL DEPENDENCIES
# ========================================
!pip install -q bitsandbytes==0.43.1
!pip install -q transformers==4.38.2
!pip install -q peft==0.10.0
!pip install -q accelerate==0.28.0
!pip install -q datasets==2.17.1
!pip install -q trl==0.8.1


In [None]:

# ========================================
# STEP 2: IMPORT LIBRARIES
# ========================================
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, TrainingArguments
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from trl import SFTTrainer

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Running on device:", device)


In [None]:

# ========================================
# STEP 3: LOAD LLAMA 2 MODEL AND TOKENIZER (IN 4-BIT MODE)
# ========================================
from huggingface_hub import login
# login(token="YOUR_HUGGINGFACE_TOKEN")  # Uncomment and paste token here.

model_name = "meta-llama/Llama-2-7b-hf"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)

print("Model loaded successfully in 4-bit mode!")


In [None]:

# ========================================
# STEP 4: CONFIGURE LORA ADAPTER
# ========================================
lora_config = LoraConfig(
    r=64,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
print("LoRA adapters added successfully!")


In [None]:

# ========================================
# STEP 5: LOAD TRAINING DATASET
# ========================================
dataset = load_dataset("tatsu-lab/alpaca")
dataset = dataset["train"].shuffle(seed=42).select(range(200))
print(dataset[0])


In [None]:

# ========================================
# STEP 6: DEFINE TRAINING PARAMETERS
# ========================================
training_args = TrainingArguments(
    output_dir="./llama2-7b-qlora-finetuned",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=50,
    learning_rate=2e-4,
    fp16=True,
    logging_steps=10,
    save_steps=50,
    optim="paged_adamw_8bit",
    lr_scheduler_type="linear"
)


In [None]:

# ========================================
# STEP 7: SET UP THE TRAINER
# ========================================
trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_args,
    max_seq_length=512,
)
print("Trainer initialized successfully!")


In [None]:

# ========================================
# STEP 8: START TRAINING
# ========================================
trainer.train()
print("Training complete! Model and adapters saved to ./llama2-7b-qlora-finetuned")


In [None]:

# ========================================
# STEP 9: SAVE LORA ADAPTER
# ========================================
model.save_pretrained("./llama2-7b-qlora-adapter")
print("LoRA adapter saved successfully!")


In [None]:

# ========================================
# STEP 10: TEST THE FINE-TUNED MODEL
# ========================================
prompt = "Explain quantum computing to a 12-year-old."
inputs = tokenizer(prompt, return_tensors="pt").to(device)
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
