## Demonstration QLORA

### Setup 

In [None]:
# Run this only if needed (if you get error in the next cell)
!pip install -U bitsandbytes transformers accelerate peft datasets

In [None]:
# verify if GPU is there
import torch
torch.cuda.is_available()

In [None]:
# Check the GPU
!nvidia-smi

### Load Model

In [None]:
# Load the model (QLORA version)
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

model_name = "distilgpt2"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,   # ðŸ‘ˆ QLoRA
    device_map="auto"
)


In [None]:
prompt = "Answer politely: How do I reset my password?"

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

out = model.generate(
    **inputs,
    max_new_tokens=60,
    do_sample=True
)

generated = out[0][inputs["input_ids"].shape[-1]:]

print("BEFORE TRAINING (QLoRA):\n")
print(tokenizer.decode(generated, skip_special_tokens=True))


### Training


In [None]:
train_texts = [
    "You are a polite customer support agent.\nQuestion: How do I reset my password?\nAnswer: Please click 'Forgot Password' and follow the email instructions.",
    "You are a polite customer support agent.\nQuestion: How can I contact support?\nAnswer: You can email us at support@company.com.",
    "You are a polite customer support agent.\nQuestion: Where is my order?\nAnswer: I will gladly check your order status for you.",
    "You are a polite customer support agent.\nQuestion: Can I return a product?\nAnswer: Yes, returns are accepted within 30 days.",
    "You are a polite customer support agent.\nQuestion: What are your hours?\nAnswer: We are open Monday to Friday, 9 AM to 5 PM."
]

from datasets import Dataset

dataset = Dataset.from_dict({"text": train_texts})

def tokenize(batch):
    tokens = tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )
    tokens["labels"] = tokens["input_ids"].copy()  # ðŸ”‘ REQUIRED
    return tokens

dataset = dataset.map(tokenize, batched=True)
dataset.set_format("torch")



In [None]:
# QLoRA = Quantized model + LoRA
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["c_attn", "c_proj"],  # GPT-2 style
    lora_dropout=0.05,
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)


In [None]:
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./demo_qlora",
    per_device_train_batch_size=1,
    max_steps=10,
    logging_steps=1,
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=dataset
)

trainer.train()


### Test again

In [None]:
out = model.generate(
    **inputs,
    max_new_tokens=60,
    do_sample=True
)

generated = out[0][inputs["input_ids"].shape[-1]:]

print("AFTER TRAINING (QLoRA):\n")
print(tokenizer.decode(generated, skip_special_tokens=True))
