In [None]:
!pip install -q transformers datasets peft accelerate bitsandbytes torch


In [None]:
!nvidia-smi


In [None]:
import torch

from transformers import (
    AutoTokenizer,
    AutoModelForCausalLM,
    TrainingArguments,
    Trainer,
    BitsAndBytesConfig,
    DataCollatorForLanguageModeling
)

from datasets import Dataset
from peft import LoraConfig, get_peft_model


In [None]:
data = [
    {"text": "Instruction: Say hello\nResponse: Hello!"},
    {"text": "Instruction: What is AI?\nResponse: AI stands for Artificial Intelligence."},
    {"text": "Instruction: Capital of France?\nResponse: Paris."}
]

dataset = Dataset.from_list(data)


In [None]:
model_name = "gpt2-medium"

tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token


In [None]:
def tokenize(batch):
    return tokenizer(
        batch["text"],
        truncation=True,
        padding="max_length",
        max_length=128
    )

tokenized_ds = dataset.map(tokenize, batched=True, remove_columns=["text"])


In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True
)


In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto"
)


In [None]:
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    target_modules=["c_attn"],
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()


In [None]:
training_args = TrainingArguments(
    output_dir="./qlora_out",
    per_device_train_batch_size=2,
    gradient_accumulation_steps=1,
    num_train_epochs=3,
    fp16=True,
    logging_steps=1,
    report_to="none",
    save_strategy="no"
)


In [None]:
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_ds,
    data_collator=data_collator
)


In [None]:
trainer.train()


In [None]:
prompt = "Instruction: Capital of France?\nResponse:"
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=20)

print(tokenizer.decode(output[0], skip_special_tokens=True))
