<a href="https://colab.research.google.com/github/IsuruMahakumara/microsoft-ai-ml-engineering/blob/main/Practice_activity_Applying_QLoRA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install -U bitsandbytes



### Load Sample Dataset

IMDB with 2000 random rows

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, DataCollatorWithPadding

# --- Mock Dataset Preparation (Replace with your actual data loading) ---

# 1. Load a raw dataset and tokenizer
raw_datasets = load_dataset("imdb")
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

# 2. Define tokenization function
def tokenize_function(examples):
    return tokenizer(examples['text'], truncation=True)

# 3. Create dummy splits and tokenize (similar to your previous steps)
# We will use the 'test' split for our mock validation set for simplicity.
tokenized_train = raw_datasets['train'].shuffle(seed=42).select(range(200)).map(tokenize_function, batched=True)
tokenized_val = raw_datasets['test'].shuffle(seed=42).select(range(50)).map(tokenize_function, batched=True)

# 4. Remove unnecessary columns and rename 'label' (if needed)
tokenized_train = tokenized_train.remove_columns(["text"]).rename_column("label", "labels")
tokenized_val = tokenized_val.remove_columns(["text"]).rename_column("label", "labels")

# 5. Define Data Collator
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Map:   0%|          | 0/50 [00:00<?, ? examples/s]

### GPT2ForSequenceClassification

In [2]:
import torch
from transformers import GPT2ForSequenceClassification, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, TaskType

# 1. Define the Quantization Configuration (Equivalent to QuantizeModel)
# For 8-bit quantization (or QLoRA's 4-bit)
bnb_config = BitsAndBytesConfig(
    load_in_8bit=True,  # Set to True for 8-bit quantization
    # For QLoRA (4-bit), you would use: load_in_4bit=True
)

# 2. Load the GPT-2 model with Quantization
# The quantization is handled during model loading by passing bnb_config
model = GPT2ForSequenceClassification.from_pretrained(
    'gpt2',
    num_labels=3,  # Assuming a 3-class classification task
    quantization_config=bnb_config,
    device_map="auto"
)

# 3. Define the LoRA Configuration (Equivalent to applying LoRALayer)
# PEFT automatically determines the correct layers for GPT-2
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,       # Sequence Classification
    r=4,
    lora_alpha=16,
    lora_dropout=0.1,
    # target_modules: For GPT-2/Llama, 'c_attn' is the key module for attention weights.
    # PEFT often infers this, but specifying it is good practice.
    target_modules=["c_attn"],
    bias="none",
)

# 4. Inject the LoRA adapters into the Quantized Model
# The model is now ready for training
lora_model = get_peft_model(model, lora_config)

print("✅ Model loaded, quantized, and LoRA adapters injected using PEFT.")
lora_model.print_trainable_parameters()

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


✅ Model loaded, quantized, and LoRA adapters injected using PEFT.
trainable params: 149,760 || all params: 124,591,872 || trainable%: 0.1202


In [3]:
from transformers import Trainer, TrainingArguments

# Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=1,
    per_device_train_batch_size=16,
    eval_strategy="epoch",
    report_to="none",
)

# Fine-tune the QLoRA-enhanced model
trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_val,
    data_collator=data_collator,
)

# Train the model
trainer.train()



KeyboardInterrupt: 

In [None]:
# Evaluate the model on the test set
results = trainer.evaluate(eval_dataset=test_data)
print(f"Test Accuracy: {results['eval_accuracy']}")