# Lightweight Fine-Tuning Project

* PEFT technique: LoRA (Low-Rank Adaptation)
* Model: GPT-2 fine-tuned for sequence classification (via AutoModelForSequenceClassification)
* Evaluation approach: Hugging Face Trainer with the “accuracy” metric
* Fine-tuning dataset: SST‑2 from the GLUE benchmark (binary sentiment classification)

## Loading and Evaluating a Foundation Model

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import numpy as np
import torch

# preprocess function: tokenize and pad/truncate
def preprocess_function(examples):
    return tokenizer(
        examples["sentence"],
        truncation=True,
        padding="max_length",
        max_length=128,
    )

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = np.argmax(logits, axis=-1)
    accuracy = (preds == labels).astype(np.float32).mean().item()
    return {"accuracy": accuracy}


# load the SST-2 dataset
raw_datasets = load_dataset("glue", "sst2")
# for quicker iteration, take subsets
train_dataset = raw_datasets["train"].shuffle(seed=42).select(range(10000))
eval_dataset  = raw_datasets["validation"].shuffle(seed=42).select(range(500))

# load GPT-2 tokenizer & model for sequence classification
model_name = "gpt2"
tokenizer  = AutoTokenizer.from_pretrained(model_name)

# GPT-2 has no pad token by default; use eos_token as pad
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)

# make sure the model config knows about that pad token
model.config.pad_token_id = tokenizer.pad_token_id


# tokenize datasets
tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_eval  = eval_dataset.map(preprocess_function,  batched=True)

# rename label column for Trainer compatibility
tokenized_train = tokenized_train.rename_column("label", "labels")
tokenized_eval  = tokenized_eval.rename_column("label",  "labels")

# set torch format
tokenized_train.set_format("torch", columns=["input_ids", "attention_mask", "labels"])
tokenized_eval.set_format("torch", columns=["input_ids", "attention_mask", "labels"])


# evaluate the original model
eval_args = TrainingArguments(
    output_dir="/tmp/original_eval",
    per_device_eval_batch_size=8,
    do_train=False,
    logging_dir="/tmp/original_eval/logs",
)
trainer = Trainer(
    model=model,
    args=eval_args,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

orig_metrics = trainer.evaluate()
print("Original model evaluation:", orig_metrics)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Map:   0%|          | 0/10000 [00:00<?, ? examples/s]

You're using a GPT2TokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Original model evaluation: {'eval_loss': 3.7848360538482666, 'eval_accuracy': 0.5220000147819519, 'eval_runtime': 4.4033, 'eval_samples_per_second': 113.552, 'eval_steps_per_second': 14.308}


## Performing Parameter-Efficient Fine-Tuning


In [2]:
from peft import LoraConfig, get_peft_model

# enable gradient checkpointing to save memory
model.gradient_checkpointing_enable()

# create a LoRA config
lora_config = LoraConfig(
    task_type="SEQ_CLS",      # sequence classification
    inference_mode=False,     # training mode
    r=8,                      # LoRA rank
    lora_alpha=32,            # LoRA alpha
    lora_dropout=0.1,         # dropout
)

# wrap the model in PEFT
peft_model = get_peft_model(model, lora_config)

# print out the fraction of trainable parameters
peft_model.print_trainable_parameters()



trainable params: 297,984 || all params: 124,737,792 || trainable%: 0.23888830740245906


In [3]:
# training arguments for PEFT fine-tuning
peft_train_args = TrainingArguments(
    output_dir="/tmp/peft_training",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    learning_rate=2e-5,
    logging_steps=1000,
    save_strategy="no",
    evaluation_strategy="no",
    logging_dir="/tmp/peft_training/logs",
)

# create a Trainer for fine-tuning
peft_trainer = Trainer(
    model=peft_model,
    args=peft_train_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_eval,      # optional if you want eval during training
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

# run training
peft_trainer.train()

# save only the adapter weights (LoRA)
peft_model.save_pretrained("/tmp/peft_gpt2")

`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`...


Step,Training Loss
1000,0.9714
2000,0.6773
3000,0.6332


## Performing Inference with a PEFT Model

In [4]:
from peft import PeftModel

# reload the base model architecture
base_model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=2)
# make sure GPT‑2 knows its pad token
base_model.config.pad_token_id = tokenizer.pad_token_id
base_model.gradient_checkpointing_enable()

# load the LoRA adapter into the base model
peft_inference_model = PeftModel.from_pretrained(base_model, "/tmp/peft_gpt2")

# evaluate the fine-tuned model
inference_args = TrainingArguments(
    output_dir="/tmp/peft_eval",
    per_device_eval_batch_size=8,
    do_train=False,
    logging_dir="/tmp/peft_eval/logs",
)
inference_trainer = Trainer(
    model=peft_inference_model,
    args=inference_args,
    eval_dataset=tokenized_eval,
    compute_metrics=compute_metrics,
    tokenizer=tokenizer,
)

finetuned_metrics = inference_trainer.evaluate()
print("Fine-tuned model evaluation:", finetuned_metrics)

Some weights of GPT2ForSequenceClassification were not initialized from the model checkpoint at gpt2 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Fine-tuned model evaluation: {'eval_loss': 0.5494799613952637, 'eval_accuracy': 0.7260000109672546, 'eval_runtime': 4.0868, 'eval_samples_per_second': 122.344, 'eval_steps_per_second': 15.415}


## Conclusion

The foundation model initially achieved 52 % accuracy, which increased to 72 % after fine‑tuning.