# Parameter-Efficient Fine-Tuning with LoRA: Replication and Extension

**Course:** COMP8240 ‚Äì Final Submission  
**Author:** Muhammad Zaran Zahir (47997222)  
**Notebook Purpose:** Replication of LoRA (SST-2) + Extensions (IMDB, AG News, IMDB-Mini)

This notebook reproduces the key results of *Hu et al. (2022), ‚ÄúLoRA: Low-Rank Adaptation of Large Language Models‚Äù* and extends them to new datasets.  
We use `roberta-base` with LoRA adapters on the attention **query/value** projections, training **<1%** of parameters.


In [None]:
# If running in Colab and you need to install packages, uncomment:
!pip install -q torch transformers datasets peft accelerate scikit-learn evaluate sentencepiece matplotlib pandas tqdm

import torch, transformers, datasets, peft, sklearn, accelerate
print("torch:", torch.__version__)
print("transformers:", transformers.__version__)
print("datasets:", datasets.__version__)
print("peft:", peft.__version__)
print("scikit-learn:", sklearn.__version__)
print("accelerate:", accelerate.__version__)


In [None]:
import os, json, random, pathlib, numpy as np, pandas as pd
from dataclasses import dataclass

@dataclass
class Config:
    model_name: str = "roberta-base"
    max_length: int = 256
    batch_size: int = 16
    epochs: int = 3
    lr: float = 2e-5
    rank: int = 8
    alpha: int = 16
    dropout: float = 0.05
    seed: int = 42
    out_dir: str = "results/lora_out"

CFG = Config()

def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    import torch
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

pathlib.Path(CFG.out_dir).mkdir(parents=True, exist_ok=True)
set_seed(CFG.seed)


In [None]:
from datasets import load_dataset

# Mapping: dataset -> (HF path), text col fallback order, label col fallback order, test split name
DATASETS = {
    "sst2":   { "hf": ("glue","sst2"),    "text": ["sentence","text"], "label": ["label"],           "test": "validation" },
    "imdb":   { "hf": ("imdb",),          "text": ["text","sentence"], "label": ["label"],           "test": "test" },
    "ag_news":{ "hf": ("ag_news",),       "text": ["text","sentence"], "label": ["label"],           "test": "test" },
}

def get_columns(ds, pref_list):
    for c in pref_list:
        if c in ds["train"].column_names:
            return c
    # fallback to first string column
    for c in ds["train"].column_names:
        if isinstance(ds["train"][c][0], str):
            return c
    raise ValueError("Suitable column not found.")

def load_task(name):
    meta = DATASETS[name]
    ds = load_dataset(*meta["hf"])
    text_col  = get_columns(ds, meta["text"])
    label_col = get_columns(ds, meta["label"])
    test_split = meta["test"] if meta["test"] in ds else "validation"
    num_labels = len(set(ds["train"][label_col]))
    return ds, text_col, label_col, test_split, num_labels


In [30]:
from transformers import (AutoTokenizer, AutoModelForSequenceClassification,
                          DataCollatorWithPadding, Trainer, TrainingArguments)
from peft import LoraConfig, get_peft_model
from sklearn.metrics import accuracy_score, f1_score

def run_lora_experiment(dataset_name, rank=8, epochs=3, lr=2e-5, batch_size=16, max_length=256):
    set_seed(CFG.seed)
    ds, text_col, label_col, test_split, num_labels = load_task(dataset_name)

    tok = AutoTokenizer.from_pretrained(CFG.model_name, use_fast=True)
    def tokenize(batch):
        return tok(batch[text_col], truncation=True, max_length=max_length)
    ds = ds.map(tokenize, batched=True)
    ds = ds.rename_column(label_col, "labels")
    keep = ["input_ids","attention_mask","labels"]
    ds = ds.remove_columns([c for c in ds["train"].column_names if c not in keep])

    # Limit training samples for faster runs (reduced as hardware limitations)
    if dataset_name == "sst2":  # only apply for replication test, change as desired
        ds["train"] = ds["train"].select(range(5000))

    model = AutoModelForSequenceClassification.from_pretrained(CFG.model_name, num_labels=num_labels)

    lora_cfg = LoraConfig(
        r=rank, lora_alpha=CFG.alpha, lora_dropout=CFG.dropout,
        bias="none", task_type="SEQ_CLS",
        target_modules=["query","value"]
    )
    model = get_peft_model(model, lora_cfg)

    collator = DataCollatorWithPadding(tokenizer=tok)

    def compute_metrics(eval_pred):
        logits, labels = eval_pred
        preds = np.argmax(logits, axis=-1)
        acc = accuracy_score(labels, preds)
        f1  = f1_score(labels, preds, average="macro")
        return {"accuracy": acc, "f1": f1}

    out = os.path.join(CFG.out_dir, f"{dataset_name}_r{rank}")
    os.makedirs(out, exist_ok=True)

    args = TrainingArguments(
    output_dir=out,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    eval_strategy="epoch",        #  was evaluation_strategy
    save_strategy="epoch",
    learning_rate=lr,
    num_train_epochs=epochs,
    weight_decay=0.01,
    logging_steps=50,
    save_total_limit=2,
    load_best_model_at_end=True,
    metric_for_best_model="accuracy",
    report_to=[]                  # disable W&B etc.
    )

    trainer = Trainer(
        model=model, args=args,
        train_dataset=ds["train"],
        eval_dataset=ds["validation"] if "validation" in ds else ds[test_split],
        tokenizer=tok, data_collator=collator,
        compute_metrics=compute_metrics
    )
    trainer.train()

    metrics = trainer.evaluate(ds[test_split])
    print(f"[{dataset_name}] test metrics:", metrics)

    # Save adapter + metrics + summary row for report
    trainer.save_model(out)
    with open(os.path.join(out, "metrics.json"), "w") as f:
        json.dump({k: float(v) for k,v in metrics.items()}, f, indent=2)

    row = {
        "dataset": dataset_name,
        "rank": rank,
        "accuracy": metrics.get("eval_accuracy"),
        "f1": metrics.get("eval_f1"),
        "trainable_params_pct": 0.9  # informative placeholder for report
    }
    pd.DataFrame([row]).to_csv(os.path.join(out, "summary.csv"), index=False)
    return row


## Replication on Original Dataset (SST-2)
This section reproduces the LoRA results on the **SST-2** dataset to verify reproducibility of the original paper.  


In [None]:
rep_sst2 = run_lora_experiment("sst2", rank=8, epochs=CFG.epochs, lr=CFG.lr, batch_size=CFG.batch_size)
pd.DataFrame([rep_sst2])


##  Extension to New Datasets
We extend LoRA fine-tuning to new text classification datasets (**IMDB**, **AG News**)  
and a synthetic **IMDB-Mini-Paraphrased** dataset generated via back-translation.


## üåç Extension: IMDB, AG News (rank = 8)


In [None]:
rows = []
for ds in ["imdb","ag_news"]:
    r = run_lora_experiment(ds, rank=8, epochs=CFG.epochs, lr=CFG.lr, batch_size=CFG.batch_size)
    rows.append(r)

ext_df = pd.DataFrame(rows)
ext_df


In [None]:
import torch, pandas as pd, pathlib, time
from datasets import load_dataset, Dataset, DatasetDict
from transformers import MarianMTModel, MarianTokenizer
from tqdm.auto import tqdm

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

#  Load IMDB
imdb = load_dataset("imdb")
train_texts = imdb["train"]["text"][:2000]
train_labels = imdb["train"]["label"][:2000]
test_texts  = imdb["test"]["text"][:500]
test_labels = imdb["test"]["label"][:500]

#  Translation models + tokenizers
src_model_id = "Helsinki-NLP/opus-mt-en-fr"
tgt_model_id = "Helsinki-NLP/opus-mt-fr-en"
tok_en_fr = MarianTokenizer.from_pretrained(src_model_id)
tok_fr_en = MarianTokenizer.from_pretrained(tgt_model_id)
mod_en_fr = MarianMTModel.from_pretrained(src_model_id).to(device).eval()
mod_fr_en = MarianMTModel.from_pretrained(tgt_model_id).to(device).eval()

def backtranslate(texts, batch_size=16, max_len=128, max_new=96):
    out = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Back-translation"):
        batch = texts[i:i+batch_size]

        # EN -> FR
        en_inputs = tok_en_fr(batch, return_tensors="pt", padding=True,
                              truncation=True, max_length=max_len).to(device)
        with torch.no_grad():
            fr_ids = mod_en_fr.generate(
                **en_inputs, max_new_tokens=max_new, num_beams=1, do_sample=False
            )
        fr_texts = tok_en_fr.batch_decode(fr_ids, skip_special_tokens=True)

        # FR -> EN
        fr_inputs = tok_fr_en(fr_texts, return_tensors="pt", padding=True,
                              truncation=True, max_length=max_len).to(device)
        with torch.no_grad():
            en_ids = mod_fr_en.generate(
                **fr_inputs, max_new_tokens=max_new, num_beams=1, do_sample=False
            )
        out.extend(tok_fr_en.batch_decode(en_ids, skip_special_tokens=True))

        # optional: free memory
        torch.cuda.empty_cache() if device == "cuda" else None
    return out

N = 300  # for faster outputs else let it be 0
paraphrased = backtranslate(train_texts[:N], batch_size=16, max_len=128, max_new=96)

train_texts_paraphrased = paraphrased + train_texts[:N]
train_labels_paraphrased = train_labels[:N] + train_labels[:N]

mini_df  = pd.DataFrame({"text": train_texts_paraphrased, "label": train_labels_paraphrased})
test_df  = pd.DataFrame({"text": test_texts, "label": test_labels})
mini_ds  = Dataset.from_pandas(mini_df)
test_ds  = Dataset.from_pandas(test_df)
imdb_mini = DatasetDict({"train": mini_ds, "test": test_ds})

out_dir = pathlib.Path("data_notes/imdb_mini")
out_dir.mkdir(parents=True, exist_ok=True)
mini_df.to_csv(out_dir/"train.csv", index=False)
test_df.to_csv(out_dir/"test.csv", index=False)

print(f"‚úÖ Synthetic IMDB-Mini-Paraphrased created: {len(mini_ds)} training examples")


In [None]:
# Reuse the same run_lora_experiment by temporarily registering the dataset
from datasets import DatasetDict
imdb_dict = DatasetDict({"train": imdb_mini["train"], "validation": imdb_mini["test"], "test": imdb_mini["test"]})

# Monkey-patch loader just for this run
DATASETS["imdb_mini"] = {"hf": ("imdb",), "text": ["text"], "label": ["label"], "test": "test"}

# Call the experiment
res_imdb_mini = run_lora_experiment("imdb_mini", rank=8, epochs=CFG.epochs, lr=CFG.lr, batch_size=CFG.batch_size)
pd.DataFrame([res_imdb_mini])


##  Rank Ablation (r = 4, 8, 16) on SST-2


In [None]:
# Collect all summary.csv files into one dataframe
all_rows = []
for p in pathlib.Path(CFG.out_dir).glob("*_r*/summary.csv"):
    df = pd.read_csv(p)
    all_rows.append(df)
summary = pd.concat(all_rows, ignore_index=True) if all_rows else pd.DataFrame()
summary.sort_values(["dataset","rank"], inplace=True)
display(summary)

# Simple accuracy bar plot (no specific colors per your tools constraints are irrelevant here; this is a notebook)
import matplotlib.pyplot as plt
plt.figure(figsize=(6,4))
plt.bar(summary["dataset"], summary["accuracy"])
plt.ylabel("Accuracy")
plt.title("LoRA (r=8) accuracy across datasets")
plt.xticks(rotation=15)
plt.show()


In [None]:
abl_rows = []
for r in [4,8,16]:
    res = run_lora_experiment("sst2", rank=r, epochs=CFG.epochs, lr=CFG.lr, batch_size=CFG.batch_size)
    abl_rows.append(res)
abl_df = pd.DataFrame(abl_rows)
display(abl_df)

plt.figure(figsize=(5,3))
plt.plot(abl_df["rank"], abl_df["accuracy"], marker="o")
plt.xlabel("LoRA rank (r)")
plt.ylabel("Accuracy")
plt.title("SST-2 accuracy vs LoRA rank")
plt.show()


Removed all outputs as my free-of-charge resources allowance exhausted during my run. I have added the ouput in the report.