First, we installed all the necessary libraries, and imported them into our environment.

In [None]:
!pip install transformers datasets scikit-learn pandas torch peft bitsandbytes matplotlib

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from datasets import Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments, EvalPrediction
from peft import LoraConfig, TaskType, get_peft_model
from peft.optimizers import create_loraplus_optimizer
import matplotlib.pyplot as plt
import bitsandbytes as bnb
import torch
import time

Next, we set up our device, leveraging the GPU if available and the CPU if not.
We also load in our dataset and split it between training and testing sets.

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

df = pd.read_csv("data/dataset.csv")
df["humor"] = df["humor"].astype(int)
print(df.head())

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["humor"])
train_ds = Dataset.from_pandas(train_df)
test_ds = Dataset.from_pandas(test_df)

Using the BERT tokenizer, we tokenized our text data to prepare it for model training.

In [None]:
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased")

def tokenize_fn(batch):
    return tokenizer(batch["text"], truncation=True, padding="max_length", max_length=64)

train_ds = train_ds.map(tokenize_fn, batched=True)
test_ds = test_ds.map(tokenize_fn, batched=True)
train_ds = train_ds.rename_column("humor", "labels")
test_ds = test_ds.rename_column("humor", "labels")

train_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])
test_ds.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

We then loaded pre-trained BERT models for sequence classification and create a function to get our metrics

In [None]:
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=2).to(device)

def compute_metrics(p: EvalPrediction):
    preds = p.predictions.argmax(-1)
    labels = p.label_ids
    return {
        "accuracy": accuracy_score(labels, preds),
        "f1": f1_score(labels, preds),
        "precision": precision_score(labels, preds),
        "recall": recall_score(labels, preds)
    }

After, we create functions for our LoRA and LoRA+ models, so they both have the correct inputs

In [None]:
def get_lora_model(model,r=4):
    lora_config = LoraConfig(task_type=TaskType.SEQ_CLS, 
                         r=r,
                         lora_alpha=1, 
                         lora_dropout=0.1)

    return get_peft_model(model, lora_config)

def get_lora_plus_inputs(lora_model):
    
    optimizer = create_loraplus_optimizer(
        model=model,
        optimizer_cls=bnb.optim.Adam8bit,
        lr=5e-5,
        loraplus_lr_ratio=16,
    )
    scheduler = None

    return(optimizer,scheduler)

We create 3 instances of our model. We use the default 4 value for r in the LoRA model, as we found no significant difference in performance when changing it.

In [None]:
base_model = model
lora_model = get_lora_model(model)
lora_plus_inputs = get_lora_plus_inputs(lora_model)

We create our training arguments

In [None]:
training_args = TrainingArguments(
    output_dir="./bert-humor",
    eval_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=32,
    num_train_epochs=1,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    fp16=False,
    report_to="none"
)

We then created a Trainer for each model, and add it the a list of trainers

In [None]:
base_trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)

lora_trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
)

lora_plus_trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=train_ds,
    eval_dataset=test_ds,
    compute_metrics=compute_metrics,
    optimizers=lora_plus_inputs,
)

all_trainers = [
    ("base", base_trainer),
    ("lora", lora_trainer),
    ("lora_plus", lora_plus_trainer)
]

For each trainer, we train the model and evaluate it, then store the results in a list.

In [None]:
results_list = []

for name, trainer in all_trainers:
    print("\nRunning "+name)

    if torch.cuda.is_available():
        torch.cuda.reset_peak_memory_stats()
    
    start_time = time.time()
    trainer.train()
    end_time = time.time()
    
    peak_gpu_memory_gb = torch.cuda.max_memory_allocated() / 1024**3 if torch.cuda.is_available() else None
    
    metrics = trainer.evaluate()
    
    metrics.update({
        "trainer": name,
        "training_time_sec": end_time - start_time,
        "peak_gpu_memory_gb": peak_gpu_memory_gb
    })
    
    results_list.append(metrics)

Finally, we print out the results for each model and create graphs for comparison.

In [None]:
df_results = pd.DataFrame(results_list)
display(df_results)

print("")

plt.figure(figsize=(7,5))
plt.bar(df_results["trainer"], df_results["peak_gpu_memory_gb"], color="skyblue")
plt.title("Peak GPU Memory Usage by Trainer")
plt.ylabel("Peak GPU Memory (GB)")
plt.xlabel("Trainer")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

plt.figure(figsize=(7,5))
plt.bar(df_results["trainer"], df_results["training_time_sec"], color="lightgreen")
plt.title("Training Time by Trainer")
plt.ylabel("Training Time (seconds)")
plt.xlabel("Trainer")
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()

plt.figure(figsize=(7,5))
plt.bar(df_results["trainer"], df_results["eval_f1"], color="salmon")
plt.title("F1 Score by Trainer")
plt.ylabel("F1 Score")
plt.xlabel("Trainer")
plt.ylim(0, 1)
plt.grid(axis="y", linestyle="--", alpha=0.7)
plt.show()
