In [24]:
import os
import time
import torch
import numpy as np
import matplotlib.pyplot as plt
from datasets import load_dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification,Trainer, TrainingArguments,EvalPrediction
from peft import get_peft_model, LoraConfig, TaskType, PeftConfig, PeftModel,IA3Config
import bitsandbytes as bnb
from sklearn.metrics import accuracy_score
import subprocess
import pandas as pd
import gc
import warnings
warnings.filterwarnings("ignore")

The installed version of bitsandbytes was compiled without GPU support. 8-bit optimizers, 8-bit multiplication, and GPU quantization are unavailable.


# PART 1

In [25]:
# Configuration
NUM_SAMPLES = 5000
TRAIN_SIZE = 3000
TEST_SIZE = 2000
EPOCHS = 3
BATCH_SIZE = 16
MODEL_NAME = "roberta-base"
MAX_LENGTH = 128
LEARNING_RATE = 2e-5
OUTPUT_DIR = "./results"

In [26]:
# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)

In [27]:
# Create output directory if it doesn't exist
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [28]:
# Function to measure GPU memory usage
def get_gpu_memory():
    try:
        result = subprocess.check_output(['nvidia-smi', '--query-gpu=memory.used', '--format=csv,nounits,noheader'], 
                                         encoding='utf-8')
        return int(result.strip())
    except:
        return 0

# Function to compute metrics
def compute_metrics(pred: EvalPrediction):
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=1)
    return {"accuracy": accuracy_score(labels, preds)}

In [29]:
# Load IMDb dataset
dataset = load_dataset("imdb")

In [30]:
train_dataset = dataset["train"].shuffle(seed=42).select(range(TRAIN_SIZE))
test_dataset = dataset["test"].shuffle(seed=42).select(range(TEST_SIZE))

In [31]:
# Load tokenizer
tokenizer = RobertaTokenizer.from_pretrained(MODEL_NAME)

In [32]:
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=MAX_LENGTH)

In [33]:
# Apply tokenization
train_tokenized = train_dataset.map(tokenize_function, batched=True)
test_tokenized = test_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2000 [00:00<?, ? examples/s]

In [34]:
train_tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])
test_tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "label"])

# PART 2

In [35]:
# Results dictionary to store metrics
results = {
    "Method": [],
    "Accuracy": [],
    "Trainable Parameters": [],
    "Training Time (s)": [],
    "GPU Memory (MB)": []
}

In [36]:
# Training arguments
training_args = TrainingArguments(
    output_dir=OUTPUT_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    warmup_steps=50,
    weight_decay=0.01,
    logging_dir=f"{OUTPUT_DIR}/logs",
    logging_steps=10,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    load_best_model_at_end=True,
    learning_rate=LEARNING_RATE,
)

## METHOD 1

In [38]:
model_full = RobertaForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [40]:
trainable_params_full = sum(p.numel() for p in model_full.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {trainable_params_full:,}")

Number of trainable parameters: 124,647,170


In [41]:
# Record initial GPU memory
initial_memory_full = get_gpu_memory()

In [42]:
trainer_full = Trainer(
    model=model_full,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    compute_metrics=compute_metrics,
)

In [None]:
# Train and measure time
start_time = time.time()
trainer_full.train()
training_time_full = time.time() - start_time

Epoch,Training Loss,Validation Loss


In [None]:
# Evaluate
eval_results_full = trainer_full.evaluate()
print(f"Evaluation results: {eval_results_full}")

In [None]:
final_memory_full = get_gpu_memory()
gpu_memory_full = max(final_memory_full, initial_memory_full)

In [None]:
results["Method"].append("Full Fine-Tuning")
results["Accuracy"].append(eval_results_full["eval_accuracy"])
results["Trainable Parameters"].append(trainable_params_full)
results["Training Time (s)"].append(training_time_full)
results["GPU Memory (MB)"].append(gpu_memory_full)

In [None]:
# Clean up to free memory
del model_full
del trainer_full
torch.cuda.empty_cache()
gc.collect()

## METHOD 2

In [None]:
model_lora = RobertaForSequenceClassification.from_pretrained(MODEL_NAME, num_labels=2)

In [None]:
# Configure LoRA
lora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"]
)

In [None]:
# Get PEFT model
model_lora = get_peft_model(model_lora, lora_config)
model_lora.print_trainable_parameters()

In [None]:
trainable_params_lora = sum(p.numel() for p in model_lora.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {trainable_params_lora:,}")

In [None]:
# Record initial GPU memory
initial_memory_lora = get_gpu_memory()

In [None]:
trainer_lora = Trainer(
    model=model_lora,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    compute_metrics=compute_metrics,
)

In [None]:
# Train and measure time
start_time = time.time()
trainer_lora.train()
training_time_lora = time.time() - start_time

In [None]:
# Evaluate
eval_results_lora = trainer_lora.evaluate()
print(f"Evaluation results: {eval_results_lora}")

In [None]:
# Record final GPU memory
final_memory_lora = get_gpu_memory()
gpu_memory_lora = max(final_memory_lora, initial_memory_lora)

In [None]:
# Save results
results["Method"].append("LoRA Fine-Tuning")
results["Accuracy"].append(eval_results_lora["eval_accuracy"])
results["Trainable Parameters"].append(trainable_params_lora)
results["Training Time (s)"].append(training_time_lora)
results["GPU Memory (MB)"].append(gpu_memory_lora)

In [None]:
# Clean up to free memory
del model_lora
del trainer_lora
torch.cuda.empty_cache()
gc.collect()

## METHOD 3

In [23]:
model_qlora = RGqeFptmYSt36osm5rdsZwJP8FaNrdG9b6.from_pretrained(
    MODEL_NAME,
    num_labels=2,
    load_in_4bit=True,
    quantization_config={
        "load_in_4bit": True,
        "bnb_4bit_compute_dtype": torch.float16,
        "bnb_4bit_use_double_quant": True,
        "bnb_4bit_quant_type": "nf4"
    }
)

In [None]:
# Configure LoRA for QLoRA
qlora_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=8,
    lora_alpha=16,
    lora_dropout=0.1,
    target_modules=["query", "key", "value"]
)

In [None]:
# Get PEFT model
model_qlora = get_peft_model(model_qlora, qlora_config)
model_qlora.print_trainable_parameters()

In [None]:
trainable_params_qlora = sum(p.numel() for p in model_qlora.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {trainable_params_qlora:,}")

In [None]:
# Record initial GPU memory
initial_memory_qlora = get_gpu_memory()

In [None]:
trainer_qlora = Trainer(
    model=model_qlora,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    compute_metrics=compute_metrics,
)

In [None]:
# Train and measure time
start_time = time.time()
trainer_qlora.train()
training_time_qlora = time.time() - start_time

In [None]:
# Evaluate
eval_results_qlora = trainer_qlora.evaluate()
print(f"Evaluation results: {eval_results_qlora}")

In [None]:
# Record final GPU memory
final_memory_qlora = get_gpu_memory()
gpu_memory_qlora = max(final_memory_qlora, initial_memory_qlora)

In [None]:
# Save results
results["Method"].append("QLoRA Fine-Tuning")
results["Accuracy"].append(eval_results_qlora["eval_accuracy"])
results["Trainable Parameters"].append(trainable_params_qlora)
results["Training Time (s)"].append(training_time_qlora)
results["GPU Memory (MB)"].append(gpu_memory_qlora)

In [None]:
# Clean up to free memory
del model_qlora
del trainer_qlora
torch.cuda.empty_cache()
gc.collect()

## Method 4

In [None]:
model_ia3 = RGqeFptmYSt36osm5rdsZwJP8FaNrdG9b6.from_pretrained(MODEL_NAME, num_labels=2)

In [None]:
# Configure IA3
ia3_config = IA3Config(
    task_type=TaskType.SEQ_CLS,
    target_modules=["query", "key", "value", "output.dense"],
    feedforward_modules=["intermediate.dense", "output.dense"]
)

In [None]:
# Get PEFT model
model_ia3 = get_peft_model(model_ia3, ia3_config)
model_ia3.print_trainable_parameters()

In [None]:
trainable_params_ia3 = sum(p.numel() for p in model_ia3.parameters() if p.requires_grad)
print(f"Number of trainable parameters: {trainable_params_ia3:,}")

In [None]:
# Record initial GPU memory
initial_memory_ia3 = get_gpu_memory()

In [None]:
trainer_ia3 = Trainer(
    model=model_ia3,
    args=training_args,
    train_dataset=train_tokenized,
    eval_dataset=test_tokenized,
    compute_metrics=compute_metrics,
)

In [None]:
# Train and measure time
start_time = time.time()
trainer_ia3.train()
training_time_ia3 = time.time() - start_time

In [None]:
eval_results_ia3 = trainer_ia3.evaluate()
print(f"Evaluation results: {eval_results_ia3}")

In [None]:
# Record final GPU memory
final_memory_ia3 = get_gpu_memory()
gpu_memory_ia3 = max(final_memory_ia3, initial_memory_ia3)

In [None]:
# Save results
results["Method"].append("IA3 Adapter Tuning")
results["Accuracy"].append(eval_results_ia3["eval_accuracy"])
results["Trainable Parameters"].append(trainable_params_ia3)
results["Training Time (s)"].append(training_time_ia3)
results["GPU Memory (MB)"].append(gpu_memory_ia3)

In [None]:
del model_ia3
del trainer_ia3
torch.cuda.empty_cache()
gc.collect()

# PART 3

In [None]:
print("\n===== Part 3: Evaluation Metrics Summary =====")
results_df = pd.DataFrame(results)
print(results_df)

In [None]:
# Display comparison of results
print("\nAccuracy Comparison:")
for method, accuracy in zip(results["Method"], results["Accuracy"]):
    print(f"{method}: {accuracy:.4f}")

In [None]:
print("\nTrainable Parameters Comparison:")
for method, params in zip(results["Method"], results["Trainable Parameters"]):
    print(f"{method}: {params:,}")

In [None]:
print("\nTraining Time Comparison:")
for method, time_taken in zip(results["Method"], results["Training Time (s)"]):
    print(f"{method}: {time_taken:.2f} seconds")

In [None]:
print("\nGPU Memory Usage Comparison:")
for method, memory in zip(results["Method"], results["GPU Memory (MB)"]):
    print(f"{method}: {memory} MB")

In [None]:
# Save results to CSV
results_df.to_csv(f"{OUTPUT_DIR}/fine_tuning_comparison_results.csv", index=False)

# PART 4

In [None]:
# Create visualizations
def create_bar_chart(data, column, title, ylabel, filename):
    plt.figure(figsize=(10, 6))
    plt.bar(data["Method"], data[column], color=['blue', 'green', 'red', 'purple'])
    plt.title(title)
    plt.ylabel(ylabel)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.savefig(f"{OUTPUT_DIR}/{filename}.png")
    plt.close()

In [None]:
create_bar_chart(results_df, "Accuracy", "Accuracy Comparison", "Accuracy", "accuracy_comparison")

In [None]:
create_bar_chart(results_df, "Trainable Parameters", "Trainable Parameters Comparison", "Number of Parameters", "parameters_comparison")

In [None]:
create_bar_chart(results_df, "Training Time (s)", "Training Time Comparison", "Time (seconds)", "time_comparison")

In [None]:
create_bar_chart(results_df, "GPU Memory (MB)", "GPU Memory Usage Comparison", "Memory (MB)", "memory_comparison")