In [1]:
!pip install -q torch transformers datasets evaluate scikit-learn pandas matplotlib

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m39.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m102.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m60.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m12.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m44.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m19.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
import torch
import torch.nn as nn
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os
import random
from sklearn.metrics import accuracy_score
from transformers import (
    DistilBertTokenizerFast, DistilBertForSequenceClassification,
    Trainer, TrainingArguments, TrainerCallback
)
from datasets import load_dataset
from peft import get_peft_model, TaskType, AdaLoraConfig
import evaluate
import gc

In [4]:
# Set Seed for reproducability
def set_seed(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)
set_seed(42)

In [5]:
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
dataset = load_dataset("glue", "sst2")
dataset = dataset.rename_column("label", "labels")
dataset = dataset.map(lambda e: tokenizer(e["sentence"], truncation=True, padding="max_length", max_length=128), batched=True)
dataset.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/35.3k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/3.11M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/72.8k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/148k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/67349 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/872 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1821 [00:00<?, ? examples/s]

Map:   0%|          | 0/67349 [00:00<?, ? examples/s]

Map:   0%|          | 0/872 [00:00<?, ? examples/s]

Map:   0%|          | 0/1821 [00:00<?, ? examples/s]

In [6]:
os.makedirs("rank_logs", exist_ok=True)

In [7]:
metric = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)
    return metric.compute(predictions=predictions, references=labels)

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [8]:
def log_phase(phase_name, run_id=None):
    ts = time.time()
    with open("adalora_gpu_phase_timestamps.log", "a") as f:
        f.write(f"{run_id},{phase_name},{ts}\n")

In [9]:
adalora_config = AdaLoraConfig(
    init_r=4,
    target_r=2,
    beta1=0.85,
    beta2=0.85,
    tinit=100,
    tfinal=500,
    deltaT=10,
    lora_alpha=16,
    lora_dropout=0.05,
    orth_reg_weight=0.5,
    target_modules=["q_lin", "v_lin"]
)

In [11]:
all_results = []

for run_id in range(6):
    print(f"\n\nStarting Run {run_id}")
    gc.collect()
    torch.cuda.empty_cache()

    set_seed(42+run_id)

    # initialize base model
    model = DistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

    # set LoRA ranks
    model = get_peft_model(model, adalora_config)


    # training arguments
    training_args = TrainingArguments(
        output_dir="./adalora_distilbert_sst2",
        per_device_train_batch_size=16,
        per_device_eval_batch_size=16,
        num_train_epochs=3,
        eval_strategy="epoch",
        learning_rate=5e-4,
        weight_decay=0.01,
        logging_steps=50,
        save_strategy="no",
        report_to="none",  # No wandb, tensorboard
        fp16=True,
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=dataset["train"],
        eval_dataset=dataset["validation"],
        compute_metrics=compute_metrics,
    )

    #------------------Training Phase START------------------
    log_phase("training_start", run_id)
    start_time = time.time()
    trainer.train()
    end_time = time.time()
    log_phase("training_end", run_id)
    #------------------Training Phase END------------------


    # final_memory = torch.cuda.memory_allocated() / 1e9
    outputs = trainer.predict(dataset["validation"])
    logits = outputs.predictions[1] if isinstance(outputs.predictions, tuple) else outputs.predictions
    preds = np.argmax(logits, axis=-1)
    labels = dataset["validation"]["labels"]
    acc = accuracy_score(labels.cpu().numpy(), preds)

    print(f"\nFinal Eval Accuracy: {acc:.4f}")

    training_time = round((end_time - start_time) / 60, 2)

    all_results.append({
        "Run ID": run_id,
        "Final Accuracy": round(acc, 4),
        "Training Time (min)": training_time,
    })



Starting Run 0


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,0.2729,No log
2,0.226,No log
3,0.2246,No log



Final Eval Accuracy: 0.8865


Starting Run 1


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,0.2744,No log
2,0.2282,No log
3,0.2269,No log



Final Eval Accuracy: 0.8911


Starting Run 2


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,0.2709,No log
2,0.2209,No log
3,0.214,No log



Final Eval Accuracy: 0.8991


Starting Run 3


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,0.2732,No log
2,0.2195,No log
3,0.2159,No log



Final Eval Accuracy: 0.8865


Starting Run 4


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,0.2786,No log
2,0.2272,No log
3,0.224,No log



Final Eval Accuracy: 0.8933


Starting Run 5


Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
No label_names provided for model class `PeftModel`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss
1,0.2679,No log
2,0.2302,No log
3,0.2126,No log



Final Eval Accuracy: 0.8945


In [12]:
# --- Save all results to CSV ---
df = pd.DataFrame(all_results)
df.to_csv("adalora_sst2_benchmark.csv", index=False)

# --- Display nicely formatted results ---
from tabulate import tabulate
print("\nFinal Results Across All Runs:\n")
print(tabulate(df, headers='keys', tablefmt='pretty'))



Final Results Across All Runs:

+---+--------+----------------+---------------------+
|   | Run ID | Final Accuracy | Training Time (min) |
+---+--------+----------------+---------------------+
| 0 |  0.0   |     0.8865     |        6.33         |
| 1 |  1.0   |     0.8911     |        6.36         |
| 2 |  2.0   |     0.8991     |         6.4         |
| 3 |  3.0   |     0.8865     |        6.41         |
| 4 |  4.0   |     0.8933     |        6.41         |
| 5 |  5.0   |     0.8945     |        6.42         |
+---+--------+----------------+---------------------+
