In [None]:
!pip install transformers datasets peft accelerate -q

In [None]:
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer, Trainer, TrainingArguments
from datasets import load_dataset
from peft import IA3Config, get_peft_model
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import itertools
from tqdm import tqdm

In [None]:
def count_trainable_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

In [None]:
def load_glue_dataset(dataset_name, tokenizer):
    max_len = 512

    def encode(examples):
        if dataset_name in ["mrpc", "rte", "qqp"]:
            return tokenizer(
                examples["sentence1"],
                examples["sentence2"],
                truncation=True,
                padding='max_length',
                max_length=max_len
            )
        elif dataset_name == "sst2":
            return tokenizer(
                examples["sentence"],
                truncation=True,
                padding='max_length',
                max_length=max_len
            )
        else:
            raise ValueError(f"Unsupported GLUE dataset: {dataset_name}")

    dataset = load_dataset("glue", dataset_name)
    encoded = dataset.map(encode, batched=True)
    encoded = encoded.rename_column("label", "labels")
    encoded.set_format("torch", columns=["input_ids", "token_type_ids", "attention_mask", "labels"])
    return encoded

In [None]:
def load_text_dataset(dataset_name, tokenizer):
    max_len = 512
    if dataset_name == "imdb":
        dataset = load_dataset("imdb")
        def encode(example):
            return tokenizer(example["text"], truncation=True, padding='max_length', max_length=max_len)
    elif dataset_name == "tweet":
        dataset = load_dataset("tweet_eval", "sentiment")
        def encode(example):
            return tokenizer(example["text"], truncation=True, padding='max_length', max_length=max_len)
    else:
        raise ValueError(f"Unsupported dataset: {dataset_name}")

    encoded = dataset.map(encode, batched=True)
    encoded = encoded.rename_column("label", "labels")
    encoded.set_format("torch", columns=["input_ids", "token_type_ids", "attention_mask", "labels"])
    return encoded

In [None]:
def create_peft_model(base_model, method, target_modules=None, fan_in_fan_out=False):
    if method == "ia3":
        config = IA3Config(
            task_type="SEQ_CLS",
            target_modules=target_modules,
            fan_in_fan_out=fan_in_fan_out
        )
        return get_peft_model(base_model, config)
    else:
        raise ValueError("Only IA3 method is implemented")

In [None]:
glue_datasets = ["sst2", "mrpc", "rte"]
extra_datasets = ["imdb", "tweet"]
all_datasets = glue_datasets + extra_datasets
all_datasets = glue_datasets
target_module_options = [["query"], ["query", "value"], ["query", "value",  "intermediate.dense", "output.dense"]]

fan_options = [False]
lr_options = [1e-4]

In [None]:
all_results = []

for dataset_name in all_datasets:
    print(f"\n===== Running on {dataset_name.upper()} =====")
    tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")

    # Load appropriate dataset
    if dataset_name in glue_datasets:
        dataset_encoded = load_glue_dataset(dataset_name, tokenizer)
    else:
        dataset_encoded = load_text_dataset(dataset_name, tokenizer)

    config_combos = list(itertools.product(target_module_options, fan_options, lr_options))

    for tm, fan, lr in tqdm(config_combos, desc=f"{dataset_name.upper()} Sweep"):
        print(f"Config: target_modules={tm}, fan_in_out={fan}, lr={lr}")
        base_model = AutoModelForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=3 if dataset_name == "tweet" else 2)
        ia3_model = create_peft_model(
            base_model=base_model,
            method="ia3",
            target_modules=tm,
            fan_in_fan_out=fan
        )

        trainable_params = count_trainable_parameters(ia3_model)

        training_args = TrainingArguments(
            output_dir=f"./ia3_{dataset_name}",
            eval_strategy="epoch",
            learning_rate=lr,
            per_device_train_batch_size=16,
            per_device_eval_batch_size=16,
            num_train_epochs=3,
            logging_steps=50,
            save_strategy="epoch",
            load_best_model_at_end=True,
            report_to="none"
        )

        trainer = Trainer(
            model=ia3_model,
            args=training_args,
            train_dataset=dataset_encoded["train"],
            eval_dataset=dataset_encoded["validation"] if "validation" in dataset_encoded else dataset_encoded["test"],
            compute_metrics=lambda p: {
                "accuracy": (np.argmax(p.predictions, axis=1) == p.label_ids).mean()
            }
        )

        trainer.train()
        metrics = trainer.evaluate()

        all_results.append({
            "dataset": dataset_name,
            "target_modules": ",".join(tm),
            "fan_in_fan_out": fan,
            "learning_rate": lr,
            "trainable_params": trainable_params,
            "accuracy": metrics["eval_accuracy"]
        })


===== Running on SST2 =====


Map:   0%|          | 0/872 [00:00<?, ? examples/s]

SST2 Sweep:   0%|          | 0/1 [00:00<?, ?it/s]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Config: target_modules=['query', 'value', 'intermediate.dense', 'output.dense'], fan_in_out=False, lr=0.0001


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.352,0.302649,0.873853
2,0.2903,0.284948,0.886468
3,0.2871,0.284176,0.888761


SST2 Sweep: 100%|██████████| 1/1 [2:00:42<00:00, 7242.94s/it]



===== Running on MRPC =====


train-00000-of-00001.parquet:   0%|          | 0.00/649k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/75.7k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/308k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/3668 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/408 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1725 [00:00<?, ? examples/s]

Map:   0%|          | 0/3668 [00:00<?, ? examples/s]

Map:   0%|          | 0/408 [00:00<?, ? examples/s]

Map:   0%|          | 0/1725 [00:00<?, ? examples/s]

MRPC Sweep:   0%|          | 0/1 [00:00<?, ?it/s]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Config: target_modules=['query', 'value', 'intermediate.dense', 'output.dense'], fan_in_out=False, lr=0.0001


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6311,0.620417,0.683824
2,0.632,0.61781,0.683824
3,0.6402,0.616172,0.683824


MRPC Sweep: 100%|██████████| 1/1 [07:00<00:00, 420.85s/it]



===== Running on RTE =====


train-00000-of-00001.parquet:   0%|          | 0.00/584k [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/69.0k [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/621k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2490 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/277 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/3000 [00:00<?, ? examples/s]

Map:   0%|          | 0/2490 [00:00<?, ? examples/s]

Map:   0%|          | 0/277 [00:00<?, ? examples/s]

Map:   0%|          | 0/3000 [00:00<?, ? examples/s]

RTE Sweep:   0%|          | 0/1 [00:00<?, ?it/s]Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Config: target_modules=['query', 'value', 'intermediate.dense', 'output.dense'], fan_in_out=False, lr=0.0001


No label_names provided for model class `PeftModelForSequenceClassification`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Epoch,Training Loss,Validation Loss,Accuracy
1,0.6934,0.71239,0.472924
2,0.6947,0.693057,0.469314
3,0.6963,0.690836,0.472924


RTE Sweep: 100%|██████████| 1/1 [04:46<00:00, 286.35s/it]


In [None]:
all_results

[{'dataset': 'sst2',
  'target_modules': 'query,value,intermediate.dense,output.dense',
  'fan_in_fan_out': False,
  'learning_rate': 0.0001,
  'trainable_params': 102914,
  'accuracy': 0.8887614678899083},
 {'dataset': 'mrpc',
  'target_modules': 'query,value,intermediate.dense,output.dense',
  'fan_in_fan_out': False,
  'learning_rate': 0.0001,
  'trainable_params': 102914,
  'accuracy': 0.6838235294117647},
 {'dataset': 'rte',
  'target_modules': 'query,value,intermediate.dense,output.dense',
  'fan_in_fan_out': False,
  'learning_rate': 0.0001,
  'trainable_params': 102914,
  'accuracy': 0.4729241877256318}]

In [None]:
df_all = pd.DataFrame(all_results)
df_all.to_csv("ia3_full_results.csv", index=False)
