In [None]:
!pip install datasets
!pip install transformers
!pip install peft
!pip install evaluate
!pip install -U datasets==2.20.0 pyarrow==15.0.2 transformers==4.44.2 evaluate==0.4.2 --no-cache-dir
!pip install -q datasets evaluate accelerate scikit-learn pandas matplotlib
!pip install -U "transformers>=4.41" accelerate safetensors
!pip install -U bitsandbytes


In [None]:
!pip install --upgrade --no-cache-dir git+https://github.com/Shannu3766/bi_influence.git@version_1

In [None]:
import warnings
import os
import logging
os.environ["WANDB_DISABLED"] = "true"
os.environ["TRANSFORMERS_NO_ADVISORY_WARNINGS"] = "true"
os.environ["HF_HUB_DISABLE_PROGRESS_BARS"] = "1"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings("ignore")
# logging.set_verbosity_error()

In [None]:
# from datasets import load_dataset, DatasetDict, Dataset

# from transformers import (
#     AutoTokenizer,
#     AutoConfig,
#     AutoModelForSequenceClassification,
#     DataCollatorWithPadding,
#     TrainingArguments,
#     Trainer)

# from peft import PeftModel, PeftConfig, get_peft_model, LoraConfig
# import evaluate
# import torch
# import numpy as np
# import os
# from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
# from peft import get_peft_model, LoraConfig, TaskType
# from torch.utils.data import DataLoader
# from adaptive_lora.callbacks import AdaptiveLoRACallback
# import numpy as np
# print("torch:", torch.__version__)
# print("cuda devices:", torch.cuda.device_count())

In [4]:
import os
import warnings
import torch
import numpy as np
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    DataCollatorWithPadding,
    TrainingArguments,
    Trainer,
    logging
)
from peft import get_peft_model, LoraConfig
import evaluate

In [5]:
# Silence all unnecessary outputs
os.environ["WANDB_DISABLED"] = "true"
os.environ["TOKENIZERS_PARALLELISM"] = "false"
warnings.filterwarnings("ignore")
logging.set_verbosity_error()


In [6]:
from datasets import load_dataset
import time

def load_dataset_with_retry(path, name=None, max_retries=None, wait=2):

    attempt = 0

    while True:
        try:
            if name is not None:
                ds = load_dataset(path, name)
            else:
                ds = load_dataset(path)

            print(f"Dataset loaded successfully after {attempt} attempts.")
            return ds

        except Exception as e:
            attempt += 1
            wait_time = wait * min(5, attempt)  

            print(f"[Attempt {attempt}] Failed to load dataset: {e}")
            print(f"Retrying in {wait_time} seconds...\n")

            if max_retries is not None and attempt >= max_retries:
                print("Max retries reached. Raising error.")
                raise e

            time.sleep(wait_time)
dataset = load_dataset_with_retry("glue", "sst2")


Dataset loaded successfully after 0 attempts.


In [7]:
model_checkpoint = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
id2label = {0: "Negative", 1: "Positive"}
label2id = {"Negative": 0, "Positive": 1}

model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=2,
    id2label=id2label,
    label2id=label2id,
    device_map="auto"
)

In [8]:
from transformers import AutoTokenizer, DataCollatorWithPadding

tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
    model.resize_token_embeddings(len(tokenizer))

model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False

MAX_LEN = 128  


def clean_text(s):
    if s is None:
        return ""
    return " ".join(str(s).strip().split())


def tokenize_function(examples):

    texts = [
        f"Review: {clean_text(x)}\nSentiment:"
        for x in examples["sentence"]
    ]

    return tokenizer(
        texts,
        truncation=True,
        padding=False,   
        max_length=MAX_LEN,
    )


tokenized_dataset = dataset.map(
    tokenize_function,
    batched=True,
    remove_columns=["sentence", "idx"],
)

if "label" in tokenized_dataset["train"].column_names:
    tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

tokenized_dataset.set_format("torch")


data_collator = DataCollatorWithPadding(
    tokenizer=tokenizer,
    pad_to_multiple_of=8,  
)


Map:   0%|          | 0/872 [00:00<?, ? examples/s]

In [9]:
accuracy = evaluate.load("accuracy")
rank = 6
def compute_metrics(eval_pred):
    preds, labels = eval_pred
    preds = np.argmax(preds, axis=1)
    return {"accuracy": accuracy.compute(predictions=preds, references=labels)}

peft_config = LoraConfig(
    task_type="SEQ_CLS",
    r=rank,
    lora_alpha=32,
    lora_dropout=0.05,
    target_modules=[   
        "q_proj", "k_proj", "v_proj", "o_proj", 
        "gate_proj", "up_proj", "down_proj"
    ]
)
model = get_peft_model(model, peft_config)


In [10]:
from peft.tuners.lora import LoraLayer

def get_lora_module_names(peft_model):
    names = []
    for name, module in peft_model.named_modules():
        if isinstance(module, LoraLayer):
            names.append(name)
    return names

lora_names = get_lora_module_names(model)   
print(f"🔢 Number of LoRA modules: {len(lora_names)}\n")

num_lora_modules = len(lora_names)
TOTAL_RANK_BUDGET = num_lora_modules*rank  

avg_rank_per_module = TOTAL_RANK_BUDGET / num_lora_modules
print(
    f" With TOTAL_RANK_BUDGET={TOTAL_RANK_BUDGET} over"
    f"{num_lora_modules} modules, avg rank ≈ {avg_rank_per_module:.2f}"
)


🔢 Number of LoRA modules: 154

 With TOTAL_RANK_BUDGET=924 over154 modules, avg rank ≈ 6.00


In [11]:
training_args = TrainingArguments(
    disable_tqdm=False,
    output_dir="./SST2_adaptive_results",
    num_train_epochs=3,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=8,
    logging_steps=500,
    eval_strategy="epoch",
    eval_steps=500,
    bf16=True,              
    report_to="none",
    save_strategy="no",
)


In [12]:
tokenized_dataset

DatasetDict({
    train: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 67349
    })
    validation: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 872
    })
    test: Dataset({
        features: ['labels', 'input_ids', 'attention_mask'],
        num_rows: 1821
    })
})

In [13]:

from torch.utils.data import DataLoader
train_dataset = tokenized_dataset["train"].select(range(10000))
eval_dataset = tokenized_dataset["validation"]

val_dataloader = DataLoader(
    eval_dataset,
    batch_size=8,
    shuffle=False,
    collate_fn=data_collator
)

In [14]:
from adaptive_lora.callbacks import AdaptiveLoRACallback

adaptive_callback = AdaptiveLoRACallback(
    val_dataloader=val_dataloader,
    total_rank=TOTAL_RANK_BUDGET,
    tau=0.9,      
    verbose=True
)

In [15]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[adaptive_callback],
)

In [16]:
trainer.train()


--- AdaptiveLoRA: Preparing ranks for Epoch 1 ---
Computing BI importance scores (pre-training)...


Computing BI scores:   0%|          | 0/109 [00:00<?, ?it/s]

Allocating new ranks based on BI scores...
Applying new ranks to LoRA modules for this epoch...
  - base_model.model.model.layers.0.self_attn.q_proj: r=6 (Unchanged, Score: 0.9897)
  - base_model.model.model.layers.0.self_attn.k_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.0.self_attn.v_proj: r=6 (Unchanged, Score: 0.9963)
  - base_model.model.model.layers.0.self_attn.o_proj: r=6 (Unchanged, Score: 0.9941)
  - base_model.model.model.layers.0.mlp.gate_proj: r=6 (Unchanged, Score: 0.9943)
  - base_model.model.model.layers.0.mlp.up_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.0.mlp.down_proj: r=6 (Unchanged, Score: 0.9992)
  - base_model.model.model.layers.1.self_attn.q_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.1.self_attn.k_proj: r=6 (Unchanged, Score: 0.9921)
  - base_model.model.model.layers.1.self_attn.v_proj: r=6 (Unchanged, Score: 0.9870)
  - base_model.model.model.layers.1.self_attn.o_proj: r=6 (Unchan

Epoch,Training Loss,Validation Loss,Accuracy
1,0.2311,0.238146,{'accuracy': 0.9438073394495413}
2,0.1552,0.242103,{'accuracy': 0.948394495412844}
3,0.1221,0.32553,{'accuracy': 0.9426605504587156}


📄 Epoch 1: Rank allocations logged to ./logs/adaptive_lora_epoch_logs.csv


--- AdaptiveLoRA: Preparing ranks for Epoch 2 ---
Computing BI importance scores (pre-training)...


Computing BI scores:   0%|          | 0/109 [00:00<?, ?it/s]

Allocating new ranks based on BI scores...
Applying new ranks to LoRA modules for this epoch...
  - base_model.model.model.layers.0.self_attn.q_proj: r=6 (Unchanged, Score: 0.9897)
  - base_model.model.model.layers.0.self_attn.k_proj: r=6 → 7 (Score: 1.0000)
  - base_model.model.model.layers.0.self_attn.v_proj: r=6 (Unchanged, Score: 0.9989)
  - base_model.model.model.layers.0.self_attn.o_proj: r=6 (Unchanged, Score: 0.9886)
  - base_model.model.model.layers.0.mlp.gate_proj: r=6 (Unchanged, Score: 0.9934)
  - base_model.model.model.layers.0.mlp.up_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.0.mlp.down_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.1.self_attn.q_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.1.self_attn.k_proj: r=6 (Unchanged, Score: 0.9936)
  - base_model.model.model.layers.1.self_attn.v_proj: r=6 (Unchanged, Score: 0.9888)
  - base_model.model.model.layers.1.self_attn.o_proj: r=6 (Unchanged, Sc

Computing BI scores:   0%|          | 0/109 [00:00<?, ?it/s]

Allocating new ranks based on BI scores...
Applying new ranks to LoRA modules for this epoch...
  - base_model.model.model.layers.0.self_attn.q_proj: r=6 (Unchanged, Score: 0.9897)
  - base_model.model.model.layers.0.self_attn.k_proj: r=7 → 6 (Score: 1.0000)
  - base_model.model.model.layers.0.self_attn.v_proj: r=6 (Unchanged, Score: 0.9997)
  - base_model.model.model.layers.0.self_attn.o_proj: r=6 (Unchanged, Score: 0.9883)
  - base_model.model.model.layers.0.mlp.gate_proj: r=6 (Unchanged, Score: 0.9928)
  - base_model.model.model.layers.0.mlp.up_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.0.mlp.down_proj: r=6 (Unchanged, Score: 0.9994)
  - base_model.model.model.layers.1.self_attn.q_proj: r=6 (Unchanged, Score: 1.0000)
  - base_model.model.model.layers.1.self_attn.k_proj: r=6 (Unchanged, Score: 0.9930)
  - base_model.model.model.layers.1.self_attn.v_proj: r=6 (Unchanged, Score: 0.9906)
  - base_model.model.model.layers.1.self_attn.o_proj: r=6 (Unchanged, Sc

TrainOutput(global_step=1875, training_loss=0.1506740997314453, metrics={'train_runtime': 3364.6706, 'train_samples_per_second': 8.916, 'train_steps_per_second': 0.557, 'total_flos': 8718560988168192.0, 'train_loss': 0.1506740997314453, 'epoch': 3.0})

In [17]:
met = trainer.evaluate()
print(met)

{'eval_loss': 0.32552972435951233, 'eval_accuracy': {'accuracy': 0.9426605504587156}, 'eval_runtime': 48.795, 'eval_samples_per_second': 17.871, 'eval_steps_per_second': 2.234, 'epoch': 3.0}
