In [None]:
!pip install datasets
!pip install transformers
!pip install peft
!pip install evaluate
!pip install -U datasets==2.20.0 pyarrow==15.0.2 transformers==4.44.2 evaluate==0.4.2 --no-cache-dir
!pip install -q datasets evaluate accelerate scikit-learn pandas matplotlib
!pip install -U "transformers>=4.41" accelerate safetensors
!pip install -U bitsandbytes


In [None]:
# !pip install --upgrade --no-cache-dir git+https://github.com/Shannu3766/bi_influence.git
!pip install --upgrade --no-cache-dir git+https://github.com/Shannu3766/Cosine-similarity.git

In [5]:
import os
from typing import Optional

import torch
from datasets import load_dataset
from transformers import (
    AutoTokenizer,
    AutoModelForSequenceClassification,
    TrainingArguments,
    Trainer,
    DataCollatorWithPadding,
)
from peft import LoraConfig, get_peft_model, TaskType
import evaluate
from adaptive_lora_gradient.callbacks import AdaptiveLoRACallback

In [6]:
model_checkpoint = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" 
output_dir = "./tinyllama-qnli-lora"
seed = 42
device = "cuda" if torch.cuda.is_available() else "cpu"
print(device)

cuda


In [7]:
batch_size = 16
eval_batch_size = 32
num_train_epochs = 3
max_length = 128
learning_rate = 3e-5
weight_decay = 0.01
rank=8
peft_config = LoraConfig(
    task_type=TaskType.SEQ_CLS,
    r=rank,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    target_modules=[   
        "q_proj", "k_proj", "v_proj", "o_proj", 
        "gate_proj", "up_proj", "down_proj"
    ],)

In [8]:

def clean_text(s: Optional[str]) -> str:
    if s is None:
        return ""
    return " ".join(str(s).strip().split())

def build_one_shot_demo(example: dict) -> str:
    q = clean_text(example["question"])
    c = clean_text(example["context"])
    lbl = example.get("label_text", "Yes")
    return f"Example:\nQuestion: {q}\nContext: {c}\nAnswer (Yes/No): {lbl}\n\n"

In [9]:
ONE_SHOT_EXAMPLE = {
    "question": "Who wrote Hamlet?",
    "context": "Hamlet was written by William Shakespeare and first performed in the early 17th century.",
    "label_text": "Yes",  # "Yes" => the context contains the answer
}

from datasets import load_dataset
import time

def load_dataset_with_retry(path, name=None, max_retries=None, wait=2):
    """
    Repeatedly tries to load a dataset until it succeeds.
    - max_retries: None â†’ infinite retries
    - wait: base wait time (exponential backoff)
    """
    attempt = 0

    while True:
        try:
            if name is not None:
                ds = load_dataset(path, name)
            else:
                ds = load_dataset(path)

            print(f"Dataset loaded successfully after {attempt} attempts.")
            return ds

        except Exception as e:
            attempt += 1
            wait_time = wait * min(5, attempt)  # exponential cap

            print(f"[Attempt {attempt}] Failed to load dataset: {e}")
            print(f"Retrying in {wait_time} seconds...\n")

            # If user gives a max_retries limit
            if max_retries is not None and attempt >= max_retries:
                print("Max retries reached. Raising error.")
                raise e

            time.sleep(wait_time)
dataset = load_dataset_with_retry("glue", "qnli")
# dataset = load_dataset("glue", "qnli")

Dataset loaded successfully after 0 attempts.


In [10]:
tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

if tokenizer.pad_token is None:
    # safe default: use eos_token as pad
    tokenizer.pad_token = tokenizer.eos_token

In [11]:

# Load tokenizer


def preprocess_function(examples, *, max_length: int = max_length, add_demo: bool = False):
    """
    Build instruction-like prompts and tokenize them.
    - add_demo: if True, prepends a 1-shot example to each prompt (can help decoder-only models)
    """
    questions = [clean_text(q) for q in examples["question"]]
    contexts = [clean_text(s) for s in examples["sentence"]]

    demo_str = build_one_shot_demo(ONE_SHOT_EXAMPLE) if add_demo else ""

    prompts = []
    for q, c in zip(questions, contexts):
        # Instruction-style prompt ending with a short, constrained label target
        prompt = (
            "You are a helpful assistant.\n"
            f"{demo_str}"
            f"Question: {q}\n"
            f"Context: {c}\n"
            "Answer (Yes/No):"
        )
        prompts.append(prompt)

    tokenized = tokenizer(
        prompts,
        truncation=True,
        max_length=max_length,
        padding=False, 
    )

    if "label" in examples:
        tokenized["labels"] = examples["label"]
    elif "labels" in examples:
        tokenized["labels"] = examples["labels"]

    return tokenized

print("Tokenizing dataset...")
tokenized = dataset.map(
    lambda ex: preprocess_function(ex, max_length=max_length, add_demo=False),
    batched=True,
    remove_columns=["question", "sentence", "idx"],
)

if "label" in tokenized["train"].column_names and "labels" not in tokenized["train"].column_names:
    tokenized = tokenized.rename_column("label", "labels")

tokenized.set_format(type="torch", columns=["input_ids", "attention_mask", "labels"])

data_collator = DataCollatorWithPadding(tokenizer=tokenizer, padding="longest", pad_to_multiple_of=8)
train_dataset = tokenized["train"].select(range(10000))
eval_dataset = tokenized["validation"].select(range(1500))

Tokenizing dataset...


Map:   0%|          | 0/5463 [00:00<?, ? examples/s]

In [12]:
print("Loading model...")

model = AutoModelForSequenceClassification.from_pretrained(
    model_checkpoint,
    num_labels=2,
    device_map="auto",
    torch_dtype=torch.bfloat16, 
    trust_remote_code=True
)

print("Model Loaded")

`torch_dtype` is deprecated! Use `dtype` instead!


Loading model...


Some weights of LlamaForSequenceClassification were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v1.0 and are newly initialized: ['score.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Model Loaded


In [13]:
print("Model Loaded")
# Resize token embeddings if tokenizer changed
model.resize_token_embeddings(len(tokenizer))
model.config.pad_token_id = tokenizer.pad_token_id
model.config.use_cache = False

# -------------------------
# Apply LoRA (PEFT)
# -------------------------
print("Applying LoRA (PEFT)...")
model = get_peft_model(model, peft_config)

Model Loaded
Applying LoRA (PEFT)...


In [14]:
val_dataloader = torch.utils.data.DataLoader(
    eval_dataset,
    batch_size=eval_batch_size,
    shuffle=False,
    collate_fn=data_collator,
    pin_memory=torch.cuda.is_available(),
    num_workers=2,
)

In [15]:
from peft.tuners.lora import LoraLayer

def get_lora_module_names(peft_model):
    names = []
    for name, module in peft_model.named_modules():
        if isinstance(module, LoraLayer):
            names.append(name)
    return names

lora_names = get_lora_module_names(model)   # `model` is your PEFT-wrapped model
print(f"ðŸ”¢ Number of LoRA modules: {len(lora_names)}\n")

# for n in lora_names:
    # print(" ", n)
num_lora_modules = len(lora_names)
TOTAL_RANK_BUDGET = rank * num_lora_modules

avg_rank_per_module = TOTAL_RANK_BUDGET / num_lora_modules
print(
    f"ðŸ’¡ With TOTAL_RANK_BUDGET={TOTAL_RANK_BUDGET} over "
    f"{num_lora_modules} modules, avg rank â‰ˆ {avg_rank_per_module:.2f}"
)


ðŸ”¢ Number of LoRA modules: 154

ðŸ’¡ With TOTAL_RANK_BUDGET=1232 over 154 modules, avg rank â‰ˆ 8.00


In [16]:
adaptive_callback = AdaptiveLoRACallback(
    val_dataloader=val_dataloader,
    total_rank=TOTAL_RANK_BUDGET,
    tau=0.9,
    min_rank=4,
    validate_batch_size=eval_batch_size,
    verbose=True,
)


In [17]:
accuracy = evaluate.load("accuracy")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    preds = logits.argmax(axis=-1)
    acc = accuracy.compute(predictions=preds, references=labels)
    return {"accuracy": acc["accuracy"]}

# -------------------------
# TrainingArguments + Trainer
# -------------------------
training_args = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=eval_batch_size,
    learning_rate=learning_rate,
    weight_decay=weight_decay,
    num_train_epochs=num_train_epochs,
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_strategy="steps",
    logging_steps=100,
    save_total_limit=2,
    metric_for_best_model="accuracy",
    seed=seed,
    fp16=False,
    bf16=True,
    push_to_hub=False,
    report_to="none",
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    tokenizer=tokenizer,
    data_collator=data_collator,
    compute_metrics=compute_metrics,
    callbacks=[adaptive_callback]
)

print("Starting training...")
trainer.train()

print("Saving model and adapters...")
trainer.save_model(output_dir)
print("Saved model to", output_dir)

  trainer = Trainer(


Starting training...

--- AdaptiveLoRA: Preparing ranks for Epoch 1 ---
Computing BI importance scores (pre-training)...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Computing Importance:   0%|          | 0/47 [00:00<?, ?it/s]

Allocating new ranks based on BI scores...
Applying new ranks to LoRA modules for this epoch...
  - base_model.model.model.layers.0.self_attn.q_proj: r=8 â†’ 6 (Score: 0.2348)
  - base_model.model.model.layers.0.self_attn.k_proj: r=8 â†’ 9 (Score: 0.4964)
  - base_model.model.model.layers.0.self_attn.v_proj: r=8 â†’ 7 (Score: 0.3562)
  - base_model.model.model.layers.0.self_attn.o_proj: r=8 â†’ 11 (Score: 0.7024)
  - base_model.model.model.layers.0.mlp.gate_proj: r=8 (Unchanged, Score: 0.4105)
  - base_model.model.model.layers.0.mlp.up_proj: r=8 (Unchanged, Score: 0.4128)
  - base_model.model.model.layers.0.mlp.down_proj: r=8 (Unchanged, Score: 0.4128)
  - base_model.model.model.layers.1.self_attn.q_proj: r=8 â†’ 7 (Score: 0.3473)
  - base_model.model.model.layers.1.self_attn.k_proj: r=8 â†’ 15 (Score: 1.0000)
  - base_model.model.model.layers.1.self_attn.v_proj: r=8 â†’ 9 (Score: 0.4820)
  - base_model.model.model.layers.1.self_attn.o_proj: r=8 (Unchanged, Score: 0.3940)
  - base_mode

Epoch,Training Loss,Validation Loss,Accuracy
1,0.4152,0.392715,0.830667
2,0.4013,0.411046,0.812
3,0.4086,0.408037,0.82


ðŸ“„ Epoch 1: Rank allocations logged to ./logs/adaptive_lora_epoch_logs.csv


--- AdaptiveLoRA: Preparing ranks for Epoch 2 ---
Computing BI importance scores (pre-training)...


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Computing Importance:   0%|          | 0/47 [00:00<?, ?it/s]

Allocating new ranks based on BI scores...
Applying new ranks to LoRA modules for this epoch...
  - base_model.model.model.layers.0.self_attn.q_proj: r=6 â†’ 7 (Score: 0.2362)
  - base_model.model.model.layers.0.self_attn.k_proj: r=9 â†’ 8 (Score: 0.4294)
  - base_model.model.model.layers.0.self_attn.v_proj: r=7 (Unchanged, Score: 0.3175)
  - base_model.model.model.layers.0.self_attn.o_proj: r=11 â†’ 10 (Score: 0.5648)
  - base_model.model.model.layers.0.mlp.gate_proj: r=8 (Unchanged, Score: 0.4285)
  - base_model.model.model.layers.0.mlp.up_proj: r=8 â†’ 9 (Score: 0.4561)
  - base_model.model.model.layers.0.mlp.down_proj: r=8 â†’ 9 (Score: 0.4561)
  - base_model.model.model.layers.1.self_attn.q_proj: r=7 â†’ 8 (Score: 0.3515)
  - base_model.model.model.layers.1.self_attn.k_proj: r=15 â†’ 13 (Score: 0.8518)
  - base_model.model.model.layers.1.self_attn.v_proj: r=9 â†’ 8 (Score: 0.4226)
  - base_model.model.model.layers.1.self_attn.o_proj: r=8 (Unchanged, Score: 0.4016)
  - base_model.m

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Computing Importance:   0%|          | 0/47 [00:00<?, ?it/s]

Allocating new ranks based on BI scores...
Applying new ranks to LoRA modules for this epoch...
  - base_model.model.model.layers.0.self_attn.q_proj: r=7 (Unchanged, Score: 0.3163)
  - base_model.model.model.layers.0.self_attn.k_proj: r=8 â†’ 9 (Score: 0.5673)
  - base_model.model.model.layers.0.self_attn.v_proj: r=7 â†’ 8 (Score: 0.4278)
  - base_model.model.model.layers.0.self_attn.o_proj: r=10 â†’ 11 (Score: 0.6781)
  - base_model.model.model.layers.0.mlp.gate_proj: r=8 â†’ 9 (Score: 0.5537)
  - base_model.model.model.layers.0.mlp.up_proj: r=9 (Unchanged, Score: 0.5705)
  - base_model.model.model.layers.0.mlp.down_proj: r=9 (Unchanged, Score: 0.5704)
  - base_model.model.model.layers.1.self_attn.q_proj: r=8 (Unchanged, Score: 0.4607)
  - base_model.model.model.layers.1.self_attn.k_proj: r=13 â†’ 15 (Score: 0.9799)
  - base_model.model.model.layers.1.self_attn.v_proj: r=8 â†’ 9 (Score: 0.5041)
  - base_model.model.model.layers.1.self_attn.o_proj: r=8 â†’ 9 (Score: 0.5054)
  - base_mo

In [18]:
results=trainer.evaluate()
print(results)

{'eval_loss': 0.4080371856689453, 'eval_accuracy': 0.82, 'eval_runtime': 171.907, 'eval_samples_per_second': 8.726, 'eval_steps_per_second': 0.273, 'epoch': 3.0}
