In [1]:
from huggingface_hub import login

# Replace 'your-hf-token-here' with your actual Hugging Face token
login(token="")

/home/sa5u24/agent/AgentTuning/eval_heldout/rewoo


In [2]:
import torch
from transformers import LlamaForCausalLM, LlamaTokenizer, AutoTokenizer, BitsAndBytesConfig
from peft import PeftModel
from datasets import load_dataset
import random
import numpy as np


# Set a seed for reproducibility
random.seed(42)

ds = load_dataset("rewoo/planner_instruction_tuning_2k")
dataset = ds['train']

# Shuffle and select 1000 samples for training
train_dataset = dataset.shuffle(seed=42).select(range(1500))

# Shuffle again (independently) and select 500 samples for testing
validation_dataset = dataset.shuffle(seed=123).select(range(200))

sampled_data = validation_dataset
len(train_dataset), len(validation_dataset), sampled_data[0]

(1500,
 200,
 {'instruction': 'For the following tasks, make plans that can solve the problem step-by-step. For each plan, indicate which external tool together with tool input to retrieve evidence. You can store the evidence into a variable #E that can be called by later tools. (Plan, #E1, Plan, #E2, Plan, ...)\n\nTools can be one of the following:\nWikipedia[input]: Worker that search for similar page contents from Wikipedia. Useful when you need to get holistic knowledge about people, places, companies, historical events, or other subjects. The response are long and might contain some irrelevant information. Input should be a search query.\nLLM[input]: A pretrained LLM like yourself. Useful when you need to act with general world knowledge and common sense. Prioritize it when you are confident in solving the problem yourself. Input can be any instruction.',
  'input': 'Which model of Ford car took its name from an Italian alpine resort?',
  'output': 'Plan: Search for more informati

In [3]:
# Configure 4-bit quantization using bitsandbytes
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",  # Normalized Float 4 (better than standard FP4)
    bnb_4bit_use_double_quant=True,  # Uses secondary quantization for better precision
    bnb_4bit_compute_dtype=torch.float16  # Keeps computation in FP16 for stability
)

# Meta-Llama/Llama-2-7b-chat-hf
# meta-llama/Llama-3.2-1B-Instruct
# Load the base LLaMA 2.7B model
base_model = LlamaForCausalLM.from_pretrained(
    "Meta-Llama/Llama-2-7b-chat-hf",
    quantization_config=bnb_config,
    device_map="auto"
)

best_model_path = "./lora/bestmodel"
# # Load the LoRA fine-tuned adapter
model_peft = PeftModel.from_pretrained(base_model, best_model_path)


# Load the fine-tuned tokenizer
tokenizer = AutoTokenizer.from_pretrained(best_model_path, use_fast=False)
tokenizer.pad_token = tokenizer.eos_token  # Ensure correct padding

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
pmodel = model_peft
proj_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj']
adapter_name = 'default'  # Replace if using another adapter

specs, all_singular_values, vh_matrices = [], [], []

for layer_idx, layer in enumerate(pmodel.base_model.model.model.layers):
    for proj_name in proj_modules:
        proj_module = getattr(layer.self_attn, proj_name, None)
        if proj_module is not None:
            if adapter_name in proj_module.lora_A and adapter_name in proj_module.lora_B:
                lora_A = proj_module.lora_A[adapter_name].weight  # [r, in_dim]
                lora_B = proj_module.lora_B[adapter_name].weight  # [out_dim, r]
                vec = lora_B @ lora_A  # [out_dim, in_dim], e.g., [4096, 4096]

                try:
                    # Step 1: Row normalization
                    row_norms = torch.norm(vec, dim=1, keepdim=True) + 1e-8
                    X_row = vec / row_norms  # [out_dim, in_dim]

                    # Step 2: Column normalization
                    col_norms = torch.norm(vec, dim=0, keepdim=True) + 1e-8
                    X_col = vec / col_norms

                    # Step 3: SVD on row-normalized matrix (to get Vh)
                    _, _, Vh_row = torch.linalg.svd(X_row, full_matrices=False)

                    # Step 4: SVD on column-normalized matrix (to get U)
                    U_col, _, _ = torch.linalg.svd(X_col, full_matrices=False)

                    # Step 5: Search for best (u, v) pair (rank-1 approximation)
                    best_score = float('inf')
                    best_u, best_v, best_d = None, None, None

                    for u in U_col[:5]:  # top-5 vectors only
                        for v in Vh_row[:5]:
                            d = torch.median((vec @ v) * u)  # robust scalar
                            recon = d * torch.ger(u, v)
                            score = torch.norm(vec - recon, p=1)
                            if score < best_score:
                                best_score = score
                                best_u, best_v, best_d = u, v, d

                    # Step 6: Restore magnitude using row norms 
                    recon_rank1_normalized = best_d * torch.ger(best_u, best_v)
                    recon_rank1_rescaled = row_norms * recon_rank1_normalized  # [out_dim, in_dim]

                    # Step 7: Final SVD on rescaled rank-1 matrix
                    _, S, Vh_final = torch.linalg.svd(recon_rank1_rescaled, full_matrices=False)

                    # Step 8: Store metrics
                    specs.append(S[0].item())  # top singular value
                    all_singular_values.append(S.cpu().numpy())
                    vh_matrices.append(Vh_final)

                    print(f"✅ Layer {layer_idx} | {proj_name} | SpSVD successful | shape: {vec.shape}")

                except Exception as e:
                    specs.append(float('nan'))
                    print(f"⚠️ SpSVD failed at layer {layer_idx} {proj_name}: {e}")


✅ Layer 0 | q_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 0 | k_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 0 | v_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 0 | o_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 1 | q_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 1 | k_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 1 | v_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 1 | o_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 2 | q_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 2 | k_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 2 | v_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 2 | o_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 3 | q_proj | SpSVD successful | shape: torch.Size([4096, 4096])
✅ Layer 3 | k_proj | SpSVD successful | shape: torch.Size([4096,

In [5]:
len(specs), len(all_singular_values),all_singular_values[0].shape, len(vh_matrices), vh_matrices[0].shape

(128, 128, (4096,), 128, torch.Size([4096, 4096]))

In [None]:
# Step 1: Compute SharpIndex for each layer from singular values
sharp_indices = []
for svals in all_singular_values:
    if len(svals) == 0 or np.sum(svals) == 0:
        sharp_indices.append(np.nan)
    else:
        sharp_index = svals[0] / (np.sum(svals) + 1e-6)
        sharp_indices.append(sharp_index)

sharp_indices = np.array(sharp_indices)

# Sort indices of sharpness scores in descending order
sorted_indices = np.argsort(-sharp_indices)  # minus sign for descending

# Select top-k most "sharp" layers
k = 5  # or 10, or any value you want
outlier_indices = sorted_indices[:k]


# Print detected layers
print("\n🚨 Detected Outlier Layers by SharpIndex:")
for idx in outlier_indices:
    print(f"⚠️  Layer Index {idx} | SharpIndex = {sharp_indices[idx]:.4f} ")
    
# Final return or export
outlier_indices  # can use in your pipeline

In [8]:
outlier_indices = set(outlier_indices)
proj_modules = ['q_proj', 'k_proj', 'v_proj', 'o_proj']
adapter_name = 'default'

global_idx = 0  # to track module position across all layers

for layer_idx, layer in enumerate(pmodel.base_model.model.model.layers):
    for proj_name in proj_modules:
        if global_idx in outlier_indices:
            proj_module = getattr(layer.self_attn, proj_name, None)
            if proj_module is not None:
                if adapter_name in proj_module.lora_B:
                    lora_B = proj_module.lora_B[adapter_name].weight
                    lora_B.data.zero_()
                    print(f"Zeroed lora_B in layer {layer_idx} | {proj_name} | global idx {global_idx}")
        global_idx += 1

Zeroed lora_B in layer 1 | v_proj | global idx 6
Zeroed lora_B in layer 19 | q_proj | global idx 76
Zeroed lora_B in layer 20 | q_proj | global idx 80
Zeroed lora_B in layer 29 | v_proj | global idx 118
Zeroed lora_B in layer 31 | v_proj | global idx 126


In [9]:
# Function to count zero parameters in each layer
def count_zero_params(model):
    zero_params = {}
    for name, param in model.named_parameters():
        num_zeros = torch.sum(param == 0).item()
        total_params = param.numel()
        sparsity = num_zeros / total_params * 100
        zero_params[name] = (num_zeros, total_params, sparsity)
    return zero_params

zero_params_info = count_zero_params(pmodel)

# Print layers with high sparsity
for layer, (num_zeros, total, sparsity) in zero_params_info.items():
    if sparsity > 50:  # Layers with more than 50% zeros
        print(f"Layer: {layer}, Sparsity: {sparsity:.2f}%")

Layer: base_model.model.model.layers.1.self_attn.v_proj.lora_B.default.weight, Sparsity: 100.00%
Layer: base_model.model.model.layers.19.self_attn.q_proj.lora_B.default.weight, Sparsity: 100.00%
Layer: base_model.model.model.layers.20.self_attn.q_proj.lora_B.default.weight, Sparsity: 100.00%
Layer: base_model.model.model.layers.29.self_attn.v_proj.lora_B.default.weight, Sparsity: 100.00%
Layer: base_model.model.model.layers.31.self_attn.v_proj.lora_B.default.weight, Sparsity: 100.00%


In [10]:
import torch.nn.utils.prune as prune

# Function to prune layers with high sparsity and return pruned model
def prune_zero_params(model, threshold=50):
    pruned_model = model  # Work on a copy of the model

    for name, module in pruned_model.named_modules():
        if hasattr(module, "weight") and module.weight is not None:
            num_zeros = torch.sum(module.weight == 0).item()
            total_params = module.weight.numel()
            sparsity = (num_zeros / total_params) * 100

            if sparsity > threshold:
                # print(f"Pruning {name} with sparsity {sparsity:.2f}%")
                prune.l1_unstructured(module, name="weight", amount=1.0)  # Fully prune
                prune.remove(module, "weight")  # Remove redundant params

    return pruned_model  # Return the pruned model

# Apply pruning and get the new model
model_prune = prune_zero_params(pmodel)


In [11]:
from evaluate import load
import time
import torch.nn.functional as F

BATCH_SIZE = 8
tokenizer.pad_token = tokenizer.eos_token
eos_token_id = tokenizer.eos_token_id 

model_prune = model_prune.eval()

def generate_batch(model,dialogues):
    """
    Generate plan using LLaMA2.
    """

    input_texts = [
            f"Follow the instruction: {inst}, input: {inp}, and Output:"
            for inst, inp in zip(dialogues['instruction'], dialogues['input'])
        ]
    # Tokenize in batch
    inputs = tokenizer(input_texts, return_tensors="pt", truncation=True, max_length=512, padding=True).to(model.device)

    # Generate summaries
    with torch.no_grad():
        generated_ids = model.generate(**inputs, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id,
                                       return_dict_in_generate=True,
                                        output_scores=True,
                                        output_hidden_states=True,
                                        do_sample=True)
    
    # Trim the generated ids to remove the input ids
    trimmed_generated_ids = [
        out_ids[len(in_ids):] for in_ids, out_ids in zip(inputs.input_ids, generated_ids[0])
    ]
    
    scores = [score.cpu() for score in generated_ids["scores"]] 
    
    # Decode and clean up summaries
    summaries = tokenizer.batch_decode(generated_ids['sequences'], skip_special_tokens=True)
    summaries_final = [s.split("Output:")[-1].strip() for s in summaries]

    summaries_trimmed = tokenizer.batch_decode(trimmed_generated_ids, skip_special_tokens=True)
    

    return summaries_final, summaries_trimmed, trimmed_generated_ids, scores




meteor = load("meteor")
bleu = load("bleu")
rouge = load("rouge")
sampled_data = validation_dataset


# Process the entire test dataset in batches
test_dialogues = sampled_data
num_samples = len(test_dialogues)

generated_summaries = []
samplewise_rouge = []
all_scores = []
unc_all = []
start_time = time.time()
for i in range(0, num_samples, BATCH_SIZE):
    batch_dialogues = test_dialogues[i : i + BATCH_SIZE]
    batch_summaries, batch_summaries_trimmed, trimmed_generated_ids, scores = generate_batch(model_peft,batch_dialogues)
    
    trimmed_generated_ids_ = torch.stack(trimmed_generated_ids)
    # Find positions where EOS token appears (first occurrence per sequence)
    eos_positions = (trimmed_generated_ids_ == eos_token_id).int()  # Create a mask
    first_eos_indices = torch.argmax(eos_positions, dim=1)  # Find first EOS position in each row
    
    # Handle cases where EOS is not found (replace 0 with sequence length)
    no_eos_mask = (eos_positions.sum(dim=1) == 0)  # Mask sequences without EOS
    first_eos_indices[no_eos_mask] = trimmed_generated_ids_.shape[1]  # Assign full length if no EOS found

    scores_ = torch.stack(scores).permute(1,0,2)
    for i in range(scores_.shape[0]):
        valid_scores= scores_[i,:first_eos_indices[i],:]
        probs = F.softmax(valid_scores, dim=-1)
        entropy = -torch.sum(probs * torch.log(probs + 1e-9), dim=-1) 
        unc = sum(entropy) / len(entropy) if len(entropy) > 0 else 0.0
        unc_all.append(unc)
    
    # samplewise_rouge.extend(each_rouge)
    generated_summaries.extend(batch_summaries_trimmed)
    
end_time = time.time()

# Compute Inference Time
total_time = end_time - start_time
avg_time_per_sample = total_time / num_samples

print(f"Total inference time: {total_time:.2f} seconds")
print(f"Average time per sample: {avg_time_per_sample:.4f} seconds")

# Load the metrics

test_ans = sampled_data["output"]
all_pred = generated_summaries
all_ans = test_ans

rouge_results = rouge.compute(predictions=all_pred, references=all_ans)
each_rouge = rouge.compute(predictions=all_pred, references=all_ans, use_aggregator=False)

bleu_result = bleu.compute(predictions=all_pred, references=all_ans)
meteor_result = meteor.compute(predictions=all_pred, references=all_ans)

print("ROUGE:", rouge_results)
print("BLEU:", bleu_result)
print("METEOR:", meteor_result)
print("unc", len(unc_all))

[nltk_data] Downloading package wordnet to /home/sa5u24/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /home/sa5u24/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /home/sa5u24/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
A decoder-only architecture is being used, but right-padding was detected! For correct generation results, plea

Total inference time: 242.32 seconds
Average time per sample: 1.2116 seconds
ROUGE: {'rouge1': 0.6905914941948734, 'rouge2': 0.5716438218605611, 'rougeL': 0.6082275010307869, 'rougeLsum': 0.6820733395424499}
BLEU: {'bleu': 0.5567200191237582, 'precisions': [0.6794777859188703, 0.5906298521568892, 0.521453500307945, 0.45903004232290295], 'brevity_penalty': 1.0, 'length_ratio': 1.0996923527688252, 'translation_length': 15013, 'reference_length': 13652}
METEOR: {'meteor': 0.6914506247633411}
unc 200


In [13]:
# proxy accuracy
# each_rouge = rouge.compute(predictions=all_pred, references=all_ans, use_aggregator=False)
all_acc = []
for i in range(len(each_rouge['rouge1'])):
    if each_rouge['rouge1'][i]>0.5:
        acc = 1
    else:
        acc = 0
    all_acc.append(acc)

overall_acc = sum(all_acc)/len(all_acc)
overall_acc

0.89

In [14]:
from sklearn.metrics import roc_auc_score, average_precision_score
unc_all_ = unc_all[::-1]
auroc = roc_auc_score(all_acc, unc_all_)
auarc = average_precision_score(all_acc, unc_all_)
auroc,auarc

(0.5919305413687437, 0.9054302668971186)