In [2]:
import torch
from transformers import AutoTokenizer, AutoModel
from pathlib import Path
from einops import rearrange, repeat
import os
import numpy as np
import matplotlib
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
num_hidden_layers = 40
layers_arr = np.arange(num_hidden_layers)

In [4]:
def load_and_stack_layers(num_layers: int, reporter_type: 'str', prefix_path: str) -> torch.Tensor:
    layers = []
    if reporter_type == 'lr' or reporter_type == 'lr2':
        for i in range(num_layers):
            layer_path = f"{prefix_path}/layer_{i}.pt"
            layer = torch.load(layer_path)
            layers.append(layer.linear.weight.detach().cpu())
        stacked = torch.cat(layers, dim=0)
        return stacked
    elif reporter_type == 'vinc':
        for i in range(num_layers):
            layer_path = f"{prefix_path}/layer_{i}.pt"
            layer = torch.load(layer_path)
            layers.append(layer.weight.detach().cpu())
        stacked = torch.cat(layers, dim=0)
        return stacked
    elif reporter_type == 'ccs':
        for i in range(num_layers):
            layer_path = f"{prefix_path}/layer_{i}.pt"
            layer = torch.load(layer_path)
            layers.append(layer.probe[0].weight.detach().cpu())
        stacked = torch.cat(layers, dim=0)
        return stacked

def get_norm_weights(data: dict, dataset: str, ccs_path: str, vinc_path: str, lr_path: str, lr2_path: str):
    
    #for reporter,path in zip(['ccs','vinc', 'lr', 'lr2'], [ccs_path,vinc_path,lr_path,lr2_path]):
    for reporter,path in zip(['ccs','vinc'], [ccs_path,vinc_path]):
        weights = load_and_stack_layers(num_hidden_layers, reporter, path)
        magnitude = torch.norm(weights, dim=-1, keepdim=True)
        data[(dataset, reporter)] = weights/magnitude
    

def similarity(rep1_normalized, rep2_normalized):
    cosine_sim = torch.einsum("ij,ij->i", rep1_normalized, rep2_normalized)
    return cosine_sim


def similarity_single_layer(rep1_normalized, rep1_layer: int, rep2_normalized):
    cosine_sim = torch.einsum("j,ij->i", rep1_normalized[rep1_layer], rep2_normalized)
    return cosine_sim

In [5]:
data = {}

for d in [1,5,10]:

    dataset = f"arith{d}"

    lr_path = f'/home/waree/elk-reporters/test-huggyllama/llama-13b/sethapun/arithmetic_2as_1to{d}/ccs/lr_models'
    ccs_path = f'/home/waree/elk-reporters/test-huggyllama/llama-13b/sethapun/arithmetic_2as_1to{d}/ccs/reporters'
    lr2_path = f'/home/waree/elk-reporters/test-huggyllama/llama-13b/sethapun/arithmetic_2as_1to{d}/vinc/lr_models'
    vinc_path = f'/home/waree/elk-reporters/test-huggyllama/llama-13b/sethapun/arithmetic_2as_1to{d}/vinc/reporters'

    get_norm_weights(data, dataset, ccs_path, vinc_path, lr_path, lr2_path)

RuntimeError: CUDA error: out of memory
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1.
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
data2 = {}

for d in [0,5,10,30,50]:

    dataset = f"imdb_misspelled_{d}"

    lr_path = f'/home/waree/elk-reporters/arith-huggyllama/llama-13b/sethapun/imdb_misspelled_{d}/ccs/lr_models'
    ccs_path = f'/home/waree/elk-reporters/arith-huggyllama/llama-13b/sethapun/imdb_misspelled_{d}/ccs/reporters'
    lr2_path = f'/home/waree/elk-reporters/arith-huggyllama/llama-13b/sethapun/imdb_misspelled_{d}/vinc/lr_models'
    vinc_path = f'/home/waree/elk-reporters/arith-huggyllama/llama-13b/sethapun/imdb_misspelled_{d}/vinc/reporters'

    get_norm_weights(data2, dataset, ccs_path, vinc_path, lr_path, lr2_path)