In [1]:
import torch
import itertools
from transformers import AutoModelForCausalLM, AutoTokenizer
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

model_id = "microsoft/phi-1_5"
model_revision = "349cf8b5e81fd5f791d1740da5de1313a0419bbd" # latest as of feb 1st

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    revision=model_revision,
    trust_remote_code=True,
    # be careful with this?
    torch_dtype=torch.float16,
    # attn_implementation="flash_attention_2",
)

  from .autonotebook import tqdm as notebook_tqdm
  return self.fget.__get__(instance, owner)()


In [2]:
def get_cosine_sim(A):

    A_normalized = A.float().detach().numpy() / np.linalg.norm(A.float().detach().numpy(), axis=1)[:, np.newaxis]
    cosine_sim = np.dot(A_normalized, A_normalized.T)

    upp_tri = np.triu(cosine_sim, k=1)
    sim = upp_tri.flatten()
    sim = sim[sim != 0]

    sorted_sim = np.sort(sim)[::-1]



    height, width = A.shape[0], A.shape[1]

    rand_uni = np.random.uniform(-1, 1, (height, width))
    rand_n = np.random.randn(height, width)

    rand_uni = rand_uni / np.linalg.norm(rand_uni, axis=1)[:, np.newaxis]
    rand_n = rand_n / np.linalg.norm(rand_n, axis=1)[:, np.newaxis]

    cosine_sim_uni = np.dot(rand_uni, rand_uni.T)
    upp_tri_uni = np.triu(cosine_sim_uni, k=1)
    sim_uni = upp_tri_uni.flatten()
    sim_uni = sim_uni[sim_uni != 0]
    sorted_rand_uni = np.sort(sim_uni)[::-1]

    cosine_sim_n = np.dot(rand_n, rand_n.T)
    upp_tri_n = np.triu(cosine_sim_n, k=1)
    sim_n = upp_tri_n.flatten()
    sim_n = sim_n[sim_n != 0]
    sorted_rand_n = np.sort(sim_n)[::-1]

    return sorted_sim, sorted_rand_uni, sorted_rand_n

In [3]:
A = model.get_submodule("model").get_submodule("layers")[0].get_submodule("mlp").fc1.weight

In [4]:
sorted_sim, sorted_rand_uni, sorted_rand_normal = get_cosine_sim(A)

: 

In [None]:
def select_entries(vector):
    return vector[np.append(np.arange(0, len(vector), 10), len(vector)-1)]

# Applying the function to each vector
selected_sorted = select_entries(sorted_sim)
selected_sorted_rand_normal = select_entries(sorted_rand_normal)
selected_sorted_rand_uni = select_entries(sorted_rand_uni)

# Indices to plot (for x-axis)
indices = np.append(np.arange(0, len(sorted_rand_uni), 10), len(sorted_rand_uni)-1)

plt.figure(figsize=(10, 6))
plt.plot(indices, selected_sorted, label='real')
plt.plot(indices, selected_sorted_rand_normal, label='rand_normal')
plt.plot(indices, selected_sorted_rand_uni, label='rand_uni')
plt.title('Cosine Similarities (No Duplicates or Self-Similarities)')
plt.xlabel('Index')
plt.ylabel('Cosine Similarity')
plt.ylim(-1, 1)
plt.axhline(y=0, color='brown', linestyle='-')
plt.legend()
plt.show()