In [None]:


from transformers import AutoModel, AutoTokenizer
import torch
import torch.nn.functional as F
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code = True, use_fast = False)

model = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code = True)

# import json 

# with open("test.tok.json","r") as infile: 
#     infile.readline()
#     lines = infile.readlines()

# data = [json.loads(line.strip("\n").rstrip(',').rstrip("]")) for line in lines]
model.eval()

JinaBertModel(
  (embeddings): JinaBertEmbeddings(
    (word_embeddings): Embedding(30528, 768, padding_idx=0)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): JinaBertEncoder(
    (layer): ModuleList(
      (0-11): 12 x JinaBertLayer(
        (attention): JinaBertAttention(
          (self): JinaBertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.0, inplace=False)
          )
          (output): JinaBertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
        )

In [13]:
num_layers = len(model.encoder.layer)
emb_dim = model.config.hidden_size
num_heads = model.config.num_attention_heads
head_dim = emb_dim // num_heads

In [14]:
import json 

with open("test.tok.json","r") as infile: 
    infile.readline()
    lines = infile.readlines()

data = [json.loads(line.strip("\n").rstrip(',').rstrip("]")) for line in lines]



In [15]:
embedding_Q = [model.encode(line["question"]) for line in data]
embedding_A = [model.encode(line["rationale"]) for line in data]



In [12]:
from sklearn.manifold import TSNE
import plotly.express as px

tsne = TSNE(n_components=2, perplexity=30, random_state=42, learning_rate="auto", early_exaggeration=5.0, n_iter=5000, init="pca")
embedding = embedding_Q + embedding_A
show_dims2 = tsne.fit_transform(np.array(embedding))
color_Q = ["question" for i in range(len(embedding_Q))]
color_A = ["answer" for i in range(len(embedding_A))]
category_Q = ["question " + str(i) for i in range(len(embedding_Q))]
category_A = ["answer " + str(i) for i in range(len(embedding_A))]

color = color_Q + color_A
category = category_Q + category_A

x = [x for x, y in show_dims2]
y = [y for x, y in show_dims2]
fig = px.scatter(
    show_dims2, x=0, y=1,
    width=600, height=500,
    color = color,
    hover_name=category,
)
fig.update_traces(
hovertemplate=None,
hoverinfo='text'
)
fig.show()


'n_iter' was renamed to 'max_iter' in version 1.5 and will be removed in 1.7.



In [None]:
#Function for doing an ablation. 

#zero ablation: set activation to zero
#random ablation: set to random value (eg. Gauss with mean 0 and std 1)
#mean ablation: mean value of all activations in a given layer

def ablation(layer, neuron_idx, data, type = "zero"):
    original_forward = model.encoder.layer[layer].mlp.wo.forward

    if type == "zero": 
        new_activation = 0.0
    if type == "mean": 
        new_activation = np.sum(np.array([original_forward[:,:,i]] for i in range(len(emb_dim))))
    if type == "random": 
        new_activation = np.random.normal(0.0,1.0)
    if type != "zero" and type != "mean" and type != "random": 
        raise ValueError("The type you wrote does not exist! Use either zero, mean or random for the ablation type")

    def change_forward(x):
        our = original_forward(x)
        our[:,:,neuron_idx] = new_activation
        return our     #torch.zeros_like(original_wo(*args, **kwargs)
    
    model.encoder.layer[layer].mlp.wo.forward = change_forward
    new_embeddings = model.encode(text for text in data)
    model.encoder.layer[layer].mlp.wo.forward = original_forward

    return new_embeddings



    
