Loading the model: 

In [1]:
from transformers import AutoModel, AutoTokenizer
import torch
import torch.nn.functional as F
import numpy as np

tokenizer = AutoTokenizer.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code = True, use_fast = False)

model = AutoModel.from_pretrained("jinaai/jina-embeddings-v2-base-en", trust_remote_code = True)

prompt = "Natural language processing tasks, such as question answering, machine translation, reading comprehension, and summarization, are typically approached with supervised learning on taskspecific datasets."

input = tokenizer(prompt, return_tensors = "pt", padding = "max_length", max_length = 60, truncation = True)


In [4]:
prompt = ["hei","på"]

input = tokenizer(prompt, return_tensors = "pt", padding = "max_length", max_length = 60, truncation = True)

embeddings = model(**input,output_attentions=True).last_hidden_state

print(embeddings.shape)

new_embeddings = embeddings

print(model.encoder(embeddings,attention_mask = input["attention_mask"]).last_hidden_state)

def mean_pooling(token_embeddings, attention_mask):
                                     #token_embeddings = model_output[0] # First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)
print(F.normalize(mean_pooling(embeddings[0], input['attention_mask']),p=2,dim=1).shape)



#print(model.encoder(prompt))

torch.Size([2, 60, 768])


RuntimeError: The size of tensor a (60) must match the size of tensor b (2) at non-singleton dimension 2

Then we output some model specific parameter values. 


In [None]:
num_layers = len(model.encoder.layer)
emb_dim = model.config.hidden_size
num_heads = model.config.num_attention_heads
head_dim = emb_dim // num_heads

Then we need some code to "hook" specific activations from the model (such as the output from MLPs, attention heads). We collect the hooks in dictionaries. 

In [None]:
#Hook attention heads

attn_act = {}
attn_hooks = {}

def hook_atthead(layer_index,head_index): 
    def hook(module, inp, out):
        batch, seq_len, embed_dim = out.shape
        out = out.view(batch, seq_len, num_heads, head_dim)
        attn_act[f"{layer_index}.{head_index}"] = out[:, :, j, :].detach()
    return hook

for i in range(num_layers):
    for j in range(num_heads): 
        attn_hooks[f"{i}.{j}"] = model.encoder.layer[i].attention.output.dense.register_forward_hook(hook_atthead(i,j))

#Hook MLPs

mlp_act = {}
mlp_hooks = {}

def hook_mlp(layer_index):
    def hook(module, inp, out):
        mlp_act[f"{layer_index}"] = out.detach()
    return hook

for i in range(num_layers): 
    mlp_hooks[f"{i}"] = model.encoder.layer[i].mlp.wo.register_forward_hook(hook_mlp(i))                                                      


original_wo = model.encoder.layer[6].mlp.wo.forward


Next we run the input text in the model and test ablations. We ablate one of the activations. 

In [None]:
embeddings1 = model(**input, output_hidden_states=False)

def zero_mlp_wo_output(*args, **kwargs):
    return -original_wo(*args, **kwargs)*100     #torch.zeros_like(original_wo(*args, **kwargs))

model.encoder.layer[6].mlp.wo.forward = zero_mlp_wo_output

embeddings2 = model(**input, output_hidden_states=False)

cosine_similarity = F.cosine_similarity(embeddings1.pooler_output.detach(), embeddings2.pooler_output.detach()) 
print(cosine_similarity)

diff = torch.norm(embeddings1.pooler_output - embeddings2.pooler_output, p=2)
print("L2 distance:", diff.item())

tensor([0.8447])
L2 distance: 4.934483051300049


Next we will attempt to do an activation patching. 

In [None]:
prompt_corrupted = "Computer vision tasks, such as object detection, image segmentation, and scene understanding, are typically approached with supervised learning on domain-specific datasets."

input_corrupted = tokenizer(prompt_corrupted, return_tensors = "pt", padding = "max_length", max_length = 60, truncation = True)

mlp_hooks["6"].remove()

def patch_hook(module, input, output):
    return mlp_act["6"]

patch_handle = model.encoder.layer[6].mlp.wo.register_forward_hook(patch_hook)
embeddings_corrupted = model(**input_corrupted, output_hidden_states=False)
patch_handle.remove()

print(F.cosine_similarity(embeddings1.pooler_output.detach(), embeddings_corrupted.pooler_output.detach()))

tensor([0.2406])


In [None]:
import json 

with open("test.tok.json","r") as infile: 
    infile.readline()
    lines = infile.readlines()

print(f"Line content: {repr(lines[-1].strip("\n").rstrip(","))}")
data = [json.loads(line.strip("\n").rstrip(',').rstrip("]")) for line in lines]





Line content: '{"question": "A grocery sells a bag of ice for $ 1.25 , and makes 20 % profit . If it sells 500 bags of ice , how much total profit does it make ?", "correct": "A", "rationale": "Profit per bag = 1.25 * 0.20 = 0.25\\nTotal profit = 500 * 0.25 = 125\\nAnswer is A .", "options": ["A ) 125", "B ) 150", "C ) 225", "D ) 250", "E ) 275"]}]'
{'question': '20 marbles were pulled out of a bag of only white marbles , painted black , and then put back in . Then , another 20 marbles were pulled out , of which 1 was black , after which they were all returned to the bag . If the percentage of black marbles pulled out the second time represents their percentage in the bag , how many marbles in total Q does the bag currently hold ?', 'correct': 'D', 'rationale': 'We know that there are 20 black marbles in the bag and this number represent 1 / 20 th of the number of all marbles in the bag , thus there are total Q of 20 * 20 = 400 marbles .\nAnswer : D .', 'options': ['A ) 40', 'B ) 200',