### Imports & Definitions

In [1]:
import sys
sys.path.append("..")
from definitions import *
from llm_helpers import calculate_probs_and_get_answer
from huggingface_helpers import get_tokenizer, get_model

### View OpenBookQA structure

In [2]:
with open(DIPLOMA_DIR_PATH.joinpath("OpenBookQA-V1-Sep2018/Data/Main/dev.jsonl")) as f:
    quiz = [json.loads(x) for x in f.readlines()]
quiz[0]

{'id': '8-376',
 'question': {'stem': 'Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as',
  'choices': [{'text': 'Deep sea animals', 'label': 'A'},
   {'text': 'fish', 'label': 'B'},
   {'text': 'Long Sea Fish', 'label': 'C'},
   {'text': 'Far Sea Animals', 'label': 'D'}]},
 'answerKey': 'A'}

In [3]:
df = pd.read_csv(DIPLOMA_DIR_PATH.joinpath("OpenBookQA-V1-Sep2018/Data/Main/dev.tsv"), sep='\t')
df.iloc[0]

ID                                                               8-376
Question Stem        Frilled sharks and angler fish live far beneat...
Choices              (A) Deep sea animals (B) fish (C) Long Sea Fis...
Complete Question    Frilled sharks and angler fish live far beneat...
Answer Key                                                           A
Name: 0, dtype: object

### Evalute Llama 2

In [4]:
! ollama list

NAME                    	ID          	SIZE  	MODIFIED   
dolphin-phi:latest      	c5761fc77240	1.6 GB	7 days ago	
gemma:2b                	b50d6c999e59	1.7 GB	7 days ago	
gemma:7b                	a72c7f4d0a15	5.0 GB	7 days ago	
llama2:13b              	d475bf4c50bc	7.4 GB	7 days ago	
llama2:70b              	e7f6c06ffef4	38 GB 	7 days ago	
llama2:latest           	78e26419b446	3.8 GB	7 days ago	
llama2-uncensored:latest	44040b922233	3.8 GB	7 days ago	
llama3:70b              	bcfb190ca3a7	39 GB 	7 days ago	
llama3:latest           	71a106a91016	4.7 GB	7 days ago	
llava:latest            	8dd30f6b0cb1	4.7 GB	7 days ago	
mistral:latest          	61e88e884507	4.1 GB	7 days ago	
neural-chat:latest      	89fa737d3b85	4.1 GB	7 days ago	
orca-mini:latest        	2dbd9f439647	2.0 GB	7 days ago	
phi:latest              	e2fd6321a5fe	1.6 GB	7 days ago	
phi3:latest             	a2c89ceaed85	2.3 GB	3 days ago	
solar:latest            	059fdabbe6e6	6.1 GB	7 days ago	
starling-lm:latest      	39153f

In [53]:
model_name = LLAMA_2_7B
tokenizer = get_tokenizer(model_name, HUGGINGFACE_MODEL_TO_REPO[model_name])
model2 = get_model(model_name, HUGGINGFACE_MODEL_TO_REPO[model_name])

In [62]:
input_prompt = df.iloc[0][3]
input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids.to(model.device)

In [54]:
with torch.no_grad():
    outputs = model2(input_ids=input_ids)

In [55]:
outputs.keys()

odict_keys(['last_hidden_state', 'past_key_values'])

In [49]:
from transformers import LlamaForCausalLM
model = LlamaForCausalLM.from_pretrained(HUGGINGFACE_MODEL_TO_REPO[model_name])
model

model.safetensors.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.98G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.50G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/188 [00:00<?, ?B/s]

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()
  )
  (lm_head):

In [63]:
with torch.no_grad():
    outputs2 = model(input_ids)

In [96]:
tokenizer.vocab.keys().__len__()

32000

In [97]:
tokens_all = [
    tokenizer(x, add_special_tokens=False).input_ids[-1] for x in tokenizer.vocab.keys()
]

In [86]:
tokens_of_interest = [
    tokenizer("A", add_special_tokens=False).input_ids[-1],
    tokenizer("B", add_special_tokens=False).input_ids[-1],
    tokenizer("C", add_special_tokens=False).input_ids[-1],
    tokenizer("D", add_special_tokens=False).input_ids[-1],
    tokenizer(" ", add_special_tokens=False).input_ids[-1],
]

In [87]:
logits = outputs2.logits  # shape (batch_size, sequence_length, vocab_size)
next_token_logits = logits[:, -1, :]  # shape (batch_size, vocab_size)

next_token_logits = next_token_logits.flatten()
next_token_probs = torch.nn.functional.softmax(next_token_logits, dim=-1).cpu()  # all probs over vocab

In [105]:
probs = next_token_probs[tokens_all].tolist()
res = dict(zip(tokenizer.vocab.keys(), probs))

In [100]:
tokenizer.decode(token_ids=input_ids[0])

'<s> Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as (A) Deep sea animals (B) fish (C) Long Sea Fish (D) Far Sea Animals'

In [110]:
entries = list(res.items())

In [112]:
entries = sorted(entries, key=lambda x: -x[1])

In [114]:
entries[:100]

[('/.', 0.3039078116416931),
 ('?.', 0.3039078116416931),
 ('=.', 0.3039078116416931),
 ('__.', 0.3039078116416931),
 ('\\.', 0.3039078116416931),
 ('>.', 0.3039078116416931),
 ('-.', 0.3039078116416931),
 ('_.', 0.3039078116416931),
 (',.', 0.3039078116416931),
 (')..', 0.3039078116416931),
 ('!.', 0.3039078116416931),
 ('“.', 0.3039078116416931),
 ('}.', 0.3039078116416931),
 ('{.', 0.3039078116416931),
 ('</s>', 0.057264987379312515),
 ('"?', 0.026555825024843216),
 ('`?', 0.026555825024843216),
 (')?', 0.026555825024843216),
 ('$?', 0.026555825024843216),
 ('/?', 0.026555825024843216),
 ('▁(', 0.019481388852000237),
 ('(', 0.019481388852000237),
 ('▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁', 0.006530789192765951),
 ('Answer', 0.003926899749785662),
 ('▁Answer', 0.003926899749785662),
 ('▁.', 0.002654710318893194),
 ('.', 0.002654710318893194),
 ('▁?', 0.0019710473716259003),
 ('?', 0.0019710473716259003),
 ('and', 0.0016641627298668027),
 ('▁and', 0.0016641627298668027),
 ('(...', 0.0016302078729495406),
 

In [44]:
outputs.keys()

odict_keys(['last_hidden_state', 'past_key_values'])

In [43]:
type(outputs)

transformers.modeling_outputs.BaseModelOutputWithPast

In [41]:
outputs[0].shape, outputs[1][1][1][0][1][1].shape

(torch.Size([1, 3, 4096]), torch.Size([128]))

In [19]:
outputs.keys()

odict_keys(['last_hidden_state', 'past_key_values'])

In [24]:
len(tokenizer.get_vocab())

32000

In [22]:
outputs.last_hidden_state.shape, len(outputs.past_key_values)

(torch.Size([1, 3, 4096]), 32)

### Evaluate Mistral

In [None]:
model_name = MISTRAL_7B
tokenizer = get_tokenizer(model_name, HUGGINGFACE_MODEL_TO_REPO[model_name])
model2 = get_model(model_name, HUGGINGFACE_MODEL_TO_REPO[model_name])

In [7]:
from transformers import AutoModelForCausalLM
casual_mistral = AutoModelForCausalLM.from_pretrained(HUGGINGFACE_MODEL_TO_REPO[MISTRAL_7B])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [6]:
from llm_helpers import calculate_token_interest_probs

In [11]:
df.iloc[0]

ID                                                               8-376
Question Stem        Frilled sharks and angler fish live far beneat...
Choices              (A) Deep sea animals (B) fish (C) Long Sea Fis...
Complete Question    Frilled sharks and angler fish live far beneat...
Answer Key                                                           A
Name: 0, dtype: object

In [8]:
for i in trange(len(df)):
    q = df.iloc[i, 3]
    print(q)
    a = calculate_probs_and_get_answer(q, tokenizer, casual_mistral)
    print(a)
    break

  0%|          | 0/500 [00:00<?, ?it/s]

Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as (A) Deep sea animals (B) fish (C) Long Sea Fish (D) Far Sea Animals


  0%|          | 0/500 [01:02<?, ?it/s]

A





In [10]:
calculate_token_interest_probs(q, tokenizer, casual_mistral)

{'A': 0.0009505083435215056,
 'B': 0.00021837258827872574,
 'C': 0.00032975224894471467,
 'D': 0.00031869541271589696}

### Evaluate Llama 2 again

In [4]:
model_name = LLAMA_2_7B
tokenizer = get_tokenizer(model_name, HUGGINGFACE_MODEL_TO_REPO[model_name])
from transformers import AutoModelForCausalLM
casual_llama_2 = AutoModelForCausalLM.from_pretrained(HUGGINGFACE_MODEL_TO_REPO[model_name])

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [8]:
for i in trange(len(df)):
    q = df.iloc[i, 3]
    print(q)
    probs = calculate_token_interest_probs(q, tokenizer, casual_llama_2)
    a = calculate_probs_and_get_answer(q, tokenizer, casual_llama_2)
    print(a)
    break

  0%|          | 0/500 [00:00<?, ?it/s]

Frilled sharks and angler fish live far beneath the surface of the ocean, which is why they are known as (A) Deep sea animals (B) fish (C) Long Sea Fish (D) Far Sea Animals


  0%|          | 0/500 [02:05<?, ?it/s]

A



