# Experiment 01
## with textual encoding (UP, DOWN, LEFT, RIGHT)
### Richardson et. al (2002) Experiment 01 with Language Models instead of Humans:

The subjects were presented with a single page,
containing a list of the verbs and four pictures, labelled A to
D. Each one contained a circle and a square aligned along a
vertical or horizontal axis, connected by an arrow pointing
up, down, left or right. Since we didn't expect any
interesting item variation between left or right placement of
the circle or square, the horizontal schemas differed only in
the direction of the arrow.
For each sentence, subjects were asked to select one of
the four sparse images that best depicted the event described
by the sentence (Figure 1)
The items were randomised in three different orders, and
crossed with two different orderings of the images. The six
lists were then distributed randomly to subjects.



### Setup of Experimental Environment

In [1]:
import torch
import os
from transformers import GPT2Tokenizer, OPTForCausalLM, AutoTokenizer, AutoModelForCausalLM
from transformers import GPTNeoXForCausalLM, GPTNeoXTokenizerFast, GPTNeoForCausalLM, GPT2Tokenizer
import torch.nn.functional as F
import pandas as pd
import numpy as np
import seaborn as sns
from tqdm import tqdm
from scipy import stats
from collections import Counter
import subprocess
import random
random.seed(1337)

with open("../../hf.key", "r") as f_in:
    hf_key = f_in.readline().strip()
subprocess.run(["huggingface-cli", "login", "--token", hf_key])

def convert_to_float(value):
    try:
        return float(value)
    except ValueError:
        return value

server_model_path = "/mounts/data/corp/huggingface/"


Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /mounts/data/corp/huggingface/token
Login successful


### Loading preprocessed data by Richardson

Creates three dictionaries:
 * `richardson_data`
 
 All choices as vectors, e.g. `{'fled': [7.2, 4.2, 80.8, 7.8], 'pointed at': [7.2, 3.6, 0.0, 89.2] ...`
 
 * `richardson_categorial`
 
 Maximum choice as binary choice, e.g. `{'fled': [0, 0, 1, 0], 'pointed at': [0, 0, 0, 1] ...`
 
 * `richardson_normed`
 
 Maximum choice divided by all choices, disregarding all other choices, e.g.  `{'fled': [0.0, 0.0, 0.808, 0.0], 'pointed at': [0.0, 0.0, 0.0, 0.892] ...`
 

In [2]:
with open("../../data/richardson_actions.txt", "r") as d_in:
    lines = [line.split() for line in d_in.readlines()]

output = []
for entry in lines:
    new_entry = [convert_to_float(item) for item in entry]
    
    if isinstance(new_entry[1],str):
        new_entry[0] = " ".join(new_entry[:2])
        del new_entry[1]
    output.append(new_entry)

richardson_data = dict()
for elem in output:
    richardson_data[elem[0]] = [i for i in elem[1:]]

# Randomizing Richardson's data
action_words = list(richardson_data.keys())
random.shuffle(action_words)

richardson_categorial = dict()
for k, v in richardson_data.items():
    if k == 0:
        continue
    vals = [0,0,0,0]
    vals[v.index(max(v))] = 1

    richardson_categorial[k] = vals
richardson_normed = dict()

for action, values in richardson_data.items():
    if action == 0:
        continue
    
    richardson_normed[action] = [round(val/sum(values),4) for val in values]

print(richardson_normed)

{'fled': [0.072, 0.042, 0.808, 0.078], 'pointed at': [0.072, 0.036, 0.0, 0.892], 'pulled': [0.06, 0.054, 0.754, 0.132], 'pushed': [0.072, 0.036, 0.012, 0.88], 'walked': [0.0905, 0.0362, 0.2412, 0.6322], 'hunted': [0.0959, 0.2038, 0.018, 0.6823], 'impacted': [0.072, 0.371, 0.03, 0.527], 'perched': [0.12, 0.76, 0.066, 0.054], 'showed': [0.1499, 0.0899, 0.1019, 0.6583], 'smashed': [0.036, 0.665, 0.012, 0.287], 'bombed': [0.048, 0.868, 0.018, 0.066], 'flew': [0.377, 0.443, 0.15, 0.03], 'floated': [0.329, 0.563, 0.078, 0.03], 'lifted': [0.874, 0.096, 0.024, 0.006], 'sank': [0.2218, 0.7183, 0.042, 0.018], 'argued with': [0.1139, 0.1379, 0.1259, 0.6224], 'gave to': [0.084, 0.096, 0.012, 0.808], 'offended': [0.09, 0.317, 0.246, 0.347], 'rushed': [0.1025, 0.1085, 0.2352, 0.5538], 'warned': [0.1079, 0.2218, 0.0599, 0.6104], 'owned': [0.0539, 0.5564, 0.1858, 0.2038], 'regretted': [0.1978, 0.2398, 0.4126, 0.1499], 'rested': [0.144, 0.365, 0.401, 0.09], 'tempted': [0.168, 0.114, 0.455, 0.263], 'wan

## Choosing model

In [3]:
model_type = "meta-llama"
model_name = "Llama-2-7b-hf"

## Choose specifc GPU for model

In [None]:
# Define the GPU ID you want to use
gpu_id = 6

# Use the torch.cuda.device() context manager to set the current GPU
with torch.cuda.device(gpu_id):
    tokenizer = AutoTokenizer.from_pretrained(model_type+"/"+model_name, use_auth_token=True)
    model = AutoModelForCausalLM.from_pretrained(server_model_path+model_type+"/"+model_name, use_auth_token=True).to(torch.device("cuda"))

## Loading model

In [4]:
gpu_id = None
tokenizer = AutoTokenizer.from_pretrained(model_type+"/"+model_name, use_auth_token=True, device_map="auto")
model = AutoModelForCausalLM.from_pretrained(server_model_path+model_type+"/"+model_name, use_auth_token=True, device_map="auto")

The model weights are not tied. Please use the `tie_weights` method before using the `infer_auto_device` function.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

### Run experiment (inludes prompt)


In [9]:
model_choices = dict()
arrows = ['UP', 'DOWN', 'LEFT', 'RIGHT']

for action_word in tqdm(action_words):

    if action_word == 0:
        continue

    ### PROMPT DEFINED HERE
    friendly_prompt = "Select the CONCEPT that best represents the event described by the sentence: "+action_word+". CONCEPTS: UP, DOWN, LEFT, RIGHT.\nThe best representation is CONCEPT:"
    
    if gpu_id:
        input_ids = tokenizer.encode(friendly_prompt, return_tensors="pt").to(torch.device("cuda"))
        max_length = input_ids.size(1)  + 20
        output = model.generate(input_ids, max_length=max_length, num_return_sequences=1).to(torch.device("cuda"))
    else:
        input_ids = tokenizer.encode(friendly_prompt, return_tensors="pt")
        max_length = input_ids.size(1)  + 20
        output = model.generate(input_ids, max_length=max_length, num_return_sequences=1)
    generated_answer = tokenizer.decode(output[0], skip_special_tokens=True)  
    
    model_choices[action_word] = []
    for idx, arrow in enumerate(arrows):
        if arrow in generated_answer[len(friendly_prompt):]:
            model_choices[action_word].append(1)
        else:
            model_choices[action_word].append(0)

# Llama-70b: 4m:15s
# Llama-13b: 1m:50s
# Llama-7b:  40s

100%|██████████| 30/30 [00:51<00:00,  1.73s/it]


### Storing experimental results from free-form generation as `exp01a_`

In [6]:
to_store = input("Should the result for "+model_name+" be stored? (y/n):")

if to_store == "y":

    with open("results/exp01b_"+model_name+".txt", "w") as f_out:
        f_out.write("Action\tUP\tDOWN\tLEFT\tRIGHT\n")
        for k,v in model_choices.items():
            f_out.write(k+"\t"+"\t".join([str(x) for x in v])+"\n")


    

In [7]:
model_choices

{'walked': [1, 0, 0, 0],
 'perched': [1, 0, 0, 0],
 'respected': [1, 0, 0, 0],
 'fled': [1, 0, 0, 0],
 'pulled': [0, 1, 0, 0],
 'wanted': [1, 0, 0, 0],
 'pushed': [0, 1, 0, 0],
 'argued with': [1, 0, 0, 0],
 'obeyed': [1, 0, 0, 0],
 'showed': [1, 0, 0, 0],
 'sank': [0, 1, 0, 0],
 'lifted': [1, 0, 0, 0],
 'regretted': [1, 0, 0, 0],
 'gave to': [1, 0, 0, 0],
 'pointed at': [1, 0, 0, 0],
 'succeeded': [1, 0, 0, 0],
 'impacted': [1, 0, 0, 0],
 'owned': [1, 0, 0, 0],
 'smashed': [1, 0, 0, 0],
 'increased': [1, 0, 0, 0],
 'floated': [1, 0, 0, 0],
 'bombed': [0, 1, 0, 0],
 'hunted': [1, 0, 0, 0],
 'tempted': [1, 0, 0, 0],
 'hoped': [1, 0, 0, 0],
 'rushed': [1, 0, 0, 0],
 'flew': [1, 0, 0, 0],
 'rested': [1, 0, 0, 0],
 'offended': [1, 0, 0, 0],
 'warned': [1, 0, 0, 0]}