In [1]:
import torch
import transformers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from unsloth import FastLanguageModel

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
from os.path import join as pjoin
import os
from functools import partial
import re
from tqdm.notebook import tqdm

class Unsloth:
    def __init__(self, llm_info):
        engine, max_tokens, temperature = llm_info
        engine = engine.replace('unsloth_', 'unsloth/')

        print(engine)
        model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = engine,
            max_seq_length = max_tokens,
            dtype = None,
            load_in_4bit = True,
        )
        FastLanguageModel.for_inference(model) # just add this line and solve the error `Cache only has 0 layers...`
        self.pipe = transformers.pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            temperature = temperature,# +1e-6, # 1 
            max_new_tokens = 20 #2 #TODO: HARCODED FOR NOW (1 token at a time)-maybe have to adapt for probabilistic reasoning
        )

    def _generate(self, text, temp, max_tokens):
        r = self.pipe(text)[0]
        print(r)
        response = r['generated_text'][len(text):]
        # response = response.replace(' ', '')            
        return response

model_id = ("meta-llama/Llama-3.1-8B-Instruct", 32768, +1e-6) #+1e-6
model = Unsloth(model_id)

# take out from_pretrained etcl... code milena
- for consistent responses, use 70B
- how to request 2 gpus use can's wiki
- between these two gpus, the 70B model can be run (required for memory storage)
- 

In [3]:
nbilly = 8
engine = f"""meta-llama/Llama-3.1-{nbilly}B-Instruct"""
model, tokenizer = FastLanguageModel.from_pretrained(
            model_name = engine,
            dtype = torch.bfloat16,
            load_in_4bit = True,
            device_map="auto"
        )
tokenizer.pad_token = tokenizer.bos_token
terminators = [
    tokenizer.eos_token_id,
    tokenizer.convert_tokens_to_ids("<|eot_id|>")
]

==((====))==  Unsloth 2025.1.8: Fast Llama patching. Transformers: 4.49.0.dev0.
   \\   /|    GPU: Tesla V100-SXM3-32GB. Max memory: 31.733 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1. CUDA: 7.0. CUDA Toolkit: 12.1. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Device does not support bfloat16. Will change to float16.


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

meta-llama/Llama-3.1-8B-Instruct does not have a padding token! Will use pad_token = <|finetune_right_pad_id|>.


In [413]:
FastLanguageModel.for_inference(model)

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128004)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSN

In [545]:
system_input = (
    "You are a cognitive scientist and you are given a by-trial data set from a participant. " +
    "in the odd-one-out task. For each trial, you are presented with the three words and " +
    "the participant's response, i.e., the word the participant thinks is the odd one out of the three. " +
    "Note that in the very first trial, you are only presented with the three words, but no previous responses."
    
    "\nYour goal is to predict, for a new triplet of words, " +
    "which word the person is most likely to identify as the odd one out."

)

## Load Triplet Responses

In [7]:
device = "cuda:0"

In [8]:
train_triplets = torch.from_numpy(np.loadtxt(
    pjoin("./semantic-representations-ID/data/", 'train_90_ID.txt'))).to(device).type(torch.LongTensor)

In [9]:
triplet_ids = train_triplets.detach().numpy()

In [10]:
tbl_labels = pd.read_csv("./semantic-representations-ID/data/unique_id.txt", delimiter="\\", header=None)

In [11]:
tbl_labels[0] = tbl_labels[0].str.replace(r'\d+', '', regex=True)

In [650]:
current_triplets = triplet_ids[triplet_ids[:, 3] == 1]

In [651]:
def randomize_and_extract_trial(tbl_labels, l_ids):
    tbl_select = tbl_labels.iloc[l_ids[0:3]]
    tbl_select.reset_index(drop=True, inplace=True)
    rand_order = np.random.choice([0, 1, 2], 3, replace=False)
    tbl_reordered = tbl_select.iloc[rand_order].reset_index(drop=True)
    tbl_reordered["rnd"] = rand_order
    tbl_return = pd.DataFrame({
        "1": [tbl_reordered.iloc[0, 0]],
        "2": [tbl_reordered.iloc[1, 0]],
        "3": [tbl_reordered.iloc[2, 0]],
        "selected": np.where(tbl_reordered["rnd"] == 2)[0] + 1
    })
    return tbl_return

In [652]:
randomize_and_extract_partial = partial(randomize_and_extract_trial, tbl_labels)

In [653]:
l_trials = list(map(randomize_and_extract_partial, triplet_ids[triplet_ids[:,3]==0]))

In [654]:
df_trials = pd.concat(l_trials)

In [655]:
l_all_current_tasks = []
l_all_previous_trials = []
n_trials = 100
for current_trial_id in range(0, n_trials):
    l_previous_trials = []
    current_trial = df_trials.iloc[current_trial_id, :]
    current_task = ", ".join([f"{index+1}. {item}" for index, item in enumerate(current_trial[0:3])])
    if current_trial_id > 0:
        for previous_trial_id in range(0, (current_trial_id)):
            previous_trial = df_trials.iloc[previous_trial_id, :]
            response_prev = f"""{previous_trial["selected"]}. {previous_trial.iloc[previous_trial["selected"]-1]} in: """ + ", ".join([f"{index+1}. {item}" for index, item in enumerate(previous_trial[0:3])])
            l_previous_trials.append(response_prev)
    l_all_current_tasks.append(current_task)
    l_all_previous_trials.append(l_previous_trials)

In [656]:
l_responses_int = []
for trial_id in tqdm(range(0, n_trials)):
    l_prev_concat = l_all_previous_trials[trial_id]
    current = l_all_current_tasks[trial_id]
    input_text = (
        "In an odd-one-out task, a participant is presented with three objects in each trial.\n" +
        "The participant is instructed to select the object they think is most dissimilar from the other presented objects.\n" +
        "A participant's response is neither correct nor incorrect. The response should simply reflect the participant's " +
        "perceived semantic similarity between the three objects."
    )
    
    if trial_id == 0:
        question_text = (
            f"""\nThe current trial is trial nr. {trial_id + 1}"""
        "\nWhich of the following three words is the person to denote as the odd-one-out?\n"
        f"""{current}?"""
        )
    elif trial_id > 0:
        question_text = (
            f"""\nThe current trial is trial nr. {trial_id + 1}"""
            "\nHere are the previous responses from the participant:\n" +
            "\n".join(l_prev_concat) +
            "\nWhich of the following three words is the person to denote as the odd-one-out?\n"
            f"""{current}?"""
            )
    
    user_input = input_text + question_text
    
    l_prev_concat = l_all_previous_trials[trial_id]
    current = l_all_current_tasks[trial_id]
    input_text = (
        "In an odd-one-out task, a participant is presented with three objects in each trial. " +
        "The participant is instructed to select the object they think is most dissimilar from the other presented objects. " +
        "A participant's response is neither correct nor incorrect. The response should simply reflect the participant's " +
        "perceived semantic similarity between the three objects."
    )
    
    question_text = (
        " Here are the previous responses from one participant:\n" +
        "\n".join(l_prev_concat) +
        "\nWhich of the following three words is the person to denote as the odd-one-out?\n"
        f"""{current}?"""
        )
    
    user_input = input_text + question_text
    
    messages = [
        {"role": "system", "content": system_input}, 
        {"role": "user", "content": user_input},
        {"role": "system", "content": "For the newest triplet, the participant responds with Nr. "}
    ]
    
    input_ids = tokenizer.apply_chat_template(
        messages,
        #add_generation_prompt=True,
        return_tensors="pt",
        continue_final_message=True
    ).to(model.device)
    
    
    outputs = model.generate(
     input_ids,
     max_new_tokens=2,
     eos_token_id=terminators,
     do_sample=False,
     #temperature=0.000001
    )
    
    response = outputs[0][input_ids.shape[-1]:]
    decoded_response = tokenizer.decode(response, skip_special_tokens=True)
    resp_extract = re.search("[1-3]", decoded_response)
    
    resp_try = resp_extract.group()
    
    try:
        resp_int = int(resp_try)
    except:
        resp_int = np.nan

    l_responses_int.append(resp_int)

  0%|          | 0/100 [00:00<?, ?it/s]

In [657]:
df_trials_pred = df_trials.head(n_trials).copy()

In [658]:
df_trials_pred["predicted"] = l_responses_int

In [659]:
df_trials_pred.query("selected == predicted").shape[0] / df_trials_pred.shape[0]

0.33

open ai embedding submodule for embeddings?