In [1]:
import pandas as pd
import re
import requests
import os
import numpy as np

In [2]:
def extract_answer(text):
    pattern = r'<answer>(.*?)</answer>'
    match = re.search(pattern, text)
    
    if match:
        return match.group(1).strip()
    return None

def split_think_text(text):
    # Find content inside <think> tags
    think_pattern = r'<think>(.*?)</think>'
    think_match = re.search(think_pattern, text, re.DOTALL)
    
    think_content = think_match.group(1).strip() if think_match else ""
    
    # Find content after </think>
    after_pattern = r'</think>(.*)'
    after_match = re.search(after_pattern, text, re.DOTALL)
    
    after_content = after_match.group(1).strip() if after_match else ""
    
    return think_content, after_content

MODELS = {'DeepSeek-R1':'deepseek-ai/DeepSeek-R1',
         'DeekSeek-R1-Zero':'deepseek-ai/DeepSeek-R1-Zero'}

def hyperbolic_query(query, model="DeepSeek-R1", temperature = 0.6):

    url = "https://api.hyperbolic.xyz/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": "Bearer " + os.getenv("HYPERBOLIC_API")
    }
    data = {
        "messages": [
            {
                "role": "user",
                "content": query
            }
        ],
        "model": MODELS[model],
        "max_tokens": 10000, # need to set it quite high because of reasoning
        "temperature": temperature,
        "top_p": 1.0
    }
    
    response = requests.post(url, headers=headers, json=data)
    return response.json()['choices'][0]['message']['content']

print(hyperbolic_query("Hello Test"))

<think>

</think>

Hello! It looks like you're testing things out. How can I assist you today? 😊


## Evaluation Creation

In [20]:
# Loading story data 
scenarios = pd.read_csv("ToM Story.csv")

# Update character's names
charA_name = "John"
charB_name = "Sam"
charC_name = "Mike"
scenarios['Story'] = scenarios['Story'].str.replace('{charA}',charA_name)
scenarios['Story'] = scenarios['Story'].str.replace('{charB}',charB_name)
scenarios['Story'] = scenarios['Story'].str.replace('{charC}',charC_name)

# Creating Questions 
questions = [ # Knowledge
              # 'Is the {story_name} at the {L3}?',
              # L0
              #'Does {charB} think the {story_name} is at the {L3}?',
              'Does {charA} think the {story_name} is heading to  the {L3}?',
              # L1
              'Does {charA} think that {charB} thinks that the {story_name} is heading to the {L3}?',
              # L2
              'Does {charA} think that {charB} thinks that {charA} thinks that the {story_name} is heading to the {L3}?',
              #'Pretend to be {charA}. Do you think that {charB} thinks that you think that the {story_name} is at the {L3}?',
              # charC questions (depends on the scenario)
              #'Does {charC} think the {story_name} is at the {L3}?',
              'Does {charC} think that {charB} is heading to the {L3}?',
              'Does {charC} plan to go to the {L3} to see {charB}?',
              'Where will {charC} go to see {charB}?'
              ]
answers = ['Yes','Yes','No','No','','','']

# Generate prompts combining story and question 
dat = []
for idx in scenarios.index:
    for i in range(len(questions)):
        instruction = "Read the story below and answer a question.\n\n"

        question = questions[i].replace('{charA}',charA_name).replace('{charB}',charB_name).replace('{charC}',charC_name)
        question = question.replace('{story_name}',scenarios.loc[idx]['story_name'])
        question = question.replace('{L3}',scenarios.loc[idx]['L3'])
        formatting = " Put the final answer (e.g., yes/no/no answer or a location) in <answer> tags"
        prompt = instruction + scenarios.loc[idx]['Story'] + '\n\nQuestion: ' + question + formatting
        temp = {'prompt':prompt, 
                'version':scenarios.loc[idx]['Version'],
                'theme':scenarios.loc[idx]['Theme'],
                'ending':scenarios.loc[idx]['Ending'],
                'question':i+1}
        dat.append(temp)
dat = pd.DataFrame(dat)
## Save to file 
dat.to_csv('ToM Story with question.csv', index=False)

## Getting Responses

- Repeat 10 times per questions 
- Store thinking and answer (only the one in <answer>)

In [22]:
dat = pd.read_csv('ToM Story with question.csv')
# Filter out due to time constraint
dat = dat[(dat['version']=='Short') & dat['ending'].isin(["charC's Question",
                                                          "charA's response (complete) "])]
dat.to_csv('Tom Story with question (lite).csv',index=False)

In [25]:
r1_results = pd.read_csv('ToM Story with question (lite) - R1.csv')
r1_results['Answer 1'] = ''
r1_results['Answer 2'] = ''
r1_results['Answer 3'] = ''
r1_results['Answer 4'] = ''
r1_results['Answer 5'] = ''
r1_results.to_csv('ToM Story with question (lite) - R1.csv',index=False)

In [23]:
def gen_responses_cont(dat, model, file_name, n=5):
    for idx in dat.index:
        for i in range(n):
            col_name = f'Answer {i+1}'
            if pd.isna(dat.loc[idx][col_name]):
                response = hyperbolic_query(dat.loc[idx]['prompt'], model)
                dat.loc[idx, col_name] = response
                dat.to_csv(file_name,index=False)


In [None]:
file_name = 'ToM Story with question (lite) - R1.csv'
r1_results = pd.read_csv(file_name, encoding='latin1')
gen_responses_cont(r1_results, 'DeepSeek-R1', file_name)

(Not used below)

In [17]:
# Helper function
def gen_responses(prompts, model, i, echo=False):
    temp_df = prompts.copy()
    temp_df['thinking'] = ''
    temp_df['full_answer'] = ''
    temp_df['final_answer'] = ''
    temp_df['n'] = i
    for idx in temp_df.index:
        if echo:
            print(temp_df.loc[idx]['prompt'])
        query = temp_df.loc[idx]['prompt'] 
        response = hyperbolic_query(query, model)
        outputs = split_think_text(response)
        temp_df.loc[idx]['thinking'] = outputs[0]
        temp_df.loc[idx]['full_answer'] = outputs[1]
        temp_df.loc[idx]['full_answer'] = extract_answer(outputs[1])
        if echo:
            print(response)
    temp_df.to_csv(f'result_{model}_{i}.csv')  
    return temp_df  

In [None]:
# R1
n = 5
model = 'DeepSeek-R1'
for i in range(n):
    print(i)
    temp_df = gen_responses(dat, model, i)
    print(temp_df) 

In [None]:
# R1-Zero
n = 5
model = 'DeepSeek-R1-Zero'
for i in range(n):
    print(i)
    temp_df = gen_responses(dat, model, i)
    print(temp_df)   