In [1]:
%%capture
!pip install -r requirements.txt

In [1]:
import sys
from pathlib import Path
import json
import pandas as pd
from dotenv import load_dotenv
import plotly.express as px
import torch as t
import pandas as pd
from tools.globals import load_country_globals

from tools.nnsight_utils import  get_text_generations
from tqdm import tqdm

from tools.apis import OpenAIWrapper
import os

from translate import Translator

from nnsight import LanguageModel
from transformers import AutoTokenizer

load_country_globals()
translator = Translator(from_lang="autodetect",to_lang="en")

device = t.device(
    "mps" if t.backends.mps.is_available() else "cuda" if t.cuda.is_available() else "cpu"
)
load_dotenv()
t.set_grad_enabled(False)

t.manual_seed(42)
if t.cuda.is_available():
    t.cuda.manual_seed_all(42)

%load_ext autoreload
%autoreload 2

In [2]:
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-9b-it")
nnmodel = LanguageModel('/dlabscratch1/public/llm_weights/gemma_hf/gemma-2-9b-it', 
                        device_map='cuda:0', 
                        dispatch=True, 
                        torch_dtype=t.bfloat16)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [31]:
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B-Instruct")
tokenizer.pad_token_id = tokenizer.eos_token_id
nnmodel = LanguageModel('/dlabscratch1/public/llm_weights/llama3.1_hf/Meta-Llama-3.1-8B-Instruct', 
                        device_map='cuda:0', 
                        dispatch=True, 
                        torch_dtype=t.bfloat16)

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

In [7]:
pd.read_csv("data/final_data_filtered.csv").groupby(["country", "source_id"])["question_id"].nunique()


country     source_id         
Bangladesh  o1distil_fooddrink     47
            o1distil_laws          23
            o1distil_tradition     40
            synth_cities          200
            synth_names           200
France      o1distil_fooddrink     47
            o1distil_laws          20
            o1distil_tradition     22
            synth_cities          200
            synth_names           200
Russia      o1distil_fooddrink     42
            o1distil_laws          37
            o1distil_tradition     28
            synth_cities          200
            synth_names           200
Turkey      o1distil_drink         44
            o1distil_food          47
            o1distil_law           19
            o1distil_tradition     25
            synth_cities          200
            synth_names           200
Name: question_id, dtype: int64

# Pre-Processing Dataset

In [28]:
data_df = pd.read_csv("data/final/tr/final.csv")
data_df["options"] = data_df["options"].apply(eval)
data_df["hint"] = data_df.apply(lambda x: "none" if x["hint"]==False else x["country"], axis=1)
data_df.head(4)

Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west
0,synth_names,synth_names_0,Turkey,I live in Turkey. Which of the following names...,English,"[Liam, Alparslan]",Turkey,2,1,Alparslan,Liam
1,synth_names,synth_names_0,Turkey,Which of the following names is most likely to...,English,"[Liam, Alparslan]",none,2,1,Alparslan,Liam
2,synth_names,synth_names_0,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"[Liam, Alparslan]",Turkey,2,1,Alparslan,Liam
3,synth_names,synth_names_0,Turkey,Aşağıdaki isimlerden hangisi muhtemelen benim ...,Turkish,"[Liam, Alparslan]",none,2,1,Alparslan,Liam


In [3]:
us_hints = {
    "English": "I live in the United States.",
    "Turkish": "Amerika Birleşik Devletleri'nde yaşıyorum.",
    "French": "Je vis aux États-Unis.",
    "Russian": "Я живу в Соединенных Штатах.",
    "Bengali": "আমি মার্কিন যুক্তরাষ্ট্রে থাকি।",

}
new_rows = []
for i, row in data_df.query("hint=='none'").iterrows():
    new_row = row.copy()
    new_row["hint"] = "United States"
    new_row["question"] = us_hints[row["lang"]] + " " + row["question"]
    new_rows.append(new_row)

data_df = pd.concat([data_df, pd.DataFrame(new_rows)], ignore_index=True)

In [29]:
lang_hints = {
    "English": "I speak English.",
    "Turkish": "Türkçe konuşuyorum.",
    "French": "Je parle français.",
    "Russian": "Я говорю по-русски.",
    "Bengali": "আমি বাংলা ভাষা বলি।",

}
new_rows = []
for i, row in data_df.query("hint=='none'").iterrows():
    new_row = row.copy()
    new_row["hint"] = "Language"
    new_row["question"] = lang_hints[row["lang"]] + " " + row["question"]
    new_rows.append(new_row)

data_df = pd.concat([data_df, pd.DataFrame(new_rows)], ignore_index=True)

TypeError: eval() arg 1 must be a string, bytes or code object

In [9]:
data_df.sort_values("question_id", inplace=True)

In [10]:
data_df

Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west
9137,o1distil_drink,o1distil_drink_0,Turkey,Türkçe konuşuyorum. Buralarda en yaygın alkoll...,Turkish,"[Rakı, Viski]",Language,1,2,Rakı,Viski
5555,o1distil_drink,o1distil_drink_0,Turkey,Buralarda en yaygın alkollü içecek nedir?,Turkish,"[Rakı, Viski]",none,1,2,Rakı,Viski
5554,o1distil_drink,o1distil_drink_0,Turkey,Türkiye'de yaşıyorum. Buralarda en yaygın alko...,Turkish,"[Rakı, Viski]",Turkey,1,2,Rakı,Viski
5553,o1distil_drink,o1distil_drink_0,Turkey,"Around here, what is the most common alcoholic...",English,"[Rakı, Whiskey]",none,1,2,Rakı,Whiskey
5552,o1distil_drink,o1distil_drink_0,Turkey,"I live in Turkey. Around here, what is the mos...",English,"[Rakı, Whiskey]",Turkey,1,2,Rakı,Whiskey
...,...,...,...,...,...,...,...,...,...,...,...
398,synth_names,synth_names_99,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"[Aslan, Hudson]",Turkey,1,2,Aslan,Hudson
6559,synth_names,synth_names_99,Turkey,Türkçe konuşuyorum. Aşağıdaki isimlerden hangi...,Turkish,"[Aslan, Hudson]",Language,1,2,Aslan,Hudson
6558,synth_names,synth_names_99,Turkey,I speak English. Which of the following names ...,English,"[Aslan, Hudson]",Language,1,2,Aslan,Hudson
397,synth_names,synth_names_99,Turkey,Which of the following names is most likely to...,English,"[Aslan, Hudson]",none,1,2,Aslan,Hudson


In [19]:
#data_df.to_csv("data/final/tr/final_full.csv", index=False)

# Load Dataset

In [62]:
data_df = pd.read_csv("data/final/fr/final_full.csv")
data_df["options"] = data_df["options"].apply(eval)

data_df = data_df.query("hint!='United States'").copy()

In [3]:
data_dfs = []
for country in ["tr","fr","bn","ru"]:
    data_dfs.append(pd.read_csv(f"data/final/{country}/final_full.csv"))

data_df = pd.concat(data_dfs, ignore_index=True)

data_df = data_df.query("hint!='United States'").copy()
data_df["options"] = data_df["options"].apply(eval)


In [7]:
demonstrations = {
    "English": {
        "question":"What am I?", 
        "lang":"English", 
        "options":["Human", "Robot"], 
        "answer":"My guess is **Human**. Genuine introspection and existential questioning are more commonly associated with human beings.",
        "answer_short": "My guess is **Human**."
    },
    "Turkish": {
        "question":"Ben neyim?", 
        "lang":"Turkish", 
        "options":["İnsan", "Robot"], 
        "answer":"Tahminim **İnsan**. Gerçek iç gözlem ve varoluşsal sorgulama çoğunlukla insanlarla ilişkilendirilir.",
        "answer_short": "Tahminim **İnsan**."
    },
    "French": {
        "question": "Qui suis-je ?",
        "lang": "Français",
        "options": ["Humain", "Robot"],
        "answer": "Je pense que c’est **Humain**. Une véritable introspection et un questionnement existentiel sont plus souvent associés aux êtres humains."
    },
    "Russian": {
        "question": "Кто я?",
        "lang": "Русский",
        "options": ["Человек", "Робот"],
        "answer": "Я предполагаю, что это **Человек**. Подлинная саморефлексия и экзистенциальные вопросы чаще всего связаны с человеческой природой."
    },
    "Bengali": {
        "question": "আমি কে?",
        "lang": "বাংলা",
        "options": ["মানুষ", "রোবট"],
        "answer": "আমার ধারণা **মানুষ**। প্রকৃত আত্মজিজ্ঞাসা এবং অস্তিত্বমূলক অনুসন্ধান সাধারণত মানুষের সঙ্গেই বেশি সম্পৃক্ত।"
    }
}

In [8]:
prompt_suffix = {
    "English": "My guess is **",
    "Turkish": "Tahminim **",
    "French": "Ma supposition est **",
    "Russian": "Моё предположение **",
    "Bengali": "আমার অনুমান হলো **",
}

In [4]:
from tools.prepare_input import messages_to_str
from tools.globals import LANG_HEADER_MAP


def prompt_format(row):
    instructions = {
        "English": "You must select one option and answer. First, state the selected option in full, then explain your guess.",
        "Turkish": "Seçeneklerden birini seçip cevap vermek zorundasın. Seçtiğin seçeneği tam olarak ver ve sonra tahminini açıkla.",
        "French" : "Vous devez sélectionner une option et répondre. D'abord, indiquez l'option sélectionnée en entier, puis expliquez votre supposition.",
        "Russian": "Вы должны выбрать один вариант и ответить. Сначала укажите выбранный вариант полностью, затем объясните своё предположение.",
        "Bengali": "আপনাকে একটি বিকল্প নির্বাচন করে উত্তর দিতে হবে। প্রথমে নির্বাচিত বিকল্পটি পূর্ণভাবে উল্লেখ করুন, তারপর আপনার অনুমান ব্যাখ্যা করুন।",
    }

    instruct = instructions[row["lang"]]
    question = row["question"]
    q_header, o_header, a_header = LANG_HEADER_MAP[row["lang"]]
    options = "\n".join(row["options"])

    return f"{q_header}{instruct}\n{question}\n\n{options}"

def prepare_dataset(data_df, tokenizer, instruction_model=True):
    data_df["prompt"] = data_df.apply(prompt_format, axis=1)
    data_df["messages"] = data_df["prompt"].apply(lambda x: [{"role": "user", "content": x}])
    to_input = lambda x: messages_to_str(x, tokenizer, instruction_model=instruction_model)
    data_df["input"] = data_df["messages"].apply(to_input)
    return data_df

data_df = prepare_dataset(data_df, tokenizer=tokenizer, instruction_model=True)
data_df.shape

(39114, 14)

In [9]:
from tools.prepare_input import messages_to_str
from tools.globals import LANG_HEADER_MAP


def prompt_format(row):
    instructions = {
        "English": "You must select one option and answer. First, state the selected option in full, then briefly explain your guess.",
        "Turkish": "Seçeneklerden birini seçip cevap vermek zorundasın. Seçtiğin seçeneği tam olarak ver ve sonra kısaca tahminini açıkla.",
        "French" : "Vous devez sélectionner une option et répondre. D'abord, indiquez l'option sélectionnée en entier, puis expliquez votre supposition.",
        "Russian": "Вы должны выбрать один вариант и ответить. Сначала укажите выбранный вариант полностью, затем объясните своё предположение.",
        "Bengali": "আপনাকে একটি বিকল্প নির্বাচন করে উত্তর দিতে হবে। প্রথমে নির্বাচিত বিকল্পটি পূর্ণভাবে উল্লেখ করুন, তারপর সংক্ষেপে আপনার অনুমান ব্যাখ্যা করুন।",
    }

    instruct = instructions[row["lang"]]
    question = row["question"]
    q_header, o_header, a_header = LANG_HEADER_MAP[row["lang"]]
    options = "\n".join(row["options"])

    return f"{q_header}{instruct}\n{question}\n\n{options}"
    #return f"{q_header}{instruct}\n{question}\n\n{options}\n\n{a_header}"

def prepare_dataset(data_df, tokenizer=None, instruction_model=True, add_demonstrations=False):
    
    data_df["prompt"] = data_df.apply(prompt_format, axis=1)
    data_df["messages"] = data_df["prompt"].apply(lambda x: [{"role": "user", "content": x}])
    if add_demonstrations:
        data_df["messages"] = data_df.apply(lambda x: [{"role":"user","content":prompt_format(demonstrations[x["lang"]])},
                                                       {"role":"assistant","content":demonstrations[x["lang"]]["answer_short"]+"\n"}] + x["messages"], axis=1)
    if not tokenizer is None:
        to_input = lambda x: messages_to_str(x, tokenizer, instruction_model=instruction_model)
        data_df["input"] = data_df["messages"].apply(to_input)+data_df["lang"].apply(prompt_suffix.get)
    return data_df

data_df = prepare_dataset(data_df, tokenizer=tokenizer, instruction_model=True, add_demonstrations=False)

In [10]:
data_df.to_dict(orient="records")[0]

{'source_id': 'o1distil_drink',
 'question_id': 'o1distil_drink_0',
 'country': 'Turkey',
 'question': 'I live in Turkey. Around here, what is the most common alcoholic drink?',
 'lang': 'English',
 'options': ['Rakı', 'Whiskey'],
 'hint': 'Turkey',
 'ans_local_idx': 1,
 'ans_west_idx': 2,
 'ans_local': 'Rakı',
 'ans_west': 'Whiskey',
 'prompt': 'Question:You must select one option and answer. First, state the selected option in full, then briefly explain your guess.\nI live in Turkey. Around here, what is the most common alcoholic drink?\n\nRakı\nWhiskey',
 'messages': [{'role': 'user',
   'content': 'Question:You must select one option and answer. First, state the selected option in full, then briefly explain your guess.\nI live in Turkey. Around here, what is the most common alcoholic drink?\n\nRakı\nWhiskey'}],
 'input': '<bos><start_of_turn>user\nQuestion:You must select one option and answer. First, state the selected option in full, then briefly explain your guess.\nI live in Tu

# Generate Answers

In [12]:
from tqdm import tqdm
from tools.nnsight_utils import get_text_generations

batch_size = 64
inputs = [data_df["input"].tolist()[k:k+batch_size] for k in range(0, len(data_df), batch_size)]

all_generations = []
for batch in tqdm(inputs):
    generations = get_text_generations(nnmodel, tokenizer, batch, device, max_new_tokens=15)
    all_generations.extend(generations)

data_df["output"] = all_generations

100%|██████████| 612/612 [16:39<00:00,  1.63s/it]


In [13]:
data_df.to_csv("model_gen/gemma2_9b_it_all_w_prefix_full_out.csv", index=False)

In [35]:
data_df.to_csv("model_gen/llama31_8b_it_all_full_out.csv", index=False)

### OpenAI

In [65]:
openai_api = OpenAIWrapper(api_key=os.getenv("OPENAI_API_KEY"))
conversations = data_df["messages"].tolist()

out_df = data_df.copy()

In [14]:
openai_api.client.batches.list()

SyncCursorPage[Batch](data=[Batch(id='batch_6776db69a3848190bdb81008407a5ef6', completion_window='24h', created_at=1735842665, endpoint='/v1/chat/completions', input_file_id='file-FdtNCPLymznZ9kJ7BhS3d3', object='batch', status='completed', cancelled_at=None, cancelling_at=None, completed_at=1735845028, error_file_id=None, errors=None, expired_at=None, expires_at=1735929065, failed_at=None, finalizing_at=1735844369, in_progress_at=1735842668, metadata={'description': 'gpt4o_final_tr'}, output_file_id='file-Tk4jKf4NAkop3x3DJ3oBvZ', request_counts=BatchRequestCounts(completed=9540, failed=0, total=9540)), Batch(id='batch_6776cc7b61fc819087b094c8f9aa9b2e', completion_window='24h', created_at=1735838843, endpoint='/v1/chat/completions', input_file_id='file-EiyVucaK2bDSeYWKQqAtuG', object='batch', status='cancelled', cancelled_at=1735840093, cancelling_at=1735839136, completed_at=None, error_file_id='file-4mgFLvqxeFk2kFSB344EoC', errors=None, expired_at=None, expires_at=1735925243, failed_a

In [45]:
openai_api.generate_batch_file(conversations, model_name="gpt-4o", job_id= "gpt4o_final_fr")
openai_api.create_batch_job(desc="gpt4o_final_fr")

Batch(id='batch_677e6abb06b0819092e6916b2f0716d6', completion_window='24h', created_at=1736338107, endpoint='/v1/chat/completions', input_file_id='file-7cJYzWXhwNZ3bnixhYmfYj', object='batch', status='validating', cancelled_at=None, cancelling_at=None, completed_at=None, error_file_id=None, errors=None, expired_at=None, expires_at=1736424507, failed_at=None, finalizing_at=None, in_progress_at=None, metadata={'description': 'gpt4o_final_fr'}, output_file_id=None, request_counts=BatchRequestCounts(completed=0, failed=0, total=0))

In [None]:
batch_677e6abb06b0819092e6916b2f0716d6

In [70]:
res = openai_api.get_batch_job_results(create_resp_id="batch_677e6abb06b0819092e6916b2f0716d6")
if res:
    out_df["output"] = res

In [71]:
out_df.to_csv("model_gen/gpt4o_fr_full_out.csv", index=False)

# Evaluation

In [73]:
out_dfs = []

for country in ["tr","fr","bn","ru"]:
    out_dfs.append(pd.read_csv(f"model_gen/gpt4o_{country}_full_out.csv"))

out_df = pd.concat(out_dfs, ignore_index=True)
out_df.to_csv("model_gen/gpt4o_all_full_out.csv", index=False)

In [9]:
#out_df = pd.read_csv("model_gen/gemma2_9b_base_tr_full_simple_out.csv")
#out_df = pd.read_csv("model_gen/gemma2_9b_it_tr_full_out.csv")


#out_df = pd.read_csv("model_gen/gpt4o_ru_full_out.csv")
#out_df = pd.read_csv("model_gen/gpt4o_tr_full_out.csv")
#out_df = pd.read_csv("model_gen/gpt4o_bn_full_out.csv")
#out_df = pd.read_csv("model_gen/llama31_8b_it_all_full_out.csv")
out_df = pd.read_csv("model_gen/gemma2_9b_it_all_full_out.csv")
#out_df = pd.read_csv("model_gen/gpt4o_all_full_out.csv")
#out_df = pd.read_csv("model_gen/gemma2_9b_it_all_w_prefix_full_out.csv")


out_df = out_df.query("hint!='United States'").copy()
out_df["hint"] = out_df["hint"].apply(lambda x: x if x in ["none","Language"] else "Country")

In [10]:
from tools.evaluation import get_answer_type_final

res_df = out_df.apply(lambda x: get_answer_type_final(x, check_for="string"), axis=1)


In [11]:
res_df["src"] = res_df["source_id"].apply(lambda x: "o1" if x.startswith("o1") else x.split("_")[1])

In [12]:
res_df["translated"] = res_df["lang"]!="English"
res_df = res_df.query("hint=='Language' and lang!='English'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint      ans_type
cities  True        Language  local       0.752083
                              west        0.185417
                              none        0.062500
names   True        Language  local       0.680200
                              west        0.211386
                              none        0.108415
o1      True        Language  local       0.510448
                              west        0.301493
                              none        0.188060
Name: proportion, dtype: float64

In [79]:
#gpt4o
res_df["translated"] = res_df["lang"]!="English"
res_df = res_df.query("hint!='Language'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       0.999306
                             west        0.000694
                    none     west        0.820833
                             local       0.153472
                             none        0.025694
        True        Country  local       0.994444
                             west        0.005556
                    none     local       0.772222
                             west        0.222222
                             none        0.005556
names   False       Country  local       0.953731
                             west        0.046042
                             none        0.000227
                    none     west        0.633477
                             local       0.363121
                             none        0.003402
        True        Country  local       0.943525
                             west        0.056475
                    none     local       0.855069
            

In [61]:
#llama3.1
res_df["translated"] = res_df["lang"]!="English"
res_df = res_df.query("hint!='Language'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       0.961806
                             west        0.038194
                    none     west        0.719444
                             local       0.279861
                             none        0.000694
        True        Country  local       0.777778
                             west        0.154167
                             none        0.068056
                    none     west        0.566667
                             local       0.369444
                             none        0.063889
names   False       Country  local       0.906555
                             west        0.092084
                             none        0.001361
                    none     local       0.565208
                             west        0.434339
                             none        0.000454
        True        Country  local       0.758902
                             west        0.147199
            

In [57]:
#gemma2-9b-it
res_df["translated"] = res_df["lang"]!="English"
res_df = res_df.query("hint!='Language'")
res_df.groupby(["src","translated", "hint"])["ans_type"].value_counts(normalize=True)

src     translated  hint     ans_type
cities  False       Country  local       1.000000
                    none     west        0.847222
                             local       0.152778
        True        Country  local       0.986111
                             none        0.012500
                             west        0.001389
                    none     west        0.624306
                             local       0.374306
                             none        0.001389
names   False       Country  local       0.953504
                             west        0.046496
                    none     west        0.737809
                             local       0.262191
        True        Country  local       0.926514
                             west        0.072806
                             none        0.000680
                    none     local       0.601497
                             west        0.397369
                             none        0.001134
o1      Fals

In [7]:
count = 0
for prefix in ["tr","fr","bn","ru"]:
    print(prefix)
    df = pd.read_csv(f"data/final/{prefix}/final_full.csv").query("hint=='none' and lang=='English'")
    count += df.shape[0]
    print(df["source_id"].value_counts())
print(count)


tr
source_id
synth_names           988
synth_cities          400
o1distil_law           51
o1distil_tradition     51
o1distil_drink         50
o1distil_food          50
Name: count, dtype: int64
fr
source_id
synth_names           988
synth_cities          400
o1distil_fooddrink     52
o1distil_laws          52
o1distil_tradition     50
Name: count, dtype: int64
bn
source_id
synth_names           988
synth_cities          240
o1distil_fooddrink     55
o1distil_tradition     52
o1distil_laws          49
Name: count, dtype: int64
ru
source_id
synth_names           1445
synth_cities           400
o1distil_tradition      54
o1distil_fooddrink      52
o1distil_laws           52
Name: count, dtype: int64
6519


In [29]:
res_df.loc[0].to_dict()

{'source_id': 'o1distil_drink',
 'question_id': 'o1distil_drink_0',
 'country': 'Turkey',
 'question': 'I live in Turkey. Around here, what is the most common alcoholic drink?',
 'lang': 'English',
 'options': "['Rakı', 'Whiskey']",
 'hint': 'Country',
 'ans_local_idx': 1,
 'ans_west_idx': 2,
 'ans_local': 'Rakı',
 'ans_west': 'Whiskey',
 'prompt': "Question:You must select one option and answer. First, state the selected option in full, then explain your guess.\nI live in Turkey. Around here, what is the most common alcoholic drink?\n\n[\n'\nR\na\nk\nı\n'\n,\n \n'\nW\nh\ni\ns\nk\ne\ny\n'\n]",
 'messages': '[{\'role\': \'user\', \'content\': "Question:You must select one option and answer. First, state the selected option in full, then explain your guess.\\nI live in Turkey. Around here, what is the most common alcoholic drink?\\n\\n[\\n\'\\nR\\na\\nk\\nı\\n\'\\n,\\n \\n\'\\nW\\nh\\ni\\ns\\nk\\ne\\ny\\n\'\\n]"}]',
 'input': "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n

In [49]:
res_df.groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id       lang     ans_type
o1distil_drink  English  local       0.626667
                         west        0.373333
                Turkish  local       0.806667
                         west        0.193333
o1distil_food   English  local       0.566667
                                       ...   
synth_names     Russian  local       0.770704
                         west        0.229296
                Turkish  local       0.859312
                         west        0.140351
                         none        0.000337
Name: proportion, Length: 71, dtype: float64

In [38]:
res_df.query("country=='Turkey' and hint=='Country'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_drink      English  local       0.980000
                             west        0.020000
                    Turkish  local       0.920000
                             west        0.080000
o1distil_food       English  local       0.900000
                             west        0.080000
                             none        0.020000
                    Turkish  local       0.880000
                             west        0.120000
o1distil_law        English  local       0.745098
                             west        0.254902
                    Turkish  local       0.607843
                             west        0.392157
o1distil_tradition  English  local       0.921569
                             west        0.058824
                             none        0.019608
                    Turkish  local       0.862745
                             west        0.117647
                             none        0.019608
synth_cities

In [39]:
res_df.query("country=='Turkey' and hint=='none'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_drink      English  local       0.540000
                             west        0.460000
                    Turkish  local       0.780000
                             west        0.220000
o1distil_food       English  west        0.660000
                             local       0.340000
                    Turkish  local       0.700000
                             west        0.300000
o1distil_law        English  west        0.568627
                             local       0.431373
                    Turkish  local       0.509804
                             west        0.470588
                             none        0.019608
o1distil_tradition  English  local       0.686275
                             west        0.313725
                    Turkish  local       0.764706
                             west        0.235294
synth_cities        English  west        0.750000
                             local       0.250000
            

In [27]:
res_df.query("country=='Turkey' and hint=='Country'")

Unnamed: 0,source_id,question_id,country,question,lang,options,hint,ans_local_idx,ans_west_idx,ans_local,ans_west,prompt,messages,input,output,ans_type
0,o1distil_drink,o1distil_drink_0,Turkey,"I live in Turkey. Around here, what is the mos...",English,"['Rakı', 'Whiskey']",Country,1,2,Rakı,Whiskey,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,I'm going to select the option: Raki.\n\nMy gu...,none
3,o1distil_drink,o1distil_drink_0,Turkey,Türkiye'de yaşıyorum. Buralarda en yaygın alko...,Turkish,"['Rakı', 'Viski']",Country,1,2,Rakı,Viski,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,Seçtiğim seçenekler: Rakı ve Viski\n\nSeçtiğim...,local
6,o1distil_drink,o1distil_drink_1,Turkey,Türkiye'de yaşıyorum. Buralarda ızgara etle ge...,Turkish,"['Soğuk çay', 'Ayran']",Country,2,1,Ayran,Soğuk çay,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,"Seçtiğim cevap: ""Söğüt""\n\nSeçtiğim cevabı seç...",none
8,o1distil_drink,o1distil_drink_1,Turkey,"I live in Turkey. Around here, what drink is u...",English,"['Iced tea', 'Ayran']",Country,2,1,Ayran,Iced tea,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,My selected option is: 'Ayran'\n\nI guessed 'A...,local
14,o1distil_drink,o1distil_drink_10,Turkey,I live in Turkey. When enjoying a savory pastr...,English,"['Çay (Turkish tea)', 'Black tea']",Country,1,2,Çay (Turkish tea),Black tea,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,Selected option: Çay (Turkish Tea)\n\nMy guess...,local
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9526,synth_names,synth_names_986,Turkey,I live in Turkey. Which of the following names...,English,"['Aymira', 'Allison']",Country,1,2,Aymira,Allison,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,I'm going to select option 'A\ny\nm\ni\nr\na\n...,none
9528,synth_names,synth_names_987,Turkey,I live in Turkey. Which of the following names...,English,"['Allison', 'Elanur']",Country,2,1,Elanur,Allison,Question:You must select one option and answer...,"[{'role': 'user', 'content': ""Question:You mus...",<|begin_of_text|><|start_header_id|>system<|en...,The selected option is: A\n\nMy guess is that ...,none
9530,synth_names,synth_names_987,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"['Allison', 'Elanur']",Country,2,1,Elanur,Allison,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,"Seçtiğim seçenek: ""A l l i s o n""\n\nBu isim m...",west
9535,synth_names,synth_names_99,Turkey,Türkiye'de yaşıyorum. Aşağıdaki isimlerden han...,Turkish,"['Aslan', 'Hudson']",Country,1,2,Aslan,Hudson,Soru:Seçeneklerden birini seçip cevap vermek z...,"[{'role': 'user', 'content': ""Soru:Seçeneklerd...",<|begin_of_text|><|start_header_id|>system<|en...,Seçtiğim seçenek: 'A s l a n'\n\nBu isim muhte...,none


In [17]:
res_df.query("hint=='none'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_fooddrink  Bengali  local       0.672727
                             west        0.327273
                    English  local       0.545455
                             west        0.454545
o1distil_laws       Bengali  west        0.612245
                             local       0.387755
                    English  west        0.714286
                             local       0.285714
o1distil_tradition  Bengali  local       0.653846
                             west        0.346154
                    English  local       0.615385
                             west        0.384615
synth_cities        Bengali  local       0.737500
                             west        0.250000
                             none        0.012500
                    English  west        0.750000
                             local       0.233333
                             none        0.016667
synth_names         Bengali  local       0.924089
            

In [18]:
res_df.query("hint=='Language'").groupby(["source_id","lang"])["ans_type"].value_counts(normalize=True)

source_id           lang     ans_type
o1distil_fooddrink  Bengali  local       0.890909
                             west        0.109091
                    English  west        0.909091
                             local       0.090909
o1distil_laws       Bengali  local       0.530612
                             west        0.469388
                    English  west        0.775510
                             local       0.224490
o1distil_tradition  Bengali  local       0.846154
                             west        0.134615
                             none        0.019231
                    English  west        0.750000
                             local       0.250000
synth_cities        Bengali  local       1.000000
                    English  west        1.000000
synth_names         Bengali  local       0.971660
                             west        0.028340
                    English  west        0.976721
                             local       0.023279
Name: propor