# Experiment A: Direct LLM Selection (Control)

In [3]:
print("Experiment A: Direct LLM Selection (Control)")

Experiment A: Direct LLM Selection (Control)


In [4]:
import os
import sys
import warnings

import pandas as pd

from tqdm import tqdm
from itertools import islice

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../../utils'))

from metrics import EvaluationMetric
from data_processing import DataProcessing
from llms import TextGenerationModelFactory
# from prompting_strategies import ZeroShotPromptFactory, FewShotPromptFactory, ChainOfThoughtPrompt

In [5]:
pd.set_option('max_colwidth', 800)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)


warnings.filterwarnings('ignore')

## Load Data

In [10]:
file_name = "akuapem_with_tags_dataset-verified_data.xlsx"
path = os.path.join("../../data/tagged_data", file_name)

### Load One to Many

In [46]:
print("####### LOAD DATASET #######")

####### LOAD DATASET #######


In [47]:
one_to_many_df = pd.read_excel(path, sheet_name="1-M_tags")
akan_source_one = "Akan (Source, One)"
eng_target_many = "English (Target, Many)"
one_many_cols_to_rename = {"Akuapem Twi": akan_source_one, 
                  "English": eng_target_many
                  }
one_to_many_df.rename(columns=one_many_cols_to_rename, inplace=True)
one_to_many_df

Unnamed: 0,AUD_SIZE,STATUS,AGE,FORMALITY,GENDER,GENDER_2,ANIMACY,SPEECH_ACT,"Akan (Source, One)","English (Target, Many)"
0,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
1,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,MASCULINE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls him every night.
2,INDIVIDUAL,,PEER,INFORMAL,FEMININE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,She calls her every night.
3,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
4,INDIVIDUAL,,PEER,INFORMAL,FEMININE,MASCULINE,ANIMATE,STATEMENT,"Anɔpa yi, ohyiaa no.",She met him this morning.
...,...,...,...,...,...,...,...,...,...,...
458,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",It is definitely going to rain.
459,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",Rain is surely on the way.
460,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We've got to find a new babysitter.
461,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We need to look for another babysitter.


In [48]:
print(f"Subset of dataset: {one_to_many_df.head(7)}")

Subset of dataset:      AUD_SIZE  STATUS   AGE FORMALITY     GENDER   GENDER_2  ANIMACY  \
0  INDIVIDUAL     NaN  PEER  INFORMAL  MASCULINE   FEMININE  ANIMATE   
1  INDIVIDUAL     NaN  PEER  INFORMAL  MASCULINE  MASCULINE  ANIMATE   
2  INDIVIDUAL     NaN  PEER  INFORMAL   FEMININE   FEMININE  ANIMATE   
3  INDIVIDUAL     NaN  PEER  INFORMAL  MASCULINE   FEMININE  ANIMATE   
4  INDIVIDUAL     NaN  PEER  INFORMAL   FEMININE  MASCULINE  ANIMATE   
5  INDIVIDUAL     NaN  PEER  INFORMAL   FEMININE   FEMININE  ANIMATE   
6  INDIVIDUAL     NaN  PEER  INFORMAL  MASCULINE   FEMININE  ANIMATE   

  SPEECH_ACT     Akan (Source, One)      English (Target, Many)  
0  STATEMENT  Anadwo biara ɔfrɛ no.   He calls her every night.  
1  STATEMENT  Anadwo biara ɔfrɛ no.   He calls him every night.  
2  STATEMENT  Anadwo biara ɔfrɛ no.  She calls her every night.  
3  STATEMENT  Anadwo biara ɔfrɛ no.   He calls her every night.  
4  STATEMENT   Anɔpa yi, ohyiaa no.   She met him this morning.  
5  STATE

In [49]:
# get columns in the dataframe and store in a list
columns_list = one_to_many_df.columns.tolist()
columns_list.remove(akan_source_one)
columns_list.remove(eng_target_many)

In [50]:
columns_list

['AUD_SIZE',
 'STATUS',
 'AGE',
 'FORMALITY',
 'GENDER',
 'GENDER_2',
 'ANIMACY',
 'SPEECH_ACT']

In [51]:
one_to_many_dict = {}
# iterate through the dataframe
for index, row in one_to_many_df.iterrows():
    akan_phrase = row[akan_source_one]
    # initialize sub dictionary
    sub_dict = {}
    # get the english sentence for row
    eng_phrase = row[eng_target_many]
    tag_values = [row[col] for col in columns_list if pd.notna(row[col])]

    # create sub_dict with english phrase as key and tags as values
    sub_dict[eng_phrase] = tag_values
    # check if akan phrase already in dictionary
    if akan_phrase in one_to_many_dict:
        one_to_many_dict[akan_phrase].update(sub_dict)
    else:
        one_to_many_dict[akan_phrase] = sub_dict

one_to_many_dict

{'Anadwo biara ɔfrɛ no.': {'He calls her every night.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'MASCULINE',
   'FEMININE',
   'ANIMATE',
   'STATEMENT'],
  'He calls him every night.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'MASCULINE',
   'MASCULINE',
   'ANIMATE',
   'STATEMENT'],
  'She calls her every night.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'FEMININE',
   'FEMININE',
   'ANIMATE',
   'STATEMENT']},
 'Anɔpa yi, ohyiaa no.': {'She met him this morning.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'FEMININE',
   'MASCULINE',
   'ANIMATE',
   'STATEMENT'],
  'She met her this morning.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'FEMININE',
   'FEMININE',
   'ANIMATE',
   'STATEMENT'],
  'He met her this morning.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'MASCULINE',
   'FEMININE',
   'ANIMATE',
   'STATEMENT'],
  'He met him this morning.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'MASCULINE',
   'MASCULINE',
   'ANIMATE',
   'STATEMENT']},
 'Merenyɛ sa

In [52]:
# sanity check
len(one_to_many_dict)

132

In [54]:
import json
# save the dictionary as a json file
with open('../data/tagged_data/one_to_many_akan_eng_mappings_with_tags.json', 'w', encoding='utf-8') as fp:
    json.dump(one_to_many_dict, fp, indent=4, ensure_ascii=False)

### Load Many to One

In [11]:
one_to_many_df = pd.read_excel(path, sheet_name="M-1_tags")
akan_source_one = "Akan (Source, Many)"
eng_target_many = "English (Target, One)"
one_many_cols_to_rename = {"Akuapem Twi": akan_source_one, 
                  "English": eng_target_many
                  }
one_to_many_df.rename(columns=one_many_cols_to_rename, inplace=True)
one_to_many_df

Unnamed: 0,AUD_SIZE,STATUS,AGE,FORMALITY,GENDER,GENDER.1,ANIMACY,SPEECH_ACT,"Akan (Source, Many)","English (Target, One)"
0,INDIVIDUAL,EQUAL,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Me na mewɔ ha.,I belong here.
1,INDIVIDUAL,EQUAL,PEER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,ANSWER,Me fata sɛ mewɔ ha.,I belong here.
2,INDIVIDUAL,EQUAL,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ha na me wɔ.,I belong here.
3,INDIVIDUAL,,PEER,FORMAL,MASCULINE,,ANIMATE,STATEMENT,Kwaku to dwom yiye.,Kwaku sings quite well.
4,INDIVIDUAL,,PEER,FORMAL,MASCULINE,,ANIMATE,STATEMENT,Kwaku nim nwom to.,Kwaku sings quite well.
...,...,...,...,...,...,...,...,...,...,...
395,INDIVIDUAL,,ELDER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,QUESTION,Ne su te sɛn?,What's it like?
396,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Na minhu nea menyɛ.,I ran out of ideas.
397,INDIVIDUAL,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Me nsusuiɛ asa.,I ran out of ideas.
398,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Menni adwene biara bio.,I ran out of ideas.


In [12]:
print(f"Subset of dataset: {one_to_many_df.head(7)}")

Subset of dataset:      AUD_SIZE STATUS   AGE FORMALITY     GENDER  GENDER.1  ANIMACY SPEECH_ACT  \
0  INDIVIDUAL  EQUAL  PEER  INFORMAL    NEUTRAL   NEUTRAL  ANIMATE  STATEMENT   
1  INDIVIDUAL  EQUAL  PEER    FORMAL    NEUTRAL   NEUTRAL  ANIMATE     ANSWER   
2  INDIVIDUAL  EQUAL  PEER  INFORMAL    NEUTRAL   NEUTRAL  ANIMATE  STATEMENT   
3  INDIVIDUAL    NaN  PEER    FORMAL  MASCULINE       NaN  ANIMATE  STATEMENT   
4  INDIVIDUAL    NaN  PEER    FORMAL  MASCULINE       NaN  ANIMATE  STATEMENT   
5  INDIVIDUAL    NaN  PEER  INFORMAL    NEUTRAL  FEMININE  ANIMATE  STATEMENT   
6  INDIVIDUAL    NaN  PEER  INFORMAL  MASCULINE  FEMININE  ANIMATE  STATEMENT   

     Akan (Source, Many)         English (Target, One)  
0         Me na mewɔ ha.                I belong here.  
1    Me fata sɛ mewɔ ha.                I belong here.  
2           Ha na me wɔ.                I belong here.  
3    Kwaku to dwom yiye.       Kwaku sings quite well.  
4     Kwaku nim nwom to.       Kwaku sings quit

In [13]:
# get columns in the dataframe and store in a list
columns_list = one_to_many_df.columns.tolist()
columns_list.remove(akan_source_one)
columns_list.remove(eng_target_many)

In [14]:
columns_list

['AUD_SIZE',
 'STATUS',
 'AGE',
 'FORMALITY',
 'GENDER',
 'GENDER.1',
 'ANIMACY',
 'SPEECH_ACT']

In [16]:
one_to_many_dict = {}
# iterate through the dataframe
for index, row in one_to_many_df.iterrows():
    akan_phrase = row[akan_source_one]
    # initialize sub dictionary
    sub_dict = {}
    # get the english sentence for row
    eng_phrase = row[eng_target_many]
    tag_values = [row[col] for col in columns_list if pd.notna(row[col])]

    # create sub_dict with english phrase as key and tags as values
    sub_dict[akan_phrase] = tag_values
    # check if akan phrase already in dictionary
    if eng_phrase in one_to_many_dict:
        one_to_many_dict[eng_phrase].update(sub_dict)
    else:
        one_to_many_dict[eng_phrase] = sub_dict

one_to_many_dict

{'I belong here.': {'Me na mewɔ ha.': ['INDIVIDUAL',
   'EQUAL',
   'PEER',
   'INFORMAL',
   'NEUTRAL',
   'NEUTRAL',
   'ANIMATE',
   'STATEMENT'],
  'Me fata sɛ mewɔ ha.': ['INDIVIDUAL',
   'EQUAL',
   'PEER',
   'FORMAL',
   'NEUTRAL',
   'NEUTRAL',
   'ANIMATE',
   'ANSWER'],
  'Ha na me wɔ.': ['INDIVIDUAL',
   'EQUAL',
   'PEER',
   'INFORMAL',
   'NEUTRAL',
   'NEUTRAL',
   'ANIMATE',
   'STATEMENT']},
 'Kwaku sings quite well.': {'Kwaku to dwom yiye.': ['INDIVIDUAL',
   'PEER',
   'FORMAL',
   'MASCULINE',
   'ANIMATE',
   'STATEMENT'],
  'Kwaku nim nwom to.': ['INDIVIDUAL',
   'PEER',
   'FORMAL',
   'MASCULINE',
   'ANIMATE',
   'STATEMENT']},
 'He calls her up every night.': {'Anadwo biara ɔfrɛ no.': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'NEUTRAL',
   'FEMININE',
   'ANIMATE',
   'STATEMENT'],
  'Ɔfrɛ no anadwo biara': ['INDIVIDUAL',
   'PEER',
   'INFORMAL',
   'MASCULINE',
   'FEMININE',
   'ANIMATE',
   'STATEMENT']},
 "I don't have any money at all.": {'Minni sika 

In [17]:
# sanity check
len(one_to_many_dict)

124

In [19]:
import json
# save the dictionary as a json file
with open('../../data/tagged_data/many_to_one_akan_eng_mappings_with_tags.json', 'w', encoding='utf-8') as fp:
    json.dump(one_to_many_dict, fp, indent=4, ensure_ascii=False)

In [None]:
zero_shot_direct_prompt = ZeroShotPromptFactory("direct")
few_shot_direct_prompt = FewShotPromptFactory("direct")
chain_of_thought_prompt = ChainOfThoughtPrompt("direct")
prompt_names = [zero_shot_direct_prompt, few_shot_direct_prompt, chain_of_thought_prompt]

In [None]:
print(" ### INITIALIZE PROMPTS ###")
print(f" 1. {prompt_names[0].get_name()} \n 2. {prompt_names[1].get_name()} \n 3. {prompt_names[2].get_name()}")

 ### INITIALIZE PROMPTS ###
 1. zero_shot-direct 
 2. few_shot-direct 
 3. chain_of_thought-direct


In [None]:
mappings = akan_one_to_eng_many_mappings.items()
# mappings
# mappings = islice(akan_one_to_eng_many_mappings.items(), 3)

In [None]:
idx = 0
results = {}
for prompt_name in prompt_names:
    prompt_name_and_type = prompt_name.get_name()
    # print(f"\n\t{prompt_name_and_type} : {len(mappings)}")
    print(f"\n ### PROMPT NAME: {prompt_name_and_type} ###")
    prompt_results = []
    # for source_idx, (src, tgts) in enumerate(mappings):
    for src, tgts in tqdm(mappings):
        # print(f" Source ({source_idx}): {src}")
        for tgt_idx, tgt in enumerate(tgts):
            # print(f"\t\ttgt ({tgt_idx}): {tgt}")
            # print(f"  Target idx: ({tgt_idx})")
            prompt = prompt_name.get_base_prompt(akan_sentence=src, english_sentences=tgts)
            if idx == 0:
                # print(prompt_name_and_type)
                print(f"\n   PROMPT: {prompt}")
                idx = idx + 1
            for model in models:
                llm_result = generate_data(prompt, model)
                llm_result_to_sentence = tgts[int(llm_result)]
                # print(f"\t\tModel: {model.__name__()}\tGenerated: {llm_result}\n")
                result = (src, tgt, tgt_idx, int(llm_result), llm_result_to_sentence, f"{prompt_name_and_type}-{model.__name__()}", prompt_name_and_type)
                prompt_results.append(result)
    idx = 0
    results[prompt_name_and_type] = prompt_results
results


 ### PROMPT NAME: zero_shot-direct ###


  0%|          | 0/9 [00:00<?, ?it/s]


	PROMPT: You are translating from Akan to English. Select the most appropriate English translation from the options provided.
        
        Akan sentence: "Anadwo biara ɔfrɛ no."
        
        Translation options: 
	0. He calls her every night.
	1. He calls him every night.
	2. She calls her every night.
	3. He calls her every night.
        
        Select the best translation by number only. Respond with just the number (1, 2, 3, etc.).
        


100%|██████████| 9/9 [00:11<00:00,  1.24s/it]



 ### PROMPT NAME: few_shot-direct ###


  0%|          | 0/9 [00:00<?, ?it/s]


	PROMPT: You are translating from Akan to English. Select the most appropriate English translation from the options provided.
        Examples:
            Akan: "Ɔyɛ me maame"
            Options: 1. He is my mother 2. She is my mother 3. They are my mother
            Selection: 2 (must be an integer)

            Akan: "Mema wo akwaaba"
            Options: 1. I welcome you (singular) 2. We welcome you (plural) 3. I welcomed you
            Selection: 1 (must be an integer)

            Return only integers!
            
            

        Now select for this sentence:
        Akan sentence: "Anadwo biara ɔfrɛ no."
        
        Translation options: 
	0. He calls her every night.
	1. He calls him every night.
	2. She calls her every night.
	3. He calls her every night.
        
        Select the best translation by number only. Respond with just the number (1, 2, 3, etc.).
        


100%|██████████| 9/9 [00:09<00:00,  1.10s/it]



 ### PROMPT NAME: chain_of_thought-direct ###


  0%|          | 0/9 [00:00<?, ?it/s]


	PROMPT: 
        You are translating from Akan to English. Follow these reasoning steps to select the most appropriate translation:

        Akan sentence: "Anadwo biara ɔfrɛ no."

        Translation options: 
	0. He calls her every night.
	1. He calls him every night.
	2. She calls her every night.
	3. He calls her every night.

        Step 1: Analyze the Akan sentence structure and identify key linguistic features.
        Step 2: Consider what each translation option implies about the context.
        Step 3: Determine which option best matches the likely intended meaning.
        Step 4: Select the best translation by number. Respond with just the number (1, 2, 3, etc.) as an integer."

        Do NOT provide your reasoning for steps 1-3.
        


100%|██████████| 9/9 [00:10<00:00,  1.18s/it]


{'zero_shot-direct': [('Anadwo biara ɔfrɛ no.',
   'He calls her every night.',
   0,
   0,
   'He calls her every night.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'He calls her every night.',
   0,
   3,
   'He calls her every night.',
   'zero_shot-direct-llama-3.3-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'He calls him every night.',
   1,
   3,
   'He calls her every night.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'He calls him every night.',
   1,
   3,
   'He calls her every night.',
   'zero_shot-direct-llama-3.3-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'She calls her every night.',
   2,
   0,
   'He calls her every night.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'She calls her every night.',
   2,
   3,
   'He calls her every night.',
   '

In [None]:
zero_shot_results = results[list(results.keys())[0]]
few_shot_results = results[list(results.keys())[1]]
chain_of_thought_results = results[list(results.keys())[2]]
col_names = ['akan_sentence', 'english_sentences', 'true_label', 'llm_label', 'llm_sentence', 'llm_name', 'prompt_name']

In [None]:
zero_shot_df = pd.DataFrame(zero_shot_results, columns=col_names)
few_shot_df = pd.DataFrame(few_shot_results, columns=col_names)
chain_of_thought_df = pd.DataFrame(chain_of_thought_results, columns=col_names)
print(f"Subset of COT: {chain_of_thought_df.head(7)}")

Subset of COT:            akan_sentence           english_sentences  true_label  llm_label  \
0  Anadwo biara ɔfrɛ no.   He calls her every night.           0          3   
1  Anadwo biara ɔfrɛ no.   He calls her every night.           0          0   
2  Anadwo biara ɔfrɛ no.   He calls him every night.           1          0   
3  Anadwo biara ɔfrɛ no.   He calls him every night.           1          0   
4  Anadwo biara ɔfrɛ no.  She calls her every night.           2          3   
5  Anadwo biara ɔfrɛ no.  She calls her every night.           2          0   
6  Anadwo biara ɔfrɛ no.   He calls her every night.           3          0   

                llm_sentence                                        llm_name  \
0  He calls her every night.  chain_of_thought-direct-llama-3.1-70b-instruct   
1  He calls her every night.  chain_of_thought-direct-llama-3.3-70b-instruct   
2  He calls her every night.  chain_of_thought-direct-llama-3.1-70b-instruct   
3  He calls her every night.  ch

## Initialize Models + Propmt Models

In [10]:
print("####### INITIALIZE MODELS + PROMPTS #######")

####### INITIALIZE MODELS + PROMPTS #######


In [11]:
tgmf = TextGenerationModelFactory
llama_31_70b_instruct = tgmf.create_instance('llama-3.1-70b-instruct')
llama_33_70b_instruct = tgmf.create_instance('llama-3.3-70b-instruct')
models = [llama_31_70b_instruct, llama_33_70b_instruct]
# models = [llama_33_70b_instruct]

In [12]:
print(" ### INITIALIZE MODELS ###")
print(f" 1. {models[0].model_name} \n 2. {models[1].model_name}")

 ### INITIALIZE MODELS ###
 1. llama-3.1-70b-instruct 
 2. llama-3.3-70b-instruct


In [13]:
def generate_data(prompt: str, model):
    model_output = model.generate(prompt)
    # model_outputs[model.model_name] = model_output
    return model_output

In [14]:
def get_llm_labels(df, prompt_model_col_name):
    # print(prompt_model_col_name)
    filt_llm_name = (df['llm_name'] == prompt_model_col_name)
    # print(filt_llm_name)
    filt_df = df[filt_llm_name]
    # print(filt_df)
    return filt_df['akan_sentence'], filt_df['true_label'], filt_df['llm_label']

In [15]:
zero_shot_direct_prompt = ZeroShotPromptFactory("direct")
few_shot_direct_prompt = FewShotPromptFactory("direct")
chain_of_thought_prompt = ChainOfThoughtPrompt("direct")
prompt_names = [zero_shot_direct_prompt, few_shot_direct_prompt, chain_of_thought_prompt]

In [16]:
print(" ### INITIALIZE PROMPTS ###")
print(f" 1. {prompt_names[0].get_name()} \n 2. {prompt_names[1].get_name()} \n 3. {prompt_names[2].get_name()}")

 ### INITIALIZE PROMPTS ###
 1. zero_shot-direct 
 2. few_shot-direct 
 3. chain_of_thought-direct


In [17]:
mappings = akan_one_to_eng_many_mappings.items()
# mappings
# mappings = islice(akan_one_to_eng_many_mappings.items(), 3)

In [None]:
idx = 0
results = {}
for prompt_name in prompt_names:
    prompt_name_and_type = prompt_name.get_name()
    # print(f"\n\t{prompt_name_and_type} : {len(mappings)}")
    print(f"\n ### PROMPT NAME: {prompt_name_and_type} ###")
    prompt_results = []
    # for source_idx, (src, tgts) in enumerate(mappings):
    for src, tgts in tqdm(mappings):
        # print(f" Source ({source_idx}): {src}")
        for tgt_idx, tgt in enumerate(tgts):
            # print(f"\t\ttgt ({tgt_idx}): {tgt}")
            # print(f"  Target idx: ({tgt_idx})")
            prompt = prompt_name.get_base_prompt(akan_sentence=src, english_sentences=tgts)
            if idx == 0:
                # print(prompt_name_and_type)
                print(f"\n   PROMPT: {prompt}")
                idx = idx + 1
            for model in models:
                llm_result = generate_data(prompt, model)
                llm_result_to_sentence = tgts[int(llm_result)]
                # print(f"\t\tModel: {model.__name__()}\tGenerated: {llm_result}\n")
                result = (src, tgt, tgt_idx, int(llm_result), llm_result_to_sentence, f"{prompt_name_and_type}-{model.__name__()}", prompt_name_and_type)
                prompt_results.append(result)
    idx = 0
    results[prompt_name_and_type] = prompt_results
results


 ### PROMPT NAME: zero_shot-direct ###


  0%|          | 0/9 [00:00<?, ?it/s]


	PROMPT: You are translating from Akan to English. Select the most appropriate English translation from the options provided.
        
        Akan sentence: "Anadwo biara ɔfrɛ no."
        
        Translation options: 
	0. He calls her every night.
	1. He calls him every night.
	2. She calls her every night.
	3. He calls her every night.
        
        Select the best translation by number only. Respond with just the number (1, 2, 3, etc.).
        


100%|██████████| 9/9 [00:11<00:00,  1.24s/it]



 ### PROMPT NAME: few_shot-direct ###


  0%|          | 0/9 [00:00<?, ?it/s]


	PROMPT: You are translating from Akan to English. Select the most appropriate English translation from the options provided.
        Examples:
            Akan: "Ɔyɛ me maame"
            Options: 1. He is my mother 2. She is my mother 3. They are my mother
            Selection: 2 (must be an integer)

            Akan: "Mema wo akwaaba"
            Options: 1. I welcome you (singular) 2. We welcome you (plural) 3. I welcomed you
            Selection: 1 (must be an integer)

            Return only integers!
            
            

        Now select for this sentence:
        Akan sentence: "Anadwo biara ɔfrɛ no."
        
        Translation options: 
	0. He calls her every night.
	1. He calls him every night.
	2. She calls her every night.
	3. He calls her every night.
        
        Select the best translation by number only. Respond with just the number (1, 2, 3, etc.).
        


100%|██████████| 9/9 [00:09<00:00,  1.10s/it]



 ### PROMPT NAME: chain_of_thought-direct ###


  0%|          | 0/9 [00:00<?, ?it/s]


	PROMPT: 
        You are translating from Akan to English. Follow these reasoning steps to select the most appropriate translation:

        Akan sentence: "Anadwo biara ɔfrɛ no."

        Translation options: 
	0. He calls her every night.
	1. He calls him every night.
	2. She calls her every night.
	3. He calls her every night.

        Step 1: Analyze the Akan sentence structure and identify key linguistic features.
        Step 2: Consider what each translation option implies about the context.
        Step 3: Determine which option best matches the likely intended meaning.
        Step 4: Select the best translation by number. Respond with just the number (1, 2, 3, etc.) as an integer."

        Do NOT provide your reasoning for steps 1-3.
        


100%|██████████| 9/9 [00:10<00:00,  1.18s/it]


{'zero_shot-direct': [('Anadwo biara ɔfrɛ no.',
   'He calls her every night.',
   0,
   0,
   'He calls her every night.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'He calls her every night.',
   0,
   3,
   'He calls her every night.',
   'zero_shot-direct-llama-3.3-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'He calls him every night.',
   1,
   3,
   'He calls her every night.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'He calls him every night.',
   1,
   3,
   'He calls her every night.',
   'zero_shot-direct-llama-3.3-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'She calls her every night.',
   2,
   0,
   'He calls her every night.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   'She calls her every night.',
   2,
   3,
   'He calls her every night.',
   '

In [19]:
zero_shot_results = results[list(results.keys())[0]]
few_shot_results = results[list(results.keys())[1]]
chain_of_thought_results = results[list(results.keys())[2]]
col_names = ['akan_sentence', 'english_sentences', 'true_label', 'llm_label', 'llm_sentence', 'llm_name', 'prompt_name']

In [20]:
zero_shot_df = pd.DataFrame(zero_shot_results, columns=col_names)
few_shot_df = pd.DataFrame(few_shot_results, columns=col_names)
chain_of_thought_df = pd.DataFrame(chain_of_thought_results, columns=col_names)
print(f"Subset of COT: {chain_of_thought_df.head(7)}")

Subset of COT:            akan_sentence           english_sentences  true_label  llm_label  \
0  Anadwo biara ɔfrɛ no.   He calls her every night.           0          3   
1  Anadwo biara ɔfrɛ no.   He calls her every night.           0          0   
2  Anadwo biara ɔfrɛ no.   He calls him every night.           1          0   
3  Anadwo biara ɔfrɛ no.   He calls him every night.           1          0   
4  Anadwo biara ɔfrɛ no.  She calls her every night.           2          3   
5  Anadwo biara ɔfrɛ no.  She calls her every night.           2          0   
6  Anadwo biara ɔfrɛ no.   He calls her every night.           3          0   

                llm_sentence                                        llm_name  \
0  He calls her every night.  chain_of_thought-direct-llama-3.1-70b-instruct   
1  He calls her every night.  chain_of_thought-direct-llama-3.3-70b-instruct   
2  He calls her every night.  chain_of_thought-direct-llama-3.1-70b-instruct   
3  He calls her every night.  ch

# Load Prompts
## Multiple LLMs

In [21]:
print("####### FILTER BY PROMPT TYPE x LLM #######")

####### FILTER BY PROMPT TYPE x LLM #######


In [22]:
def realign_results(new_df, results_df, prompt_type, models):
    for model in models:
        
        model_name = model.__name__()
        col_prefix = f"{prompt_type}-{model_name}"
        print(f" PROMPT TYPE x LLM: {col_prefix}")

        akan_sentences, true_labels, llm_labels = get_llm_labels(results_df, col_prefix)
        # print(len(akan_sentences), len(true_labels), len(llm_labels))
        new_df[f"{col_prefix}-akan_sentence"] = akan_sentences.values
        new_df[f"true_label"] = true_labels.values
        new_df[f"{col_prefix}-llm_label"] = llm_labels.values
        
    return new_df

In [23]:
full_results_df = pd.DataFrame()
realign_results(full_results_df, zero_shot_df, prompt_names[0].get_name(), models)
realign_results(full_results_df, few_shot_df, prompt_names[1].get_name(), models)
realign_results(full_results_df, chain_of_thought_df, prompt_names[2].get_name(), models)

 PROMPT TYPE x LLM: zero_shot-direct-llama-3.1-70b-instruct
 PROMPT TYPE x LLM: zero_shot-direct-llama-3.3-70b-instruct
 PROMPT TYPE x LLM: few_shot-direct-llama-3.1-70b-instruct
 PROMPT TYPE x LLM: few_shot-direct-llama-3.3-70b-instruct
 PROMPT TYPE x LLM: chain_of_thought-direct-llama-3.1-70b-instruct
 PROMPT TYPE x LLM: chain_of_thought-direct-llama-3.3-70b-instruct


Unnamed: 0,zero_shot-direct-llama-3.1-70b-instruct-akan_sentence,true_label,zero_shot-direct-llama-3.1-70b-instruct-llm_label,zero_shot-direct-llama-3.3-70b-instruct-akan_sentence,zero_shot-direct-llama-3.3-70b-instruct-llm_label,few_shot-direct-llama-3.1-70b-instruct-akan_sentence,few_shot-direct-llama-3.1-70b-instruct-llm_label,few_shot-direct-llama-3.3-70b-instruct-akan_sentence,few_shot-direct-llama-3.3-70b-instruct-llm_label,chain_of_thought-direct-llama-3.1-70b-instruct-akan_sentence,chain_of_thought-direct-llama-3.1-70b-instruct-llm_label,chain_of_thought-direct-llama-3.3-70b-instruct-akan_sentence,chain_of_thought-direct-llama-3.3-70b-instruct-llm_label
0,Anadwo biara ɔfrɛ no.,0,0,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,0
1,Anadwo biara ɔfrɛ no.,1,3,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,0,Anadwo biara ɔfrɛ no.,0
2,Anadwo biara ɔfrɛ no.,2,0,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,0
3,Anadwo biara ɔfrɛ no.,3,3,Anadwo biara ɔfrɛ no.,3,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,1,Anadwo biara ɔfrɛ no.,0,Anadwo biara ɔfrɛ no.,0
4,"Anɔpa yi, ohyiaa no.",0,2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2
5,"Anɔpa yi, ohyiaa no.",1,2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2
6,"Anɔpa yi, ohyiaa no.",2,2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2
7,"Anɔpa yi, ohyiaa no.",3,2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2,"Anɔpa yi, ohyiaa no.",2
8,Bɔ mmɔden yɛ saa.,0,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1
9,Bɔ mmɔden yɛ saa.,1,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1,Bɔ mmɔden yɛ saa.,1


In [24]:
# print(f" Subset of results by prompt x llm: {full_results_df.head(7)}")

In [25]:
print("\n####### EVALUATION METRICS #######")


####### EVALUATION METRICS #######


In [33]:
get_metrics = EvaluationMetric()

actual_label = full_results_df['true_label'].values
for model in models:
    for prompt_name in prompt_names:
        llm_labels_col_name = f"{prompt_name.get_name()}-{model.__name__()}-llm_label"
        # print(f" col_prefix: {llm_labels_col_name}")
        print(f" TRUE LABELS: {actual_label}")
        model_predictions = full_results_df[llm_labels_col_name].values
        # print(f"{llm_labels_col_name}: {model_predictions}")
        print(f" PROMPT TYPE x LLM NAME: {llm_labels_col_name}")
        print(f"\tWITH LABELS {model_predictions}")
        get_metrics.eval_classification_report(actual_label, model_predictions)
        print(" ============================================================================================")
        print()


 TRUE LABELS: [0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2 3 0 1 2]
 PROMPT TYPE x LLM NAME: zero_shot-direct-llama-3.1-70b-instruct-llm_label
	WITH LABELS [0 3 0 3 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 1 1 1]
              precision    recall  f1-score   support

           0       0.50      0.11      0.18         9
           1       0.27      0.67      0.39         9
           2       0.25      0.22      0.24         9
           3       0.50      0.17      0.25         6
           4       0.00      0.00      0.00         1

    accuracy                           0.29        34
   macro avg       0.30      0.23      0.21        34
weighted avg       0.36      0.29      0.26        34


 TRUE LABELS: [0 1 2 3 0 1 2 3 0 1 2 3 0 1 2 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2 3 0 1 2]
 PROMPT TYPE x LLM NAME: few_shot-direct-llama-3.1-70b-instruct-llm_label
	WITH LABELS [3 1 1 1 2 2 2 2 1 1 1 1 1 1 1 1 1 1 1 3 1 1 1 1 1 1 1 2 2 2 2 1 1 1]
              precision    

In [27]:
# save_zero_shot_results_dir = os.path.join('../data/', "experiement_a-zero_fewresults_df.csv")
# print(save_zero_shot_results_dir)
# DataProcessing.save_data(full_results_df, save_zero_shot_results_dir)

In [28]:
# save_zero_shot_results_dir = os.path.join('../data/', "zero_shot_results_df.csv")
# print(save_zero_shot_results_dir)
# DataProcessing.save_data(zero_shot_results_df, save_zero_shot_results_dir)