# LLM Text Generation

In [1]:
import os
import sys


import pandas as pd

from tqdm import tqdm
from itertools import islice

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../utils'))

from llms import TextGenerationModelFactory
from prompting_strategies import ZeroShotPromptFactory, FewShotPromptFactory

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
pd.set_option('max_colwidth', 800)
# pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)

## Load Data

In [3]:
file_name = "akuapem_with_tags_dataset-verified_data.xlsx"
path = os.path.join("../data/", file_name)

### Load One to Many

In [4]:
one_to_many_df = pd.read_excel(path, sheet_name="1-M_tags")
akan_source_one = "Akan (Source, One)"
eng_target_many = "English (Target, Many)"
one_many_cols_to_rename = {"Akuapem Twi": akan_source_one, 
                  "English": eng_target_many
                  }
one_to_many_df.rename(columns=one_many_cols_to_rename, inplace=True)
one_to_many_df

Unnamed: 0,AUD_SIZE,STATUS,AGE,FORMALITY,GENDER,GENDER_2,ANIMACY,SPEECH_ACT,"Akan (Source, One)","English (Target, Many)"
0,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
1,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,MASCULINE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls him every night.
2,INDIVIDUAL,,PEER,INFORMAL,FEMININE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,She calls her every night.
3,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
4,INDIVIDUAL,,PEER,INFORMAL,FEMININE,MASCULINE,ANIMATE,STATEMENT,"Anɔpa yi, ohyiaa no.",She met him this morning.
...,...,...,...,...,...,...,...,...,...,...
458,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",It is definitely going to rain.
459,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",Rain is surely on the way.
460,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We've got to find a new babysitter.
461,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We need to look for another babysitter.


In [5]:
akan_one_to_eng_many_mappings = one_to_many_df.groupby(akan_source_one)[eng_target_many].apply(list).to_dict()
# for akan, e_list in akan_one_to_eng_many_mappings.items():
#     print(f"Key: {akan}")
#     print(f"Values: {e_list}\n")

In [6]:
akan_sentences = list(akan_one_to_eng_many_mappings.keys())
akan_sentences

['Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.',
 'Anadwo biara ɔfrɛ no.',
 'Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.',
 'Anɔpa yi, ohyiaa no.',
 'Asamaoh ye nipa kese.',
 'Asamoah anhu sɛnea na wobu Esi anim animtiaa no.',
 'Asamoah betumi abua no.',
 'Asamoah yɛ ɔnokwafo.',
 'Awarefo no de sika pii na ɛyɛɛ wɔn fie.',
 'Bere a ade tɔɔ ne so ara pɛ na yɛde no kɔɔ ayaresabea hɔ.',
 'Bere a Ɔhemmaa no soo nkuruwa no mu no, ɔde guu adaka no mu.',
 'Bere bɛn na wiase no bɛba awiei?',
 'Bɔ mmɔden yɛ saa.',
 'Dɛn na wobɛka sɛ ɛsɛ sɛ wokɔ nnwonto no ase?',
 'Efi saa bere no, minni adagyew koraa.',
 'Egye obi na wahu emu biako.',
 'Esi bɔɔ ne tirim sɛ ɔbɛyɛ ne fie ho mfonini.',
 'Esiane ahum no nti, hyɛn no antumi amfi po so hyɛn gyinabea hɔ.',
 "Esiane sɛ na m'ani so atan me nti, anka mereyɛ akyere me.",
 'Hena na ɔka kyerɛɛ wo sɛ mintumi nnom nsu?',
 'Kwaku anyɛ bɔne biara.',
 'Kwaku dii akɔneaba.',
 'Kwaku ntumi ntu ntɛm sɛnea Abena betumi ayɛ no.',
 "M'ani begye ho s

## Initialize Models + Propmt Models

In [7]:
tgmf = TextGenerationModelFactory
llama_31_70b_instruct = tgmf.create_instance('llama-3.1-70b-instruct')
llama_33_70b_instruct = tgmf.create_instance('llama-3.3-70b-instruct')
models = [llama_31_70b_instruct, llama_33_70b_instruct]
# models = [llama_33_70b_instruct]

In [8]:
def generate_data(prompt: str, model):
    model_output = model.generate(prompt)
    # model_outputs[model.model_name] = model_output
    return model_output

In [9]:
def get_llm_labels(df, prompt_model_col_name):
    # print(prompt_model_col_name)
    filt_llm_name = (df['llm_name'] == prompt_model_col_name)
    # print(filt_llm_name)
    filt_df = df[filt_llm_name]
    # print(filt_df)
    return filt_df['llm_label'], filt_df['llm_sentence']

### Few Shot

In [10]:
zero_shot_direct_prompt = ZeroShotPromptFactory("direct")
few_shot_direct_prompt = FewShotPromptFactory("direct")
# chain_of_thought_prompt = ChainOfThoughtPrompt
# prompt_names = [zero_shot_prompt, few_shot_prompt, chain_of_thought_prompt]
prompt_names = [zero_shot_direct_prompt, few_shot_direct_prompt]

In [11]:
mappings = akan_one_to_eng_many_mappings.items()
# mappings
# mappings = islice(akan_one_to_eng_many_mappings.items(), 3)
# type(mappings)

In [12]:
idx = 0
results = {}
for prompt_name in prompt_names:
    prompt_name_and_type = prompt_name.get_name()
    # print(prompt_name_and_type)
    prompt_results = []
    for key, values in tqdm(mappings):
        # print(f"Key: {key}")
        # print(f"\tIndex: {value}")
        
        prompt = prompt_name.get_base_prompt(akan_sentence=key, english_sentences=values)
        if idx == 0:
            print(prompt_name_and_type)
            print(f"Prompt: {prompt}")
            idx = idx + 1

        for model in models:
            llm_result = generate_data(prompt, model)
            llm_result_to_sentence = values[int(llm_result)]
            # print(f"Model: {model.__name__()}\tGenerated: {llm_result}\n")
            result = (key, values, int(llm_result), llm_result_to_sentence, f"{prompt_name_and_type}-{model.__name__()}", prompt_name_and_type)
            prompt_results.append(result)
    results[prompt_name] = prompt_results
results

  0%|          | 0/132 [00:00<?, ?it/s]

zero_shot-direct
Prompt: You are translating from Akan to English. Select the most appropriate English translation from the options provided.
        
        Akan sentence: "Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu."
        
        Translation options: 
	0. The first American colonists arrived in the 17th century.
	1. The first settlers came to America in the 1600s.
	2. America was first colonized in the 17th century.
	3. The first American colonists landed in the 17th century.
        
        Select the best translation by number only. Respond with just the number (1, 2, 3, etc.).
        


100%|██████████| 132/132 [00:37<00:00,  3.56it/s]
100%|██████████| 132/132 [00:37<00:00,  3.54it/s]


{<prompting_strategies.ZeroShotPromptFactory at 0x1407c5250>: [('Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.',
   ['The first American colonists arrived in the 17th century.',
    'The first settlers came to America in the 1600s.',
    'America was first colonized in the 17th century.',
    'The first American colonists landed in the 17th century.'],
   1,
   'The first settlers came to America in the 1600s.',
   'zero_shot-direct-llama-3.1-70b-instruct',
   'zero_shot-direct'),
  ('Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.',
   ['The first American colonists arrived in the 17th century.',
    'The first settlers came to America in the 1600s.',
    'America was first colonized in the 17th century.',
    'The first American colonists landed in the 17th century.'],
   2,
   'America was first colonized in the 17th century.',
   'zero_shot-direct-llama-3.3-70b-instruct',
   'zero_shot-direct'),
  ('Anadwo biara ɔfrɛ no.',
   ['He calls her eve

In [13]:
zero_shot_results = results[list(results.keys())[0]]
few_shot_results = results[list(results.keys())[1]]
# chain_of_thought_results = results[list(results.keys())[2]]
col_names = ['akan_sentence', 'english_sentences', 'llm_label', 'llm_sentence', 'llm_name', 'prompt_name']

In [14]:
zero_shot_df = pd.DataFrame(zero_shot_results, columns=col_names)
zero_shot_df

Unnamed: 0,akan_sentence,english_sentences,llm_label,llm_sentence,llm_name,prompt_name
0,Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.,"[The first American colonists arrived in the 17th century., The first settlers came to America in the 1600s., America was first colonized in the 17th century., The first American colonists landed in the 17th century.]",1,The first settlers came to America in the 1600s.,zero_shot-direct-llama-3.1-70b-instruct,zero_shot-direct
1,Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.,"[The first American colonists arrived in the 17th century., The first settlers came to America in the 1600s., America was first colonized in the 17th century., The first American colonists landed in the 17th century.]",2,America was first colonized in the 17th century.,zero_shot-direct-llama-3.3-70b-instruct,zero_shot-direct
2,Anadwo biara ɔfrɛ no.,"[He calls her every night., He calls him every night., She calls her every night., He calls her every night.]",0,He calls her every night.,zero_shot-direct-llama-3.1-70b-instruct,zero_shot-direct
3,Anadwo biara ɔfrɛ no.,"[He calls her every night., He calls him every night., She calls her every night., He calls her every night.]",3,He calls her every night.,zero_shot-direct-llama-3.3-70b-instruct,zero_shot-direct
4,"Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.","[ Fortunately, no passengers were injured., Luckily, none of the passengers were hurt., Thankfully, no passengers were harmed., Thankfully, all passengers were safe.]",1,"Luckily, none of the passengers were hurt.",zero_shot-direct-llama-3.1-70b-instruct,zero_shot-direct
...,...,...,...,...,...,...
259,Ɛyɛ nwonwa yiye.,"[It's quite remarkable., It is truly amazing., It is quite impressive.]",1,It is truly amazing.,zero_shot-direct-llama-3.3-70b-instruct,zero_shot-direct
260,Ɛyɛ pɛ!,"[Perfect!, Spot on!, Exactly!, Just right!]",2,Exactly!,zero_shot-direct-llama-3.1-70b-instruct,zero_shot-direct
261,Ɛyɛ pɛ!,"[Perfect!, Spot on!, Exactly!, Just right!]",2,Exactly!,zero_shot-direct-llama-3.3-70b-instruct,zero_shot-direct
262,ɔyarehwɛfo no bɛkyerɛ wo ɔkwan a wobɛfa so ayɛ saa.,"[The nurse will tell you how to do it., The nurse will explain the procedure to you., The nurse will show you what to do.]",1,The nurse will explain the procedure to you.,zero_shot-direct-llama-3.1-70b-instruct,zero_shot-direct


In [15]:
few_shot_df = pd.DataFrame(few_shot_results, columns=col_names)
few_shot_df

Unnamed: 0,akan_sentence,english_sentences,llm_label,llm_sentence,llm_name,prompt_name
0,Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.,"[The first American colonists arrived in the 17th century., The first settlers came to America in the 1600s., America was first colonized in the 17th century., The first American colonists landed in the 17th century.]",0,The first American colonists arrived in the 17th century.,few_shot-direct-llama-3.1-70b-instruct,few_shot-direct
1,Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.,"[The first American colonists arrived in the 17th century., The first settlers came to America in the 1600s., America was first colonized in the 17th century., The first American colonists landed in the 17th century.]",0,The first American colonists arrived in the 17th century.,few_shot-direct-llama-3.3-70b-instruct,few_shot-direct
2,Anadwo biara ɔfrɛ no.,"[He calls her every night., He calls him every night., She calls her every night., He calls her every night.]",3,He calls her every night.,few_shot-direct-llama-3.1-70b-instruct,few_shot-direct
3,Anadwo biara ɔfrɛ no.,"[He calls her every night., He calls him every night., She calls her every night., He calls her every night.]",1,He calls him every night.,few_shot-direct-llama-3.3-70b-instruct,few_shot-direct
4,"Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.","[ Fortunately, no passengers were injured., Luckily, none of the passengers were hurt., Thankfully, no passengers were harmed., Thankfully, all passengers were safe.]",1,"Luckily, none of the passengers were hurt.",few_shot-direct-llama-3.1-70b-instruct,few_shot-direct
...,...,...,...,...,...,...
259,Ɛyɛ nwonwa yiye.,"[It's quite remarkable., It is truly amazing., It is quite impressive.]",1,It is truly amazing.,few_shot-direct-llama-3.3-70b-instruct,few_shot-direct
260,Ɛyɛ pɛ!,"[Perfect!, Spot on!, Exactly!, Just right!]",2,Exactly!,few_shot-direct-llama-3.1-70b-instruct,few_shot-direct
261,Ɛyɛ pɛ!,"[Perfect!, Spot on!, Exactly!, Just right!]",2,Exactly!,few_shot-direct-llama-3.3-70b-instruct,few_shot-direct
262,ɔyarehwɛfo no bɛkyerɛ wo ɔkwan a wobɛfa so ayɛ saa.,"[The nurse will tell you how to do it., The nurse will explain the procedure to you., The nurse will show you what to do.]",1,The nurse will explain the procedure to you.,few_shot-direct-llama-3.1-70b-instruct,few_shot-direct


In [16]:
# chain_of_thought_df = pd.DataFrame(chain_of_thought_results, columns=['akan_sentence', 'english_sentences', 'llm_label', 'llm_name', 'prompt_name'])
# chain_of_thought_df

In [17]:
# chain_of_thought_df['llm_label'].values

In [18]:
# chain_of_thought_df['llm_name'].values
# filt_llm_name = (chain_of_thought_df['llm_name'] == f"{prompt_names[2]}_{ model.__name__()}")
# filt_llm_name

In [19]:
prompt_names[0].get_name()

'zero_shot-direct'

In [20]:
def realign_results(new_df, results_df, prompt_type, models):
    for model in models:
        
        model_name = model.__name__()
        col_prefix = f"{prompt_type}-{model_name}"
        print(f"col_prefix: {col_prefix}")

        llm_labels, llm_sentences = get_llm_labels(results_df, col_prefix)
        # few_shot_model_labels = get_llm_labels(few_shot_df, f"{prompt_names[1]}_{ model.__name__()}")
        # chain_of_thought_model_labels = get_llm_labels(chain_of_thought_df, f"{prompt_names[2]}_{ model.__name__()}")
        
        new_df[f"{col_prefix}-llm_label"] = llm_labels.values
        new_df[f"{col_prefix}-llm_sentence"] = llm_sentences.values
        # few_shot_results_df[f"{prompt_names[1]}_{ model.__name__()}"] = few_shot_model_labels.to_numpy().ravel()
        # chain_of_thought_df[model.__name__()] = chain_of_thought_model_labels.to_numpy().ravel()

    return new_df

In [21]:
new_df = pd.DataFrame(akan_sentences, columns=['akan_sentence'])
zero_shot_results_df = realign_results(new_df, zero_shot_df, prompt_names[0].get_name(), models)
zero_shot_results_df

col_prefix: zero_shot-direct-llama-3.1-70b-instruct
col_prefix: zero_shot-direct-llama-3.3-70b-instruct


Unnamed: 0,akan_sentence,zero_shot-direct-llama-3.1-70b-instruct-llm_label,zero_shot-direct-llama-3.1-70b-instruct-llm_sentence,zero_shot-direct-llama-3.3-70b-instruct-llm_label,zero_shot-direct-llama-3.3-70b-instruct-llm_sentence
0,Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.,1,The first settlers came to America in the 1600s.,2,America was first colonized in the 17th century.
1,Anadwo biara ɔfrɛ no.,0,He calls her every night.,3,He calls her every night.
2,"Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.",1,"Luckily, none of the passengers were hurt.",1,"Luckily, none of the passengers were hurt."
3,"Anɔpa yi, ohyiaa no.",2,He met her this morning.,2,He met her this morning.
4,Asamaoh ye nipa kese.,3,Asamoah is an amazing person.,3,Asamoah is an amazing person.
...,...,...,...,...,...
127,Ɛsɛ sɛ yɛma yɛn ani da hɔ.,1,We have to stay alert.,2,We must keep our eyes open.
128,Ɛyɛ nwonwa sɛ ebetumi aba saa.,1,How could this be possible?,1,How could this be possible?
129,Ɛyɛ nwonwa yiye.,1,It is truly amazing.,1,It is truly amazing.
130,Ɛyɛ pɛ!,2,Exactly!,2,Exactly!


In [22]:
few_shot_results_df = realign_results(new_df, few_shot_df, prompt_names[1].get_name(), models)
few_shot_results_df

col_prefix: few_shot-direct-llama-3.1-70b-instruct
col_prefix: few_shot-direct-llama-3.3-70b-instruct


Unnamed: 0,akan_sentence,zero_shot-direct-llama-3.1-70b-instruct-llm_label,zero_shot-direct-llama-3.1-70b-instruct-llm_sentence,zero_shot-direct-llama-3.3-70b-instruct-llm_label,zero_shot-direct-llama-3.3-70b-instruct-llm_sentence,few_shot-direct-llama-3.1-70b-instruct-llm_label,few_shot-direct-llama-3.1-70b-instruct-llm_sentence,few_shot-direct-llama-3.3-70b-instruct-llm_label,few_shot-direct-llama-3.3-70b-instruct-llm_sentence
0,Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.,1,The first settlers came to America in the 1600s.,2,America was first colonized in the 17th century.,0,The first American colonists arrived in the 17th century.,0,The first American colonists arrived in the 17th century.
1,Anadwo biara ɔfrɛ no.,0,He calls her every night.,3,He calls her every night.,3,He calls her every night.,1,He calls him every night.
2,"Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.",1,"Luckily, none of the passengers were hurt.",1,"Luckily, none of the passengers were hurt.",1,"Luckily, none of the passengers were hurt.",1,"Luckily, none of the passengers were hurt."
3,"Anɔpa yi, ohyiaa no.",2,He met her this morning.,2,He met her this morning.,2,He met her this morning.,2,He met her this morning.
4,Asamaoh ye nipa kese.,3,Asamoah is an amazing person.,3,Asamoah is an amazing person.,2,Asamoah is wonderful.,3,Asamoah is an amazing person.
...,...,...,...,...,...,...,...,...,...
127,Ɛsɛ sɛ yɛma yɛn ani da hɔ.,1,We have to stay alert.,2,We must keep our eyes open.,1,We have to stay alert.,2,We must keep our eyes open.
128,Ɛyɛ nwonwa sɛ ebetumi aba saa.,1,How could this be possible?,1,How could this be possible?,1,How could this be possible?,0,One wonders how it's possible.
129,Ɛyɛ nwonwa yiye.,1,It is truly amazing.,1,It is truly amazing.,1,It is truly amazing.,1,It is truly amazing.
130,Ɛyɛ pɛ!,2,Exactly!,2,Exactly!,2,Exactly!,2,Exactly!


In [23]:
# results_df = pd.concat([zero_shot_results_df, few_shot_results_df])
# results_df