# LLM Text Generation

In [1]:
import os
import sys


import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../utils'))

from llms import TextGenerationModelFactory

  from .autonotebook import tqdm as notebook_tqdm


## Load Data

In [2]:
file_name = "akuapem_with_tags_dataset-verified_data.xlsx"
path = os.path.join("../data/", file_name)

### Load One to Many

In [3]:
one_to_many_df = pd.read_excel(path, sheet_name="1-M_tags")
akan_source_one = "Akan (Source, One)"
eng_target_many = "English (Target, Many)"
one_many_cols_to_rename = {"Akuapem Twi": akan_source_one, 
                  "English": eng_target_many
                  }
one_to_many_df.rename(columns=one_many_cols_to_rename, inplace=True)
one_to_many_df

Unnamed: 0,AUD_SIZE,STATUS,AGE,FORMALITY,GENDER,GENDER_2,ANIMACY,SPEECH_ACT,"Akan (Source, One)","English (Target, Many)"
0,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
1,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,MASCULINE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls him every night.
2,INDIVIDUAL,,PEER,INFORMAL,FEMININE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,She calls her every night.
3,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
4,INDIVIDUAL,,PEER,INFORMAL,FEMININE,MASCULINE,ANIMATE,STATEMENT,"Anɔpa yi, ohyiaa no.",She met him this morning.
...,...,...,...,...,...,...,...,...,...,...
458,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",It is definitely going to rain.
459,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",Rain is surely on the way.
460,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We've got to find a new babysitter.
461,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We need to look for another babysitter.


In [4]:
akan_one_to_eng_many_mappings = one_to_many_df.groupby(akan_source_one)[eng_target_many].apply(list).to_dict()
for akan, e_list in akan_one_to_eng_many_mappings.items():
    print(f"Key: {akan}")
    print(f"Values: {e_list}\n")

Key: Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.
Values: ['The first American colonists arrived in the 17th century.', 'The first settlers came to America in the 1600s.', 'America was first colonized in the 17th century.', 'The first American colonists landed in the 17th century.']

Key: Anadwo biara ɔfrɛ no.
Values: ['He calls her every night.', 'He calls him every night.', 'She calls her every night.', 'He calls her every night.']

Key: Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.
Values: [' Fortunately, no passengers were injured.', ' Luckily, none of the passengers were hurt.', ' Thankfully, no passengers were harmed.', ' Thankfully, all passengers were safe.']

Key: Anɔpa yi, ohyiaa no.
Values: ['She met him this morning.', 'She met her this morning.', 'He met her this morning.', 'He met him this morning.']

Key: Asamaoh ye nipa kese.
Values: ["Asamoah's great.", 'Asamoah is fantastic.', 'Asamoah is wonderful.', 'Asamoah is an amazing person.']

Key: Asamoah an

### Load Many to One

In [5]:
many_to_one_df = pd.read_excel(path, sheet_name="M-1_tags")
akan_source_many = "Akan (Source, Many)"
eng_target_one = "English (Target, One)"
many_to_1_cols_to_rename = {"Akuapem Twi": akan_source_many, 
                  "English": eng_target_one
                  }
many_to_one_df.rename(columns=many_to_1_cols_to_rename, inplace=True)
many_to_one_df

Unnamed: 0,AUD_SIZE,STATUS,AGE,FORMALITY,GENDER,GENDER.1,ANIMACY,SPEECH_ACT,"Akan (Source, Many)","English (Target, One)"
0,INDIVIDUAL,EQUAL,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Me na mewɔ ha.,I belong here.
1,INDIVIDUAL,EQUAL,PEER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,ANSWER,Me fata sɛ mewɔ ha.,I belong here.
2,INDIVIDUAL,EQUAL,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ha na me wɔ.,I belong here.
3,INDIVIDUAL,,PEER,FORMAL,MASCULINE,,ANIMATE,STATEMENT,Kwaku to dwom yiye.,Kwaku sings quite well.
4,INDIVIDUAL,,PEER,FORMAL,MASCULINE,,ANIMATE,STATEMENT,Kwaku nim nwom to.,Kwaku sings quite well.
...,...,...,...,...,...,...,...,...,...,...
395,INDIVIDUAL,,ELDER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,QUESTION,Ne su te sɛn?,What's it like?
396,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Na minhu nea menyɛ.,I ran out of ideas.
397,INDIVIDUAL,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Me nsusuiɛ asa.,I ran out of ideas.
398,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Menni adwene biara bio.,I ran out of ideas.


In [6]:
akan_man_to_eng_one_mappings = many_to_one_df.groupby(eng_target_one)[akan_source_many].apply(list).to_dict()
for eng, a_list in akan_man_to_eng_one_mappings.items():
    print(f"Key: {eng}")
    print(f"Values: {a_list}\n")

    

Key: Act like a man.
Values: ["Yɛ w'ade te sɛ onipa.", 'Di wo dwuma sɛ ɔbarima.', 'Ma mmarisɛm mmra wo mu.', 'Yɛ ɔkokoɔdurufoɔ.']

Key: Almost three thousand people died.
Values: ['Nnipa bɛyɛ mpensa wuwui.', 'Nnipa a wɔwui no bɛduru mpem mmiɛnsa.', 'Nnipa a wɔwui no bɛyɛ mpem mmiɛnsa.']

Key: Asamoah didn't tell Abena his secret.
Values: ['Asamoah anka ne kokoam asɛm ankyerɛ Abena.', "Asamoah anka n'ahintasɛm ankyerɛ Abena.", 'Asamoah de nsɛm sumaa Abena.']

Key: Asamoah is not serious about his studies.
Values: [' "Sɛ Yalah no, n\'adesua no ho nhia no kɛse.', "Asamoah ani nku n'adesua ho.", "Asamoah atoto n'adesua ase.", 'Asamoah adesua nyɛ adeɛ a ɛho hia no.']

Key: Asamoah isn't my father.
Values: ['Me papa a wɔfrɛ no ulah no nyɛ me papa.', 'Asamoah nyɛ me papa.', 'Ɛnyɛ me papa ne Asamoah.', 'Me papa nyɛ Asamoah.']

Key: Be respectful to your superiors, if you have any.
Values: [' Sɛ wowɔ bi a,  bu wo mpanyimfo.', ' Sɛ wowɔ akannifoɔ a,  fa anidie ma wɔn.', ' Sɛ wowɔ mpanimfoɔ a,  b

## Initialize Models + Propmt Models

In [7]:
tgmf = TextGenerationModelFactory
# llama_31_70b_instruct = tgmf.create_instance('llama-3.1-70b-instruct')
llama_33_70b_instruct = tgmf.create_instance('llama-3.3-70b-instruct')
# models = [llama_31_70b_instruct, llama_33_70b_instruct]
models = [llama_33_70b_instruct]

In [8]:
def generate_data(prompt: str, models: list) -> list:
    model_outputs = {}

    # print(f"prompt:\t{prompt}")
    for model in models:

        model_output = model.generate(prompt)
        model_outputs[model.model_name] = model_output
    return model_outputs

### One to Many

In [9]:
idx = 0
direct_llm_selection = {}
for key, value in tqdm(akan_one_to_eng_many_mappings.items()):
    # print(f"Key: {key}")
    # print(f"\tIndex: {value}")
        
    prompt = f"You are a linguist that knows both Akan and English languages. I need you to take this Akan sentence: '{key}'alone without surrounding context and translate it to one of the following English sentences: '{value}'. Do NOT generate anything other than the best English sentence that fits the English sentence."
    generated_data = generate_data(prompt, models)
    # print(f"\tGenerated: {generated_data}\n")
    direct_llm_selection[key] = (generated_data)
    akan_one_to_eng_many_mappings[key].append(generated_data)

100%|██████████| 132/132 [00:43<00:00,  3.02it/s]


In [10]:
direct_llm_selection

{'Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.': {'llama-3.3-70b-instruct': 'America was first colonized in the 17th century.'},
 'Anadwo biara ɔfrɛ no.': {'llama-3.3-70b-instruct': 'He calls her every night.'},
 'Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.': {'llama-3.3-70b-instruct': 'Thankfully, no passengers were harmed.'},
 'Anɔpa yi, ohyiaa no.': {'llama-3.3-70b-instruct': 'He met her this morning.'},
 'Asamaoh ye nipa kese.': {'llama-3.3-70b-instruct': 'Asamoah is wonderful.'},
 'Asamoah anhu sɛnea na wobu Esi anim animtiaa no.': {'llama-3.3-70b-instruct': "Asamoah didn't notice the look of disdain on Esi's face."},
 'Asamoah betumi abua no.': {'llama-3.3-70b-instruct': 'Asamoah is able to answer.'},
 'Asamoah yɛ ɔnokwafo.': {'llama-3.3-70b-instruct': 'Asamoah is an honest person.'},
 'Awarefo no de sika pii na ɛyɛɛ wɔn fie.': {'llama-3.3-70b-instruct': 'They spent a lot of money on furnishing their house.'},
 'Bere a ade tɔɔ ne so ara pɛ na yɛde no kɔɔ ayare

In [11]:
akan_one_to_eng_many_mappings

{'Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.': ['The first American colonists arrived in the 17th century.',
  'The first settlers came to America in the 1600s.',
  'America was first colonized in the 17th century.',
  'The first American colonists landed in the 17th century.',
  {'llama-3.3-70b-instruct': 'America was first colonized in the 17th century.'}],
 'Anadwo biara ɔfrɛ no.': ['He calls her every night.',
  'He calls him every night.',
  'She calls her every night.',
  'He calls her every night.',
  {'llama-3.3-70b-instruct': 'He calls her every night.'}],
 'Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.': [' Fortunately, no passengers were injured.',
  ' Luckily, none of the passengers were hurt.',
  ' Thankfully, no passengers were harmed.',
  ' Thankfully, all passengers were safe.',
  {'llama-3.3-70b-instruct': 'Thankfully, no passengers were harmed.'}],
 'Anɔpa yi, ohyiaa no.': ['She met him this morning.',
  'She met her this morning.',
  'He met her t

### Many to One

In [12]:
idx = 0
m_to_o_direct_llm_selection = {}
for key, value in tqdm(akan_man_to_eng_one_mappings.items()):
    # print(f"Key: {key}")
    # print(f"\tIndex: {value}")
        
    prompt = f"You are a linguist that knows both Akan and English languages. I need you to take these Akan sentences: '{value}' alone without surrounding context and choose which translation most match the one English sentences: '{key}'. Do NOT generate anything other than the best Akan sentence that fits the Akan sentence."
    generated_data = generate_data(prompt, models)
    # print(f"\tGenerated: {generated_data}\n")
    m_to_o_direct_llm_selection[key] = (generated_data)
    akan_man_to_eng_one_mappings[key].append(generated_data)

100%|██████████| 124/124 [01:04<00:00,  1.91it/s]


In [13]:
m_to_o_direct_llm_selection

{'Act like a man.': {'llama-3.3-70b-instruct': 'Di wo dwuma sɛ ɔbarima.'},
 'Almost three thousand people died.': {'llama-3.3-70b-instruct': 'Nnipa bɛyɛ mpensa wuwui.'},
 "Asamoah didn't tell Abena his secret.": {'llama-3.3-70b-instruct': 'Asamoah anka ne kokoam asɛm ankyerɛ Abena.'},
 'Asamoah is not serious about his studies.': {'llama-3.3-70b-instruct': "Asamoah ani nku n'adesua ho."},
 "Asamoah isn't my father.": {'llama-3.3-70b-instruct': 'Ɛnyɛ me papa ne Asamoah.'},
 'Be respectful to your superiors, if you have any.': {'llama-3.3-70b-instruct': 'Sɛ wowɔ bi a,  bu wo mpanyimfo.'},
 "Both of my parents aren't alive.": {'llama-3.3-70b-instruct': "M'awofoɔ awu."},
 'Cain was evil.': {'llama-3.3-70b-instruct': 'Ná Kain yɛ ɔbɔnefo.'},
 'Can I go in there now?': {'llama-3.3-70b-instruct': 'Mɛtumi awura mu seesei?'},
 'Children mostly lack patience.': {'llama-3.3-70b-instruct': 'Adeɛ a mmɔfra nni ne abotare.'},
 'Could I borrow a pencil?': {'llama-3.3-70b-instruct': 'Mɛtumi afɛm wo pɛns

In [14]:
akan_man_to_eng_one_mappings

{'Act like a man.': ["Yɛ w'ade te sɛ onipa.",
  'Di wo dwuma sɛ ɔbarima.',
  'Ma mmarisɛm mmra wo mu.',
  'Yɛ ɔkokoɔdurufoɔ.',
  {'llama-3.3-70b-instruct': 'Di wo dwuma sɛ ɔbarima.'}],
 'Almost three thousand people died.': ['Nnipa bɛyɛ mpensa wuwui.',
  'Nnipa a wɔwui no bɛduru mpem mmiɛnsa.',
  'Nnipa a wɔwui no bɛyɛ mpem mmiɛnsa.',
  {'llama-3.3-70b-instruct': 'Nnipa bɛyɛ mpensa wuwui.'}],
 "Asamoah didn't tell Abena his secret.": ['Asamoah anka ne kokoam asɛm ankyerɛ Abena.',
  "Asamoah anka n'ahintasɛm ankyerɛ Abena.",
  'Asamoah de nsɛm sumaa Abena.',
  {'llama-3.3-70b-instruct': 'Asamoah anka ne kokoam asɛm ankyerɛ Abena.'}],
 'Asamoah is not serious about his studies.': [' "Sɛ Yalah no, n\'adesua no ho nhia no kɛse.',
  "Asamoah ani nku n'adesua ho.",
  "Asamoah atoto n'adesua ase.",
  'Asamoah adesua nyɛ adeɛ a ɛho hia no.',
  {'llama-3.3-70b-instruct': "Asamoah ani nku n'adesua ho."}],
 "Asamoah isn't my father.": ['Me papa a wɔfrɛ no ulah no nyɛ me papa.',
  'Asamoah nyɛ me 