# LLM Text Generation

In [1]:
import os
import sys


import pandas as pd

from tqdm import tqdm

# Get the current working directory of the notebook
notebook_dir = os.getcwd()
# Add the parent directory to the system path
sys.path.append(os.path.join(notebook_dir, '../utils'))

from llms import TextGenerationModelFactory

  from .autonotebook import tqdm as notebook_tqdm


## Load Data

In [2]:
file_name = "akuapem_with_tags_dataset-verified_data.xlsx"
path = os.path.join("../data/", file_name)
one_to_many_df = pd.read_excel(path, sheet_name="1-M_tags")
# many_to_one_df = pd.read_excel(path, sheet_name="M-1_tags")
akan_source_many = "Akan (Source, Many)"
eng_target_one = "English (Target, One)"
cols_to_rename = {"Akuapem Twi": akan_source_many, 
                  "English": eng_target_one
                  }
one_to_many_df.rename(columns=cols_to_rename, inplace=True)
one_to_many_df

Unnamed: 0,AUD_SIZE,STATUS,AGE,FORMALITY,GENDER,GENDER_2,ANIMACY,SPEECH_ACT,"Akan (Source, Many)","English (Target, One)"
0,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
1,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,MASCULINE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls him every night.
2,INDIVIDUAL,,PEER,INFORMAL,FEMININE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,She calls her every night.
3,INDIVIDUAL,,PEER,INFORMAL,MASCULINE,FEMININE,ANIMATE,STATEMENT,Anadwo biara ɔfrɛ no.,He calls her every night.
4,INDIVIDUAL,,PEER,INFORMAL,FEMININE,MASCULINE,ANIMATE,STATEMENT,"Anɔpa yi, ohyiaa no.",She met him this morning.
...,...,...,...,...,...,...,...,...,...,...
458,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",It is definitely going to rain.
459,INDIVIDUAL,,PEER,FORMAL,NEUTRAL,NEUTRAL,INANIMATE,STATEMENT,"Nokwarem no, osu bɛtɔ.",Rain is surely on the way.
460,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We've got to find a new babysitter.
461,SMALL GROUP,,PEER,INFORMAL,NEUTRAL,NEUTRAL,ANIMATE,STATEMENT,Ɛsɛ sɛ yehu akokoaa foforo bi a wɔawo no foforo.,We need to look for another babysitter.


In [3]:
# one_to_many_sentences_only_df = one_to_many_df.loc[:, [eng_target_one, akan_source_many]]
# one_to_many_sentences_only_df.head(33)

In [4]:
akan_one_to_eng_many_mappings = one_to_many_df.groupby(akan_source_many)[eng_target_one].apply(list).to_dict()
for akan, e_list in akan_one_to_eng_many_mappings.items():
    print(f"Key: {akan}")
    print(f"Values: {e_list}\n")

Key: Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.
Values: ['The first American colonists arrived in the 17th century.', 'The first settlers came to America in the 1600s.', 'America was first colonized in the 17th century.', 'The first American colonists landed in the 17th century.']

Key: Anadwo biara ɔfrɛ no.
Values: ['He calls her every night.', 'He calls him every night.', 'She calls her every night.', 'He calls her every night.']

Key: Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.
Values: [' Fortunately, no passengers were injured.', ' Luckily, none of the passengers were hurt.', ' Thankfully, no passengers were harmed.', ' Thankfully, all passengers were safe.']

Key: Anɔpa yi, ohyiaa no.
Values: ['She met him this morning.', 'She met her this morning.', 'He met her this morning.', 'He met him this morning.']

Key: Asamaoh ye nipa kese.
Values: ["Asamoah's great.", 'Asamoah is fantastic.', 'Asamoah is wonderful.', 'Asamoah is an amazing person.']

Key: Asamoah an

## Initialize Models + Propmt Models

In [5]:
tgmf = TextGenerationModelFactory
# llama_31_70b_instruct = tgmf.create_instance('llama-3.1-70b-instruct')
llama_33_70b_instruct = tgmf.create_instance('llama-3.3-70b-instruct')
# models = [llama_31_70b_instruct, llama_33_70b_instruct]
models = [llama_33_70b_instruct]

In [6]:
# llama_31_70b_instruct.model_name

In [7]:
def generate_data(prompt: str, models: list) -> list:
    model_outputs = {}

    # print(f"prompt:\t{prompt}")
    for model in models:

        model_output = model.generate(prompt)
        model_outputs[model.model_name] = model_output
    return model_outputs

In [8]:
idx = 0
direct_llm_selection = {}
for key, value in tqdm(akan_one_to_eng_many_mappings.items()):
    # print(f"Key: {key}")
    # print(f"\tIndex: {value}")
        
    prompt = f"You are a linguist that knows both Akan and English languages. I need you to take this Akan sentence: '{key}'alone without surrounding context and translate it to one of the following English sentences: '{value}'. Do NOT generate anything other than the best English sentence that fits the English sentence."
    generated_data = generate_data(prompt, models)
    # print(f"\tGenerated: {generated_data}\n")
    direct_llm_selection[key] = (generated_data)
    akan_one_to_eng_many_mappings[key].append(generated_data)

100%|██████████| 132/132 [00:46<00:00,  2.83it/s]


In [9]:
direct_llm_selection

{'Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.': {'llama-3.3-70b-instruct': 'America was first colonized in the 17th century.'},
 'Anadwo biara ɔfrɛ no.': {'llama-3.3-70b-instruct': 'He calls her every night.'},
 'Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.': {'llama-3.3-70b-instruct': 'Thankfully, no passengers were harmed.'},
 'Anɔpa yi, ohyiaa no.': {'llama-3.3-70b-instruct': 'He met her this morning.'},
 'Asamaoh ye nipa kese.': {'llama-3.3-70b-instruct': 'Asamoah is wonderful.'},
 'Asamoah anhu sɛnea na wobu Esi anim animtiaa no.': {'llama-3.3-70b-instruct': "Asamoah didn't notice the look of disdain on Esi's face."},
 'Asamoah betumi abua no.': {'llama-3.3-70b-instruct': 'Asamoah is able to answer.'},
 'Asamoah yɛ ɔnokwafo.': {'llama-3.3-70b-instruct': 'Asamoah is an honest person.'},
 'Awarefo no de sika pii na ɛyɛɛ wɔn fie.': {'llama-3.3-70b-instruct': 'They spent a lot of money on furnishing their house.'},
 'Bere a ade tɔɔ ne so ara pɛ na yɛde no kɔɔ ayare

In [10]:
akan_one_to_eng_many_mappings

{'Amerika atubrafo a wodi kan no duu hɔ wɔ afeha a ɛto so 17 no mu.': ['The first American colonists arrived in the 17th century.',
  'The first settlers came to America in the 1600s.',
  'America was first colonized in the 17th century.',
  'The first American colonists landed in the 17th century.',
  {'llama-3.3-70b-instruct': 'America was first colonized in the 17th century.'}],
 'Anadwo biara ɔfrɛ no.': ['He calls her every night.',
  'He calls him every night.',
  'She calls her every night.',
  'He calls her every night.',
  {'llama-3.3-70b-instruct': 'He calls her every night.'}],
 'Anigyesɛm ne sɛ, hyɛn no mufo biara anhaw.': [' Fortunately, no passengers were injured.',
  ' Luckily, none of the passengers were hurt.',
  ' Thankfully, no passengers were harmed.',
  ' Thankfully, all passengers were safe.',
  {'llama-3.3-70b-instruct': 'Thankfully, no passengers were harmed.'}],
 'Anɔpa yi, ohyiaa no.': ['She met him this morning.',
  'She met her this morning.',
  'He met her t