In [1]:
from parrot import Parrot
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline,T5Tokenizer, T5ForConditionalGeneration
import pandas as pd
from numpy import average, round, nan
import re
import warnings
warnings.filterwarnings("ignore")


#uncomment to get reproducable paraphrase generations
def random_state(seed):
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

random_state(1234)

In [2]:
#Init models (make sure you init ONLY once if you integrate this to your code)
parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=False)

device = "cuda"

tokenizer_gpt = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")

model_gpt = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")#.to(device)


tokenizer_t5 = T5Tokenizer.from_pretrained("t5-base")
model_t5 = T5ForConditionalGeneration.from_pretrained("t5-base")

# Translator
translator_en_fr = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr", max_length=512)
translator_fr_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-fr-en", max_length=512)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [67]:
def paraphrase(
    question,
    num_beams=4,
    num_beam_groups=4,
    num_return_sequences=4,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.5,
    max_length=128
):
    input_ids = tokenizer_gpt(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids#.to(device)
    
    outputs = model_gpt.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer_gpt.batch_decode(outputs, skip_special_tokens=True)

    return res

In [68]:
def translate_t5flan(text):
    list_de = []

    for t in text:
        # You can also use "translate English to French" and "translate English to Romanian"
        input_ids = tokenizer_t5("translate English to French: "+t, return_tensors="pt").input_ids  # Batch size 1
        list_de.append(tokenizer_t5.decode(model_t5.generate(input_ids)[0], skip_special_tokens=True, clean_up_tokenization_spaces=True))
    
    list_eng = []
    translator_fr = translator_fr_en(list_de)
    for t in translator_fr:
        list_eng.append(t['translation_text'])
    
    return text,  list_de, list_eng

In [69]:
def translate(text):
    list_fr = []
    translator_fr = translator_en_fr(text)
    for t in translator_fr:
        list_fr.append(t['translation_text'])
    
    list_eng = []
    translator_fr = translator_fr_en(list_fr)
    for t in translator_fr:
        list_eng.append(t['translation_text'])
    
    return text,  list_fr, list_eng

In [70]:
frase = ['Yves Mirande (Bagneux (Maine-et-Loire), Mai 8, 1876 – Paris, March 17, 1957) was a France screenwriter, director, actor, and producer.',]
print("PARROT")
for phrase in frase:
  print("-"*100)
  print("Input_phrase: ", phrase)
  print("-"*100)
  para_phrases = parrot.augment(input_phrase=phrase, use_gpu=False)
  for para_phrase in para_phrases:
   print(para_phrase)

PARROT
----------------------------------------------------------------------------------------------------
Input_phrase:  Yves Mirande (Bagneux (Maine-et-Loire), Mai 8, 1876 – Paris, March 17, 1957) was a France screenwriter, director, actor, and producer.
----------------------------------------------------------------------------------------------------
('Yves Mirande (Bagneux (Maine-et-Loire), Mai 8, 1876 – Paris, March 17, 1957) was a France screenwriter, director, actor, and producer.', 0)


In [71]:
print("GPT")
paraphrase(frase)

GPT


['France was home to Yves Mirande, who worked as a screenwriter, director, actor, and producer from 1876 to 1957.',
 'Yves Mirande, an actor, director, screenwriter, and producer, was born in France on May 8, 1876.',
 "A screenwriter, director, actor and producer, France's Yves Mirande (Bagneux (Maine-et-Loire), born on May 8, 1876, but lived in Paris on March 17, 1957.",
 'The career of Yves Mirande, a French screenwriter, director and actor, was established in the mid-1800s.']

In [72]:
print("T5 and Helsinki-NLP/opus-mt")
translate_t5flan(frase)

T5 and Helsinki-NLP/opus-mt


(['Yves Mirande (Bagneux (Maine-et-Loire), Mai 8, 1876 – Paris, March 17, 1957) was a France screenwriter, director, actor, and producer.'],
 ['Yves Mirande (Bagneux (Maine-et-L'],
 ['Yves Mirande (Bagneux (Maine-et-L)'])

In [73]:
print("Helsinki-NLP/opus-mt")
translate(frase)

Helsinki-NLP/opus-mt


(['Yves Mirande (Bagneux (Maine-et-Loire), Mai 8, 1876 – Paris, March 17, 1957) was a France screenwriter, director, actor, and producer.'],
 ['Yves Mirande est un scénariste, réalisateur, acteur et producteur français, né le 8 mai 1876 à Paris (France).'],
 ['Yves Mirande (born 8 May 1876 in Paris, France) is a French screenwriter, director, actor and producer.'])

In [74]:
frase = ['Sangaste is also the birthplace of the Estonia writer August Gailit (1891-1960) and Estonian opera singer Aarne Viisimaa (1898-1989).',]
print("PARROT")
for phrase in frase:
  print("-"*100)
  print("Input_phrase: ", phrase)
  print("-"*100)
  para_phrases = parrot.augment(input_phrase=phrase, use_gpu=False)
  if para_phrases == None:
    print("No paraphrases found")
  else:
    for para_phrase in para_phrases:
     print(para_phrase)

PARROT
----------------------------------------------------------------------------------------------------
Input_phrase:  Sangaste is also the birthplace of the Estonia writer August Gailit (1891-1960) and Estonian opera singer Aarne Viisimaa (1898-1989).
----------------------------------------------------------------------------------------------------


No paraphrases found


In [75]:
print("GPT")
paraphrase(frase)

GPT


["'Aarne Viisimaa (1898-1989) and August Gailit, an Estonian writer from 1891 to 1960, were born in the same sangaste location.'",
 'The birthplace of August Gailit, Estonian writer, and Aarne Viisimaa, opera singer, is situated in the city of Sanaste.',
 "August Gailit, Estonia's writer from 1891 to 1960 and Aarne Viisimaa, an Estonian opera singer from the same era, were born in Singapore.",
 'In the same region as Sangaste are born August Gailit, (from 1891 to 1960) and Aarne Viisimaa, from 1898 to 1989.']

In [76]:
print("T5 and Helsinki-NLP/opus-mt")
translate_t5flan(frase)

T5 and Helsinki-NLP/opus-mt


(['Sangaste is also the birthplace of the Estonia writer August Gailit (1891-1960) and Estonian opera singer Aarne Viisimaa (1898-1989).'],
 ["Sangaste est également le lieu de naissance de l'écrivain estonien August"],
 ['Sangaste is also the birthplace of the Estonian writer August.'])

In [77]:
print("Helsinki-NLP/opus-mt")
translate(frase)

Helsinki-NLP/opus-mt


(['Sangaste is also the birthplace of the Estonia writer August Gailit (1891-1960) and Estonian opera singer Aarne Viisimaa (1898-1989).'],
 ["Sangaste est également le lieu de naissance de l'écrivain estonien August Gailit (1891-1960) et de la chanteuse d'opéra estonienne Aarne Viisimaa (1898-1989)."],
 ['Sangaste is also the birthplace of Estonian writer August Gailit (1891-1960) and Estonian opera singer Aarne Viisimaa (1898-1989).'])

## Utilizando

### Funções

In [3]:
def generate_output_phrases_parrot(df, input_column, output_column):
    """
    Generate output phrases by paraphrasing the input phrases.

    Parameters:
    - df (pandas.DataFrame): The input DataFrame containing the input phrases.
    - input_column (str): The name of the column in the DataFrame that contains the input phrases.
    - output_column (str): The name of the column in the DataFrame to store the output phrases.
    - paraphrase (function): The function used to paraphrase the input phrases.

    Returns:
    - df (pandas.DataFrame): The DataFrame with the original input phrases and the generated output phrases.
    """
    
    # Create an empty list to store data
    data = []

    # Iterate over each input phrase
    for sentence in df[input_column]:
        para_phrases = parrot.augment(input_phrase=sentence)
        if para_phrases == None:
           data.append({input_column: sentence, output_column: para_phrases})
        else: 
            para_frases = list(list(zip(*para_phrases))[0])
            data.append({input_column: sentence, output_column: para_frases})

    # Convert list of dictionaries to DataFrame
    new_df = pd.DataFrame(data)

    # Merge new_df with the original DataFrame df
    outro_df = pd.concat([df, new_df[output_column]], axis=1)

    return outro_df


def generate_output_phrases_t5(df, input_column, output_column):
    """
    Generate output phrases by paraphrasing the input phrases.

    Parameters:
    - df (pandas.DataFrame): The input DataFrame containing the input phrases.
    - input_column (str): The name of the column in the DataFrame that contains the input phrases.
    - output_column (str): The name of the column in the DataFrame to store the output phrases.
    - paraphrase (function): The function used to paraphrase the input phrases.

    Returns:
    - df (pandas.DataFrame): The DataFrame with the original input phrases and the generated output phrases.
    """
    
    # Create an empty list to store data
    data = []

    # Iterate over each input phrase
    for sentence in df[input_column]:
        para_phrases = paraphrase(sentence)
        data.append({input_column: sentence, output_column: para_phrases})

    # Convert list of dictionaries to DataFrame
    new_df = pd.DataFrame(data)

    # Merge new_df with the original DataFrame df
    outro_df = pd.concat([df, new_df[output_column]], axis=1)
    outro_df = pd.concat([outro_df, pd.DataFrame(outro_df[output_column].values.tolist())], axis=1)
    return outro_df



def translate_and_generate(df, input_column, output_columnn):
    # Traduzindo para francês e gerando a lista de frases em francês
    list_fr = [t['translation_text'] for sentence in df[input_column] for t in translator_en_fr(sentence)]

    # Traduzindo de volta para inglês e gerando a lista de frases em inglês
    list_eng = [t['translation_text'] for fr_phrase in list_fr for t in translator_fr_en([fr_phrase])]

    # Convertendo a lista de frases em inglês em DataFrame
    new_df = pd.DataFrame({output_columnn: list_eng})

    # Concatenando o novo DataFrame com o original
    outro_df = pd.concat([df, new_df], axis=1)
    return outro_df

def verificar_e_substituir2(df, coluna_palavras, coluna_salvar, indices_substituicao=None):
    for index, row in df.iterrows():
        if indices_substituicao is not None and index not in indices_substituicao:
            continue
        palavra_alvo = str(row[coluna_palavras])  # Convert to string
        encontrada = False
        for coluna in df.columns:
            if isinstance(row[coluna], str):  # Check if the value is a string
                if palavra_alvo in str(row[coluna]):
                    # Substituir a palavra-alvo por '[MASK]' na primeira coluna onde é encontrada
                    df.at[index, coluna_salvar] = df.at[index, coluna].replace(palavra_alvo, '[MASK]')
                    encontrada = True
                    break  # Pula para a próxima linha após substituir
            elif isinstance(row[coluna], int):  # Check if the value is an integer
                if str(palavra_alvo) in str(row[coluna]):
                    df.at[index, coluna_salvar] = str(df.at[index, coluna]).replace(str(palavra_alvo), '[MASK]')
                    encontrada = True
                    break
        if not encontrada:
            df.at[index, coluna_salvar] = None  # Se a palavra-alvo não for encontrada, atribui None
    return df


def replace_masked_sentence(df, mask_column, label_column, new_column):
    df[new_column] = df.apply(lambda row: str(row[mask_column]).replace("[MASK]", str(row[label_column])), axis=1)
    return df

def substituir_x(row):
    return row['template'].replace('[X]', row['sub_label'])

def substituir_y(row):
    return row['triple_NL'].replace('[Y]', row['obj_label'])

def rename_numeric_columns(df, replacement_prefix='Column_'):
    """
    Renomeia as colunas que consistem apenas de valores numéricos.

    Parâmetros:
    - df (pandas.DataFrame): O DataFrame que contém as colunas a serem renomeadas.
    - replacement_prefix (str): O prefixo a ser usado para os novos nomes das colunas.

    Retorna:
    - df (pandas.DataFrame): O DataFrame com as colunas renomeadas.
    """

    new_columns = []
    for col in df.columns:
        # Verificar se o nome da coluna pode ser convertido para um número inteiro
        try:
            col_int = int(col)
            new_columns.append(replacement_prefix + str(col_int))
        except ValueError:
            new_columns.append(col)

    # Renomear as colunas do DataFrame
    df.columns = new_columns

    return df

def substituir_palavras(texto, palavras_substituir, substituto="[MASK]"):
    if isinstance(texto, str) and texto is not None:
        for palavra in palavras_substituir:
            # Usando expressão regular para substituir todas as ocorrências da palavra
            texto = re.sub(r'\b' + re.escape(palavra) + r'\b', substituto, texto)
    return texto if isinstance(texto, str) else None


## Citizen

In [97]:
file = 'D:/tese/code/data/novos_data/filtrado_one_citizen.csv'
data_citizen = pd.read_csv(file).drop(columns=['Unnamed: 0'])
data_citizen = data_citizen[:10]
data_citizen = replace_masked_sentence(data_citizen, 'masked_sentence', 'obj_label', 
                               'filled_sentence')

# Aplicando a função a cada linha do DataFrame
data_citizen['triple_NL'] = data_citizen.apply(substituir_x, axis=1)
data_citizen['triple_NL'] = data_citizen.apply(substituir_y, axis=1)


data_citizen = data_citizen[['sub_label', 'template', 'obj_label', 'triple_NL',
             'masked_sentence', 'filled_sentence']]

print('DADOS ORIGINAIS')
display(data_citizen)

translate_citizen = translate_and_generate(data_citizen, 'triple_NL', 'translated_triple_NL')
translate_citizen = translate_and_generate(translate_citizen, 'filled_sentence', 'translated_filled_sentence')


parafrase_t5_citizen = generate_output_phrases_t5(data_citizen, 'triple_NL', 't5_triple_NL')
parafrase_t5_citizen = rename_numeric_columns(parafrase_t5_citizen, 't5_triple_NL_')
parafrase_t5_citizen = generate_output_phrases_t5(parafrase_t5_citizen, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_citizen = rename_numeric_columns(parafrase_t5_citizen, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_citizen = generate_output_phrases_parrot(data_citizen, 'triple_NL', 'parrot_triple_NL')
parafrase_parrot_citizen = generate_output_phrases_parrot(parafrase_parrot_citizen, 'filled_sentence', 'parrot_paraphrased_filled_sentence')


DADOS ORIGINAIS


Unnamed: 0,sub_label,template,obj_label,triple_NL,masked_sentence,filled_sentence
0,Rubens Barrichello,[X] is [Y] citizen .,Brazil,Rubens Barrichello is Brazil citizen .,"It was the final race of the 2004 season, and ...","It was the final race of the 2004 season, and ..."
1,Yves Mirande,[X] is [Y] citizen .,France,Yves Mirande is France citizen .,"Yves Mirande (Bagneux (Maine-et-Loire), Mai 8,...","Yves Mirande (Bagneux (Maine-et-Loire), Mai 8,..."
2,August Gailit,[X] is [Y] citizen .,Estonia,August Gailit is Estonia citizen .,Sangaste is also the birthplace of the [MASK] ...,Sangaste is also the birthplace of the Estonia...
3,Princess Elisabeth of Denmark,[X] is [Y] citizen .,Denmark,Princess Elisabeth of Denmark is Denmark citiz...,"Princess Elisabeth of [MASK], RE (Elisabeth Ca...","Princess Elisabeth of Denmark, RE (Elisabeth C..."
4,Tue West,[X] is [Y] citizen .,Denmark,Tue West is Denmark citizen .,Tue West (born 1977) is a [MASK] composer.,Tue West (born 1977) is a Denmark composer.
5,Bertil Lindblad,[X] is [Y] citizen .,Sweden,Bertil Lindblad is Sweden citizen .,"Bertil Lindblad (Örebro, 26 November 1895 – Sa...","Bertil Lindblad (Örebro, 26 November 1895 – Sa..."
6,Prafulla Chandra Ghosh,[X] is [Y] citizen .,India,Prafulla Chandra Ghosh is India citizen .,Prafulla Chandra Ghosh (Bengali: প্রফুল্ল চন্দ...,Prafulla Chandra Ghosh (Bengali: প্রফুল্ল চন্দ...
7,Noriyasu Hirata,[X] is [Y] citizen .,Japan,Noriyasu Hirata is Japan citizen .,"Noriyasu Hirata (平田 典靖 Hirata Noriyasu, born N...","Noriyasu Hirata (平田 典靖 Hirata Noriyasu, born N..."
8,Woodrow Lloyd,[X] is [Y] citizen .,Canada,Woodrow Lloyd is Canada citizen .,After Douglas left to lead the federal NDP in ...,After Douglas left to lead the federal NDP in ...
9,Namadi Sambo,[X] is [Y] citizen .,Nigeria,Namadi Sambo is Nigeria citizen .,Mohammed Namadi Sambo (born 2 August 1954) is ...,Mohammed Namadi Sambo (born 2 August 1954) is ...


In [98]:
countries = ['Brazil', 'France', 'Estonia', 'Denmark', 'Sweden', 'India',
             'Japan', 'Canada', 'Nigeria', 'Australia', 'England', 'Austria',
             'Israel', 'Ecuador', 'Mexico', 'Argentina', 'Venezuela', 'Belgium',
             'Tanzania', 'Malaysia', 'Albania', 'Switzerland', 'Poland',
             'Spain', 'Norway', 'Lebanon', 'Ireland', 'Ukraine', 'Laos',
             'Bulgaria', 'Afghanistan', 'Paraguay', 'Germany', 'Bolivia',
             'Senegal', 'Egypt', 'Romania', 'Italy', 'Slovakia', 'Singapore',
             'Philippines', 'Thailand', 'Bangladesh', 'Iran', 'Ghana', 'Iraq',
             'Chile']

nationalities = ['Brazilian', 'French', 'Estonian', 'Danish', 'Swedish', 'Indian',
                 'Japanese', 'Canadian', 'Nigerian', 'Australian', 'English', 'Austrian',
                 'Israeli', 'Ecuadorian', 'Mexican', 'Argentine', 'Venezuelan', 'Belgian',
                 'Tanzanian', 'Malaysian', 'Albanian', 'Swiss', 'Polish',
                 'Spanish', 'Norwegian', 'Lebanese', 'Irish', 'Ukrainian', 'Laotian',
                 'Bulgarian', 'Afghan', 'Paraguayan', 'German', 'Bolivian',
                 'Senegalese', 'Egyptian', 'Romanian', 'Italian', 'Slovak', 'Singaporean',
                 'Filipino', 'Thai', 'Bangladeshi', 'Iranian', 'Ghanaian', 'Iraqi',
                 'Chilean']

# Mapeando países para nacionalidades
country_nationality_dict = dict(zip(countries, nationalities))

# Função para obter a nacionalidade
def get_nationality(country):
    return country_nationality_dict.get(country)

In [None]:
# Exibindo os triples em NL
triple_to_nl_citizen = pd.concat([data_citizen[['sub_label', 'template', 'obj_label']],
                          translate_citizen['translated_triple_NL'], 
                          parafrase_t5_citizen[['t5_triple_NL', 't5_triple_NL_0', 't5_triple_NL_1', 
                                                't5_triple_NL_2', 't5_triple_NL_3']], 
                          parafrase_parrot_citizen['parrot_triple_NL']], axis=1)
triple_to_nl_citizen.dropna(inplace=True)
triple_to_nl_citizen['nationality'] = triple_to_nl_citizen['obj_label'].apply(get_nationality)
triple_to_nl_citizen.reset_index(drop=True, inplace=True)
triple_to_nl_citizen['parrot_triple_NL'] = triple_to_nl_citizen['parrot_triple_NL'].apply(lambda x: x[0] if x != None else None)

# Exibindo as frases preenchidas
filled_sentences_citizen = pd.concat([data_citizen[['sub_label', 'template', 'obj_label']],
                          translate_citizen['translated_filled_sentence'], 
                          parafrase_t5_citizen[['T5_paraphrased_filled_sentence', 'T5_paraphrased_filled_sentence_0', 
                                                'T5_paraphrased_filled_sentence_1', 'T5_paraphrased_filled_sentence_2', 
                                                'T5_paraphrased_filled_sentence_3']], 
                          parafrase_parrot_citizen['parrot_paraphrased_filled_sentence']], axis=1)
filled_sentences_citizen.dropna(inplace=True)
filled_sentences_citizen['nationality'] = filled_sentences_citizen['obj_label'].apply(get_nationality)
filled_sentences_citizen.reset_index(drop=True, inplace=True)
#triple_to_nl_citizen['parrot_paraphrased_filled_sentence'] = triple_to_nl_citizen['parrot_paraphrased_filled_sentence'].apply(lambda x: x[0] if x != None else None)

In [100]:
print('Triples NL')
display(triple_to_nl_citizen)

Triples NL


Unnamed: 0,sub_label,template,obj_label,translated_triple_NL,t5_triple_NL,t5_triple_NL_0,t5_triple_NL_1,t5_triple_NL_2,t5_triple_NL_3,parrot_triple_NL,nationality
0,Rubens Barrichello,[X] is [Y] citizen .,Brazil,Rubens Barrichello is a Brazilian citizen.,[Rubens Barrichello holds the title of a citiz...,Rubens Barrichello holds the title of a citize...,The citizenship of Rubens Barrichello is that ...,Brazil's citizenship is held by Rubens Barrich...,Rubens Barrichello is a citizen of Brazil.,"[rubens barrichello is a citizen of brazil, ru...",Brazilian
1,Yves Mirande,[X] is [Y] citizen .,France,Yves Mirande is a French citizen.,"[Yves Mirande holds a French citizenship., Fra...",Yves Mirande holds a French citizenship.,France is the nationality of Yves Mirande.,French national Yves Mirande.,The citizenship of Yves Mirande is France.,[yves mirande is a french citizen],French
2,Princess Elisabeth of Denmark,[X] is [Y] citizen .,Denmark,Princess Elisabeth of Denmark is a Danish citi...,[Princess Elisabeth of Denmark holds Danish ci...,Princess Elisabeth of Denmark holds Danish cit...,The citizen of Denmark is Princess Elisabeth o...,The citizenship of Princess Elisabeth of Denma...,Denmark has awarded citizenship to Princess El...,[princess elisabeth of denmark is a national o...,Danish
3,Tue West,[X] is [Y] citizen .,Denmark,Tue West is a Danish citizen.,[The citizenship of Tue West is that of a citi...,The citizenship of Tue West is that of a citiz...,Tue West is a citizen of Denmark.,Denmark's citizenship is given to Tue West.,"A citizen of Denmark, Tue West.",[Tue West is Denmark citizen .],Danish
4,Bertil Lindblad,[X] is [Y] citizen .,Sweden,Bertil Lindblad is a Swedish citizen.,[Bertil Lindblad holds the citizenship of Swed...,Bertil Lindblad holds the citizenship of Sweden.,The citizenship of Bertil Lindblad is that of ...,Sweden's citizenship is Bertil Lindblad.,A citizen of Sweden is Bertil Lindblad.,"[bertil lindblad is a citizen of sweden, berti...",Swedish
5,Prafulla Chandra Ghosh,[X] is [Y] citizen .,India,Prafulla Chandra Ghosh is an Indian citizen.,[The person known as Prafulla Chandra Ghosh is...,The person known as Prafulla Chandra Ghosh is ...,Prafulla Chandra Ghosh holds a citizenship tha...,India is represented by the identity of Praful...,"An Indian citizen, Prafulla Chandra Ghosh.","[chandra ghosh is a citizen of india, prafulla...",Indian
6,Noriyasu Hirata,[X] is [Y] citizen .,Japan,Noriyasu Hirata is a Japanese citizen.,[Noriyasu Hirata holds the status of a Japanes...,Noriyasu Hirata holds the status of a Japanese...,The citizenship of Noriyasu Hirata is Japanese.,Noriyasu Hirata is a citizen of Japan.,Japan recognizes Noriyashi Hirata as a citizen.,"[noriyasu hirata is a citizen of japan, hirata...",Japanese
7,Woodrow Lloyd,[X] is [Y] citizen .,Canada,Woodrow Lloyd is a Canadian citizen.,"[Woodrow Lloyd holds Canadian citizenship., Ca...",Woodrow Lloyd holds Canadian citizenship.,Canada is where Woodrow Lloyd's citizenship co...,The citizenship of Canada is given to Woodrow ...,"A Canadian citizen, Woodrow Lloyd.",[woodrow lloyd is a canadian citizen],Canadian
8,Namadi Sambo,[X] is [Y] citizen .,Nigeria,Namadi Sambo is a Nigerian citizen.,"[Namadi Sambo holds a citizenship in Nigeria.,...",Namadi Sambo holds a citizenship in Nigeria.,The citizenship of Namadi Sambo is that of Nig...,Nigeria's citizenship is held by Namadi Sambo.,A citizen of Nigerian nationality is Namadi Sa...,[namadi sambo is a nigerian citizen],Nigerian


In [101]:
print('Filled Sentences')
display(filled_sentences_citizen)

Filled Sentences


Unnamed: 0,sub_label,template,obj_label,translated_filled_sentence,T5_paraphrased_filled_sentence,T5_paraphrased_filled_sentence_0,T5_paraphrased_filled_sentence_1,T5_paraphrased_filled_sentence_2,T5_paraphrased_filled_sentence_3,parrot_paraphrased_filled_sentence,nationality
0,Rubens Barrichello,[X] is [Y] citizen .,Brazil,It was the last race of the 2004 season and lo...,[The 2004 season's final race saw Rubens Barri...,The 2004 season's final race saw Rubens Barric...,Rubens Barrichello of Brazil claimed pole for ...,"In 2004, the last race of the season saw Ruben...",Local fans were thrilled to see Rubens Barrich...,[it was the final race of the 2004 season and ...,Brazilian
1,Yves Mirande,[X] is [Y] citizen .,France,"Yves Mirande (born 8 May 1876 in Paris, France...","[France screenwriter, director, actor and prod...","France screenwriter, director, actor and produ...","Yves Mirande, who lived in France from Mai 8, ...","A French actor, screenwriter, director, and pr...","The career of Yves Mirande, a screenwriter, di...","[Yves Mirande (Bagneux (Maine-et-Loire), Mai 8...",French
2,Tue West,[X] is [Y] citizen .,Denmark,Tue West (born 1977) is a Danish composer.,"[Denmark's Tue West, born in 1977, is a compos...","Denmark's Tue West, born in 1977, is a composer.","Tue West, a composer from Denmark, was born in...","The composer Tue West, who was born in Denmark...","A composer hailing from Denmark is Tue West, w...",[Tue West (born 1977) is a Denmark composer.],Danish
3,Bertil Lindblad,[X] is [Y] citizen .,Sweden,"Bertil Lindblad (rebro, 26 November 1895 – Sal...","[Bertil Lindblad, an astronomer from Sweden, w...","Bertil Lindblad, an astronomer from Sweden, wa...","The famous astronomer Bertil Lindblad, who liv...","A renowned astronomer from Sweden, Bertil Lind...","Originally from Sweden, Bertil Lindblad was an...","[Bertil Lindblad (Örebro, 26 November 1895 – S...",Swedish
4,Prafulla Chandra Ghosh,[X] is [Y] citizen .,India,Prafulla Chandra Ghosh (1891-1983) was the pri...,"[Prafulla Chandra Ghosh, also known as Prophul...","Prafulla Chandra Ghosh, also known as Prophull...","The first Chief Minister of West Bengal, India...","During the period of 1891 to 1980, Prafulla Ch...",The initial Chief Minister of India from Kolka...,[Prafulla Chandra Ghosh (Bengali: প্রফুল্ল চন্...,Indian
5,Noriyasu Hirata,[X] is [Y] citizen .,Japan,"Noriyasu Hirata (born November 17, 1983) is a ...","[Born on November 17, 1983, Noriyasu Hirata is...","Born on November 17, 1983, Noriyasu Hirata is ...","Noriyasu Hirata, a Japanese badminton player, ...","A Japanese player, Noriyasu Hirata (born Novem...","The person known as Noriyasu Hirata, also know...",[noriyasu hirata hirata noriyasu born 17 novem...,Japanese
6,Woodrow Lloyd,[X] is [Y] citizen .,Canada,After Douglas left to lead the federal NDP in ...,[The first universal medical care plan in Cana...,The first universal medical care plan in Canad...,"In 1961, Woodrow Lloyd became the premier afte...",Following Douglas' departure to lead the feder...,Woodrow Lloyd became the premier in 1961 after...,[woodrow lloyd became prime minister after dou...,Canadian
7,Namadi Sambo,[X] is [Y] citizen .,Nigeria,"Mohammed Namadi Sambo (born August 2, 1954) is...","[Mohammed Namadi Sambo, born in August 1954, s...","Mohammed Namadi Sambo, born in August 1954, se...",During the period of 19 May 2010 to 29 May 201...,"The Vice President of Nigeria, who was born on...","From 19 May 2010 to 29 May 2015, Mohammed Nama...",[Mohammed Namadi Sambo (born 2 August 1954) is...,Nigerian


## Works for

In [102]:
file = 'D:/tese/code/data/novos_data/filtrado_one_worksfor.csv'
data_worksfor = pd.read_csv(file).drop(columns=['Unnamed: 0'])
data_worksfor = data_worksfor[:10]
data_worksfor = replace_masked_sentence(data_worksfor, 'masked_sentence', 'obj_label', 
                               'filled_sentence')

# Aplicando a função a cada linha do Data_worksforFrame
data_worksfor['triple_NL'] = data_worksfor.apply(substituir_x, axis=1)
data_worksfor['triple_NL'] = data_worksfor.apply(substituir_y, axis=1)

data_worksfor = data_worksfor[['sub_label', 'template', 'obj_label', 'triple_NL',
             'masked_sentence', 'filled_sentence']]

print('DADOS ORIGINAIS')
display(data_worksfor)

translate_worksfor = translate_and_generate(data_worksfor, 'triple_NL', 'translated_triple_NL')
translate_worksfor = translate_and_generate(translate_worksfor, 'filled_sentence', 'translated_filled_sentence')

parafrase_t5_worksfor = generate_output_phrases_t5(data_worksfor, 'triple_NL', 't5_triple_NL')
parafrase_t5_worksfor = rename_numeric_columns(parafrase_t5_worksfor, 't5_triple_NL_')
parafrase_t5_worksfor = generate_output_phrases_t5(parafrase_t5_worksfor, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_worksfor = rename_numeric_columns(parafrase_t5_worksfor, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_worksfor = generate_output_phrases_parrot(data_worksfor, 'triple_NL', 'parrot_triple_NL')
parafrase_parrot_worksfor = generate_output_phrases_parrot(parafrase_parrot_worksfor, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')


DADOS ORIGINAIS


Unnamed: 0,sub_label,template,obj_label,triple_NL,masked_sentence,filled_sentence
0,Steve Jobs,[X] works for [Y] .,Apple,Steve Jobs works for Apple .,"[MASK] was founded by Steve Jobs, Steve Woznia...","Apple was founded by Steve Jobs, Steve Wozniak..."
1,Steve Wozniak,[X] works for [Y] .,Apple,Steve Wozniak works for Apple .,"[MASK] was founded by Steve Jobs, Steve Woznia...","Apple was founded by Steve Jobs, Steve Wozniak..."
2,Paul Allen,[X] works for [Y] .,Microsoft,Paul Allen works for Microsoft .,"In 1975, Gates and Paul Allen co-founded [MASK...","In 1975, Gates and Paul Allen co-founded Micro..."
3,Satya Nadella,[X] works for [Y] .,Microsoft,Satya Nadella works for Microsoft .,He stepped down as Chairman of [MASK] in Febru...,He stepped down as Chairman of Microsoft in Fe...
4,Jeremy Paxman,[X] works for [Y] .,BBC,Jeremy Paxman works for BBC .,"The election was broadcast live on the [MASK],...","The election was broadcast live on the BBC, an..."
5,Andrew Marr,[X] works for [Y] .,BBC,Andrew Marr works for BBC .,"The election was broadcast live on the [MASK],...","The election was broadcast live on the BBC, an..."
6,David Dimbleby,[X] works for [Y] .,BBC,David Dimbleby works for BBC .,"The election was broadcast live on the [MASK],...","The election was broadcast live on the BBC, an..."
7,Grady Booch,[X] works for [Y] .,IBM,Grady Booch works for IBM .,"It was developed by Grady Booch, while at Rati...","It was developed by Grady Booch, while at Rati..."
8,Horst Feistel,[X] works for [Y] .,IBM,Horst Feistel works for IBM .,Developed in the early 1970s at [MASK] and bas...,Developed in the early 1970s at IBM and based ...
9,Philip Don Estridge,[X] works for [Y] .,IBM,Philip Don Estridge works for IBM .,It was created by a team of engineers and desi...,It was created by a team of engineers and desi...


In [103]:
triple_to_nl_worksfor = pd.concat([data_worksfor[['sub_label', 'template', 'obj_label']],
                          translate_worksfor['translated_triple_NL'], 
                          parafrase_t5_worksfor[['t5_triple_NL', 't5_triple_NL_0', 't5_triple_NL_1', 
                                                't5_triple_NL_2', 't5_triple_NL_3']], 
                          parafrase_parrot_worksfor['parrot_triple_NL']], axis=1)
triple_to_nl_worksfor.dropna(inplace=True)
triple_to_nl_worksfor.reset_index(drop=True, inplace=True)
triple_to_nl_worksfor['parrot_triple_NL'] = triple_to_nl_worksfor['parrot_triple_NL'].apply(lambda x: x[0] if x != None else None)

filled_sentences_worksfor = pd.concat([data_worksfor[['sub_label', 'template', 'obj_label']],
                          translate_worksfor['translated_filled_sentence'], 
                          parafrase_t5_worksfor[['T5_paraphrased_filled_sentence', 'T5_paraphrased_filled_sentence_0', 
                                                'T5_paraphrased_filled_sentence_1', 'T5_paraphrased_filled_sentence_2', 
                                                'T5_paraphrased_filled_sentence_3']], 
                          parafrase_parrot_worksfor['Parrot_paraphrased_filled_sentence']], axis=1)
filled_sentences_worksfor.dropna(inplace=True)
filled_sentences_worksfor.reset_index(drop=True, inplace=True)

In [104]:
print('Triples NL')
display(triple_to_nl_worksfor)

Triples NL


Unnamed: 0,sub_label,template,obj_label,translated_triple_NL,t5_triple_NL,t5_triple_NL_0,t5_triple_NL_1,t5_triple_NL_2,t5_triple_NL_3,parrot_triple_NL
0,Steve Jobs,[X] works for [Y] .,Apple,Steve Jobs works for Apple.,"[Steve Jobs is a staff member at Apple., Apple...",Steve Jobs is a staff member at Apple.,Apple employs Steve Jobs.,The job of Steve Jobs is at Apple.,Steve Jobs holds a position at Apple.,"[steve jobs works at apple, steve jobs works f..."
1,Steve Wozniak,[X] works for [Y] .,Apple,Steve Wozniak works for Apple.,"[Steve Wozniak is a staff member at Apple., Ap...",Steve Wozniak is a staff member at Apple.,Apple has hired Steve Wozniak.,The job of Steve Wozniak is at Apple.,Steve Wozniak holds a position at Apple.,[steve wozniak works with apple]
2,Paul Allen,[X] works for [Y] .,Microsoft,Paul Allen works for Microsoft.,"[Paul Allen is employed by Microsoft., The pos...",Paul Allen is employed by Microsoft.,The position of Microsoft's human resources ma...,Microsoft has a representative named Paul Allen.,A Microsoft employee is Paul Allen.,"[allen works for microsoft, paul allen works a..."
3,Satya Nadella,[X] works for [Y] .,Microsoft,Satya Nadella works for Microsoft.,"[Satya Nadella is employed by Microsoft., Micr...",Satya Nadella is employed by Microsoft.,Microsoft has employed Satya Nadella.,Satya Nadella holds a position at Microsoft.,"At Microsoft, Satya Nadella is employed.","[satya nadella is working for microsoft, satya..."
4,Jeremy Paxman,[X] works for [Y] .,BBC,Jeremy Paxman works for the BBC.,"[Jeremy Paxman is employed by the BBC., The BB...",Jeremy Paxman is employed by the BBC.,The BBC employs Jeremy Paxman.,The job of Jeremy Paxman is associated with th...,"Jeremy Paxman, who works for the BBC.","[jeremy paxman is working for the bbc, jeremy ..."
5,Andrew Marr,[X] works for [Y] .,BBC,Andrew Marr works for BBC.,"[The BBC employs Andrew Marr., Andrew Marr is ...",The BBC employs Andrew Marr.,Andrew Marr is a member of the BBC.,The job of Andrew Marr is within the scope of ...,Andrew Marr holds a position at the BBC.,"[andrew marr works for the bbc truce, andrew ..."
6,David Dimbleby,[X] works for [Y] .,BBC,David Dimbleby works for the BBC.,"[The BBC employs David Dimbleby., David Dimble...",The BBC employs David Dimbleby.,David Dimbleby is a member of the BBC team.,A member of the BBC's team is David Dimbleby.,BBC staff member David Dimbleby.,[david dimbleby works for the bbc]
7,Grady Booch,[X] works for [Y] .,IBM,Grady Booch works for IBM.,"[Grady Booch is employed at IBM., IBM is where...",Grady Booch is employed at IBM.,IBM is where Grady Booch is currently working.,The occupation of Grady Booch involves working...,"At IBM, Grady is employed.","[grady booch works at ibm, grady booch works f..."
8,Horst Feistel,[X] works for [Y] .,IBM,Horst Feistel works for IBM.,"[Horst Feistel is employed at IBM., IBM employ...",Horst Feistel is employed at IBM.,IBM employs Horst Feistel.,Horst Feistel holds a position at IBM.,The occupation of Horst Feistel is for IBM.,[horst feistel works for ibm]
9,Philip Don Estridge,[X] works for [Y] .,IBM,Philip Don Estridge works for IBM.,[Philip Don Estridge is a member of the IBM wo...,Philip Don Estridge is a member of the IBM wor...,IBM is where Philip Don Estridge presently works.,The job of Philip Don Estridge is at IBM.,"Philip Don Estridge, an IBM employee.","[philip don estridge works at ibm, philip don ..."


In [105]:
print('Filled Sentences')
display(filled_sentences_worksfor)

Filled Sentences


Unnamed: 0,sub_label,template,obj_label,translated_filled_sentence,T5_paraphrased_filled_sentence,T5_paraphrased_filled_sentence_0,T5_paraphrased_filled_sentence_1,T5_paraphrased_filled_sentence_2,T5_paraphrased_filled_sentence_3,Parrot_paraphrased_filled_sentence
0,Steve Jobs,[X] works for [Y] .,Apple,"Apple was founded by Steve Jobs, Steve Wozniak...","[Apple was founded on April 1, 1976, by Steve ...","Apple was founded on April 1, 1976, by Steve J...","The founding of Apple on April 1, 1976, was ma...","Steve Jobs, Steve Wozniak, and Ronald Wayne es...","On April 1, 1976, Steve Jobs joined with fello...",[steve wozniak and ronald wayne founded apple ...
1,Steve Wozniak,[X] works for [Y] .,Apple,"Apple was founded by Steve Jobs, Steve Wozniak...","[Apple was founded on April 1, 1976, by Steve ...","Apple was founded on April 1, 1976, by Steve J...","The founding of Apple on April 1, 1976, was ma...","Steve Jobs, Steve Wozniak, and Ronald Wayne es...","On April 1, 1976, Steve Jobs joined with fello...",[on 1 april 1976 steve wozniak and ronald wayn...
2,Paul Allen,[X] works for [Y] .,Microsoft,"In 1975, Gates and Paul Allen co-founded Micro...",[The co-founding of Gates and Paul Allen in 19...,The co-founding of Gates and Paul Allen in 197...,Gates and Paul Allen established Microsoft in ...,"Microsoft, which was founded in 1975 by Gates ...","In 1975, Gates and Paul Allen established Micr...",[in 1975 gates and paul allen co-founded micro...
3,Satya Nadella,[X] works for [Y] .,Microsoft,He resigned as president of Microsoft in Febru...,[He resigned as Chairman of Microsoft in Febru...,He resigned as Chairman of Microsoft in Februa...,February 2014 saw him relinquishing his positi...,"In February 2014, he announced his resignation...","As of February 2014, he was no longer Chairman...",[he stepped down from microsoft as chairman in...
4,Jeremy Paxman,[X] works for [Y] .,BBC,"The election was broadcast live on the BBC, an...","[Jeremy Paxman, Andrew Marr, Peter Snow, and D...","Jeremy Paxman, Andrew Marr, Peter Snow, and Da...","The election was broadcasted live on the BBC, ...",The BBC's live broadcast of the election featu...,"Jeremy Paxman, Andrew Marr and the party's own...","[The election was broadcast live on the BBC, a..."
5,Andrew Marr,[X] works for [Y] .,BBC,"The election was broadcast live on the BBC, an...","[Jeremy Paxman, Andrew Marr, Peter Snow, and D...","Jeremy Paxman, Andrew Marr, Peter Snow, and Da...","The election was broadcasted live on the BBC, ...",The BBC's live broadcast of the election featu...,"Jeremy Paxman, Andrew Marr and the party's own...",[the vote was broadcast live on the bbc and pr...
6,David Dimbleby,[X] works for [Y] .,BBC,"The election was broadcast live on the BBC, an...","[Jeremy Paxman, Andrew Marr, Peter Snow, and D...","Jeremy Paxman, Andrew Marr, Peter Snow, and Da...","The election was broadcasted live on the BBC, ...",The BBC's live broadcast of the election featu...,"Jeremy Paxman, Andrew Marr and the party's own...","[The election was broadcast live on the BBC, a..."
7,Grady Booch,[X] works for [Y] .,IBM,"It was developed by Grady Booch, while at Rati...",[Grady Booch created it while working at Ratio...,Grady Booch created it while working at Ration...,At Rational Software (now a subsidiary of IBM)...,While at Rational Software (now a subsidiary o...,It was developed by Grady Booch while he was a...,[it was developed by grady booch during ration...
8,Horst Feistel,[X] works for [Y] .,IBM,Developed in the early 1970s at IBM and based ...,[The algorithm was created at IBM in the early...,The algorithm was created at IBM in the early ...,Developed at IBM during the early 1970s and ba...,Based on a previous algorithm designed by Hors...,"IBM developed an algorithm in the early 1970s,...",[the algorithm based on an earlier design by h...
9,Philip Don Estridge,[X] works for [Y] .,IBM,It was created by a team of engineers and desi...,[The creation of it was overseen by a group of...,The creation of it was overseen by a group of ...,"Don Estridge, an engineer and designer, led th...",It was designed and developed by a group of en...,"A team of engineers and designers, led by Don ...",[it was created by a team of engineers and des...


## Capital

In [106]:

file = 'D:/tese/code/data/novos_data/filtrado_one_theCapital_is.csv'
data_capital = pd.read_csv(file).drop(columns=['Unnamed: 0'])
data_capital = data_capital[:10]
data_capital = replace_masked_sentence(data_capital, 'masked_sentence', 'obj_label', 
                               'filled_sentence')

# Aplicando a função a cada linha do DataFrame
data_capital['triple_NL'] = data_capital.apply(substituir_x, axis=1)
data_capital['triple_NL'] = data_capital.apply(substituir_y, axis=1)

data_capital = data_capital[['sub_label', 'template', 'obj_label', 'triple_NL',
             'masked_sentence', 'filled_sentence']]

print('DADOS ORIGINAIS')
display(data_capital)

translate_capital = translate_and_generate(data_capital, 'triple_NL', 'translated_triple_NL')
translate_capital = translate_and_generate(translate_capital, 'filled_sentence', 'translated_filled_sentence')


parafrase_t5_capital = generate_output_phrases_t5(data_capital, 'triple_NL', 't5_triple_NL')
parafrase_t5_capital = rename_numeric_columns(parafrase_t5_capital, 't5_triple_NL_')
parafrase_t5_capital = generate_output_phrases_t5(parafrase_t5_capital, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_capital = rename_numeric_columns(parafrase_t5_capital, 'T5_paraphrased_filled_sentence_')


parafrase_parrot_capital = generate_output_phrases_parrot(data_capital, 'triple_NL', 'parrot_triple_NL')
parafrase_parrot_capital = generate_output_phrases_parrot(parafrase_parrot_capital, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')


DADOS ORIGINAIS


Unnamed: 0,sub_label,template,obj_label,triple_NL,masked_sentence,filled_sentence
0,Azerbaijan,The capital of [X] is [Y] .,Baku,The capital of Azerbaijan is Baku .,The capital of Azerbaijan is the ancient city ...,The capital of Azerbaijan is the ancient city ...
1,Brody Raion,The capital of [X] is [Y] .,Brody,The capital of Brody Raion is Brody .,The Brody Raion Museum of History and Ethnogra...,The Brody Raion Museum of History and Ethnogra...
2,Cook County,The capital of [X] is [Y] .,Chicago,The capital of Cook County is Chicago .,[MASK] is the seat of Cook County.,Chicago is the seat of Cook County.
3,Fort Bend County,The capital of [X] is [Y] .,Richmond,The capital of Fort Bend County is Richmond .,"Moore's son, John Jr., served as a two-term Ma...","Moore's son, John Jr., served as a two-term Ma..."
4,Cayuga County,The capital of [X] is [Y] .,Auburn,The capital of Cayuga County is Auburn .,Hulbert was elected as a Federalist to the Thi...,Hulbert was elected as a Federalist to the Thi...
5,City of Hawkesbury,The capital of [X] is [Y] .,Windsor,The capital of City of Hawkesbury is Windsor .,The Hawkesbury Advocate was an English languag...,The Hawkesbury Advocate was an English languag...
6,Grand Est,The capital of [X] is [Y] .,Strasbourg,The capital of Grand Est is Strasbourg .,"[MASK] (/ˈstræzbɜːrɡ/, [stʁaz.buʁ, stʁas.buʁ];...","Strasbourg (/ˈstræzbɜːrɡ/, [stʁaz.buʁ, stʁas.b..."
7,Caddo Parish,The capital of [X] is [Y] .,Shreveport,The capital of Caddo Parish is Shreveport .,"(born November 24, 1983 in Caddo Parish [MASK]...","(born November 24, 1983 in Caddo Parish Shreve..."
8,Nebraska,The capital of [X] is [Y] .,Lincoln,The capital of Nebraska is Lincoln .,Urban Search and Rescue Nebraska Task Force 1 ...,Urban Search and Rescue Nebraska Task Force 1 ...
9,Hampshire,The capital of [X] is [Y] .,Winchester,The capital of Hampshire is Winchester .,"The county town of Hampshire is [MASK], the fo...","The county town of Hampshire is Winchester, th..."


In [109]:
triple_to_nl_capital = pd.concat([data_capital[['sub_label', 'template', 'obj_label']],
                          translate_capital['translated_triple_NL'], 
                          parafrase_t5_capital[['t5_triple_NL', 't5_triple_NL_0', 't5_triple_NL_1', 
                                                't5_triple_NL_2', 't5_triple_NL_3']], 
                          parafrase_parrot_capital['parrot_triple_NL']], axis=1)
triple_to_nl_capital.dropna(inplace=True)
triple_to_nl_capital.reset_index(drop=True, inplace=True)
triple_to_nl_capital['parrot_triple_NL'] = triple_to_nl_capital['parrot_triple_NL'].apply(lambda x: x[0] if x != None else None)


filled_sentences_capital = pd.concat([data_capital[['sub_label', 'template', 'obj_label']],
                          translate_capital['translated_filled_sentence'], 
                          parafrase_t5_capital[['T5_paraphrased_filled_sentence', 'T5_paraphrased_filled_sentence_0', 
                                                'T5_paraphrased_filled_sentence_1', 'T5_paraphrased_filled_sentence_2', 
                                                'T5_paraphrased_filled_sentence_3']], 
                          parafrase_parrot_capital['Parrot_paraphrased_filled_sentence']], axis=1)
filled_sentences_capital.dropna(inplace=True)
filled_sentences_capital.reset_index(drop=True, inplace=True)


In [110]:
print('Triples NL')
display(triple_to_nl_capital)

Triples NL


Unnamed: 0,sub_label,template,obj_label,translated_triple_NL,t5_triple_NL,t5_triple_NL_0,t5_triple_NL_1,t5_triple_NL_2,t5_triple_NL_3,parrot_triple_NL
0,Azerbaijan,The capital of [X] is [Y] .,Baku,The capital of Azerbaijan is Baku.,"[Baku is the capital of Azerbaijan., Azerbaija...",Baku is the capital of Azerbaijan.,Azerbaijan's capital is located in Baku.,The city of Baku is the capital of Azerbaijan.,Baku serves as the capital of Azerbaijan.,"[azerbaijan's capital is baku, baku is the cap..."
1,Brody Raion,The capital of [X] is [Y] .,Brody,The capital of Brody Raion is Brody.,[Brody Raion is a city that has its capital in...,Brody Raion is a city that has its capital in ...,"The city of Brody Raion is located in Brough, ...",Brody is the capital of Brodia Raion.,The capital city of Brody Raion is Brough.,[the brody raion capital is brody]
2,Cook County,The capital of [X] is [Y] .,Chicago,The capital of Cook County is Chicago.,[The city of Chicago serves as the capital of ...,The city of Chicago serves as the capital of C...,Cook County's capital is located in Chicago.,Chicago serves as the political center of Cook...,The capital of Cook County is Chicago.,"[cook county's capital is chicago, the county'..."
3,Fort Bend County,The capital of [X] is [Y] .,Richmond,The capital of Fort Bend County is Richmond.,[Richmond serves as the capital of Fort Bend C...,Richmond serves as the capital of Fort Bend Co...,The capital of Fort Bend County is located in ...,The city of Richmond serves as the capital of ...,Fort Bend County's capital is situated in Rich...,"[fort bend county's capital is richmond, richm..."
4,Cayuga County,The capital of [X] is [Y] .,Auburn,The capital of Cayuga County is Auburn.,[Auburn serves as the capital of Cayuga County...,Auburn serves as the capital of Cayuga County.,Cayuga County's capital is located in Auburn.,The capital of Cayuga County is Auburn.,Auburn is the capital of Cayuga County.,"[the county's capital is auburn, auburn is the..."
5,City of Hawkesbury,The capital of [X] is [Y] .,Windsor,The capital city of Hawkesbury is Windsor.,[Windsor serves as the capital of the City of ...,Windsor serves as the capital of the City of H...,The City of Hawkesbury is headquartered in Win...,Windsor is the capital city of Hawkesbury.,The city of Windsor serves as the capital.,"[windsor is the capital of hawkesbury, windsor..."
6,Grand Est,The capital of [X] is [Y] .,Strasbourg,The capital of the Grand Est is Strasbourg.,"[Strasbourg is the capital of the Grand Est., ...",Strasbourg is the capital of the Grand Est.,Strasbourg serves as the capital of the Grand ...,The Grand Est's capital is Strasbourg.,The city of Strasbourg is the capital of the G...,"[the capital of the grand est is strasbourg, t..."
7,Caddo Parish,The capital of [X] is [Y] .,Shreveport,The capital of Caddo Parish is Shreveport.,[Shreveport serves as the capital of Caddo Par...,Shreveport serves as the capital of Caddo Parish.,The city of Shreveport is the capital of Caddo...,Caddo Parish's capital city is Shreveport.,Shreveport is the main location in Caddo Parish.,[the capital of the parish of caddo is shrevep...
8,Nebraska,The capital of [X] is [Y] .,Lincoln,The capital of Nebraska is Lincoln.,[The state of Nebraska's capital is located in...,The state of Nebraska's capital is located in ...,Lincoln serves as the capital of the state.,"Nebraska's capital is located in Lincoln, not ...",The capital city of Nebraska is Lincoln.,"[nebraska's capital is lincoln, nebraska's cap..."
9,Hampshire,The capital of [X] is [Y] .,Winchester,The capital of Hampshire is Winchester.,[Winchester serves as the capital of Hampshire...,Winchester serves as the capital of Hampshire.,Hampshire's capital is situated in Winchester.,The city of Winchester is the capital of Hamps...,Winchester is the political center of Hampshir...,"[winchester is hampshire's capital, winchester..."


In [111]:
print('Filled Sentences')
display(filled_sentences_capital)

Filled Sentences


Unnamed: 0,sub_label,template,obj_label,translated_filled_sentence,T5_paraphrased_filled_sentence,T5_paraphrased_filled_sentence_0,T5_paraphrased_filled_sentence_1,T5_paraphrased_filled_sentence_2,T5_paraphrased_filled_sentence_3,Parrot_paraphrased_filled_sentence
0,Azerbaijan,The capital of [X] is [Y] .,Baku,The capital of Azerbaijan is the ancient city ...,"[Baku, the ancient city with the largest and b...","Baku, the ancient city with the largest and be...","The capital of Azerbaijan is Baku, an ancient ...","Azerbaijan's capital, Baku, is the ancient cit...","Baku, an ancient city, has been the capital of...",[the capital of azerbaijan is the ancient city...
1,Brody Raion,The capital of [X] is [Y] .,Brody,The Brody Raion Museum of History and Ethnogra...,[The Brody Raion Museum of History and Ethnogr...,The Brody Raion Museum of History and Ethnogra...,"Brody city, Ukraine is home to the Brolin Raio...",A museum named Brody Raion Museum of History a...,"In Brody city, Ukraine, the Brolli Raion Museu...",[The Brody Raion Museum of History and Ethnogr...
2,Cook County,The capital of [X] is [Y] .,Chicago,Chicago is the seat of Cook County.,[The city of Chicago is the capital and admini...,The city of Chicago is the capital and adminis...,Chicago is the city that governs Cook County.,Cook County is situated in Chicago.,The city of Chicago serves as the capital of C...,[chicago is the capital of cook county]
3,Fort Bend County,The capital of [X] is [Y] .,Richmond,"Moore's son, John Jr., was a two-term mayor of...","[John Jr., the son of Moore, held the position...","John Jr., the son of Moore, held the positions...","In Fort Bend County, Texas, John Jr. was elect...",Moore's son John Jr. served two terms as Mayor...,"The son of Moore, John Jr. was elected Mayor o...",[moore's son john jr served as mayor of richmo...
4,Cayuga County,The capital of [X] is [Y] .,Auburn,Hulbert was elected federalist at the Thirteen...,[In response to the vacancy caused by Daniel D...,In response to the vacancy caused by Daniel De...,"After the resignation of Daniel Dewey, Hulbert...","Following Daniel Dewey's resignation, Hulbert ...",Hulbert was elected as a Federalist to the Thi...,[hulbert was elected to the thirteenth congres...
5,City of Hawkesbury,The capital of [X] is [Y] .,Windsor,The Hawkesbury Advocate is an English-language...,"[Windsor, Australia was home to the Hawkesbury...","Windsor, Australia was home to the Hawkesbury ...","The Hawkesbury Advocate, a broadsheet newspape...","In Windsor, Australia, the Hawkesbury Advocate...",An English language broadsheet newspaper named...,[the hawkesbury advocate was an english-langua...
6,Grand Est,The capital of [X] is [Y] .,Strasbourg,Strasbourg is the capital and largest city of ...,[The capital and largest city of the Alsatian-...,The capital and largest city of the Alsatian-C...,"Strasbourg, which is also known as Straßburg i...",The seat of the European Parliament is located...,"Strasbourg (/strzbrd/, [stig.bu, stag.luj]) is...","[Strasbourg (/ˈstræzbɜːrɡ/, [stʁaz.buʁ, stʁas...."
7,Caddo Parish,The capital of [X] is [Y] .,Shreveport,"(born November 24, 1983 in Caddo Parish Shreve...","[Born on November 24, 1983, in Caddo Parish Sh...","Born on November 24, 1983, in Caddo Parish Shr...",A former American football linebacker was born...,"The individual who was born on November 24, 19...",Born in Caddo Parish Shreveport on November 24...,"[(born November 24, 1983 in Caddo Parish Shrev..."
8,Nebraska,The capital of [X] is [Y] .,Lincoln,Nebraska Task Force 1 or NE-TF1 is a FEMA urba...,"[The FEMA Urban Search and Rescue Task Force, ...","The FEMA Urban Search and Rescue Task Force, a...","Lincoln is the location of NE-TF1, which stand...",NE-TF1 is the name given to the Urban Search a...,"Urban Search and Rescue Nebraska Task Force 1,...",[urban search and rescue nebraska task force 1...
9,Hampshire,The capital of [X] is [Y] .,Winchester,"The county town of Hampshire is Winchester, th...","[The county town of Winchester, which was once...","The county town of Winchester, which was once ...","Hampshire is the county town of Winchester, wh...","Winchester, the former capital city of England...","Hampshire's county town is Winchester, the for...","[The county town of Hampshire is Winchester, t..."


## Died In

In [112]:

file = 'D:/tese/code/data/novos_data/filtrado_one_died_in.csv'
data_diedIn = pd.read_csv(file).drop(columns=['Unnamed: 0'])
data_diedIn = data_diedIn[:10]
data_diedIn = replace_masked_sentence(data_diedIn, 'masked_sentence', 'obj_label', 
                               'filled_sentence')

# Aplicando a função a cada linha do DataFrame
data_diedIn['triple_NL'] = data_diedIn.apply(substituir_x, axis=1)
data_diedIn['triple_NL'] = data_diedIn.apply(substituir_y, axis=1)

data_diedIn = data_diedIn[['sub_label', 'template', 'obj_label', 'triple_NL',
             'masked_sentence', 'filled_sentence']]

print('DADOS ORIGINAIS')
display(data_diedIn)

translate_diedIn = translate_and_generate(data_diedIn, 'triple_NL', 'translated_triple_NL')
translate_diedIn = translate_and_generate(translate_diedIn, 'filled_sentence', 'translated_filled_sentence')


parafrase_t5_diedIn = generate_output_phrases_t5(data_diedIn, 'triple_NL', 't5_triple_NL')
parafrase_t5_diedIn = rename_numeric_columns(parafrase_t5_diedIn, 't5_triple_NL_')
parafrase_t5_diedIn = generate_output_phrases_t5(parafrase_t5_diedIn, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_diedIn = rename_numeric_columns(parafrase_t5_diedIn, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_diedIn = generate_output_phrases_parrot(data_diedIn, 'triple_NL', 'parrot_triple_NL')
parafrase_parrot_diedIn = generate_output_phrases_parrot(parafrase_parrot_diedIn, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')


DADOS ORIGINAIS


Unnamed: 0,sub_label,template,obj_label,triple_NL,masked_sentence,filled_sentence
0,Akihiko Saito,[X] died in [Y] .,Iraq,Akihiko Saito died in Iraq .,"Akihiko Saito (斎藤昭彦 Saitō Akihiko, born Januar...","Akihiko Saito (斎藤昭彦 Saitō Akihiko, born Januar..."
1,Charles Nodier,[X] died in [Y] .,Paris,Charles Nodier died in Paris .,It is usually said that he studied in [MASK] w...,It is usually said that he studied in Paris wi...
2,Jack Laurence Chalker,[X] died in [Y] .,Baltimore,Jack Laurence Chalker died in Baltimore .,"The first edition, entitled The Languages of M...","The first edition, entitled The Languages of M..."
3,Laurel Aitken,[X] died in [Y] .,Leicester,Laurel Aitken died in Leicester .,Fox also appeared later that year with members...,Fox also appeared later that year with members...
4,Georgios Roilos,[X] died in [Y] .,Athens,Georgios Roilos died in Athens .,He studied painting at the [MASK] School of Fi...,He studied painting at the Athens School of Fi...
5,Salomon Sulzer,[X] died in [Y] .,Vienna,Salomon Sulzer died in Vienna .,From the age of 10 he sang in the choir of Sal...,From the age of 10 he sang in the choir of Sal...
6,Michelangelo,[X] died in [Y] .,Rome,Michelangelo died in Rome .,"As a young woman, Anguissola traveled to [MASK...","As a young woman, Anguissola traveled to Rome ..."
7,Louis Vola,[X] died in [Y] .,Paris,Louis Vola died in Paris .,"Louis Vola (La Seyne-sur-Mer, France, 6 July 1...","Louis Vola (La Seyne-sur-Mer, France, 6 July 1..."
8,Mark Lothar,[X] died in [Y] .,Munich,Mark Lothar died in Munich .,"Mark Lothar [ló:tar] (born Lothar Hundertmark,...","Mark Lothar [ló:tar] (born Lothar Hundertmark,..."
9,Friedrich August Theodor Winnecke,[X] died in [Y] .,Bonn,Friedrich August Theodor Winnecke died in Bonn .,Jean Louis Pons (Marseille) originally discove...,Jean Louis Pons (Marseille) originally discove...


In [113]:
triple_to_nl_diedIn = pd.concat([data_diedIn[['sub_label', 'template', 'obj_label']],
                          translate_diedIn['translated_triple_NL'], 
                          parafrase_t5_diedIn[['t5_triple_NL', 't5_triple_NL_0', 't5_triple_NL_1', 
                                                't5_triple_NL_2', 't5_triple_NL_3']], 
                          parafrase_parrot_diedIn['parrot_triple_NL']], axis=1)
triple_to_nl_diedIn.dropna(inplace=True)
triple_to_nl_diedIn.reset_index(drop=True, inplace=True)
triple_to_nl_diedIn['parrot_triple_NL'] = triple_to_nl_diedIn['parrot_triple_NL'].apply(lambda x: x[0] if x != None else None)

filled_sentences_diedIn = pd.concat([data_diedIn[['sub_label', 'template', 'obj_label']],
                          translate_diedIn['translated_filled_sentence'], 
                          parafrase_t5_diedIn[['T5_paraphrased_filled_sentence', 'T5_paraphrased_filled_sentence_0', 
                                                'T5_paraphrased_filled_sentence_1', 'T5_paraphrased_filled_sentence_2', 
                                                'T5_paraphrased_filled_sentence_3']], 
                          parafrase_parrot_diedIn['Parrot_paraphrased_filled_sentence']], axis=1)
filled_sentences_diedIn.dropna(inplace=True)
filled_sentences_diedIn.reset_index(drop=True, inplace=True)



In [114]:
print('Triples NL')
display(triple_to_nl_diedIn)

Triples NL


Unnamed: 0,sub_label,template,obj_label,translated_triple_NL,t5_triple_NL,t5_triple_NL_0,t5_triple_NL_1,t5_triple_NL_2,t5_triple_NL_3,parrot_triple_NL
0,Akihiko Saito,[X] died in [Y] .,Iraq,Akihiko Saito has died in Iraq.,"[Akihiko Saito lost his life in Iraq., In Iraq...",Akihiko Saito lost his life in Iraq.,"In Iraq, Akihiko Saito lost his life.",Akihiko Saitô perished in Iraq.,The death of Akihiko Saikito occurred in Iraq.,[akihiko saito died in iraq]
1,Charles Nodier,[X] died in [Y] .,Paris,Charles Nodier died in Paris.,"[Charles Nodier's death occurred in Paris., Th...",Charles Nodier's death occurred in Paris.,The demise of Charles Nodier took place in Paris.,"In Paris, Charles Nodier passed away.",Paris was the place where Charles Nodier passe...,[charles nodier died in paris]
2,Jack Laurence Chalker,[X] died in [Y] .,Baltimore,Jack Laurence Chalker died in Baltimore.,[The death of Jack Laurence Chalker occurred i...,The death of Jack Laurence Chalker occurred in...,"In Baltimore, the life of Jack Laurence Chalke...",Baltimore was the location where Jack Laurence...,Jack Laurence Chalker met his demise in Baltim...,"[laurence chalker died in baltimore, jack laur..."
3,Laurel Aitken,[X] died in [Y] .,Leicester,Laurel Aitken died in Leicester.,"[Laurel Aitken met his demise in Leicester., L...",Laurel Aitken met his demise in Leicester.,Leicester was where Laurel Aitken met his demise.,"In Leicester, the late Laurel Aitken passed away.",The death of Laurel Aitken occurred in Leicester.,"[aitken died in leicester, laurel aitken died ..."
4,Georgios Roilos,[X] died in [Y] .,Athens,Georgios Roilos died in Athens.,"[In Athens, Georgios Roilos passed away., Geor...","In Athens, Georgios Roilos passed away.",Georgios Roilos met his demise in Athens.,The death of Georgios Roilos occurred in Athena.,Athens was where GeorgioS Roillos died.,[georgios roilos died in athens]
5,Salomon Sulzer,[X] died in [Y] .,Vienna,Solomon Sulzer died in Vienna.,"[In Vienna, Salomon Sulzer passed away., Salom...","In Vienna, Salomon Sulzer passed away.",Salomon Sulzer passed away in Vienna.,Vienna was the place where Salomon Sulzer pass...,The death of Salomon Sulzer occurred in Vienna.,"[in vienna salomon sulzer died, salomon sulzer..."
6,Michelangelo,[X] died in [Y] .,Rome,Michelangelo died in Rome.,"[The death of Michelangelo occurred in Rome., ...",The death of Michelangelo occurred in Rome.,Rome was the place where Michelangelo lost his...,Michelangelo's death occurred in the city of R...,"In Rome, Michelangelo passed away.",[michelangelo died in rome]
7,Louis Vola,[X] died in [Y] .,Paris,Louis Vola died in Paris.,"[Louis Vola passed away in Paris., In Paris, L...",Louis Vola passed away in Paris.,"In Paris, Louis Vola passed away.",The demise of Louis Vola took place in Paris.,Paris was the place where Louis Vola passed away.,"[louis vola died at paris, louis vola died in ..."
8,Mark Lothar,[X] died in [Y] .,Munich,Mark Lothar died in Munich.,"[In Munich, Mark Lothar passed away., Mark Lot...","In Munich, Mark Lothar passed away.",Mark Lothar's demise took place in Munich.,Munich was the place where Mark Lothar passed ...,The death of Mark Lothar occurred in Munich.,[mark lothar died in munich]
9,Friedrich August Theodor Winnecke,[X] died in [Y] .,Bonn,Friedrich August Theodor Winnecke died in Bonn.,[The death of Friedrich August Theodor Winneck...,The death of Friedrich August Theodor Winnecke...,"In Bonn, Friedrich August Theodor Winnecke pas...",Friedrich August Theodor Winnecke's death took...,Bonn was where Friedrich August Theodor Winnec...,[friedrich august theodor winnecke died in bonn]


In [115]:
print('Filled Sentences')
display(filled_sentences_diedIn)

Filled Sentences


Unnamed: 0,sub_label,template,obj_label,translated_filled_sentence,T5_paraphrased_filled_sentence,T5_paraphrased_filled_sentence_0,T5_paraphrased_filled_sentence_1,T5_paraphrased_filled_sentence_2,T5_paraphrased_filled_sentence_3,Parrot_paraphrased_filled_sentence
0,Charles Nodier,[X] died in [Y] .,Paris,It is generally said that he studied in Paris ...,[Charles Nodier and he were said to have atten...,Charles Nodier and he were said to have attend...,He was said to have studied in Paris with Char...,"According to tradition, he attended school in ...",It is commonly believed that he attended schoo...,[it is usually said he studied with charles no...
1,Jack Laurence Chalker,[X] died in [Y] .,Baltimore,"The first edition, entitled The Languages of M...","[Mirage Press, Baltimore, published The Langua...","Mirage Press, Baltimore, published The Languag...","In 1974, Mirage Press, Baltimore published The...","The first edition of Middle-earth, titled The ...","Mirage Press, Baltimore published The Language...",[the first edition the languages of middle ear...
2,Laurel Aitken,[X] died in [Y] .,Leicester,Fox also appeared later that year with members...,"[Fox joined The Riffs, his son Lawrence, and G...","Fox joined The Riffs, his son Lawrence, and Ge...",At a Leicester memorial concert for Laurel Ait...,"During a Leicester memorial concert, Fox joine...","The following year, Fox joined The Riffs membe...",[Fox also appeared later that year with member...
3,Georgios Roilos,[X] died in [Y] .,Athens,He studied painting at the School of Fine Arts...,[He received instruction in painting from teac...,He received instruction in painting from teach...,"At the Athens School of Fine Arts, he received...",The Athens School of Fine Arts provided him wi...,Painting was taught to him by Nikiphoros Lytra...,[he studied painting at the athens school of f...
4,Salomon Sulzer,[X] died in [Y] .,Vienna,From the age of 10 he sang in the choir of Sal...,"[As a 10-year-old, he sang in Salomon Sulzer's...","As a 10-year-old, he sang in Salomon Sulzer's ...",He began his singing career at the age of 10 i...,"At the age of 10, he sang in Salomon Sulzer's ...","From the age of 10, he sang in Salomon Sulzer'...",[from the age of 10 he sang in the choir of sa...
5,Michelangelo,[X] died in [Y] .,Rome,"As a young woman, Anguissola travels to Rome w...","[As a young woman, Anguissola traveled to Rome...","As a young woman, Anguissola traveled to Rome ...",Anguissola traveled with Michelangelo to Rome ...,"During her youth, Anguissola journeyed to Rome...","In her youth, Anguissola journeyed to Rome whe...",[as a young woman anguissola traveled to rome ...
6,Louis Vola,[X] died in [Y] .,Paris,"Louis Vola (La Seyne-sur-Mer, France, July 6, ...",[The godfather of guitarist Francois Vola is L...,The godfather of guitarist Francois Vola is Lo...,"Louis Vola, a French double-bassist who worked...","The French double-bassist Louis Vola, who gain...","French double-bassist Louis Vola, who played w...","[Louis Vola (La Seyne-sur-Mer, France, 6 July ..."
7,Mark Lothar,[X] died in [Y] .,Munich,"Mark Lothar [l:tar] (born Lothar Hundertmark, ...","[Mark Lothar, a German composer who was born i...","Mark Lothar, a German composer who was born in...",A German composer named Mark Lothar (pronounce...,"Lothar Hundertmark, who was born in 1902 and l...","The composer Mark Lothar, who was born in 1902...",[Mark Lothar [ló:tar] (born Lothar Hundertmark...
8,Friedrich August Theodor Winnecke,[X] died in [Y] .,Bonn,Jean Louis Pons (Marseille) discovered the com...,[After being discovered by Jean Louis Pons on ...,After being discovered by Jean Louis Pons on J...,The comet was first detected by Jean Louis Pon...,Jean Louis Pons (Marseille) first discovered t...,"On June 12, 1819, Jean Louis Pons (Marseille) ...",[the comet originally discovered by jean louis...


## Born In

In [4]:

file = 'D:/tese/code/data/novos_data/filtrado_one_born_in.csv'
data_bornIn = pd.read_csv(file).drop(columns=['Unnamed: 0'])
data_bornIn = data_bornIn[:10]
data_bornIn = replace_masked_sentence(data_bornIn, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_bornIn['obj_label'].str.strip()
# Aplicando a função a cada linha do Data_bornInFrame
data_bornIn['triple_NL'] = data_bornIn.apply(substituir_x, axis=1)
data_bornIn['triple_NL'] = data_bornIn.apply(substituir_y, axis=1)

data_bornIn = data_bornIn[['sub_label', 'template', 'obj_label', 'triple_NL',
             'masked_sentence', 'filled_sentence']]

print('DADOS ORIGINAIS')
display(data_bornIn)

translate_bornIn = translate_and_generate(data_bornIn, 'triple_NL', 'translated_triple_NL')
translate_bornIn = translate_and_generate(translate_bornIn, 'filled_sentence', 'translated_filled_sentence')


parafrase_t5_bornIn = generate_output_phrases_t5(data_bornIn, 'triple_NL', 't5_triple_NL')
parafrase_t5_bornIn = rename_numeric_columns(parafrase_t5_bornIn, 't5_triple_NL_')
parafrase_t5_bornIn = generate_output_phrases_t5(parafrase_t5_bornIn, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_bornIn = rename_numeric_columns(parafrase_t5_bornIn, 'T5_paraphrased_filled_sentence_')


parafrase_parrot_bornIn = generate_output_phrases_parrot(data_bornIn, 'triple_NL', 'parrot_triple_NL')
parafrase_parrot_bornIn = generate_output_phrases_parrot(parafrase_parrot_bornIn, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')


DADOS ORIGINAIS


Unnamed: 0,sub_label,template,obj_label,triple_NL,masked_sentence,filled_sentence
0,Allan Peiper,[X] was born in [Y] .,Alexandra,Allan Peiper was born in Alexandra .,"Allan Peiper (born 26 April 1960 in [MASK], Au...","Allan Peiper (born 26 April 1960 in Alexandra,..."
1,Anthony Barber,[X] was born in [Y] .,Doncaster,Anthony Barber was born in Doncaster .,It was won by the Conservative candidate Antho...,It was won by the Conservative candidate Antho...
2,Paul Mounsey,[X] was born in [Y] .,Scotland,Paul Mounsey was born in Scotland .,NahooToo is the second album by [MASK] musicia...,NahooToo is the second album by Scotland music...
3,Moe Koffman,[X] was born in [Y] .,Toronto,Moe Koffman was born in Toronto .,He has performed with many of [MASK]'s foremos...,He has performed with many of Toronto's foremo...
4,Kurt Schwertsik,[X] was born in [Y] .,Vienna,Kurt Schwertsik was born in Vienna .,"Kurt Schwertsik (born 25 June 1935, [MASK]) is...","Kurt Schwertsik (born 25 June 1935, Vienna) is..."
5,Claude Arrieu,[X] was born in [Y] .,Paris,Claude Arrieu was born in Paris .,"Claude Arrieu (born [MASK], November 30, 1903 ...","Claude Arrieu (born Paris, November 30, 1903 -..."
6,Ryō Kase,[X] was born in [Y] .,Yokohama,Ryō Kase was born in Yokohama .,"Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974...","Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974..."
7,Frans Floris I,[X] was born in [Y] .,Antwerp,Frans Floris I was born in Antwerp .,"1228 – [MASK], March 26, 1258) ""the guardian""[...","1228 – Antwerp, March 26, 1258) ""the guardian""..."
8,Henry Heras,[X] was born in [Y] .,Barcelona,Henry Heras was born in Barcelona .,"Henry Heras (11 September 1888, [MASK] - 14 De...","Henry Heras (11 September 1888, Barcelona - 14..."
9,Daniele Franceschini,[X] was born in [Y] .,Rome,Daniele Franceschini was born in Rome .,Daniele Franceschini (born 13 January 1976 in ...,Daniele Franceschini (born 13 January 1976 in ...


NameError: name 'paraphrase' is not defined

In [157]:
# Função para expandir uma coluna específica do DataFrame
def expand_column(df, column_name):
    # Copiar o DataFrame original para evitar modificá-lo diretamente
    df_expanded = df.copy()
    
    # Verificar se a coluna especificada existe no DataFrame
    if column_name in df.columns:
        # Expandir a coluna especificada
        expanded_df = df[column_name].apply(lambda x: x[:6] if isinstance(x, list) else [None]*6).apply(pd.Series)
        expanded_df.columns = [f"{column_name}_{i}" for i in range(1, 7)]
        
        # Concatenar as colunas expandidas com o DataFrame original
        df_expanded = pd.concat([df_expanded, expanded_df], axis=1)
    
    return df_expanded

In [160]:
triple_to_nl_bornIn = pd.concat([data_bornIn[['sub_label', 'template', 'obj_label', 'triple_NL']],
                          translate_bornIn['translated_triple_NL'], 
                          parafrase_t5_bornIn[['t5_triple_NL_0', 't5_triple_NL_1', 
                                                't5_triple_NL_2', 't5_triple_NL_3']], 
                          parafrase_parrot_bornIn['parrot_triple_NL']], axis=1)
triple_to_nl_bornIn.dropna(inplace=True)
triple_to_nl_bornIn.reset_index(drop=True, inplace=True)
triple_to_nl_bornIn['parrot_triple_NL'] = triple_to_nl_bornIn['parrot_triple_NL'].apply(lambda x: x[0] if x != None else None)


filled_sentences_bornIn = pd.concat([data_bornIn[['sub_label', 'template', 'obj_label', 'masked_sentence']],
                          translate_bornIn['translated_filled_sentence'], 
                          parafrase_t5_bornIn[['T5_paraphrased_filled_sentence_0', 
                                                'T5_paraphrased_filled_sentence_1', 'T5_paraphrased_filled_sentence_2', 
                                                'T5_paraphrased_filled_sentence_3']], 
                          parafrase_parrot_bornIn['Parrot_paraphrased_filled_sentence']], axis=1)
filled_sentences_bornIn.dropna(inplace=True)
filled_sentences_bornIn.reset_index(drop=True, inplace=True)
filled_sentences_bornIn = expand_column(filled_sentences_bornIn, 'Parrot_paraphrased_filled_sentence')

In [81]:
def substituir_x(row):
    return row['template'].replace('[X]', row['sub_label'])

def substituir_by_MASK(row):
    return row['triple_NL'].replace('[Y]', '[MASK]')

In [185]:
# Triple Masked
triple_to_nl_bornIn['triple_NL'] = triple_to_nl_bornIn.apply(substituir_x, axis=1)
triple_to_nl_bornIn['triple_NL'] = triple_to_nl_bornIn.apply(substituir_by_MASK, axis=1)

triple_to_nl_bornIn['translated_triple_NL'] = triple_to_nl_bornIn['translated_triple_NL']\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

triple_to_nl_bornIn['t5_triple_NL_0'] = triple_to_nl_bornIn['t5_triple_NL_0']\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

triple_to_nl_bornIn['t5_triple_NL_1'] = triple_to_nl_bornIn['t5_triple_NL_1']\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

triple_to_nl_bornIn['t5_triple_NL_2'] = triple_to_nl_bornIn['t5_triple_NL_2']\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

triple_to_nl_bornIn['t5_triple_NL_3'] = triple_to_nl_bornIn['t5_triple_NL_3']\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

triple_to_nl_bornIn['parrot_triple_NL'] = triple_to_nl_bornIn['parrot_triple_NL']\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))


# Filled Masked
filled_sentences_bornIn["translated_filled_sentence"] = filled_sentences_bornIn["translated_filled_sentence"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["T5_paraphrased_filled_sentence_0"] = filled_sentences_bornIn["T5_paraphrased_filled_sentence_0"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["T5_paraphrased_filled_sentence_1"] = filled_sentences_bornIn["T5_paraphrased_filled_sentence_1"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["T5_paraphrased_filled_sentence_2"] = filled_sentences_bornIn["T5_paraphrased_filled_sentence_2"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["T5_paraphrased_filled_sentence_3"] = filled_sentences_bornIn["T5_paraphrased_filled_sentence_3"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_1"] = filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_1"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_2"] = filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_2"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_3"] = filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_3"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_4"] = filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_4"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_5"] = filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_5"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_6"] = filled_sentences_bornIn["Parrot_paraphrased_filled_sentence_6"]\
    .apply(lambda x: substituir_palavras(x, filled_sentences_bornIn["obj_label"]))

In [122]:
triple_to_nl_bornIn['triple_NL'] = triple_to_nl_bornIn.apply(substituir_by_MASK, axis=1)

In [173]:
def indices_sem_MASK(texto):
    indices_sem_MASK = []
    for idx, linha in enumerate(texto):
        if '[MASK]' not in linha:
            indices_sem_MASK.append(idx)
    return indices_sem_MASK


In [174]:
print(f'parrot_triple_NL index: {indices_sem_MASK(triple_to_nl_bornIn["parrot_triple_NL"])}')

parrot_triple_NL index: [0, 1, 2, 3, 4, 5, 7, 8]


In [264]:
teste = data_bornIn[['obj_label', "triple_NL"]]#.iloc[indices_sem_MASK(triple_to_nl_bornIn["parrot_triple_NL"])]
teste

Unnamed: 0,obj_label,triple_NL
0,Alexandra,Allan Peiper was born in Alexandra .
1,Doncaster,Anthony Barber was born in Doncaster .
2,Scotland,Paul Mounsey was born in Scotland .
3,Toronto,Moe Koffman was born in Toronto .
4,Vienna,Kurt Schwertsik was born in Vienna .
5,Paris,Claude Arrieu was born in Paris .
6,Yokohama,Ryō Kase was born in Yokohama .
7,Antwerp,Frans Floris I was born in Antwerp .
8,Barcelona,Henry Heras was born in Barcelona .
9,Rome,Daniele Franceschini was born in Rome .


In [265]:
lista_strings1 = ['Daniele Franceschini was born in Rome. \n Anthony Barber was born in [MASK] .',
 'Daniele Franceschini was born in Rome. \n Paul Mounsey was born in [MASK] .',
 'Claude Arrieu was born in Paris. \n Moe Koffman was born in [MASK] .',
 'Moe Koffman was born in Toronto. \n Kurt Schwertsik was born in [MASK] .',
 'Anthony Barber was born in Doncaster. \n Claude Arrieu was born in [MASK] .',
 'Moe Koffman was born in Toronto. \n Ryō Kase was born in [MASK] .',
 'Moe Koffman was born in Toronto. \n Frans Floris I was born in [MASK] .',
 'Frans Floris I was born in Antwerp. \n Henry Heras was born in [MASK] .',
 'Henry Heras was born in Barcelona. \n Daniele Franceschini was born in [MASK] .']
triple_to_nl_bornIn = triple_to_nl_bornIn.assign(triple_NL_w_help_1=lista_strings1)

lista_strings2 = ['Henry Heras was born in Barcelona. \n Ryō Kase was born in Yokohama.\n Daniele Franceschini was born in Rome. \n Anthony Barber was born in [MASK].',
 'Moe Koffman was born in Toronto. \n Daniele Franceschini was born in Rome. \n Paul Mounsey was born in [MASK].',
 'Daniele Franceschini was born in Rome. \n Claude Arrieu was born in Paris. \n Moe Koffman was born in [MASK].',
 'Paul Mounsey was born in Scotland. \n Moe Koffman was born in Toronto. \n Kurt Schwertsik was born in [MASK].',
 'Kurt Schwertsik was born in Vienna. \n Anthony Barber was born in Doncaster. \n Claude Arrieu was born in [MASK].',
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n Ryō Kase was born in [MASK].',
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n Frans Floris I was born in [MASK].',
 'Moe Koffman was born in Toronto. \n Frans Floris I was born in Antwerp. \n Henry Heras was born in [MASK].',
 'Paul Mounsey was born in Scotland. \n Henry Heras was born in Barcelona. \n Daniele Franceschini was born in [MASK].']
triple_to_nl_bornIn = triple_to_nl_bornIn.assign(triple_NL_w_help_2=lista_strings2)

In [182]:
triple_to_nl_bornIn.at[0, 'parrot_triple_NL'] = 'anthony barber was born in [MASK]'
triple_to_nl_bornIn.at[1, 'parrot_triple_NL'] = 'paul mounsey was born in [MASK]'
triple_to_nl_bornIn.at[2, 'parrot_triple_NL'] = 'born in toronto moe koffman was born in [MASK]'
triple_to_nl_bornIn.at[3, 'parrot_triple_NL'] = 'kurt schwertsik was born in [MASK]'
triple_to_nl_bornIn.at[4, 'parrot_triple_NL'] = 'claude arrieu was born in [MASK]'
triple_to_nl_bornIn.at[5, 'parrot_triple_NL'] = 'ry kase was born in [MASK]'
triple_to_nl_bornIn.at[7, 'parrot_triple_NL'] = 'henry heras was born in [MASK]'
triple_to_nl_bornIn.at[8, 'parrot_triple_NL'] = 'daniele franceschini was born in [MASK]'

In [266]:
print('Triples NL')
display(triple_to_nl_bornIn)

Triples NL


Unnamed: 0,sub_label,template,obj_label,triple_NL,translated_triple_NL,t5_triple_NL_0,t5_triple_NL_1,t5_triple_NL_2,t5_triple_NL_3,parrot_triple_NL,triple_NL_w_help_1,triple_NL_w_help_2
0,Anthony Barber,[X] was born in [Y] .,Doncaster,Anthony Barber was born in [MASK] .,Anthony Barber was born in [MASK].,[MASK] is the birthplace of Anthony Barber.,Anthony Barber was born in [MASK].,The birthplace of Anthony Barber was [MASK].,[MASK] was the birthplace of Anthony Barber.,anthony barber was born in [MASK],Daniele Franceschini was born in Rome. \n Anth...,Henry Heras was born in Barcelona. \n Ryō Kase...
1,Paul Mounsey,[X] was born in [Y] .,Scotland,Paul Mounsey was born in [MASK] .,Paul Mounsey was born in [MASK].,The birth of Paul Mounsey took place in [MASK].,[MASK] is the birthplace of Paul Mounsey.,Paul Mounsey was born in [MASK].,"In [MASK], there is someone named Paul Mounsey.",paul mounsey was born in [MASK],Daniele Franceschini was born in Rome. \n Paul...,Moe Koffman was born in Toronto. \n Daniele Fr...
2,Moe Koffman,[X] was born in [Y] .,Toronto,Moe Koffman was born in [MASK] .,Moe Koffman was born in [MASK].,Moe Koffman was born in [MASK].,The birthplace of Moe Koffman is [MASK].,"In [MASK], Moe Koffman was born.",[MASK] is the birthplace of Moe Koffman.,born in toronto moe koffman was born in [MASK],Claude Arrieu was born in Paris. \n Moe Koffma...,Daniele Franceschini was born in Rome. \n Clau...
3,Kurt Schwertsik,[X] was born in [Y] .,Vienna,Kurt Schwertsik was born in [MASK] .,Kurt Schwertsik was born in [MASK].,Kurt Schwertsik was born in [MASK].,The birthplace of Kurt Schwertsik was [MASK].,[MASK] was the birthplace of Kurt Schwertsik.,Kurt Schwertsik was brought up in [MASK].,kurt schwertsik was born in [MASK],Moe Koffman was born in Toronto. \n Kurt Schwe...,Paul Mounsey was born in Scotland. \n Moe Koff...
4,Claude Arrieu,[X] was born in [Y] .,Paris,Claude Arrieu was born in [MASK] .,Claude Arrieu was born in [MASK].,Claude Arrieu was born in [MASK].,The birthplace of Claude Arrieu is [MASK].,"In [MASK], Claude Arrieu was born.",[MASK] was the birthplace of Claude Arrieu.,claude arrieu was born in [MASK],Anthony Barber was born in Doncaster. \n Claud...,Kurt Schwertsik was born in Vienna. \n Anthony...
5,Ryō Kase,[X] was born in [Y] .,Yokohama,Ryō Kase was born in [MASK] .,Ry Kase was born in [MASK].,Ry Kase hails from [MASK].,"In [MASK], Ry Kase was born.",The birthplace of Ry Kase is [MASK].,Born in [MASK] is Ry Kase.,ry kase was born in [MASK],Moe Koffman was born in Toronto. \n Ryō Kase w...,Henry Heras was born in Barcelona. \n Moe Koff...
6,Frans Floris I,[X] was born in [Y] .,Antwerp,Frans Floris I was born in [MASK] .,Frans Floris I was born in [MASK].,The birthplace of Frans Floris I was in [MASK].,"In [MASK], Frans Floris I was born.",[MASK] was the birthplace of Frans Floris I.,Frans Floris I was born in the city of [MASK].,Frans Floris I was born in [MASK] .,Moe Koffman was born in Toronto. \n Frans Flor...,Henry Heras was born in Barcelona. \n Moe Koff...
7,Henry Heras,[X] was born in [Y] .,Barcelona,Henry Heras was born in [MASK] .,Henry Heras was born in [MASK].,Henry Heras was born in [MASK].,[MASK] is the birthplace of Henry Heras.,"In [MASK], Henry Heras was born.",The birthplace of Henry Heras was [MASK].,henry heras was born in [MASK],Frans Floris I was born in Antwerp. \n Henry H...,Moe Koffman was born in Toronto. \n Frans Flor...
8,Daniele Franceschini,[X] was born in [Y] .,Rome,Daniele Franceschini was born in [MASK] .,Daniele Franceschini was born in [MASK].,Daniele Franceschini was born in [MASK].,"In [MASK], Daniele Franceschini was born.",The birthplace of Daniele Franceschini was [MA...,"Born in [MASK], Daniele Franceschini is of Rom...",daniele franceschini was born in [MASK],Henry Heras was born in Barcelona. \n Daniele ...,Paul Mounsey was born in Scotland. \n Henry He...


In [89]:
print(f'translated_filled_sentence index: {indices_sem_MASK(filled_sentences_bornIn["translated_filled_sentence"])}')

print(f'T5_paraphrased_filled_sentence_0 index: {indices_sem_MASK(filled_sentences_bornIn["T5_paraphrased_filled_sentence_0"])}')

print(f'T5_paraphrased_filled_sentence_1 index: {indices_sem_MASK(filled_sentences_bornIn["T5_paraphrased_filled_sentence_1"])}')

print(f'T5_paraphrased_filled_sentence_2 index: {indices_sem_MASK(filled_sentences_bornIn["T5_paraphrased_filled_sentence_2"])}')

print(f'T5_paraphrased_filled_sentence_3 index: {indices_sem_MASK(filled_sentences_bornIn["T5_paraphrased_filled_sentence_3"])}')

translated_filled_sentence index: [2, 4, 6, 9, 11, 18, 25, 27, 28, 32, 33, 38, 45, 47, 48, 50, 51, 54, 59, 60, 64, 65, 67, 70, 71, 73, 79, 80, 82, 84, 86, 88, 94, 95, 97, 104, 108, 109, 111, 116, 117, 119, 120, 122, 123, 124, 130, 133, 134]
T5_paraphrased_filled_sentence_0 index: [10, 53, 76, 104, 119, 123]
T5_paraphrased_filled_sentence_1 index: [2, 3, 10, 15, 37, 49, 122, 123]
T5_paraphrased_filled_sentence_2 index: [30, 37, 40, 81, 91, 104, 106, 111, 122, 123, 135]
T5_paraphrased_filled_sentence_3 index: [1, 7, 8, 10, 41, 52, 59, 76, 91, 99, 102, 104, 105, 107, 126, 135]


In [90]:
teste = filled_sentences_bornIn[['obj_label', 'T5_paraphrased_filled_sentence_3']].iloc[indices_sem_MASK(filled_sentences_bornIn["T5_paraphrased_filled_sentence_3"])]
teste

Unnamed: 0,obj_label,T5_paraphrased_filled_sentence_3
1,Doncaster,Following his defeat in the 1964 General Elect...
7,Antwerp,"The guardian of Holland, who was the son and d..."
8,Barcelona,"During his time in India, Henry Heras worked a..."
10,Lahore,"Bilal Khan, Mizraab and Akhtar Chanal Zahari w..."
41,Scotland,The Scottish team was set to field him and Dav...
52,Lyon,"In the Yvelines department of France, Catherin..."
59,Essex,"An English record producer, engineer and mixer..."
76,Harlem,"During the Civil Rights Movement in the 1960s,..."
91,Glasgow,Roddy McMillan played the title character of T...
99,Utrecht,"The author, artist, illustrator, and graphic d..."


In [91]:
teste.index.tolist()

[1, 7, 8, 10, 41, 52, 59, 76, 91, 99, 102, 104, 105, 107, 126, 135]

In [92]:
filled_sentences_bornIn.at[2, 'translated_filled_sentence'] = 'NahooToo is the second album by [MASK] musician Paul Mounsey.'
filled_sentences_bornIn.at[4, 'translated_filled_sentence'] = 'Kurt Schwertsik (born 25 June 1935, in [MASK]) is an Austrian composer.'
filled_sentences_bornIn.at[6, 'translated_filled_sentence'] = 'Ryo Kase (born November 9, 1974, in [MASK]) is a Japanese actor.'
filled_sentences_bornIn.at[9, 'translated_filled_sentence'] = 'Daniele Franceschini (born 13 January 1976, in [MASK]) is an Italian footballer.'
filled_sentences_bornIn.at[11, 'translated_filled_sentence'] = 'Giovanni Maria Morandi (30 April 1622, in [MASK] – 18 February 1717) was an Italian painter.'
filled_sentences_bornIn.at[18, 'translated_filled_sentence'] = 'Masako Natsume (December 17, 1957, in [MASK] – September 11, 1985) was a Japanese model and actress.'
filled_sentences_bornIn.at[25, 'translated_filled_sentence'] = 'Sir Robert Lee, born in [MASK] (died 22 December 1605), was an English merchant.'
filled_sentences_bornIn.at[27, 'translated_filled_sentence'] = 'Hiroshi Takemura (born October 24, 1953, in [MASK]) is a Japanese actor.'
filled_sentences_bornIn.at[28, 'translated_filled_sentence'] = 'Cedric Boswell (born July 21, 1969, in [MASK]) is an American professional boxer.'
filled_sentences_bornIn.at[32, 'translated_filled_sentence'] = 'Gerard Lee (born 1951, in [MASK]) is an Australian novelist, screenwriter and director.'
filled_sentences_bornIn.at[33, 'translated_filled_sentence'] = 'Eddie Creatchman (February 27, 1928, in [MASK] – March 9, 1994) was a Canadian wrestler, known as Eddie "The Brain" Creatchman, who hated wrestler managers like The Sheik, The Great Samu and Steve Strong.'
filled_sentences_bornIn.at[38, 'translated_filled_sentence'] = 'Alla Pavlova (born July 13, 1952, in [MASK]) is a Russian composer of Ukrainian descent.'
filled_sentences_bornIn.at[40, 'translated_filled_sentence'] = 'Sverrir Gudnason (Icelandic: Sverrir Pll Gunason; born 12 September 1978, in [MASK]) is an Icelandic-born Swedish actor.'
filled_sentences_bornIn.at[46, 'translated_filled_sentence'] = 'Elise Broach (born September 20, 1963, in [MASK]) is an American writer.'
filled_sentences_bornIn.at[48, 'translated_filled_sentence'] = 'Kristine Johnson (born June 5, 1972, in [MASK]) is a fellow member of WCBS-TV in New York City, New York.'
filled_sentences_bornIn.at[49, 'translated_filled_sentence'] = 'Benjamin Cas Haley (born December 27, 1980, in [MASK]) is an American singer and guitarist.'
filled_sentences_bornIn.at[51, 'translated_filled_sentence'] = 'Nayef Hawatmeh (Arabic: [MASK]) is a Palestinian politician of Jordanian origin.'
filled_sentences_bornIn.at[52, 'translated_filled_sentence'] = 'Albert Allen Bartlett (March 21, 1923, in [MASK] – September 7, 2013) was a professor emeritus of physics at the University of Colorado Boulder.'
filled_sentences_bornIn.at[55, 'translated_filled_sentence'] = 'Kjell Olofsson (born 23 July 1965, in [MASK]) is a Swedish footballer.'
filled_sentences_bornIn.at[60, 'translated_filled_sentence'] = 'Haydn Bendall (born 13 April 1951, in [MASK]) is an English producer, engineer and mixer.'
filled_sentences_bornIn.at[61, 'translated_filled_sentence'] = 'Larry Stabbins (born 9 September 1949, in [MASK]) is a British saxophonist, flutist and composer.'
filled_sentences_bornIn.at[65, 'translated_filled_sentence'] = 'Bryan Crawford (born February 18, 1982, in [MASK]) is a Canadian former footballer.'
filled_sentences_bornIn.at[66, 'translated_filled_sentence'] = "Damian O'Flynn (born 29 January 1907, in [MASK]) is an Irish actor."
filled_sentences_bornIn.at[68, 'translated_filled_sentence'] = 'Wade Cunningham (born 19 August 1984, in [MASK]) is a New Zealand racing driver.'
filled_sentences_bornIn.at[71, 'translated_filled_sentence'] = 'Lawrence Bayne (born November 11, 1960, in [MASK]) is a Canadian actor and singer.'
filled_sentences_bornIn.at[72, 'translated_filled_sentence'] = 'Aaron King (born July 26, 1984, in [MASK]) is an American footballer.'
filled_sentences_bornIn.at[74, 'translated_filled_sentence'] = 'India Amanda Caroline Hicks (born 5 September 1967, in [MASK]) is a former fashion model.'
filled_sentences_bornIn.at[80, 'translated_filled_sentence'] = 'Pierre Braunberger (29 July 1905, in [MASk] – 17 November 1990) was a French producer and actor.'
filled_sentences_bornIn.at[81, 'translated_filled_sentence'] = 'Marie Georges Picquart (6 September 1854, in [MASK] – 19 January 1914) was a French army officer.'
filled_sentences_bornIn.at[83, 'translated_filled_sentence'] = 'Nikolaus Dumba (24 July 1830, in [MASK] – 23 March 1900) was an Austrian politician.'
filled_sentences_bornIn.at[85, 'translated_filled_sentence'] = 'Michael Bleekemolen (born 2 October 1949, in [MASK]) is a former racing driver.'
filled_sentences_bornIn.at[87, 'translated_filled_sentence'] = 'Phil Bentham (born 28 October 1971, in [MASK]) is an English referee.'
filled_sentences_bornIn.at[89, 'translated_filled_sentence'] = 'Michael McGoldrick (born 26 November 1971, in [MASK]) is an Irish flute player.'
filled_sentences_bornIn.at[95, 'translated_filled_sentence'] = 'Shritama Mukherjee (born 3 November 1993, in [MASK]) is an Indian television actress.'
filled_sentences_bornIn.at[96, 'translated_filled_sentence'] = 'Axel Whitehead (born 16 December 1980, in [MASK]) is an Australian singer, musician and actress.'
filled_sentences_bornIn.at[98, 'translated_filled_sentence'] = 'Tic Tac is a 1997 Swedish drama film directed by Daniel Alfredson, who was born in [MASK].'
filled_sentences_bornIn.at[106, 'translated_filled_sentence'] = 'Rosario Garibaldi Bosco (July 28, 1866, in [MASK] – December 2, 1936) was an Italian politician, socialist, and writer of republican inspiration.'
filled_sentences_bornIn.at[110, 'translated_filled_sentence'] = 'Randy Renfrow (born January 28, 1958, in [MASK]) is a former NASCAR driver.'
filled_sentences_bornIn.at[111, 'translated_filled_sentence'] = 'Marta Abba (25 June 1900 – 24 June 1988, in the city [MASK]) was an Italian actress.'
filled_sentences_bornIn.at[113, 'translated_filled_sentence'] = 'Carlos Roberto Baute Jiménez (born March 8, 1974, in the city [MASK]) is a Venezuelan singer.'
filled_sentences_bornIn.at[118, 'translated_filled_sentence'] = 'Josh Wise (born February 7, 1983, in the city [MASK]) is an American professional racing driver.'
filled_sentences_bornIn.at[119, 'translated_filled_sentence'] = 'Cao Yunding (born 22 November 1989, in the city [MASK]) is a Chinese footballer.'
filled_sentences_bornIn.at[121, 'translated_filled_sentence'] = 'Frederick Whymper (1838 – 26 November 1901, in the city [MASK]) was a British artist and explorer.'
filled_sentences_bornIn.at[122, 'translated_filled_sentence'] = 'David Harrower (born 1966, in the city [MASK]) is a Scottish playwright.'
filled_sentences_bornIn.at[124, 'translated_filled_sentence'] = 'Giuseppe Tellera (14 March 1882, in the city [MASK] – 7 February 1941) was an Italian general.'
filled_sentences_bornIn.at[125, 'translated_filled_sentence'] = 'In 2010, a Swiss man, Oliver Fricker, pleaded guilty to entering a Mass Rapid Transit depot and painting a train with an accomplice, and was sentenced to five months in prison and three cane shots. He was born in [MASK].'
filled_sentences_bornIn.at[126, 'translated_filled_sentence'] = 'Derek Kerswill (born January 1, 1971, in the city [MASK]) is an American musician.'
filled_sentences_bornIn.at[132, 'translated_filled_sentence'] = 'Ignazio Gardella (March 30, 1905, in the city [MASK] – March 16, 1999) was an Italian architect and designer.'
filled_sentences_bornIn.at[135, 'translated_filled_sentence'] = 'Eric Dover (born January 19, 1967, in the city [MASK]) is an American musician, guitarist and singer.'

In [93]:
filled_sentences_bornIn.at[10,'T5_paraphrased_filled_sentence_0'] = 'In the fourth season, Bilal Khan, Mizraab, Akhtar Chanal Zahri, Jal from [MASK] (a pop rock band), Kaavish, Fareed Ayaz & Abu Muhammad, Asif Hussain Samrak, Komal Rizvi, Sajjad Ali, Mole, QB, and Attaullah Khan Esakhelvi were among the artists featured in the show.'
filled_sentences_bornIn.at[35,'T5_paraphrased_filled_sentence_0'] = 'The English Voluntary Aid Detachment force was established by Dame Katharine Furse, GBE, RRC, who was born in [MASK] to John Addington Symonds and Janet Catherine North.'
filled_sentences_bornIn.at[54,'T5_paraphrased_filled_sentence_0'] = 'Professor Ludwig Marcuse, who was of Jewish origin, wrote and lived in Bad Wiessee from February 8, 1894 to August 2, 1971, and was born in [MASK].'
filled_sentences_bornIn.at[77,'T5_paraphrased_filled_sentence_0'] = 'Bob Moses, also known as Robert Parris Moses or Bob Mose, was an American civil rights leader and educator who led the Student Nonviolent Coordinating Committee on voter education and registration in Mississippi during the Civil Rights Movement of the 1960s. He was born in [MASK].'
filled_sentences_bornIn.at[103,'T5_paraphrased_filled_sentence_0'] = "Cesira Ferrani, an operatic soprano from [MASK], Italy, who died in Pollone on 4 May 1943, is known for her contributions to the history of opera, including the role of Mim in Giacomo Puccini's La bohème and the title role in Puccini's Manon Lescaut during its world premiere in 1893."
filled_sentences_bornIn.at[121,'T5_paraphrased_filled_sentence_0'] = 'The [MASK]-born artist and explorer Frederick Whymper, who lived in Britain from 1838 to 1901, was known for his artistic talent.'
filled_sentences_bornIn.at[125,'T5_paraphrased_filled_sentence_0'] = 'A [MASK]-born named Oliver Fricker was sentenced to five months in prison and three canines for trespassing into a Mass Rapid Transit depot and spray-painting the train with an accomplice in 2010.'


filled_sentences_bornIn.at[2,'T5_paraphrased_filled_sentence_1'] = "The second album from [MASK]-born Paul Mounsey's band NahooToo is released."
filled_sentences_bornIn.at[3,'T5_paraphrased_filled_sentence_1'] = "As a member of Toronto-born Bernie Senensky's Moe Koffman Tribute Band, he has been involved in the music industry for many years."
filled_sentences_bornIn.at[10,'T5_paraphrased_filled_sentence_1'] = "Among the artists featured in season four were Bilal Khan, Mizraab, Akhtar Chanal Zahri, Jal from [MASK] (a pop rock band), Kaavish, Fareed Ayaz & Abu Muhammad, Asif Hussain Samroat, Komal Rizvi, Sajjad Ali, Mole (the British version of Qan Bai) and Attaullah Khan Esakhelvi."
filled_sentences_bornIn.at[15,'T5_paraphrased_filled_sentence_1'] = "He was a trade union official, small business owner, and researcher who also served as an adviser to John Brumby, the Victorian State Leader of the Opposition before he entered politics."
filled_sentences_bornIn.at[35,'T5_paraphrased_filled_sentence_1'] = "The founder of the English Voluntary Aid Detachment (VAD) force was Dame Katharine Furse, GBE. RRC, who was born to John Addington Symonds and Janet Catherine North in 1875 and 1879 respectively. She was born in [MASK]"
filled_sentences_bornIn.at[37,'T5_paraphrased_filled_sentence_1'] = "Rodel Mayol, a Filipino boxer and former WBC Light Flyweight World Champion, was born in Mandaue City, [MASK], Philippines on August 9, 1981."
filled_sentences_bornIn.at[51,'T5_paraphrased_filled_sentence_1'] = "George Adams, a member of the Anson family and renowned politician from Britain's Whig family until 1773, was known as George, and was born in [MASK]"
filled_sentences_bornIn.at[124,'T5_paraphrased_filled_sentence_1'] = "In the Second World War, Giuseppe Tellera, born in the city of [MASK], a general in the Italian army, served from 14 March 1882 to 7 February 1941."
filled_sentences_bornIn.at[125, 'T5_paraphrased_filled_sentence_1'] = "The defendant, Oliver Fricker, a [MASK]-born, was sentenced to five months in prison and three canines for spray-painting the train with an accomplice and breaking into ten minutes of trespassing at Mass Rapid Transit depot in 2010."

filled_sentences_bornIn.at[30,'T5_paraphrased_filled_sentence_2'] = 'The Omaha tribe was the birthplace of Insata La Flesche, also known as Susette LaFlesche Tibbles or Inda Nutley, who wrote and illustrated stories from her childhood in America. She was born in [MASK]'
filled_sentences_bornIn.at[37,'T5_paraphrased_filled_sentence_2'] = 'The former WBC Light Flyweight World Champion and Filipino boxer Rodel Bryan Generalao Mayol, who was born on August 9, 1981 in Mandaue City, [MASK], Philippines, is more commonly known as Rodél Mayolin.'
filled_sentences_bornIn.at[41,'T5_paraphrased_filled_sentence_2'] = 'He was appointed Apostolic Protonotary in 1515 and served as ambassador to the Papal court under Duke Massimiliano Sforza, while also speaking as a speaker to be heard at the V Lateran Council in that year. He was born in [MASK]'
filled_sentences_bornIn.at[82,'T5_paraphrased_filled_sentence_2'] =  "Meyerbeer's musical career began as a pianist, but he ultimately pursued his passion for opera and spent several years studying and writing in Italy. HE was born in [MASK]"
filled_sentences_bornIn.at[92,'T5_paraphrased_filled_sentence_2'] = 'In the early 1970s, Roddy McMillan, born [MASK], played the title character of a tough detective called Daniel Pike in The View from Daniel (the first Scottish television drama starring him), with Edward Boyd writing the series.'
filled_sentences_bornIn.at[103,'T5_paraphrased_filled_sentence_2'] = "An Italian operatic soprano, Cesira Ferrani, born in [MASK], made her debut as Mim in the original production of Giacomo Puccini's La bohème in 1896 and as the title role in Pucci’Saut in its world premiere in 1913."
filled_sentences_bornIn.at[108,'T5_paraphrased_filled_sentence_2'] = 'Pakistani-born model, film and television actor Asad Malik, born in the city [MASK], has starred in several movies and TV shows.'
filled_sentences_bornIn.at[113,'T5_paraphrased_filled_sentence_2'] = 'Carlos Roberto Baute Jiménez, a Venezuelan singer and TV presenter, was born in Carres on March 8, 1974 in the city [MASK].'
filled_sentences_bornIn.at[124,'T5_paraphrased_filled_sentence_2'] = 'During World War II, Giuseppe Tellera, born in the city [MASK], served as a general in the Italian Army.'
filled_sentences_bornIn.at[125,'T5_paraphrased_filled_sentence_2'] = 'In 2010, a [MASK]-born named Oliver Fricker was sentenced to five months in prison and three canines for breaking into. and spray-painting, as well as trespassing into and fleeing from... with an accomplice.'
filled_sentences_bornIn.at[136,'T5_paraphrased_filled_sentence_2'] = "It was held on 31 December 2014 at COEX Hall D, South Korea's Samseong-dong, born in [MASK] with Lee Hwi-jae, Park Shin-hye andPark Seok–joon as hosts."

filled_sentences_bornIn.at[1,'T5_paraphrased_filled_sentence_3'] = 'Following his defeat in the 1964 General Election, Anthony Barber, born in [MASK],of the Conservative Party won the election.'
filled_sentences_bornIn.at[7,'T5_paraphrased_filled_sentence_3'] = 'The guardian of Holland, who was the son and daughter of Floris IV, Count of Netherlands, and Matilda of Brabant, was mentioned in court from 1228-[MASK] to March 26, 1258, under the title "Guardians."'
filled_sentences_bornIn.at[8,'T5_paraphrased_filled_sentence_3'] = 'During his time in India, Henry Heras, born in [MASK], worked as a Spanish Jesuit priest and archeologist before becoming renowned in the field of history.'
filled_sentences_bornIn.at[10,'T5_paraphrased_filled_sentence_3'] = 'Bilal Khan, Mizraab and Akhtar Chanal Zahari were among the featured artists in the fourth season. Pop rock band Jal from [MASK]'
filled_sentences_bornIn.at[35,'T5_paraphrased_filled_sentence_3'] = 'Dame Katharine Furse, born in [MASK], GBE and RRC, who was born to John Addington Symonds and Janet Catherine North, founded the English Voluntary Aid Detachment force.'
filled_sentences_bornIn.at[42,'T5_paraphrased_filled_sentence_3'] = 'The Scottish team was set to field him and David Coulthard, born in [MASK], for the first time.'
filled_sentences_bornIn.at[53,'T5_paraphrased_filled_sentence_3'] = 'In the Yvelines department of France, Catherine Tasca was born on 13 December 1941, in the city of [MASK], and serves as an official in the Senate.'
filled_sentences_bornIn.at[60,'T5_paraphrased_filled_sentence_3'] = 'An English record producer, engineer and mixer is Haydn Bendall, who was born in the city [MASK],England on 13 April 1951.'
filled_sentences_bornIn.at[77,'T5_paraphrased_filled_sentence_3'] = 'During the Civil Rights Movement in the 1960s, Bob Moses, also known as Robert Parris Moses or Bob Moseley, born in [MASK], was an educator and civil rights activist who led the Student Nonviolent Coordinating Committee on voter education.'
filled_sentences_bornIn.at[92,'T5_paraphrased_filled_sentence_3'] = 'Roddy McMillan, born in [MASK] played the title character of The View from Daniel Pike, a Scottish TV drama series set in the early 1970s, who is primarily based on Jeremy Clarkson and written by Edward Boyd.'
filled_sentences_bornIn.at[100,'T5_paraphrased_filled_sentence_3'] = 'The author, artist, illustrator, and graphic designer from the city [MASK], Netherlands is Dick Bruna.'
filled_sentences_bornIn.at[103,'T5_paraphrased_filled_sentence_3'] = "Cesira Ferrani, born in [MASK],an Italian operatic soprano, gained fame for her debut performances in two of the most iconic roles in opera history, namely Mim in Giacomo Puccini's La bohème from 1896 and the title role as Pucci in PuCcini."
filled_sentences_bornIn.at[104,'T5_paraphrased_filled_sentence_3'] = 'A former Secretary-General of the European Commission, Catherine Day was born on 16 June 1954 in Mount Merrion, [MASK], Ireland.'
filled_sentences_bornIn.at[106,'T5_paraphrased_filled_sentence_3'] = 'The author Rosario Garibaldi Bosco (1771–1806) was born in [MASK], Italy on July 28, 1866 and lived from December 2, 1936 to December 3, 1936. He was a socialist, politician, and writer who had Republican principles in Italy.'
filled_sentences_bornIn.at[107,'T5_paraphrased_filled_sentence_3'] = 'In Brooklyn, New York, in 2009, Deanne Reynolds (born 24 August 1985) and [MASK]-born Tim K (Tim Kvasnosky), an American dream pop and post-rock duo from Wisconsin, formed a group known as DEDE.'
filled_sentences_bornIn.at[109,'T5_paraphrased_filled_sentence_3'] = 'Ulysses Dove (January 17, 1947 in [MASK] – June 11, 1996 in Manhattan) was one of the most inventive choreographers to have emerged in contemporary times during the last 50 years.'
filled_sentences_bornIn.at[128,'T5_paraphrased_filled_sentence_3'] = 'After Husayn declared himself caliph in [MASK], the rebellion of Humayn ibn Ali el-Haytham began.'
filled_sentences_bornIn.at[136,'T5_paraphrased_filled_sentence_3'] = 'On December 31, 2014, a ceremony was held at the COEX Hall D in Samseong-don, born in [MASK], South Korea, with Lee Hwi-jae, Park Shin–hye and Park Seo-joon as guests.'

In [250]:
teste = filled_sentences_bornIn["parrot_"]#.iloc[indices_sem_MASK(filled_sentences_bornIn["T5_paraphrased_filled_sentence_3"])]
teste.tolist()

['Allan Peiper (born 26 April 1960 in [MASK], Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.',
 'It was won by the Conservative candidate Anthony Barber, who returned after losing his [MASK] constituency in the 1964 General Election.',
 'NahooToo is the second album by [MASK] musician Paul Mounsey.',
 "He has performed with many of [MASK]'s foremost musicians including as a longstanding and charter member of Bernie Senensky's Moe Koffman Tribute Band.",
 'Kurt Schwertsik (born 25 June 1935, [MASK]) is an Austrian contemporary composer.',
 'Claude Arrieu (born [MASK], November 30, 1903 - died Paris, March 7, 1990) was a prolific French composer.',
 'Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974 in [MASK]) is a Japanese actor.',
 '1228 – [MASK], March 26, 1258) "the guardian"[citation needed] of Holland, son of Floris IV, Count of Holland (1210–1234) and Matilda of Brabant (ca.',
 'Henry Heras (11 September 1888, [MASK] - 14 December 1955, 

In [272]:
lista_strings1 = ['Daniele Franceschini was born in Rome. \n Allan Peiper (born 26 April 1960 in [MASK], Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.',
 'Frans Floris I was born in Antwerp. \n Henry Heras was born in Barcelona. \n It was won by the Conservative candidate Anthony Barber, who returned after losing his [MASK] constituency in the 1964 General Election.',
 'Ryō Kase was born in Yokohama. \n NahooToo is the second album by [MASK] musician Paul Mounsey.',
 "Claude Arrieu was born in Paris. \n He has performed with many of [MASK]'s foremost musicians including as a longstanding and charter member of Bernie Senensky's Moe Koffman Tribute Band.",
 'Kurt Schwertsik was born in Vienna. \n Kurt Schwertsik (born 25 June 1935, [MASK]) is an Austrian contemporary composer.',
 'Moe Koffman was born in Toronto. \n Claude Arrieu (born [MASK], November 30, 1903 - died Paris, March 7, 1990) was a prolific French composer.',
 'Paul Mounsey was born in Scotland. \n Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974 in [MASK]) is a Japanese actor.',
 'Claude Arrieu was born in Paris. \n 1228 – [MASK], March 26, 1258) "the guardian"[citation needed] of Holland, son of Floris IV, Count of Holland (1210–1234) and Matilda of Brabant (ca.',
 'Allan Peiper was born in Alexandra. \n Henry Heras (11 September 1888, [MASK] - 14 December 1955, Bombay, India) was a Spanish Jesuit priest, archeologist and historian in India.',
 'Anthony Barber was born in Doncaster. \n Daniele Franceschini (born 13 January 1976 in [MASK]) is an Italian football midfielder.']
filled_sentences_bornIn = filled_sentences_bornIn.assign(masked_sentence_w_help=lista_strings1)

lista_strings2 = [ 'Ryō Kase was born in Yokohama.\n Daniele Franceschini was born in Rome. \n Allan Peiper (born 26 April 1960 in [MASK], Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.',
 'Moe Koffman was born in Toronto. \n Daniele Franceschini was born in Rome. \n It was won by the Conservative candidate Anthony Barber, who returned after losing his [MASK] constituency in the 1964 General Election.',
 'Daniele Franceschini was born in Rome. \n Claude Arrieu was born in Paris. \n NahooToo is the second album by [MASK] musician Paul Mounsey.',
 "Paul Mounsey was born in Scotland. \n Moe Koffman was born in Toronto. \n He has performed with many of [MASK]'s foremost musicians including as a longstanding and charter member of Bernie Senensky's Moe Koffman Tribute Band.",
 'Kurt Schwertsik was born in Vienna. \n Anthony Barber was born in Doncaster. \n Kurt Schwertsik (born 25 June 1935, [MASK]) is an Austrian contemporary composer.',
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n Claude Arrieu (born [MASK], November 30, 1903 - died Paris, March 7, 1990) was a prolific French composer.',
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974 in [MASK]) is a Japanese actor.',
 'Moe Koffman was born in Toronto. \n Frans Floris I was born in Antwerp. \n 1228 – [MASK], March 26, 1258) "the guardian"[citation needed] of Holland, son of Floris IV, Count of Holland (1210–1234) and Matilda of Brabant (ca.',
 'Paul Mounsey was born in Scotland. \n Henry Heras was born in Barcelona. \n Henry Heras (11 September 1888, [MASK] - 14 December 1955, Bombay, India) was a Spanish Jesuit priest, archeologist and historian in India.',
 "Anthony Barber was born in Doncaster. \n Paul Mounsey was born in Scotland. \n Daniele Franceschini (born 13 January 1976 in [MASK]) is an Italian football midfielder."]
filled_sentences_bornIn = filled_sentences_bornIn.assign(masked_sentence_w_help_2=lista_strings2)

In [None]:
lista_strings1 = ['Daniele Franceschini was born in Rome. \n Allan Peiper (born 26 April 1960 in [MASK], Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.',
 'Frans Floris I was born in Antwerp. \n Henry Heras was born in Barcelona. \n It was won by the Conservative candidate Anthony Barber, who returned after losing his [MASK] constituency in the 1964 General Election.',
 'Ryō Kase was born in Yokohama. \n NahooToo is the second album by [MASK] musician Paul Mounsey.',
 "Claude Arrieu was born in Paris. \n He has performed with many of [MASK]'s foremost musicians including as a longstanding and charter member of Bernie Senensky's Moe Koffman Tribute Band.",
 'Kurt Schwertsik was born in Vienna. \n Kurt Schwertsik (born 25 June 1935, [MASK]) is an Austrian contemporary composer.',
 'Moe Koffman was born in Toronto. \n Claude Arrieu (born [MASK], November 30, 1903 - died Paris, March 7, 1990) was a prolific French composer.',
 'Paul Mounsey was born in Scotland. \n Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974 in [MASK]) is a Japanese actor.',
 'Claude Arrieu was born in Paris. \n 1228 – [MASK], March 26, 1258) "the guardian"[citation needed] of Holland, son of Floris IV, Count of Holland (1210–1234) and Matilda of Brabant (ca.',
 'Allan Peiper was born in Alexandra. \n Henry Heras (11 September 1888, [MASK] - 14 December 1955, Bombay, India) was a Spanish Jesuit priest, archeologist and historian in India.',
 'Anthony Barber was born in Doncaster. \n Daniele Franceschini (born 13 January 1976 in [MASK]) is an Italian football midfielder.']
filled_sentences_bornIn = filled_sentences_bornIn.assign(masked_sentence_w_help=lista_strings1)

lista_strings2 = ['Henry Heras was born in Barcelona. \n Ryō Kase was born in Yokohama.\n Daniele Franceschini was born in Rome. \n Allan Peiper (born 26 April 1960 in [MASK], Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.',
 'Moe Koffman was born in Toronto. \n Daniele Franceschini was born in Rome. \n It was won by the Conservative candidate Anthony Barber, who returned after losing his [MASK] constituency in the 1964 General Election.',
 'Daniele Franceschini was born in Rome. \n Claude Arrieu was born in Paris. \n NahooToo is the second album by [MASK] musician Paul Mounsey.',
 "Paul Mounsey was born in Scotland. \n Moe Koffman was born in Toronto. \n He has performed with many of [MASK]'s foremost musicians including as a longstanding and charter member of Bernie Senensky's Moe Koffman Tribute Band.",
 'Kurt Schwertsik was born in Vienna. \n Anthony Barber was born in Doncaster. \n Kurt Schwertsik (born 25 June 1935, [MASK]) is an Austrian contemporary composer.',
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n Claude Arrieu (born [MASK], November 30, 1903 - died Paris, March 7, 1990) was a prolific French composer.',
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974 in [MASK]) is a Japanese actor.',
 'Moe Koffman was born in Toronto. \n Frans Floris I was born in Antwerp. \n 1228 – [MASK], March 26, 1258) "the guardian"[citation needed] of Holland, son of Floris IV, Count of Holland (1210–1234) and Matilda of Brabant (ca.',
 'Paul Mounsey was born in Scotland. \n Henry Heras was born in Barcelona. \n Henry Heras (11 September 1888, [MASK] - 14 December 1955, Bombay, India) was a Spanish Jesuit priest, archeologist and historian in India.',
 "Anthony Barber was born in Doncaster. \n Paul Mounsey was born in Scotland. \n Daniele Franceschini (born 13 January 1976 in [MASK]) is an Italian football midfielder."]
filled_sentences_bornIn = filled_sentences_bornIn.assign(masked_sentence_w_help_2=lista_strings2)

In [282]:
lista_strings1 = ['Daniele Franceschini was born in Rome. \n ',
 'Frans Floris I was born in Antwerp. \n ',
 'Ryō Kase was born in Yokohama. \n ',
 "Claude Arrieu was born in Paris. \n ",
 'Kurt Schwertsik was born in Vienna. \n ',
 'Moe Koffman was born in Toronto. \n ',
 'Paul Mounsey was born in Scotland. \n ',
 'Claude Arrieu was born in Paris. \n ',
 'Allan Peiper was born in Alexandra. \n ',
 'Anthony Barber was born in Doncaster. \n ']

lista_strings2 = ['Henry Heras was born in Barcelona. \n Ryō Kase was born in Yokohama.\n ',
 'Moe Koffman was born in Toronto. \n Daniele Franceschini was born in Rome. \n ',
 'Daniele Franceschini was born in Rome. \n Claude Arrieu was born in Paris. \n' ,
 "Paul Mounsey was born in Scotland. \n Moe Koffman was born in Toronto. \n" ,
 'Kurt Schwertsik was born in Vienna. \n Anthony Barber was born in Doncaster. \n' ,
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n' ,
 'Henry Heras was born in Barcelona. \n Moe Koffman was born in Toronto. \n' ,
 'Moe Koffman was born in Toronto. \n Frans Floris I was born in Antwerp. \n' ,
 'Paul Mounsey was born in Scotland. \n Henry Heras was born in Barcelona. \n' ,
 "Anthony Barber was born in Doncaster. \n Paul Mounsey was born in Scotland. \n "]

In [287]:
parrot_list_1 = [lista_strings1[0] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[0],
 lista_strings1[1] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[1],
 lista_strings1[2] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[2],
 lista_strings1[3] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[3],
 lista_strings1[4] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[4],
 lista_strings1[5] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[5],
 lista_strings1[6] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[6],
 lista_strings1[7] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[7],
 lista_strings1[8] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[8],
 lista_strings1[9] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(Parrot_paraphrased_filled_sentence_1_w_help=parrot_list_1)

parrot_list_2 = [lista_strings2[0] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[0],
 lista_strings2[1] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[1],
 lista_strings2[2] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[2],
 lista_strings2[3] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[3],
 lista_strings2[4] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[4],
 lista_strings2[5] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[5],
 lista_strings2[6] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[6],
 lista_strings2[7] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[7],
 lista_strings2[8] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[8],
 lista_strings2[9] + filled_sentences_bornIn.Parrot_paraphrased_filled_sentence_1[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(Parrot_paraphrased_filled_sentence_1_w_help_2=parrot_list_2)

In [288]:
T5_paraphrased_filled_sentence_0_list_1 = [lista_strings1[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[0],
 lista_strings1[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[1],
 lista_strings1[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[2],
 lista_strings1[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[3],
 lista_strings1[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[4],
 lista_strings1[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[5],
 lista_strings1[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[6],
 lista_strings1[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[7],
 lista_strings1[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[8],
 lista_strings1[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_0_w_help=T5_paraphrased_filled_sentence_0_list_1)

T5_paraphrased_filled_sentence_0_list_2 = [lista_strings2[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[0],
 lista_strings2[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[1],
 lista_strings2[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[2],
 lista_strings2[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[3],
 lista_strings2[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[4],
 lista_strings2[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[5],
 lista_strings2[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[6],
 lista_strings2[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[7],
 lista_strings2[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[8],
 lista_strings2[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_0[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_0_w_help_2=T5_paraphrased_filled_sentence_0_list_2)

In [289]:
T5_paraphrased_filled_sentence_1_list_1 = [lista_strings1[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[0],
 lista_strings1[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[1],
 lista_strings1[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[2],
 lista_strings1[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[3],
 lista_strings1[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[4],
 lista_strings1[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[5],
 lista_strings1[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[6],
 lista_strings1[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[7],
 lista_strings1[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[8],
 lista_strings1[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_1_w_help=T5_paraphrased_filled_sentence_1_list_1)

T5_paraphrased_filled_sentence_1_list_2 = [lista_strings2[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[0],
 lista_strings2[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[1],
 lista_strings2[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[2],
 lista_strings2[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[3],
 lista_strings2[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[4],
 lista_strings2[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[5],
 lista_strings2[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[6],
 lista_strings2[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[7],
 lista_strings2[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[8],
 lista_strings2[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_1[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_1_w_help_2=T5_paraphrased_filled_sentence_1_list_2)

In [290]:
T5_paraphrased_filled_sentence_2_list_1 = [lista_strings1[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[0],
 lista_strings1[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[1],
 lista_strings1[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[2],
 lista_strings1[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[3],
 lista_strings1[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[4],
 lista_strings1[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[5],
 lista_strings1[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[6],
 lista_strings1[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[7],
 lista_strings1[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[8],
 lista_strings1[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_2_w_help=T5_paraphrased_filled_sentence_2_list_1)

T5_paraphrased_filled_sentence_2_list_2 = [lista_strings2[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[0],
 lista_strings2[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[1],
 lista_strings2[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[2],
 lista_strings2[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[3],
 lista_strings2[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[4],
 lista_strings2[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[5],
 lista_strings2[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[6],
 lista_strings2[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[7],
 lista_strings2[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[8],
 lista_strings2[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_2[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_2_w_help_2=T5_paraphrased_filled_sentence_2_list_2)

In [291]:
T5_paraphrased_filled_sentence_3_list_1 = [lista_strings1[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[0],
 lista_strings1[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[1],
 lista_strings1[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[2],
 lista_strings1[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[3],
 lista_strings1[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[4],
 lista_strings1[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[5],
 lista_strings1[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[6],
 lista_strings1[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[7],
 lista_strings1[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[8],
 lista_strings1[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_3_w_help=T5_paraphrased_filled_sentence_3_list_1)

T5_paraphrased_filled_sentence_3_list_2 = [lista_strings2[0] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[0],
 lista_strings2[1] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[1],
 lista_strings2[2] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[2],
 lista_strings2[3] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[3],
 lista_strings2[4] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[4],
 lista_strings2[5] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[5],
 lista_strings2[6] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[6],
 lista_strings2[7] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[7],
 lista_strings2[8] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[8],
 lista_strings2[9] + filled_sentences_bornIn.T5_paraphrased_filled_sentence_3[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(T5_paraphrased_filled_sentence_3_w_help_2=T5_paraphrased_filled_sentence_3_list_2)

In [293]:
translated_filled_sentence_list_1 = [lista_strings1[0] + filled_sentences_bornIn.translated_filled_sentence[0],
 lista_strings1[1] + filled_sentences_bornIn.translated_filled_sentence[1],
 lista_strings1[2] + filled_sentences_bornIn.translated_filled_sentence[2],
 lista_strings1[3] + filled_sentences_bornIn.translated_filled_sentence[3],
 lista_strings1[4] + filled_sentences_bornIn.translated_filled_sentence[4],
 lista_strings1[5] + filled_sentences_bornIn.translated_filled_sentence[5],
 lista_strings1[6] + filled_sentences_bornIn.translated_filled_sentence[6],
 lista_strings1[7] + filled_sentences_bornIn.translated_filled_sentence[7],
 lista_strings1[8] + filled_sentences_bornIn.translated_filled_sentence[8],
 lista_strings1[9] + filled_sentences_bornIn.translated_filled_sentence[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(translated_filled_sentence_w_help=translated_filled_sentence_list_1)

translated_filled_sentence_list_2 = [lista_strings2[0] + filled_sentences_bornIn.translated_filled_sentence[0],
 lista_strings2[1] + filled_sentences_bornIn.translated_filled_sentence[1],
 lista_strings2[2] + filled_sentences_bornIn.translated_filled_sentence[2],
 lista_strings2[3] + filled_sentences_bornIn.translated_filled_sentence[3],
 lista_strings2[4] + filled_sentences_bornIn.translated_filled_sentence[4],
 lista_strings2[5] + filled_sentences_bornIn.translated_filled_sentence[5],
 lista_strings2[6] + filled_sentences_bornIn.translated_filled_sentence[6],
 lista_strings2[7] + filled_sentences_bornIn.translated_filled_sentence[7],
 lista_strings2[8] + filled_sentences_bornIn.translated_filled_sentence[8],
 lista_strings2[9] + filled_sentences_bornIn.translated_filled_sentence[9]]
filled_sentences_bornIn = filled_sentences_bornIn.assign(translated_filled_sentence_w_help_2=translated_filled_sentence_list_2)

In [210]:
filled_sentences_bornIn.at[0, 'Parrot_paraphrased_filled_sentence_1'] = "allan peiper born april 26 1960 in [MASK] australia is a former professional cyclist who participated in five tour de france cycling races"
filled_sentences_bornIn.at[1, 'Parrot_paraphrased_filled_sentence_1'] = "it was won by the conservative candidate anthony barber who returned in 1964 after losing his constituency [MASK]"
filled_sentences_bornIn.at[3, 'Parrot_paraphrased_filled_sentence_1'] = "he has played with many of [MASK]'s leading musicians including as a long-standing and charter member of the moe koffman tribute band of bernie senensky"
filled_sentences_bornIn.at[4, 'Parrot_paraphrased_filled_sentence_1'] = "kurt schwertsik born june 25 1935 in [MASK] is an austrian contemporary composer"
filled_sentences_bornIn.at[5, 'Parrot_paraphrased_filled_sentence_1'] = "Claude Arrieu (born [MASK], November 30, 1903 - died Paris, March 7, 1990) was a prolific French composer."

filled_sentences_bornIn.at[2, 'translated_filled_sentence'] = "NahooToo is the second album by [MASK]-born musician Paul Mounsey."
filled_sentences_bornIn.at[4, 'translated_filled_sentence'] = "Kurt Schwertsik (born 25 June 1935 in [MASK]) is an Austrian composer."
filled_sentences_bornIn.at[6, 'translated_filled_sentence'] = "Ryo Kase (born November 9, 1974 in [MASK]) is a Japanese actor."
filled_sentences_bornIn.at[9, 'translated_filled_sentence'] = "Daniele Franceschini (born 13 January 1976 in [MASK]) is an Italian footballer."

filled_sentences_bornIn.at[2, 'T5_paraphrased_filled_sentence_1'] = "The second album of Paul Mounsey's, born in [MASK], Scottish band, NahooToo, is released."
filled_sentences_bornIn.at[3, 'T5_paraphrased_filled_sentence_1'] = "'As a member of Bernie Senensky's, born in [MASK], Moe Koffman Tribute Band, he has been involved in the music industry for many years."

filled_sentences_bornIn.at[1, 'T5_paraphrased_filled_sentence_3'] = "Following his defeat in the 1964 General Election, Anthony Barber of the Conservative Party won the election. He was born in [MASK]."
filled_sentences_bornIn.at[7, 'T5_paraphrased_filled_sentence_3'] = 'The guardian of Holland, who was the son and daughter of Floris IV, Count of Netherlands, and Matilda of Brabant, was mentioned in court from 1228 to March 26, 1258, under the title "Guardians."She was born in [MASK].'
filled_sentences_bornIn.at[8, 'T5_paraphrased_filled_sentence_3'] = "During his time in India, Henry Heras, born in [MASK], worked as a Spanish Jesuit priest and archeologist before becoming renowned in the field of history."

In [294]:
print('Filled Sentences')
display(filled_sentences_bornIn)

Filled Sentences


Unnamed: 0,sub_label,template,obj_label,masked_sentence,translated_filled_sentence,T5_paraphrased_filled_sentence_0,T5_paraphrased_filled_sentence_1,T5_paraphrased_filled_sentence_2,T5_paraphrased_filled_sentence_3,Parrot_paraphrased_filled_sentence,...,T5_paraphrased_filled_sentence_0_w_help,T5_paraphrased_filled_sentence_0_w_help_2,T5_paraphrased_filled_sentence_1_w_help,T5_paraphrased_filled_sentence_1_w_help_2,T5_paraphrased_filled_sentence_2_w_help,T5_paraphrased_filled_sentence_2_w_help_2,T5_paraphrased_filled_sentence_3_w_help,T5_paraphrased_filled_sentence_3_w_help_2,translated_filled_sentence_w_help,translated_filled_sentence_w_help_2
0,Allan Peiper,[X] was born in [Y] .,Alexandra,"Allan Peiper (born 26 April 1960 in [MASK], Au...","Allan Peiper (born 26 April 1960 in [MASK], Au...","Allan Peiper, a professional cyclist from [MAS...","Formerly a Professional cyclist, Allan Peiper ...","Allan Peiper, who was born in [MASK], Australi...","A former professional cyclist, Allan Peiper, w...",[allan peiper born april 26 1960 in alexandra ...,...,Daniele Franceschini was born in Rome. \n Alla...,Henry Heras was born in Barcelona. \n Ryō Kase...,Daniele Franceschini was born in Rome. \n Form...,Henry Heras was born in Barcelona. \n Ryō Kase...,Daniele Franceschini was born in Rome. \n Alla...,Henry Heras was born in Barcelona. \n Ryō Kase...,Daniele Franceschini was born in Rome. \n A fo...,Henry Heras was born in Barcelona. \n Ryō Kase...,Daniele Franceschini was born in Rome. \n Alla...,Henry Heras was born in Barcelona. \n Ryō Kase...
1,Anthony Barber,[X] was born in [Y] .,Doncaster,It was won by the Conservative candidate Antho...,It was won by Conservative candidate Anthony B...,After losing his [MASK] constituency in the 19...,"Anthony Barber, the Conservative candidate who...","The Conservative candidate Anthony Barber, who...",Following his defeat in the 1964 General Elect...,[it was won by the conservative candidate anth...,...,Frans Floris I was born in Antwerp. \n After l...,Moe Koffman was born in Toronto. \n Daniele Fr...,Frans Floris I was born in Antwerp. \n Anthony...,Moe Koffman was born in Toronto. \n Daniele Fr...,Frans Floris I was born in Antwerp. \n The Con...,Moe Koffman was born in Toronto. \n Daniele Fr...,Frans Floris I was born in Antwerp. \n Followi...,Moe Koffman was born in Toronto. \n Daniele Fr...,Frans Floris I was born in Antwerp. \n It was ...,Moe Koffman was born in Toronto. \n Daniele Fr...
2,Paul Mounsey,[X] was born in [Y] .,Scotland,NahooToo is the second album by [MASK] musicia...,NahooToo is the second album by [MASK]-born mu...,"Paul Mounsey, a musician from [MASK], has rele...","The second album of Paul Mounsey's, born in [M...","NahooToo is the second album of Paul Mounsey, ...","During his time in India, Henry Heras, born in...",[NahooToo is the second album by Scotland musi...,...,Ryō Kase was born in Yokohama. \n Paul Mounsey...,Daniele Franceschini was born in Rome. \n Clau...,Ryō Kase was born in Yokohama. \n The second a...,Daniele Franceschini was born in Rome. \n Clau...,Ryō Kase was born in Yokohama. \n NahooToo is ...,Daniele Franceschini was born in Rome. \n Clau...,Ryō Kase was born in Yokohama. \n During his t...,Daniele Franceschini was born in Rome. \n Clau...,Ryō Kase was born in Yokohama. \n NahooToo is ...,Daniele Franceschini was born in Rome. \n Clau...
3,Moe Koffman,[X] was born in [Y] .,Toronto,He has performed with many of [MASK]'s foremos...,He has performed with many of [MASK]'s greates...,"Among the prominent musicians in [MASK], he ha...","'As a member of Bernie Senensky's, born in [MA...",He has collaborated with a number of prominent...,Many of [MASK]'s top musicians have collaborat...,[he has played with many of toronto's leading ...,...,Claude Arrieu was born in Paris. \n Among the ...,Paul Mounsey was born in Scotland. \n Moe Koff...,Claude Arrieu was born in Paris. \n 'As a memb...,Paul Mounsey was born in Scotland. \n Moe Koff...,Claude Arrieu was born in Paris. \n He has col...,Paul Mounsey was born in Scotland. \n Moe Koff...,Claude Arrieu was born in Paris. \n Many of [M...,Paul Mounsey was born in Scotland. \n Moe Koff...,Claude Arrieu was born in Paris. \n He has per...,Paul Mounsey was born in Scotland. \n Moe Koff...
4,Kurt Schwertsik,[X] was born in [Y] .,Vienna,"Kurt Schwertsik (born 25 June 1935, [MASK]) is...",Kurt Schwertsik (born 25 June 1935 in [MASK]) ...,The Austrian contemporary composer Kurt Schwer...,"Kurt Schwertsik, an Austrian composer born in ...","An Austrian composer of the modern era, Kurt S...","A contemporary composer of Austrian origin, Ku...",[kurt schwertsik born june 25 1935 in vienna i...,...,Kurt Schwertsik was born in Vienna. \n The Aus...,Kurt Schwertsik was born in Vienna. \n Anthony...,Kurt Schwertsik was born in Vienna. \n Kurt Sc...,Kurt Schwertsik was born in Vienna. \n Anthony...,Kurt Schwertsik was born in Vienna. \n An Aust...,Kurt Schwertsik was born in Vienna. \n Anthony...,Kurt Schwertsik was born in Vienna. \n A conte...,Kurt Schwertsik was born in Vienna. \n Anthony...,Kurt Schwertsik was born in Vienna. \n Kurt Sc...,Kurt Schwertsik was born in Vienna. \n Anthony...
5,Claude Arrieu,[X] was born in [Y] .,Paris,"Claude Arrieu (born [MASK], November 30, 1903 ...","Claude Arrieu, born on November 30, 1903 in [M...","Claude Arrieu, who was born in [MASK] on Novem...","The French composer Claude Arrieu, who was bor...","[MASK] native Claude Arrieu (born November 30,...","A prolific French composer, Claude Arrieu (bor...","[Claude Arrieu (born Paris, November 30, 1903 ...",...,Moe Koffman was born in Toronto. \n Claude Arr...,Henry Heras was born in Barcelona. \n Moe Koff...,Moe Koffman was born in Toronto. \n The French...,Henry Heras was born in Barcelona. \n Moe Koff...,Moe Koffman was born in Toronto. \n [MASK] nat...,Henry Heras was born in Barcelona. \n Moe Koff...,Moe Koffman was born in Toronto. \n A prolific...,Henry Heras was born in Barcelona. \n Moe Koff...,Moe Koffman was born in Toronto. \n Claude Arr...,Henry Heras was born in Barcelona. \n Moe Koff...
6,Ryō Kase,[X] was born in [Y] .,Yokohama,"Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 1974...","Ryo Kase (born November 9, 1974 in [MASK]) is ...","Born in [MASK] on November 9, 1974, Ryo Kase i...","The actor Ryo Kase, born in [MASK] on November...","Ryo Kase, born in [MASK] on November 9, 1974, ...","Born on November 9, 1974 in [MASK], Ryo Kase (...","[Ryo Kase (加瀬 亮 Kase Ryō, born November 9, 197...",...,Paul Mounsey was born in Scotland. \n Born in ...,Henry Heras was born in Barcelona. \n Moe Koff...,Paul Mounsey was born in Scotland. \n The acto...,Henry Heras was born in Barcelona. \n Moe Koff...,Paul Mounsey was born in Scotland. \n Ryo Kase...,Henry Heras was born in Barcelona. \n Moe Koff...,Paul Mounsey was born in Scotland. \n Born on ...,Henry Heras was born in Barcelona. \n Moe Koff...,Paul Mounsey was born in Scotland. \n Ryo Kase...,Henry Heras was born in Barcelona. \n Moe Koff...
7,Frans Floris I,[X] was born in [Y] .,Antwerp,"1228 – [MASK], March 26, 1258) ""the guardian""[...","1228 – [MASK], 26 March 1258) “the guardian” o...","Holland, the son of Floris IV, Count of Hollan...","""The protector"" was the name given to Holland,...","During the years 1228 to March 26, 1248, in [M...","The guardian of Holland, who was the son and d...","[1228 – Antwerp, March 26, 1258) ""the guardian...",...,"Claude Arrieu was born in Paris. \n Holland, t...",Moe Koffman was born in Toronto. \n Frans Flor...,"Claude Arrieu was born in Paris. \n ""The prote...",Moe Koffman was born in Toronto. \n Frans Flor...,Claude Arrieu was born in Paris. \n During the...,Moe Koffman was born in Toronto. \n Frans Flor...,Claude Arrieu was born in Paris. \n The guardi...,Moe Koffman was born in Toronto. \n Frans Flor...,Claude Arrieu was born in Paris. \n 1228 – [MA...,Moe Koffman was born in Toronto. \n Frans Flor...
8,Henry Heras,[X] was born in [Y] .,Barcelona,"Henry Heras (11 September 1888, [MASK] - 14 De...","Henry Heras (11 September 1888, [MASK] – 14 De...","In India, Henry Heras was a Spanish Jesuit pri...","A Spanish Jesuit priest, archeologist, and his...","Henry Heras, a Spanish Jesuit priest, archeolo...","During his time in India, Henry Heras, born in...","[Henry Heras (11 September 1888, Barcelona - 1...",...,Allan Peiper was born in Alexandra. \n In Indi...,Paul Mounsey was born in Scotland. \n Henry He...,Allan Peiper was born in Alexandra. \n A Spani...,Paul Mounsey was born in Scotland. \n Henry He...,Allan Peiper was born in Alexandra. \n Henry H...,Paul Mounsey was born in Scotland. \n Henry He...,Allan Peiper was born in Alexandra. \n During ...,Paul Mounsey was born in Scotland. \n Henry He...,Allan Peiper was born in Alexandra. \n Henry H...,Paul Mounsey was born in Scotland. \n Henry He...
9,Daniele Franceschini,[X] was born in [Y] .,Rome,Daniele Franceschini (born 13 January 1976 in ...,Daniele Franceschini (born 13 January 1976 in ...,"Daniele Franceschini, an Italian football midf...",The Italian football midfielder Daniele France...,"Born in [MASK] on January 13, 1976, Daniele Fr...","An Italian football player, Daniele Franceschi...",[Daniele Franceschini (born 13 January 1976 in...,...,Anthony Barber was born in Doncaster. \n Danie...,Anthony Barber was born in Doncaster. \n Paul ...,Anthony Barber was born in Doncaster. \n The I...,Anthony Barber was born in Doncaster. \n Paul ...,Anthony Barber was born in Doncaster. \n Born ...,Anthony Barber was born in Doncaster. \n Paul ...,Anthony Barber was born in Doncaster. \n An It...,Anthony Barber was born in Doncaster. \n Paul ...,Anthony Barber was born in Doncaster. \n Danie...,Anthony Barber was born in Doncaster. \n Paul ...


## Teste Modelo

In [193]:
def process_prompt_results(df_column, masked_model, with_socre=0):
    """
    Process the results of prompts using a masked language model.

    Parameters:
    df_column (list): A list of prompts to be processed.
    masked_model: The masked language model used for processing.

    Returns:
    list: A list of dictionaries for each prompt, where each dictionary contains 'tokens' and 'score'.
    """
    outputs = []
    if with_socre == 1:
        for prompt in df_column:
            result = masked_model(prompt)
            output_dict = {
                'tokens': [dictionary['token_str'].strip().lower() for dictionary in result],
                'score': [round(dictionary['score'],2) for dictionary in result]
            }
            outputs.append(output_dict)
    else:
        # Iterate through prompts and store the best output
        for prompt in df_column:
            outputs.append(
                [dictionary['token_str'].strip().lower() for dictionary in masked_model(prompt)]
                )
    return outputs

def create_results_list(data, true_labels, num_iterations):
    """
    Create a results list based on whether each corresponding true_label is present in the corresponding sublist of the data.

    Parameters:
    - data: List of sublists to search through
    - true_labels: List of true_labels to check for in each sublist
    - num_iterations: Number of iterations to consider for each sublist

    Returns:
    - results_list: List of 1s and 0s based on the presence of true_labels in the sublists

    results_list = []
    for sublist, true_label in zip(data, true_labels):
        results_list.append(1 if true_label in sublist[:num_iterations] else 0)
    """
    results_list = [1 if true_label in sublist[:num_iterations] else 0 for sublist, true_label in zip(data, true_labels)]
    indices = [sublist.index(true_label) if true_label in sublist[:num_iterations] else None for sublist, true_label in zip(data, true_labels)]
    return results_list, indices, round(average(results_list),3)

def verificar_e_substituir(df, coluna_palavras, coluna_salvar, indices_substituicao=None):
    for index, row in df.iterrows():
        if indices_substituicao is not None and index not in indices_substituicao:
            continue
        palavra_alvo = row[coluna_palavras]
        encontrada = False
        for coluna in df.columns:
            if palavra_alvo in row[coluna]:
                # Substituir a palavra-alvo por '[MASK]' na primeira coluna onde é encontrada
                df.at[index, coluna_salvar] = df.at[index, coluna].replace(palavra_alvo, '[MASK]')
                encontrada = True
                break  # Pula para a próxima linha após substituir
        if not encontrada:
            df.at[index, coluna_salvar] = nan  # Se a palavra-alvo não for encontrada, atribui NaN
    return df

### Chamando Modelos

In [194]:
unmasker_electra = pipeline('fill-mask', model='google/electra-large-generator', top_k=10)
unmasker_robertaLarge = pipeline('fill-mask', model='roberta-large', top_k=10)
unmasker_bertLarge = pipeline('fill-mask', model='bert-large-uncased', top_k=10)
unmasker_albertLarge = pipeline('fill-mask', model='albert/albert-large-v2', top_k=10)
unmasker_distbert = pipeline('fill-mask', model='distilbert/distilbert-base-uncased', top_k=10)

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of the model checkpoint at albert/albert-large-v2 were not used when initializing AlbertForMaskedLM: ['albert.pooler.bias', 'albert.pooler.weight']
- This IS expected if you are initializing AlbertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. 

#### Respostas

In [223]:
answer_triple_bornIn = triple_to_nl_bornIn['obj_label'].str.lower().tolist()
#answer_triple_diedIn = triple_to_nl_diedIn['obj_label'].str.lower().tolist()
#answer_triple_capital = triple_to_nl_capital['obj_label'].str.lower().tolist()
#answer_triple_citizen = triple_to_nl_citizen['obj_label'].str.lower().tolist()
#answer_triple_worksfor = triple_to_nl_worksfor['obj_label'].str.lower().tolist()


answer_filled_sentences_bornIn = filled_sentences_bornIn['obj_label'].str.lower().tolist()
#answer_filled_sentences_diedIn = filled_sentences_diedIn['obj_label'].str.lower().tolist()
#answer_filled_sentences_capital = filled_sentences_capital['obj_label'].str.lower().tolist()
#answer_filled_sentences_citizen = filled_sentences_citizen['obj_label'].str.lower().tolist()
#answer_filled_sentences_worksfor = filled_sentences_worksfor['obj_label'].str.lower().tolist()

#### Verificando

Por fatores de agilidade irei realizar apenas para o BornIn dataset os testes.

In [267]:
# Triples Originais
outputs_electra_bornIn_original_triple = process_prompt_results(triple_to_nl_bornIn['triple_NL'], unmasker_bertLarge)
outputs_electra_bornIn_original_w_help_1_triple = process_prompt_results(triple_to_nl_bornIn['triple_NL_w_help_1'], unmasker_bertLarge)
outputs_electra_bornIn_original_w_help_2_triple = process_prompt_results(triple_to_nl_bornIn['triple_NL_w_help_2'], unmasker_bertLarge)

# Triples Parafraseados
outputs_electra_bornIn_translated_triple = process_prompt_results(triple_to_nl_bornIn['translated_triple_NL'], unmasker_bertLarge)
outputs_electra_bornIn_t5_0_triple = process_prompt_results(triple_to_nl_bornIn['t5_triple_NL_0'], unmasker_bertLarge)
outputs_electra_bornIn_t5_1_triple = process_prompt_results(triple_to_nl_bornIn['t5_triple_NL_1'], unmasker_bertLarge)
outputs_electra_bornIn_t5_2_triple = process_prompt_results(triple_to_nl_bornIn['t5_triple_NL_2'], unmasker_bertLarge)
outputs_electra_bornIn_t5_3_triple = process_prompt_results(triple_to_nl_bornIn['t5_triple_NL_3'], unmasker_bertLarge)
outputs_electra_bornIn_parrot_triple = process_prompt_results(triple_to_nl_bornIn['parrot_triple_NL'], unmasker_bertLarge)

In [271]:
print('*'*19+' Triple - k=1 '+'*'*20)
print('*'*50)

results_list_bornIn_original_electra_top1_triple, _, average_bornIn_original_electra_top1_triple = create_results_list(
    outputs_electra_bornIn_original_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Original: {average_bornIn_original_electra_top1_triple}')

results_list_bornIn_original_w_help_1_electra_top1_triple, _, average_bornIn_original_w_help_1_electra_top1_triple = create_results_list(
    outputs_electra_bornIn_original_w_help_1_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Original with help 1: {average_bornIn_original_w_help_1_electra_top1_triple}')

results_list_bornIn_original_w_help_2_electra_top1_triple, _, average_bornIn_original_w_help_2_electra_top1_triple = create_results_list(
    outputs_electra_bornIn_original_w_help_2_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Original with help 2: {average_bornIn_original_w_help_2_electra_top1_triple}')

results_list_bornIn_translated_electra_top1_triple, _, average_bornIn_translated_electra_top1_triple = create_results_list(
    outputs_electra_bornIn_translated_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Translated: {average_bornIn_translated_electra_top1_triple}')


results_list_bornIn_t5_0_electra_top1_triple_para, _, average_bornIn_t5_0_electra_top1_triple_para = create_results_list(
    outputs_electra_bornIn_t5_0_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: {average_bornIn_t5_0_electra_top1_triple_para}')


results_list_bornIn_t5_1_electra_top1_triple_para, _, average_bornIn_t5_1_electra_top1_triple_para = create_results_list(
    outputs_electra_bornIn_t5_1_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 1: {average_bornIn_t5_1_electra_top1_triple_para}')


results_list_bornIn_t5_2_electra_top1_triple_para, _, average_bornIn_t5_2_electra_top1_triple_para = create_results_list(
    outputs_electra_bornIn_t5_2_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: {average_bornIn_t5_2_electra_top1_triple_para}')


results_list_bornIn_t5_3_electra_top1_triple_para, _, average_bornIn_t5_3_electra_top1_triple_para = create_results_list(
    outputs_electra_bornIn_t5_3_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: {average_bornIn_t5_3_electra_top1_triple_para}')


results_list_bornIn_parrot_electra_top1_triple, _, average_bornIn_parrot_electra_top1_triple = create_results_list(
    outputs_electra_bornIn_parrot_triple, answer_triple_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Parrot Paraphrased: {average_bornIn_parrot_electra_top1_triple}')

******************* Triple - k=1 ********************
**************************************************
Avg Precision at K = 1 of Born In Dataset Original: 0.333
Avg Precision at K = 1 of Born In Dataset Original with help 1: 0.111
Avg Precision at K = 1 of Born In Dataset Original with help 2: 0.111
Avg Precision at K = 1 of Born In Dataset Translated: 0.333
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: 0.333
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 1: 0.0
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: 0.0
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: 0.222
Avg Precision at K = 1 of Date of Birth Dataset Parrot Paraphrased: 0.111


In [270]:
print('*'*19+' Triple - k=5 '+'*'*20)
print('*'*50)

results_list_bornIn_original_electra_top5_triple, _, average_bornIn_original_electra_top5_triple = create_results_list(
    outputs_electra_bornIn_original_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Original: {average_bornIn_original_electra_top5_triple}')

results_list_bornIn_original_w_help_1_electra_top5_triple, _, average_bornIn_original_w_help_1_electra_top5_triple = create_results_list(
    outputs_electra_bornIn_original_w_help_1_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Original with help 1: {average_bornIn_original_w_help_1_electra_top5_triple}')

results_list_bornIn_original_w_help_2_electra_top5_triple, _, average_bornIn_original_w_help_2_electra_top5_triple = create_results_list(
    outputs_electra_bornIn_original_w_help_2_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Original with help 2: {average_bornIn_original_w_help_2_electra_top5_triple}')

results_list_bornIn_translated_electra_top5_triple, _, average_bornIn_translated_electra_top5_triple = create_results_list(
    outputs_electra_bornIn_translated_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Translated: {average_bornIn_translated_electra_top5_triple}')


results_list_bornIn_t5_0_electra_top5_triple_para, _, average_bornIn_t5_0_electra_top5_triple_para = create_results_list(
    outputs_electra_bornIn_t5_0_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: {average_bornIn_t5_0_electra_top5_triple_para}')


results_list_bornIn_t5_5_electra_top5_triple_para, _, average_bornIn_t5_5_electra_top5_triple_para = create_results_list(
    outputs_electra_bornIn_t5_1_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 5: {average_bornIn_t5_5_electra_top5_triple_para}')


results_list_bornIn_t5_2_electra_top5_triple_para, _, average_bornIn_t5_2_electra_top5_triple_para = create_results_list(
    outputs_electra_bornIn_t5_2_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: {average_bornIn_t5_2_electra_top5_triple_para}')


results_list_bornIn_t5_3_electra_top5_triple_para, _, average_bornIn_t5_3_electra_top5_triple_para = create_results_list(
    outputs_electra_bornIn_t5_3_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: {average_bornIn_t5_3_electra_top5_triple_para}')


results_list_bornIn_parrot_electra_top5_triple, _, average_bornIn_parrot_electra_top5_triple = create_results_list(
    outputs_electra_bornIn_parrot_triple, answer_triple_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Parrot Paraphrased: {average_bornIn_parrot_electra_top5_triple}')

******************* Triple - k=5 ********************
**************************************************
Avg Precision at K = 5 of Born In Dataset Original: 0.444
Avg Precision at K = 5 of Born In Dataset Original with help 1: 0.111
Avg Precision at K = 5 of Born In Dataset Original with help 2: 0.333
Avg Precision at K = 5 of Born In Dataset Translated: 0.444
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: 0.444
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 5: 0.0
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: 0.111
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: 0.333
Avg Precision at K = 5 of Date of Birth Dataset Parrot Paraphrased: 0.444


In [269]:
print('*'*19+' Triple - k=10 '+'*'*20)
print('*'*50)

results_list_bornIn_original_electra_top10_triple, _, average_bornIn_original_electra_top10_triple = create_results_list(
    outputs_electra_bornIn_original_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Original: {average_bornIn_original_electra_top10_triple}')

results_list_bornIn_original_w_help_1_electra_top10_triple, _, average_bornIn_original_w_help_1_electra_top10_triple = create_results_list(
    outputs_electra_bornIn_original_w_help_1_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Original with help 1: {average_bornIn_original_w_help_1_electra_top10_triple}')

results_list_bornIn_original_w_help_2_electra_top10_triple, _, average_bornIn_original_w_help_2_electra_top10_triple = create_results_list(
    outputs_electra_bornIn_original_w_help_2_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Original with help 2: {average_bornIn_original_w_help_2_electra_top10_triple}')

results_list_bornIn_translated_electra_top10_triple, _, average_bornIn_translated_electra_top10_triple = create_results_list(
    outputs_electra_bornIn_translated_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Translated: {average_bornIn_translated_electra_top10_triple}')


results_list_bornIn_t5_0_electra_top10_triple_para, _, average_bornIn_t5_0_electra_top10_triple_para = create_results_list(
    outputs_electra_bornIn_t5_0_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: {average_bornIn_t5_0_electra_top10_triple_para}')


results_list_bornIn_t5_10_electra_top10_triple_para, _, average_bornIn_t5_10_electra_top10_triple_para = create_results_list(
    outputs_electra_bornIn_t5_1_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 10: {average_bornIn_t5_10_electra_top10_triple_para}')


results_list_bornIn_t5_2_electra_top10_triple_para, _, average_bornIn_t5_2_electra_top10_triple_para = create_results_list(
    outputs_electra_bornIn_t5_2_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: {average_bornIn_t5_2_electra_top10_triple_para}')


results_list_bornIn_t5_3_electra_top10_triple_para, _, average_bornIn_t5_3_electra_top10_triple_para = create_results_list(
    outputs_electra_bornIn_t5_3_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: {average_bornIn_t5_3_electra_top10_triple_para}')


results_list_bornIn_parrot_electra_top10_triple, _, average_bornIn_parrot_electra_top10_triple = create_results_list(
    outputs_electra_bornIn_parrot_triple, answer_triple_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Parrot Paraphrased: {average_bornIn_parrot_electra_top10_triple}')

******************* Triple - k=10 ********************
**************************************************
Avg Precision at K = 10 of Born In Dataset Original: 0.778
Avg Precision at K = 10 of Born In Dataset Original with help 1: 0.222
Avg Precision at K = 10 of Born In Dataset Original with help 2: 0.556
Avg Precision at K = 10 of Born In Dataset Translated: 0.667
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: 0.778
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 10: 0.0
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: 0.111
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: 0.556
Avg Precision at K = 10 of Date of Birth Dataset Parrot Paraphrased: 0.556


In [298]:
# Masked Sentences Originais
outputs_bert_bornIn_original_filled = process_prompt_results(filled_sentences_bornIn['masked_sentence'], unmasker_bertLarge)
outputs_bert_bornIn_original_wHelp_filled = process_prompt_results(filled_sentences_bornIn['masked_sentence_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_original_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['masked_sentence_w_help_2'], unmasker_bertLarge)

# Masked Sentences Parafraseados
## Translated
outputs_bert_bornIn_translated_filled = process_prompt_results(filled_sentences_bornIn['translated_filled_sentence'], unmasker_bertLarge)
outputs_bert_bornIn_translated_wHelp_filled = process_prompt_results(filled_sentences_bornIn['translated_filled_sentence_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_translated_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['translated_filled_sentence_w_help_2'], unmasker_bertLarge)

## T5
outputs_bert_bornIn_t5_0_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_0'], unmasker_bertLarge)
outputs_bert_bornIn_t5_0_wHelp_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_0_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_t5_0_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_0_w_help_2'], unmasker_bertLarge)

outputs_bert_bornIn_t5_1_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_1'], unmasker_bertLarge)
outputs_bert_bornIn_t5_1_wHelp_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_1_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_t5_1_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_1_w_help_2'], unmasker_bertLarge)

outputs_bert_bornIn_t5_2_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_2'], unmasker_bertLarge)
outputs_bert_bornIn_t5_2_wHelp_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_2_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_t5_2_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_2_w_help_2'], unmasker_bertLarge)

outputs_bert_bornIn_t5_3_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_3'], unmasker_bertLarge)
outputs_bert_bornIn_t5_3_wHelp_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_3_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_t5_3_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['T5_paraphrased_filled_sentence_3_w_help_2'], unmasker_bertLarge)

## Parrot
outputs_bert_bornIn_parrot_filled = process_prompt_results(filled_sentences_bornIn['Parrot_paraphrased_filled_sentence_1'], unmasker_bertLarge)
outputs_bert_bornIn_parrot_wHelp_filled = process_prompt_results(filled_sentences_bornIn['Parrot_paraphrased_filled_sentence_1_w_help'], unmasker_bertLarge)
outputs_bert_bornIn_parrot_wHelp_2_filled = process_prompt_results(filled_sentences_bornIn['Parrot_paraphrased_filled_sentence_1_w_help_2'], unmasker_bertLarge)

In [299]:
print('*'*19+' Filled Sentence k=1 '+'*'*20)
print('*'*50)
## Original
print('Original')
results_list_bornIn_original_bert_top1_filled, _, average_bornIn_original_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_original_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Original: {average_bornIn_original_bert_top1_filled}')

results_list_bornIn_original_wHelp_bert_top1_filled, _, average_bornIn_original_wHelp_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_original_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Original with Help 1: {average_bornIn_original_wHelp_bert_top1_filled}')

results_list_bornIn_original_wHelp_2_bert_top1_filled, _, average_bornIn_original_wHelp_2_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_original_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Original with Help 2: {average_bornIn_original_wHelp_2_bert_top1_filled}')


## Translated
print('\n\n')
print('Translated')
results_list_bornIn_translated_bert_top1_filled, _, average_bornIn_translated_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_translated_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Translated: {average_bornIn_translated_bert_top1_filled}')

results_list_bornIn_translated_wHelp_bert_top1_filled, _, average_bornIn_translated_wHelp_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_translated_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Translated with Help 1: {average_bornIn_translated_wHelp_bert_top1_filled}')

results_list_bornIn_translated_wHelp_2_bert_top1_filled, _, average_bornIn_translated_wHelp_2_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_translated_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Born In Dataset Translated with Help 2: {average_bornIn_translated_wHelp_2_bert_top1_filled}')

## T5
### Paraphrase 0
print('\n\n')
print('T5 Column Paraphrase 0')
results_list_bornIn_t5_0_bert_top1_filled_para, _, average_bornIn_t5_0_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: {average_bornIn_t5_0_bert_top1_filled_para}')

results_list_bornIn_t5_0_wHelp_bert_top1_filled_para, _, average_bornIn_t5_0_wHelp_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 1: {average_bornIn_t5_0_wHelp_bert_top1_filled_para}')

results_list_bornIn_t5_0_wHelp_2_bert_top1_filled_para, _, average_bornIn_t5_0_wHelp_2_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 2: {average_bornIn_t5_0_wHelp_2_bert_top1_filled_para}')

### Paraphrase 1
print('\n\n')
print('T5 Column Paraphrase 1')
results_list_bornIn_t5_1_bert_top1_filled_para, _, average_bornIn_t5_1_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 1: {average_bornIn_t5_1_bert_top1_filled_para}')

results_list_bornIn_t5_1_wHelp_bert_top1_filled_para, _, average_bornIn_t5_1_wHelp_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 1 with Help 1: {average_bornIn_t5_1_wHelp_bert_top1_filled_para}')

results_list_bornIn_t5_1_wHelp_2_bert_top1_filled_para, _, average_bornIn_t5_1_wHelp_2_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 1 with Help 2: {average_bornIn_t5_1_wHelp_2_bert_top1_filled_para}')

### Paraphrase 2
print('\n\n')
print('T5 Column Paraphrase 2')
results_list_bornIn_t5_2_bert_top1_filled_para, _, average_bornIn_t5_2_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: {average_bornIn_t5_2_bert_top1_filled_para}')

results_list_bornIn_t5_2_wHelp_bert_top1_filled_para, _, average_bornIn_t5_2_wHelp_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2 with Help 1: {average_bornIn_t5_2_wHelp_bert_top1_filled_para}')

results_list_bornIn_t5_2_wHelp_2_bert_top1_filled_para, _, average_bornIn_t5_2_wHelp_2_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2 with Help 2: {average_bornIn_t5_2_wHelp_2_bert_top1_filled_para}')

### Paraphrase 3
print('\n\n')
print('T5 Column Paraphrase 3')
results_list_bornIn_t5_3_bert_top1_filled_para, _, average_bornIn_t5_3_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: {average_bornIn_t5_3_bert_top1_filled_para}')

results_list_bornIn_t5_3_wHelp_bert_top1_filled_para, _, average_bornIn_t5_3_wHelp_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3 with Help 1: {average_bornIn_t5_3_wHelp_bert_top1_filled_para}')

results_list_bornIn_t5_3_wHelp_2_bert_top1_filled_para, _, average_bornIn_t5_3_wHelp_2_bert_top1_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3 with Help 2: {average_bornIn_t5_3_wHelp_2_bert_top1_filled_para}')

## Parrot
print('\n\n')
print('Parrot')
results_list_bornIn_parrot_bert_top1_filled, _, average_bornIn_parrot_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_parrot_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Parrot Paraphrased: {average_bornIn_parrot_bert_top1_filled}')

results_list_bornIn_parrot_wHelp_bert_top1_filled, _, average_bornIn_parrot_wHelp_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_parrot_wHelp_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Parrot Paraphrased with Help 1: {average_bornIn_parrot_wHelp_bert_top1_filled}')

results_list_bornIn_parrot_wHelp_2_bert_top1_filled, _, average_bornIn_parrot_wHelp_2_bert_top1_filled = create_results_list(
    outputs_bert_bornIn_parrot_wHelp_2_filled, answer_filled_sentences_bornIn, 1
    ) # top-1 accuracy
print(f'Avg Precision at K = 1 of Date of Birth Dataset Parrot Paraphrased with Help 2: {average_bornIn_parrot_wHelp_2_bert_top1_filled}')

******************* Filled Sentence k=1 ********************
**************************************************
Original
Avg Precision at K = 1 of Born In Dataset Original: 0.3
Avg Precision at K = 1 of Born In Dataset Original with Help 1: 0.1
Avg Precision at K = 1 of Born In Dataset Original with Help 2: 0.4



Translated
Avg Precision at K = 1 of Born In Dataset Translated: 0.3
Avg Precision at K = 1 of Born In Dataset Translated with Help 1: 0.1
Avg Precision at K = 1 of Born In Dataset Translated with Help 2: 0.4



T5 Column Paraphrase 0
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: 0.3
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 1: 0.1
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 2: 0.3



T5 Column Paraphrase 1
Avg Precision at K = 1 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 1: 0.3
Avg Precision at K = 1 of Date of Birth Dataset

In [300]:
print('*'*19+' Filled Sentence k=5 '+'*'*20)
print('*'*50)
## Original
print('Original')
results_list_bornIn_original_bert_top5_filled, _, average_bornIn_original_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_original_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Original: {average_bornIn_original_bert_top5_filled}')

results_list_bornIn_original_wHelp_bert_top5_filled, _, average_bornIn_original_wHelp_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_original_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Original with Help 1: {average_bornIn_original_wHelp_bert_top5_filled}')

results_list_bornIn_original_wHelp_2_bert_top5_filled, _, average_bornIn_original_wHelp_2_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_original_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Original with Help 2: {average_bornIn_original_wHelp_2_bert_top5_filled}')

## Translated
print('\n\n')
print('Translated')
results_list_bornIn_translated_bert_top5_filled, _, average_bornIn_translated_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_translated_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Translated: {average_bornIn_translated_bert_top5_filled}')

results_list_bornIn_translated_wHelp_bert_top5_filled, _, average_bornIn_translated_wHelp_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_translated_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Translated with Help 1: {average_bornIn_translated_wHelp_bert_top5_filled}')

results_list_bornIn_translated_wHelp_2_bert_top5_filled, _, average_bornIn_translated_wHelp_2_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_translated_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Born In Dataset Translated with Help 2: {average_bornIn_translated_wHelp_2_bert_top5_filled}')

## T5
### Paraphrase 0
print('\n\n')
print('T5 Column Paraphrase 0')
results_list_bornIn_t5_0_bert_top5_filled_para, _, average_bornIn_t5_0_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: {average_bornIn_t5_0_bert_top5_filled_para}')

results_list_bornIn_t5_0_wHelp_bert_top5_filled_para, _, average_bornIn_t5_0_wHelp_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 1: {average_bornIn_t5_0_wHelp_bert_top5_filled_para}')

results_list_bornIn_t5_0_wHelp_2_bert_top5_filled_para, _, average_bornIn_t5_0_wHelp_2_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 2: {average_bornIn_t5_0_wHelp_2_bert_top5_filled_para}')

### Paraphrase 1
print('\n\n')
print('T5 Column Paraphrase 1')
results_list_bornIn_t5_5_bert_top5_filled_para, _, average_bornIn_t5_5_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 5: {average_bornIn_t5_5_bert_top5_filled_para}')

results_list_bornIn_t5_5_wHelp_bert_top5_filled_para, _, average_bornIn_t5_5_wHelp_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 5 with Help 1: {average_bornIn_t5_5_wHelp_bert_top5_filled_para}')

results_list_bornIn_t5_5_wHelp_2_bert_top5_filled_para, _, average_bornIn_t5_5_wHelp_2_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 5 with Help 2: {average_bornIn_t5_5_wHelp_2_bert_top5_filled_para}')

### Paraphrase 2
print('\n\n')
print('T5 Column Paraphrase 2')
results_list_bornIn_t5_2_bert_top5_filled_para, _, average_bornIn_t5_2_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: {average_bornIn_t5_2_bert_top5_filled_para}')

results_list_bornIn_t5_2_wHelp_bert_top5_filled_para, _, average_bornIn_t5_2_wHelp_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2 with Help 1: {average_bornIn_t5_2_wHelp_bert_top5_filled_para}')

results_list_bornIn_t5_2_wHelp_2_bert_top5_filled_para, _, average_bornIn_t5_2_wHelp_2_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2 with Help 2: {average_bornIn_t5_2_wHelp_2_bert_top5_filled_para}')

### Paraphrase 3
print('\n\n')
print('T5 Column Paraphrase 3')
results_list_bornIn_t5_3_bert_top5_filled_para, _, average_bornIn_t5_3_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: {average_bornIn_t5_3_bert_top5_filled_para}')

results_list_bornIn_t5_3_wHelp_bert_top5_filled_para, _, average_bornIn_t5_3_wHelp_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3 with Help 1: {average_bornIn_t5_3_wHelp_bert_top5_filled_para}')

results_list_bornIn_t5_3_wHelp_2_bert_top5_filled_para, _, average_bornIn_t5_3_wHelp_2_bert_top5_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3 with Help 2: {average_bornIn_t5_3_wHelp_2_bert_top5_filled_para}')

## Parrot
print('\n\n')
print('Parrot')
results_list_bornIn_parrot_bert_top5_filled, _, average_bornIn_parrot_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_parrot_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Parrot Paraphrased: {average_bornIn_parrot_bert_top5_filled}')

results_list_bornIn_parrot_wHelp_bert_top5_filled, _, average_bornIn_parrot_wHelp_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_parrot_wHelp_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Parrot Paraphrased with Help 1: {average_bornIn_parrot_wHelp_bert_top5_filled}')

results_list_bornIn_parrot_wHelp_2_bert_top5_filled, _, average_bornIn_parrot_wHelp_2_bert_top5_filled = create_results_list(
    outputs_bert_bornIn_parrot_wHelp_2_filled, answer_filled_sentences_bornIn, 5
    ) # top-5 accuracy
print(f'Avg Precision at K = 5 of Date of Birth Dataset Parrot Paraphrased with Help 2: {average_bornIn_parrot_wHelp_2_bert_top5_filled}')

******************* Filled Sentence k=5 ********************
**************************************************
Original
Avg Precision at K = 5 of Born In Dataset Original: 0.4
Avg Precision at K = 5 of Born In Dataset Original with Help 1: 0.5
Avg Precision at K = 5 of Born In Dataset Original with Help 2: 0.7



Translated
Avg Precision at K = 5 of Born In Dataset Translated: 0.4
Avg Precision at K = 5 of Born In Dataset Translated with Help 1: 0.4
Avg Precision at K = 5 of Born In Dataset Translated with Help 2: 0.7



T5 Column Paraphrase 0
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: 0.3
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 1: 0.2
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 2: 0.5



T5 Column Paraphrase 1
Avg Precision at K = 5 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 5: 0.6
Avg Precision at K = 5 of Date of Birth Dataset

In [301]:
print('*'*19+' Filled Sentence k=10 '+'*'*20)
print('*'*50)
## Original
print('Original')
results_list_bornIn_original_bert_top10_filled, _, average_bornIn_original_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_original_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Original: {average_bornIn_original_bert_top10_filled}')

results_list_bornIn_original_wHelp_bert_top10_filled, _, average_bornIn_original_wHelp_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_original_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Original with Help 1: {average_bornIn_original_wHelp_bert_top10_filled}')

results_list_bornIn_original_wHelp_2_bert_top10_filled, _, average_bornIn_original_wHelp_2_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_original_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Original with Help 2: {average_bornIn_original_wHelp_2_bert_top10_filled}')

## Translated
print('\n\n')
print('Translated')
results_list_bornIn_translated_bert_top10_filled, _, average_bornIn_translated_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_translated_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Translated: {average_bornIn_translated_bert_top10_filled}')

results_list_bornIn_translated_wHelp_bert_top10_filled, _, average_bornIn_translated_wHelp_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_translated_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Translated with Help 1: {average_bornIn_translated_wHelp_bert_top10_filled}')

results_list_bornIn_translated_wHelp_2_bert_top10_filled, _, average_bornIn_translated_wHelp_2_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_translated_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Born In Dataset Translated with Help 2: {average_bornIn_translated_wHelp_2_bert_top10_filled}')

## T5
### Paraphrase 0
print('\n\n')
print('T5 Column Paraphrase 0')
results_list_bornIn_t5_0_bert_top10_filled_para, _, average_bornIn_t5_0_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: {average_bornIn_t5_0_bert_top10_filled_para}')

results_list_bornIn_t5_0_wHelp_bert_top10_filled_para, _, average_bornIn_t5_0_wHelp_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 1: {average_bornIn_t5_0_wHelp_bert_top10_filled_para}')

results_list_bornIn_t5_0_wHelp_2_bert_top10_filled_para, _, average_bornIn_t5_0_wHelp_2_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_0_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 2: {average_bornIn_t5_0_wHelp_2_bert_top10_filled_para}')

### Paraphrase 1
print('\n\n')
print('T5 Column Paraphrase 1')
results_list_bornIn_t5_10_bert_top10_filled_para, _, average_bornIn_t5_10_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 10: {average_bornIn_t5_10_bert_top10_filled_para}')

results_list_bornIn_t5_10_wHelp_bert_top10_filled_para, _, average_bornIn_t5_10_wHelp_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 10 with Help 1: {average_bornIn_t5_10_wHelp_bert_top10_filled_para}')

results_list_bornIn_t5_10_wHelp_2_bert_top10_filled_para, _, average_bornIn_t5_10_wHelp_2_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_1_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 10 with Help 2: {average_bornIn_t5_10_wHelp_2_bert_top10_filled_para}')

### Paraphrase 2
print('\n\n')
print('T5 Column Paraphrase 2')
results_list_bornIn_t5_2_bert_top10_filled_para, _, average_bornIn_t5_2_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2: {average_bornIn_t5_2_bert_top10_filled_para}')

results_list_bornIn_t5_2_wHelp_bert_top10_filled_para, _, average_bornIn_t5_2_wHelp_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2 with Help 1: {average_bornIn_t5_2_wHelp_bert_top10_filled_para}')

results_list_bornIn_t5_2_wHelp_2_bert_top10_filled_para, _, average_bornIn_t5_2_wHelp_2_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_2_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 2 with Help 2: {average_bornIn_t5_2_wHelp_2_bert_top10_filled_para}')

### Paraphrase 3
print('\n\n')
print('T5 Column Paraphrase 3')
results_list_bornIn_t5_3_bert_top10_filled_para, _, average_bornIn_t5_3_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3: {average_bornIn_t5_3_bert_top10_filled_para}')

results_list_bornIn_t5_3_wHelp_bert_top10_filled_para, _, average_bornIn_t5_3_wHelp_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3 with Help 1: {average_bornIn_t5_3_wHelp_bert_top10_filled_para}')

results_list_bornIn_t5_3_wHelp_2_bert_top10_filled_para, _, average_bornIn_t5_3_wHelp_2_bert_top10_filled_para = create_results_list(
    outputs_bert_bornIn_t5_3_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 3 with Help 2: {average_bornIn_t5_3_wHelp_2_bert_top10_filled_para}')

## Parrot
print('\n\n')
print('Parrot')
results_list_bornIn_parrot_bert_top10_filled, _, average_bornIn_parrot_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_parrot_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Parrot Paraphrased: {average_bornIn_parrot_bert_top10_filled}')

results_list_bornIn_parrot_wHelp_bert_top10_filled, _, average_bornIn_parrot_wHelp_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_parrot_wHelp_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Parrot Paraphrased with Help 1: {average_bornIn_parrot_wHelp_bert_top10_filled}')

results_list_bornIn_parrot_wHelp_2_bert_top10_filled, _, average_bornIn_parrot_wHelp_2_bert_top10_filled = create_results_list(
    outputs_bert_bornIn_parrot_wHelp_2_filled, answer_filled_sentences_bornIn, 10
    ) # top-10 accuracy
print(f'Avg Precision at K = 10 of Date of Birth Dataset Parrot Paraphrased with Help 2: {average_bornIn_parrot_wHelp_2_bert_top10_filled}')

******************* Filled Sentence k=10 ********************
**************************************************
Original
Avg Precision at K = 10 of Born In Dataset Original: 0.5
Avg Precision at K = 10 of Born In Dataset Original with Help 1: 0.5
Avg Precision at K = 10 of Born In Dataset Original with Help 2: 0.7



Translated
Avg Precision at K = 10 of Born In Dataset Translated: 0.5
Avg Precision at K = 10 of Born In Dataset Translated with Help 1: 0.5
Avg Precision at K = 10 of Born In Dataset Translated with Help 2: 0.7



T5 Column Paraphrase 0
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0: 0.6
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 1: 0.4
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 0 with Help 2: 0.5



T5 Column Paraphrase 1
Avg Precision at K = 10 of Date of Birth Dataset Paraphrased T5 Column Paraphrase 10: 0.6
Avg Precision at K = 10 of Date of 