In [1]:
from parrot import Parrot
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline,T5Tokenizer, T5ForConditionalGeneration
import pandas as pd
from numpy import average, round, nan
import re
import warnings
warnings.filterwarnings("ignore")
import random
import itertools

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

#uncomment to get reproducable paraphrase generations
def random_state(seed):
  torch.manual_seed(seed)
  if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

random_state(1234)

In [2]:
#Init models (make sure you init ONLY once if you integrate this to your code)
parrot = Parrot(model_tag="prithivida/parrot_paraphraser_on_T5", use_gpu=True)

#device = "cuda"

tokenizer_gpt = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")

model_gpt = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)


tokenizer_t5 = T5Tokenizer.from_pretrained("t5-base")
model_t5 = T5ForConditionalGeneration.from_pretrained("t5-base")

# Translator
translator_en_fr = pipeline("translation", model="Helsinki-NLP/opus-mt-en-fr", max_length=512)
translator_fr_en = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-fr-en", max_length=512)

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [3]:
def paraphrase(
    question,
    num_beams=4,
    num_beam_groups=4,
    num_return_sequences=4,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.5,
    max_length=128
):
    input_ids = tokenizer_gpt(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids.to(device)
    
    outputs = model_gpt.generate(
        input_ids, temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer_gpt.batch_decode(outputs, skip_special_tokens=True)

    return res

In [4]:
def generate_output_phrases_t5(df, input_column, output_column):
    """
    Generate output phrases by paraphrasing the input phrases.

    Parameters:
    - df (pandas.DataFrame): The input DataFrame containing the input phrases.
    - input_column (str): The name of the column in the DataFrame that contains the input phrases.
    - output_column (str): The name of the column in the DataFrame to store the output phrases.
    - paraphrase (function): The function used to paraphrase the input phrases.

    Returns:
    - df (pandas.DataFrame): The DataFrame with the original input phrases and the generated output phrases.
    """
    
    # Create an empty list to store data
    data = []

    # Iterate over each input phrase
    for sentence in df[input_column]:
        para_phrases = paraphrase(sentence)
        data.append({input_column: sentence, output_column: para_phrases})

    # Convert list of dictionaries to DataFrame
    new_df = pd.DataFrame(data)

    # Merge new_df with the original DataFrame df
    outro_df = pd.concat([df, new_df[output_column]], axis=1)
    outro_df = pd.concat([outro_df, pd.DataFrame(outro_df[output_column].values.tolist())], axis=1)
    return outro_df

def generate_output_phrases_parrot(df, input_column, output_column):
    """
    Generate output phrases by paraphrasing the input phrases.

    Parameters:
    - df (pandas.DataFrame): The input DataFrame containing the input phrases.
    - input_column (str): The name of the column in the DataFrame that contains the input phrases.
    - output_column (str): The name of the column in the DataFrame to store the output phrases.
    - paraphrase (function): The function used to paraphrase the input phrases.

    Returns:
    - df (pandas.DataFrame): The DataFrame with the original input phrases and the generated output phrases.
    """
    
    # Create an empty list to store data
    data = []
    x = -1
    # Iterate over each input phrase
    for sentence in df[input_column]:
        para_phrases = parrot.augment(input_phrase=sentence, 
                                      max_return_phrases = 3)
        if para_phrases == None:
           data.append({input_column: sentence, output_column: para_phrases})
        else:
            para_frases = list(list(zip(*para_phrases))[0])
            data.append({input_column: sentence, output_column: para_frases})

    # Convert list of dictionaries to DataFrame
    new_df = pd.DataFrame(data)

    # Merge new_df with the original DataFrame df
    outro_df = pd.concat([df, new_df[output_column]], axis=1)

    # Verificar se há valores nulos na coluna 'Parrot_paraphrased_filled_sentence'
    has_null_values = outro_df[output_column].isnull().any()

    # Se houver valores nulos, você pode escolher lidar com eles de várias maneiras
    if has_null_values:
        # Ou substituir os valores nulos por uma string vazia, por exemplo
        outro_df[output_column].fillna('', inplace=True)
    outro_df = pd.concat([outro_df, pd.DataFrame(outro_df[output_column].values.tolist())], axis=1)
    return outro_df

def rename_numeric_columns(df, replacement_prefix='Column_'):
    """
    Renomeia as colunas que consistem apenas de valores numéricos.

    Parâmetros:
    - df (pandas.DataFrame): O DataFrame que contém as colunas a serem renomeadas.
    - replacement_prefix (str): O prefixo a ser usado para os novos nomes das colunas.

    Retorna:
    - df (pandas.DataFrame): O DataFrame com as colunas renomeadas.
    """

    new_columns = []
    for col in df.columns:
        # Verificar se o nome da coluna pode ser convertido para um número inteiro
        try:
            col_int = int(col)
            new_columns.append(replacement_prefix + str(col_int))
        except ValueError:
            new_columns.append(col)

    # Renomear as colunas do DataFrame
    df.columns = new_columns

    return df

def replace_masked_sentence(df, mask_column, label_column, new_column):
    df[new_column] = df.apply(lambda row: str(row[mask_column]).replace("[MASK]", str(row[label_column])), axis=1)
    return df


def substituir_palavras(texto, palavras_substituir, substituto="[MASK]"):
    if isinstance(texto, str) and texto is not None:
        for palavra in palavras_substituir:
            # Usando expressão regular para substituir apenas a primeira ocorrência da palavra
            texto = re.sub(r'\b' + re.escape(palavra) + r'\b', substituto, texto, count=1)
        return texto
    return None

#### Born In

In [9]:
file = '/home/rafael/tese/code/data/novos_data/filtrado_one_born_in.csv'
data_bornIn = pd.read_csv(file).drop(columns=['Unnamed: 0'])
#data_bornIn = data_bornIn[:10]
data_bornIn = replace_masked_sentence(data_bornIn, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_bornIn['obj_label'].str.strip()
data_bornIn = data_bornIn[['sub_label', 'template', 'obj_label', 
                           'masked_sentence', 'filled_sentence']]

parafrase_t5_bornIn = generate_output_phrases_t5(data_bornIn, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_bornIn = rename_numeric_columns(parafrase_t5_bornIn, 'T5_paraphrased_filled_sentence_')
parafrase_t5_bornIn.drop(columns = ['filled_sentence', 'T5_paraphrased_filled_sentence'], inplace=True, axis=1)

parafrase_parrot_bornIn = generate_output_phrases_parrot(data_bornIn, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')
parafrase_parrot_bornIn = rename_numeric_columns(parafrase_parrot_bornIn, 'Parrot_paraphrased_filled_sentence_')
parafrase_parrot_bornIn.drop(columns = ['filled_sentence','Parrot_paraphrased_filled_sentence'], inplace=True, axis=1)

Token indices sequence length is longer than the specified maximum sequence length for this model (906 > 512). Running this sequence through the model will result in indexing errors


KeyboardInterrupt: 

In [None]:
colunas = [coluna for coluna in parafrase_t5_bornIn.columns if re.match(r'T5_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas:
    parafrase_t5_bornIn[coluna] = parafrase_t5_bornIn[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_bornIn['obj_label']))

colunas = [coluna for coluna in parafrase_parrot_bornIn.columns if re.match(r'Parrot_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas:
    parafrase_parrot_bornIn[coluna] = parafrase_parrot_bornIn[coluna].apply(lambda x: substituir_palavras(x, parafrase_parrot_bornIn['obj_label'].str.lower()))


#### Died In

In [7]:
file = '/home/rafael/tese/code/data/novos_data/filtrado_one_died_in.csv'
data_diedIn = pd.read_csv(file).drop(columns=['Unnamed: 0'])
#data_diedIn = data_diedIn[:200]
data_diedIn = replace_masked_sentence(data_diedIn, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_diedIn['obj_label'].str.strip()
data_diedIn = data_diedIn[['sub_label', 'template', 'obj_label', 
                           'masked_sentence', 'filled_sentence']]

parafrase_t5_diedIn = generate_output_phrases_t5(data_diedIn, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_diedIn = rename_numeric_columns(parafrase_t5_diedIn, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_diedIn = generate_output_phrases_parrot(data_diedIn, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')
parafrase_parrot_diedIn = rename_numeric_columns(parafrase_parrot_diedIn, 'Parrot_paraphrased_filled_sentence_')

In [8]:
colunas = [coluna for coluna in parafrase_t5_diedIn.columns if re.match(r'T5_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas:
    parafrase_t5_diedIn[coluna] = parafrase_t5_diedIn[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_diedIn["obj_label"]))

colunas = [coluna for coluna in parafrase_t5_diedIn.columns if re.match(r'Parrot_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas:
    parafrase_t5_diedIn[coluna] = parafrase_t5_diedIn[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_diedIn["obj_label"].str.lower()))

#### Capital

In [9]:
file = '/home/rafael/tese/code/data/novos_data/filtrado_one_theCapital_is.csv'
data_capital = pd.read_csv(file).drop(columns=['Unnamed: 0'])
#data_capital = data_capital[:200]
data_capital = replace_masked_sentence(data_capital, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_capital['obj_label'].str.strip()
data_capital = data_capital[['sub_label', 'template', 'obj_label', 
                           'masked_sentence', 'filled_sentence']]


parafrase_t5_capital = generate_output_phrases_t5(data_capital, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_capital = rename_numeric_columns(parafrase_t5_capital, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_capital = generate_output_phrases_parrot(data_capital, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')
parafrase_parrot_capital = rename_numeric_columns(parafrase_parrot_capital, 'Parrot_paraphrased_filled_sentence_')

In [10]:
colunas_t5 = [coluna for coluna in parafrase_t5_capital.columns if re.match(r'T5_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_t5:
    parafrase_t5_capital[coluna] = parafrase_t5_capital[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_capital["obj_label"]))

colunas_parrot = [coluna for coluna in parafrase_parrot_capital.columns if re.match(r'Parrot_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_parrot:
    parafrase_parrot_capital[coluna] = parafrase_parrot_capital[coluna].apply(lambda x: substituir_palavras(x, parafrase_parrot_capital["obj_label"].str.lower()))

#### Citizen

In [11]:
file = '/home/rafael/tese/code/data/novos_data/filtrado_one_citizen.csv'
data_citizen = pd.read_csv(file).drop(columns=['Unnamed: 0'])
#data_citizen = data_citizen[:200]
data_citizen = replace_masked_sentence(data_citizen, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_citizen['obj_label'].str.strip()
data_citizen = data_citizen[['sub_label', 'template', 'obj_label', 
                           'masked_sentence', 'filled_sentence']]


parafrase_t5_citizen = generate_output_phrases_t5(data_citizen, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_citizen = rename_numeric_columns(parafrase_t5_citizen, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_citizen = generate_output_phrases_parrot(data_citizen, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')
parafrase_parrot_citizen = rename_numeric_columns(parafrase_parrot_citizen, 'Parrot_paraphrased_filled_sentence_')

In [12]:
colunas_t5 = [coluna for coluna in parafrase_t5_citizen.columns if re.match(r'T5_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_t5:
    parafrase_t5_citizen[coluna] = parafrase_t5_citizen[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_citizen["obj_label"]))

colunas_parrot = [coluna for coluna in parafrase_parrot_citizen.columns if re.match(r'Parrot_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_parrot:
    parafrase_parrot_citizen[coluna] = parafrase_parrot_citizen[coluna].apply(lambda x: substituir_palavras(x, parafrase_parrot_citizen["obj_label"].str.lower()))

#### Works For

In [13]:
file = '/home/rafael/tese/code/data/novos_data/filtrado_one_worksfor.csv'
data_worksfor = pd.read_csv(file).drop(columns=['Unnamed: 0'])
#data_worksfor = data_worksfor[:200]
data_worksfor = replace_masked_sentence(data_worksfor, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_worksfor['obj_label'].str.strip()
data_worksfor = data_worksfor[['sub_label', 'template', 'obj_label', 
                           'masked_sentence', 'filled_sentence']]



parafrase_t5_worksfor = generate_output_phrases_t5(data_worksfor, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_worksfor = rename_numeric_columns(parafrase_t5_worksfor, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_worksfor = generate_output_phrases_parrot(data_worksfor, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')
parafrase_parrot_worksfor = rename_numeric_columns(parafrase_parrot_worksfor, 'Parrot_paraphrased_filled_sentence_')

In [14]:
colunas_t5 = [coluna for coluna in parafrase_t5_worksfor.columns if re.match(r'T5_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_t5:
    parafrase_t5_worksfor[coluna] = parafrase_t5_worksfor[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_worksfor["obj_label"]))

colunas_parrot = [coluna for coluna in parafrase_parrot_worksfor.columns if re.match(r'Parrot_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_parrot:
    parafrase_parrot_worksfor[coluna] = parafrase_parrot_worksfor[coluna].apply(lambda x: substituir_palavras(x, parafrase_parrot_worksfor["obj_label"].str.lower()))

#### Language

In [5]:
file = '/home/rafael/tese/code/data/novos_data/filtrado_one_language.csv'
data_language = pd.read_csv(file).drop(columns=['Unnamed: 0'])
#data_language = data_language[:20]
data_language = replace_masked_sentence(data_language, 'masked_sentence', 'obj_label', 
                               'filled_sentence')
data_language['obj_label'].str.strip()
data_language = data_language[['sub_label', 'template', 'obj_label', 
                           'masked_sentence', 'filled_sentence']]



parafrase_t5_language = generate_output_phrases_t5(data_language, 'filled_sentence', 'T5_paraphrased_filled_sentence')
parafrase_t5_language = rename_numeric_columns(parafrase_t5_language, 'T5_paraphrased_filled_sentence_')

parafrase_parrot_language = generate_output_phrases_parrot(data_language, 'filled_sentence', 'Parrot_paraphrased_filled_sentence')
parafrase_parrot_language = rename_numeric_columns(parafrase_parrot_language, 'Parrot_paraphrased_filled_sentence_')

In [6]:
colunas_t5 = [coluna for coluna in parafrase_t5_language.columns if re.match(r'T5_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_t5:
    parafrase_t5_language[coluna] = parafrase_t5_language[coluna].apply(lambda x: substituir_palavras(x, parafrase_t5_language["obj_label"]))

colunas_parrot = [coluna for coluna in parafrase_parrot_language.columns if re.match(r'Parrot_paraphrased_filled_sentence_[^_]*\d+', coluna)]
# Agora, use o tamanho da lista para determinar quantas colunas você precisa processar
for coluna in colunas_parrot:
    parafrase_parrot_language[coluna] = parafrase_parrot_language[coluna].apply(lambda x: substituir_palavras(x, parafrase_parrot_language["obj_label"].str.lower()))

#

## Substituir e deixar apenas uma [MASK]

In [7]:
def deixa_uma_mask(sentence, word):
    # Substitui a palavra na frase por '[MASK]'
    if sentence is None:
        return None
    elif sentence.count('[MASK]') == 1 or sentence.count('[MASK]') == 0:
        return sentence
    else:
        return sentence.replace('[MASK]', word, (sentence.count('[MASK]'))-1)

##### Born In

In [16]:
for i in parafrase_t5_bornIn.columns[3:]:
    parafrase_t5_bornIn[i] = parafrase_t5_bornIn.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)
    
for i in parafrase_parrot_bornIn.columns[3:]:
    parafrase_parrot_bornIn[i] = parafrase_parrot_bornIn.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)

##### Died In

In [17]:
for i in parafrase_t5_diedIn.columns[3:]:
    parafrase_t5_diedIn[i] = parafrase_t5_diedIn.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)
    
for i in parafrase_parrot_diedIn.columns[3:]:
    parafrase_parrot_diedIn[i] = parafrase_parrot_diedIn.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)

##### Capital

In [18]:
for i in parafrase_t5_capital.columns[3:]:
    parafrase_t5_capital[i] = parafrase_t5_capital.apply(lambda row: deixa_uma_mask(row[i],
                                                                                   row['obj_label']), axis=1)
    
for i in parafrase_parrot_capital.columns[3:]:
    parafrase_parrot_capital[i] = parafrase_parrot_capital.apply(lambda row: deixa_uma_mask(row[i],
                                                                                   row['obj_label']), axis=1)

##### Citizen

In [19]:
for i in parafrase_t5_citizen.columns[3:]:
    parafrase_t5_citizen[i] = parafrase_t5_citizen.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)
    
for i in parafrase_parrot_citizen.columns[3:]:
    parafrase_parrot_citizen[i] = parafrase_parrot_citizen.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)

##### Works For

In [20]:
for i in parafrase_t5_worksfor.columns[3:]:
    parafrase_t5_worksfor[i] = parafrase_t5_worksfor.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)
    
for i in parafrase_parrot_worksfor.columns[3:]:
    parafrase_parrot_worksfor[i] = parafrase_parrot_worksfor.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)

##### Language

In [8]:
for i in parafrase_t5_language.columns[3:]:
    parafrase_t5_language[i] = parafrase_t5_language.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)
    
for i in parafrase_parrot_language.columns[3:]:
    parafrase_parrot_language[i] = parafrase_parrot_language.apply(lambda row: deixa_uma_mask(row[i], 
                                         row['obj_label']), axis=1)

In [21]:
## Born In
parafrase_t5_bornIn["sentence_length"]= parafrase_t5_bornIn["masked_sentence"].str.len()
parafrase_t5_bornIn = parafrase_t5_bornIn.query('sentence_length < 513')
parafrase_t5_bornIn.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_bornIn["sentence_length"]= parafrase_parrot_bornIn["masked_sentence"].str.len()
parafrase_parrot_bornIn = parafrase_parrot_bornIn.query('sentence_length < 513')
parafrase_parrot_bornIn.drop(columns=['sentence_length'], inplace=True)

## Died In
parafrase_t5_diedIn["sentence_length"]= parafrase_t5_diedIn["masked_sentence"].str.len()
parafrase_t5_diedIn = parafrase_t5_diedIn.query('sentence_length < 513')
parafrase_t5_diedIn.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_diedIn["sentence_length"]= parafrase_parrot_diedIn["masked_sentence"].str.len()
parafrase_parrot_diedIn = parafrase_parrot_diedIn.query('sentence_length < 513')
parafrase_parrot_diedIn.drop(columns=['sentence_length'], inplace=True)

## Capital
parafrase_t5_capital["sentence_length"]= parafrase_t5_capital["masked_sentence"].str.len()
parafrase_t5_capital = parafrase_t5_capital.query('sentence_length < 513')
parafrase_t5_capital.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_capital["sentence_length"]= parafrase_parrot_capital["masked_sentence"].str.len()
parafrase_parrot_capital = parafrase_parrot_capital.query('sentence_length < 513')
parafrase_parrot_capital.drop(columns=['sentence_length'], inplace=True)

## Citizen
parafrase_t5_citizen["sentence_length"]= parafrase_t5_citizen["masked_sentence"].str.len()
parafrase_t5_citizen = parafrase_t5_citizen.query('sentence_length < 513')
parafrase_t5_citizen.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_citizen["sentence_length"]= parafrase_parrot_citizen["masked_sentence"].str.len()
parafrase_parrot_citizen = parafrase_parrot_citizen.query('sentence_length < 513')
parafrase_parrot_citizen.drop(columns=['sentence_length'], inplace=True)

## Works For
parafrase_t5_worksfor["sentence_length"]= parafrase_t5_worksfor["masked_sentence"].str.len()
parafrase_t5_worksfor = parafrase_t5_worksfor.query('sentence_length < 513')
parafrase_t5_worksfor.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_worksfor["sentence_length"]= parafrase_parrot_worksfor["masked_sentence"].str.len()
parafrase_parrot_worksfor = parafrase_parrot_worksfor.query('sentence_length < 513')
parafrase_parrot_worksfor.drop(columns=['sentence_length'], inplace=True)

## Language
parafrase_t5_language["sentence_length"]= parafrase_t5_language["masked_sentence"].str.len()
parafrase_t5_language = parafrase_t5_language.query('sentence_length < 513')
parafrase_t5_language.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_language["sentence_length"]= parafrase_parrot_language["masked_sentence"].str.len()
parafrase_parrot_language = parafrase_parrot_language.query('sentence_length < 513')
parafrase_parrot_language.drop(columns=['sentence_length'], inplace=True)

In [9]:
## Language
parafrase_t5_language["sentence_length"]= parafrase_t5_language["masked_sentence"].str.len()
parafrase_t5_language = parafrase_t5_language.query('sentence_length < 513')
parafrase_t5_language.drop(columns=['sentence_length'], inplace=True)

parafrase_parrot_language["sentence_length"]= parafrase_parrot_language["masked_sentence"].str.len()
parafrase_parrot_language = parafrase_parrot_language.query('sentence_length < 513')
parafrase_parrot_language.drop(columns=['sentence_length'], inplace=True)

In [22]:
parafrase_parrot_bornIn.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_bornIn.csv')
parafrase_t5_bornIn.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_bornIn.csv')

parafrase_parrot_diedIn.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_diedIn.csv')
parafrase_t5_diedIn.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_diedIn.csv')

parafrase_parrot_capital.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_capital.csv')
parafrase_t5_capital.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_capital.csv')

parafrase_parrot_citizen.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_citizen.csv')
parafrase_t5_citizen.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_citizen.csv')

parafrase_parrot_worksfor.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_worksfor.csv')
parafrase_t5_worksfor.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_worksfor.csv')

parafrase_parrot_language.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_language.csv')
parafrase_t5_language.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_language.csv')

In [10]:
parafrase_parrot_language.to_csv('/home/rafael/tese/code/data/novos_data/dados/parrot_language.csv')
parafrase_t5_language.to_csv('/home/rafael/tese/code/data/novos_data/dados/t5_language.csv')

# Utilizando

In [11]:
def process_prompt_results(df_column, masked_model,):
    """
    Process the results of prompts using a masked language model.

    Parameters:
    df_column (list): A list of prompts to be processed.
    masked_model: The masked language model used for processing.

    Returns:
    list: A list of dictionaries for each prompt, where each dictionary contains 'tokens' and 'score'.
    """
    outputs = []
    for prompt in df_column:
        if prompt is None:
            outputs.append(nan)
        elif prompt.find('[MASK]') == -1:
            outputs.append(nan)
        else:
            for dictionary in masked_model(prompt):
                outputs.append(dictionary['token_str'].strip().lower())
            #outputs.append(
            #    [dictionary['token_str'].strip().lower() for dictionary in masked_model(prompt)]
            #    )
    return outputs

def process_prompt_results2(df_column, masked_model, with_socre=0):
    """
    Process the results of prompts using a masked language model.

    Parameters:
    df_column (list): A list of prompts to be processed.
    masked_model: The masked language model used for processing.

    Returns:
    list: A list of dictionaries for each prompt, where each dictionary contains 'tokens' and 'score'.
    """
    outputs = []
    for prompt in df_column:
        if prompt is None or prompt.find('[MASK]') == -1:
                outputs.append(nan)
        else:
            outputs.append([{'token':item['token_str'].strip().lower(), 
                             'score':round(item['score'],3)} \
                                for item in masked_model(prompt)])      
    return outputs

def process_prompt_results_roberta2(df_column, masked_model, with_socre=0):
    """
    Process the results of prompts using a masked language model.

    Parameters:
    df_column (list): A list of prompts to be processed.
    masked_model: The masked language model used for processing.

    Returns:
    list: A list of dictionaries for each prompt, where each dictionary contains 'tokens' and 'score'.
    """
    outputs = []
    for prompt in df_column:
        if prompt is None or prompt.find('<mask>') == -1:
                outputs.append(nan)
        else:
            outputs.append([{'token':item['token_str'].strip().lower(), 
                             'score':round(item['score'],3)} \
                                for item in masked_model(prompt)])      
    return outputs

def create_results_list(data, true_labels, num_iterations):
    """
    Create a results list based on whether each corresponding true_label is present in the corresponding sublist of the data.

    Parameters:
    - data: List of sublists to search through
    - true_labels: List of true_labels to check for in each sublist
    - num_iterations: Number of iterations to consider for each sublist

    Returns:
    - results_list: List of 1s and 0s based on the presence of true_labels in the sublists

    results_list = []
    for sublist, true_label in zip(data, true_labels):
        results_list.append(1 if true_label in sublist[:num_iterations] else 0)
    """
    results_list = [1 if true_label in sublist[:num_iterations] else 0 for sublist, true_label in zip(data, true_labels)]
    indices = [sublist.index(true_label) if true_label in sublist[:num_iterations] else None for sublist, true_label in zip(data, true_labels)]
    return results_list, indices, round(average(results_list), 3)

def get_first(seq):
    if isinstance(seq, (tuple, list)):
        return get_first(seq[0])
    return seq

def get_zero_list(seq):
    return [get_first(i) for i in seq]

In [25]:
bornIn = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_bornIn.csv')
bornIn.drop(columns=['Unnamed: 0'], inplace=True)
capital = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_capital.csv')
capital.drop(columns=['Unnamed: 0', 'filled_sentence'], inplace=True)
diedIn = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_diedIn.csv')
diedIn.drop(columns=['Unnamed: 0', 'filled_sentence'], inplace=True)
worksfor = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_worksfor.csv')
worksfor.drop(columns=['Unnamed: 0', 'filled_sentence'], inplace=True)
citizen = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_citizen.csv')
citizen.drop(columns=['Unnamed: 0', 'filled_sentence'], inplace=True)

In [12]:
language = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_language.csv')
language.drop(columns=['Unnamed: 0', 'filled_sentence'], inplace=True)

In [13]:
def mask_substitution(text):
    """
    Replace '[MASK]' with '<mask>' in the input text.

    Parameters:
    - text: Input text

    Returns:
    - Transformed text
    """
    return re.sub(r'\[MASK\]', '<mask>', text)

def mask_substitution_use(df, columns):
    for coluna in columns:
        df[coluna] = df[coluna].apply(mask_substitution)
    
    return df

In [14]:
def mask_substitution(text):
    """
    Replace '[MASK]' with '<mask>' in the input text.

    Parameters:
    - text: Input text

    Returns:
    - Transformed text
    """
    return re.sub(r'\<mask\>', '[MASK]', text)

def mask_substitution_use(df, columns):
    for coluna in columns:
        df[coluna] = df[coluna].apply(mask_substitution)
    
    return df

In [34]:
# Born In
bornIn_roberta = mask_substitution_use(bornIn, bornIn.columns[3:])

# Capital
capital_roberta = mask_substitution_use(capital, capital.columns[3:])

# Died In
diedIn_roberta = mask_substitution_use(diedIn, diedIn.columns[3:])

# Works For
worksfor_roberta = mask_substitution_use(worksfor, worksfor.columns[3:])

# Citizen
citizen_roberta = mask_substitution_use(citizen, citizen.columns[3:])

# Language
language_roberta = mask_substitution_use(language, language.columns[3:])

NameError: name 'bornIn' is not defined

In [35]:
# Language
language_roberta = mask_substitution_use(language, language.columns[3:])

In [52]:
language = mask_substitution_use(language, language.columns[3:])

In [28]:
## Born in
answer_filled_sentences_bornIn_t5 = bornIn['obj_label'].str.lower().tolist()
#answer_filled_sentences_bornIn_parrot = parafrase_parrot_bornIn['obj_label'].str.lower().tolist()

## Died In
answer_filled_sentences_diedIn_t5 = diedIn['obj_label'].str.lower().tolist()
#answer_filled_sentences_diedIn_parrot = parafrase_parrot_diedIn['obj_label'].str.lower().tolist()

## The Capital is
answer_filled_sentences_capital_t5 = capital['obj_label'].str.lower().tolist()
#answer_filled_sentences_capital_parrot = parafrase_parrot_capital['obj_label'].str.lower().tolist()

## Citizen
answer_filled_sentences_citizen_t5 = citizen['obj_label'].str.lower().tolist()
#answer_filled_sentences_citizen_parrot = parafrase_parrot_citizen['obj_label'].str.lower().tolist()

## Works For
answer_filled_sentences_worksfor_t5 = worksfor['obj_label'].str.lower().tolist()
#answer_filled_sentences_worksfor_parrot = parafrase_parrot_worksfor['obj_label'].str.lower().tolist()

## Language
answer_filled_sentences_language_t5 = language['obj_label'].str.lower().tolist()

#answer_filled_sentences_bornIn_withHelp = filled_sentences_bornIn_witHelp_1['obj_label'].str.lower().tolist()
#answer_filled_sentences_bornIn_withHelp2 = filled_sentences_bornIn_witHelp_2['obj_label'].str.lower().tolist()

In [15]:
## Language
answer_filled_sentences_language_t5 = language['obj_label'].str.lower().tolist()

In [16]:
unmasker_bertLarge = pipeline('fill-mask', model='bert-large-uncased', top_k = 10)#, top_k=10
unmasker_robertaLarge = pipeline('fill-mask', model='FacebookAI/roberta-large', top_k = 10)#, top_k=10
unmasker_electraLarge = pipeline('fill-mask', model='google/electra-large-generator', top_k = 10)#, top_k=10

Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


#### Born In

##### Bert

In [30]:
outputs_bert_bornIn_original= process_prompt_results2(bornIn['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_bornIn_t5_0= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_1= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_2= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_3= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_bornIn_t5_bert = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_bert_bornIn_original, 
                        't5_0': outputs_bert_bornIn_t5_0, 
                        't5_1': outputs_bert_bornIn_t5_1, 
                        't5_2': outputs_bert_bornIn_t5_2, 
                        't5_3': outputs_bert_bornIn_t5_3})
outputs_len_bornIn_t5 = outputs_bornIn_t5_bert.shape[0]

In [31]:
outputs_bornIn_t5_bert.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_bert.csv')

##### Roberta

In [32]:
outputs_roberta_bornIn_original= process_prompt_results_roberta2(bornIn_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_bornIn_t5_0= process_prompt_results_roberta2(bornIn_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_1= process_prompt_results_roberta2(bornIn_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_2= process_prompt_results_roberta2(bornIn_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_3= process_prompt_results_roberta2(bornIn_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_bornIn_t5_roberta = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_roberta_bornIn_original, 
                        't5_0': outputs_roberta_bornIn_t5_0, 
                        't5_1': outputs_roberta_bornIn_t5_1, 
                        't5_2': outputs_roberta_bornIn_t5_2, 
                        't5_3': outputs_roberta_bornIn_t5_3})
outputs_len_bornIn_t5 = outputs_bornIn_t5_roberta.shape[0]

In [33]:
outputs_bornIn_t5_roberta.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_roberta.csv')

##### Electra

In [34]:
outputs_electra_bornIn_original= process_prompt_results2(bornIn['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_bornIn_t5_0= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_1= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_2= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_3= process_prompt_results2(bornIn['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_bornIn_t5_electra = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_electra_bornIn_original, 
                        't5_0': outputs_electra_bornIn_t5_0, 
                        't5_1': outputs_electra_bornIn_t5_1, 
                        't5_2': outputs_electra_bornIn_t5_2, 
                        't5_3': outputs_electra_bornIn_t5_3})
outputs_len_bornIn_t5 = outputs_bornIn_t5_electra.shape[0]

In [35]:
outputs_bornIn_t5_electra.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_electra.csv')

#### Died In

##### Bert

In [36]:
outputs_bert_diedIn_original= process_prompt_results2(diedIn['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_diedIn_t5_0= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_1= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_2= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_3= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_diedIn_t5 = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_bert_diedIn_original, 
                        't5_0': outputs_bert_diedIn_t5_0, 
                        't5_1': outputs_bert_diedIn_t5_1, 
                        't5_2': outputs_bert_diedIn_t5_2, 
                        't5_3': outputs_bert_diedIn_t5_3})
outputs_len_diedIn_t5 = outputs_diedIn_t5.shape[0]

In [37]:
outputs_diedIn_t5.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_bert.csv')

##### Roberta

In [38]:
outputs_roberta_diedIn_original= process_prompt_results_roberta2(diedIn_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_diedIn_t5_0= process_prompt_results_roberta2(diedIn_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_1= process_prompt_results_roberta2(diedIn_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_2= process_prompt_results_roberta2(diedIn_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_3= process_prompt_results_roberta2(diedIn_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_diedIn_t5_roberta = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_roberta_diedIn_original, 
                        't5_0': outputs_roberta_diedIn_t5_0, 
                        't5_1': outputs_roberta_diedIn_t5_1, 
                        't5_2': outputs_roberta_diedIn_t5_2, 
                        't5_3': outputs_roberta_diedIn_t5_3})
outputs_len_diedIn_t5 = outputs_diedIn_t5_roberta.shape[0]

In [39]:
outputs_diedIn_t5_roberta.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_roberta.csv')

##### Electra

In [40]:
outputs_electra_diedIn_original= process_prompt_results2(diedIn['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_diedIn_t5_0= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_1= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_2= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_3= process_prompt_results2(diedIn['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_diedIn_t5_electra = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_electra_diedIn_original, 
                        't5_0': outputs_electra_diedIn_t5_0, 
                        't5_1': outputs_electra_diedIn_t5_1, 
                        't5_2': outputs_electra_diedIn_t5_2, 
                        't5_3': outputs_electra_diedIn_t5_3})
outputs_len_diedIn_t5 = outputs_diedIn_t5_electra.shape[0]

In [41]:
outputs_diedIn_t5_electra.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_electra.csv')

#### Capital

##### Bert

In [42]:
outputs_bert_capital_original= process_prompt_results2(capital['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_capital_t5_0= process_prompt_results2(capital['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_1= process_prompt_results2(capital['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_2= process_prompt_results2(capital['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_3= process_prompt_results2(capital['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_capital_t5_bert = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_bert_capital_original, 
                        't5_0': outputs_bert_capital_t5_0, 
                        't5_1': outputs_bert_capital_t5_1, 
                        't5_2': outputs_bert_capital_t5_2, 
                        't5_3': outputs_bert_capital_t5_3})
outputs_len_capital_t5 = outputs_capital_t5_bert.shape[0]

In [43]:
outputs_capital_t5_bert.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_bert.csv')

##### Roberta

In [44]:
outputs_roberta_capital_original= process_prompt_results_roberta2(capital_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_capital_t5_0= process_prompt_results_roberta2(capital_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_1= process_prompt_results_roberta2(capital_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_2= process_prompt_results_roberta2(capital_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_3= process_prompt_results_roberta2(capital_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_capital_t5_roberta = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_roberta_capital_original, 
                        't5_0': outputs_roberta_capital_t5_0, 
                        't5_1': outputs_roberta_capital_t5_1, 
                        't5_2': outputs_roberta_capital_t5_2, 
                        't5_3': outputs_roberta_capital_t5_3})
outputs_len_capital_t5 = outputs_capital_t5_roberta.shape[0]

In [45]:
outputs_capital_t5_roberta.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_roberta.csv')

##### Electra

In [46]:
outputs_electra_capital_original= process_prompt_results2(capital['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_capital_t5_0= process_prompt_results2(capital['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_1= process_prompt_results2(capital['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_2= process_prompt_results2(capital['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_3= process_prompt_results2(capital['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_capital_t5_electra = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_electra_capital_original, 
                        't5_0': outputs_electra_capital_t5_0, 
                        't5_1': outputs_electra_capital_t5_1, 
                        't5_2': outputs_electra_capital_t5_2, 
                        't5_3': outputs_electra_capital_t5_3})
outputs_len_capital_t5 = outputs_capital_t5_electra.shape[0]

In [47]:
outputs_capital_t5_electra.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_electra.csv')

#### Works for

##### Bert

In [48]:
outputs_bert_worksfor_original= process_prompt_results2(worksfor['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_worksfor_t5_0= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_1= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_2= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_3= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_worksfor_t5_bert = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_bert_worksfor_original, 
                        't5_0': outputs_bert_worksfor_t5_0, 
                        't5_1': outputs_bert_worksfor_t5_1, 
                        't5_2': outputs_bert_worksfor_t5_2, 
                        't5_3': outputs_bert_worksfor_t5_3})
outputs_len_worksfor_t5 = outputs_worksfor_t5_bert.shape[0]

In [49]:
outputs_worksfor_t5_bert.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_bert.csv')

##### Roberta

In [50]:
outputs_roberta_worksfor_original= process_prompt_results_roberta2(worksfor_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_worksfor_t5_0= process_prompt_results_roberta2(worksfor_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_1= process_prompt_results_roberta2(worksfor_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_2= process_prompt_results_roberta2(worksfor_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_3= process_prompt_results_roberta2(worksfor_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_worksfor_t5_roberta = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_roberta_worksfor_original, 
                        't5_0': outputs_roberta_worksfor_t5_0, 
                        't5_1': outputs_roberta_worksfor_t5_1, 
                        't5_2': outputs_roberta_worksfor_t5_2, 
                        't5_3': outputs_roberta_worksfor_t5_3})
outputs_len_worksfor_t5 = outputs_worksfor_t5_roberta.shape[0]

In [51]:
outputs_worksfor_t5_roberta.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_roberta.csv')

##### Electra

In [52]:
outputs_electra_worksfor_original= process_prompt_results2(worksfor['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_worksfor_t5_0= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_1= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_2= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_3= process_prompt_results2(worksfor['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_worksfor_t5_electra = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_electra_worksfor_original, 
                        't5_0': outputs_electra_worksfor_t5_0, 
                        't5_1': outputs_electra_worksfor_t5_1, 
                        't5_2': outputs_electra_worksfor_t5_2, 
                        't5_3': outputs_electra_worksfor_t5_3})
outputs_len_worksfor_t5 = outputs_worksfor_t5_electra.shape[0]

In [53]:
outputs_worksfor_t5_electra.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_electra.csv')

#### Citizen

##### Bert

In [54]:
outputs_bert_citizen_original= process_prompt_results2(citizen['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_citizen_t5_0= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_1= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_2= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_3= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_citizen_t5_bert = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_bert_citizen_original, 
                        't5_0': outputs_bert_citizen_t5_0, 
                        't5_1': outputs_bert_citizen_t5_1, 
                        't5_2': outputs_bert_citizen_t5_2, 
                        't5_3': outputs_bert_citizen_t5_3})
outputs_len_citizen_t5 = outputs_citizen_t5_bert.shape[0]

In [55]:
outputs_citizen_t5_bert.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_bert.csv')

##### Roberta

In [56]:
outputs_roberta_citizen_original= process_prompt_results_roberta2(citizen_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_citizen_t5_0= process_prompt_results_roberta2(citizen_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_1= process_prompt_results_roberta2(citizen_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_2= process_prompt_results_roberta2(citizen_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_3= process_prompt_results_roberta2(citizen_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_citizen_t5_roberta = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_roberta_citizen_original, 
                        't5_0': outputs_roberta_citizen_t5_0, 
                        't5_1': outputs_roberta_citizen_t5_1, 
                        't5_2': outputs_roberta_citizen_t5_2, 
                        't5_3': outputs_roberta_citizen_t5_3})
outputs_len_citizen_t5 = outputs_citizen_t5_roberta.shape[0]

In [57]:
outputs_citizen_t5_roberta.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_roberta.csv')

##### Electra

In [58]:
outputs_electra_citizen_original= process_prompt_results2(citizen['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_citizen_t5_0= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_1= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_2= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_3= process_prompt_results2(citizen['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_citizen_t5_electra = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_electra_citizen_original, 
                        't5_0': outputs_electra_citizen_t5_0, 
                        't5_1': outputs_electra_citizen_t5_1, 
                        't5_2': outputs_electra_citizen_t5_2, 
                        't5_3': outputs_electra_citizen_t5_3})
outputs_len_citizen_t5 = outputs_citizen_t5_electra.shape[0]

In [59]:
outputs_citizen_t5_electra.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_electra.csv')

#### Language

##### Bert

In [17]:
outputs_bert_language_original= process_prompt_results2(language['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_language_t5_0= process_prompt_results2(language['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_1= process_prompt_results2(language['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_2= process_prompt_results2(language['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_3= process_prompt_results2(language['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_language_t5_bert = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_bert_language_original, 
                        't5_0': outputs_bert_language_t5_0, 
                        't5_1': outputs_bert_language_t5_1, 
                        't5_2': outputs_bert_language_t5_2, 
                        't5_3': outputs_bert_language_t5_3})
outputs_len_language_t5 = outputs_language_t5_bert.shape[0]

In [18]:
outputs_language_t5_bert.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_bert.csv')

##### Roberta

In [36]:
outputs_roberta_language_original= process_prompt_results_roberta2(language_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_language_t5_0= process_prompt_results_roberta2(language_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_1= process_prompt_results_roberta2(language_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_2= process_prompt_results_roberta2(language_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_3= process_prompt_results_roberta2(language_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_language_t5_roberta = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_roberta_language_original, 
                        't5_0': outputs_roberta_language_t5_0, 
                        't5_1': outputs_roberta_language_t5_1, 
                        't5_2': outputs_roberta_language_t5_2, 
                        't5_3': outputs_roberta_language_t5_3})
outputs_len_language_t5 = outputs_language_t5_roberta.shape[0]

In [37]:
outputs_language_t5_roberta.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_roberta.csv')

##### Electra

In [19]:
outputs_electra_language_original= process_prompt_results2(language['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_language_t5_0= process_prompt_results2(language['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_1= process_prompt_results2(language['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_2= process_prompt_results2(language['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_3= process_prompt_results2(language['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_language_t5_electra = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_electra_language_original, 
                        't5_0': outputs_electra_language_t5_0, 
                        't5_1': outputs_electra_language_t5_1, 
                        't5_2': outputs_electra_language_t5_2, 
                        't5_3': outputs_electra_language_t5_3})
outputs_len_language_t5 = outputs_language_t5_electra.shape[0]

In [20]:
outputs_language_t5_electra.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_electra.csv')

## One and Few Shoot

In [21]:
def substituir_x(row):
    return row['template'].replace('[X]', row['sub_label'])

def substituir_y(row):
    return row['triple_NL'].replace('[Y]', row['obj_label'])

In [61]:
# Born In
bornIn_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_bornIn.csv')
bornIn_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_bornIn = bornIn_prompt[bornIn_prompt.columns[0:3]]

# Died In
diedIn_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_diedIn.csv')
diedIn_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_diedIn = diedIn_prompt[diedIn_prompt.columns[0:3]]

# Capital
capital_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_capital.csv')
capital_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_capital = capital_prompt[capital_prompt.columns[0:3]]

# Works For
worksfor_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_worksfor.csv')
worksfor_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_worksfor = worksfor_prompt[worksfor_prompt.columns[0:3]]

# Citizen
citizen_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_citizen.csv')
citizen_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_citizen = citizen_prompt[citizen_prompt.columns[0:3]]

# Language
language_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_language.csv')
language_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_language = language_prompt[language_prompt.columns[0:3]]

In [22]:
# Language
language_prompt = pd.read_csv('/home/rafael/tese/code/data/novos_data/dados/t5_language.csv')
language_prompt.drop(columns=['Unnamed: 0'], inplace=True)
triple_language = language_prompt[language_prompt.columns[0:3]]

In [62]:
# Born In
triple_bornIn.loc[:, 'triple_NL'] = triple_bornIn.apply(substituir_x, axis=1)
triple_bornIn.loc[:, 'triple_NL'] = triple_bornIn.apply(substituir_y, axis=1)

# Died In
triple_diedIn.loc[:, 'triple_NL'] = triple_diedIn.apply(substituir_x, axis=1)
triple_diedIn.loc[:, 'triple_NL'] = triple_diedIn.apply(substituir_y, axis=1)

# Capital
triple_capital.loc[:, 'triple_NL'] = triple_capital.apply(substituir_x, axis=1)
triple_capital.loc[:, 'triple_NL'] = triple_capital.apply(substituir_y, axis=1)

# Works For
triple_worksfor.loc[:, 'triple_NL'] = triple_worksfor.apply(substituir_x, axis=1)
triple_worksfor.loc[:, 'triple_NL'] = triple_worksfor.apply(substituir_y, axis=1)

# Citizen
triple_citizen.loc[:, 'triple_NL'] = triple_citizen.apply(substituir_x, axis=1)
triple_citizen.loc[:, 'triple_NL'] = triple_citizen.apply(substituir_y, axis=1)

# Language
triple_language.loc[:, 'triple_NL'] = triple_language.apply(substituir_x, axis=1)
triple_language.loc[:, 'triple_NL'] = triple_language.apply(substituir_y, axis=1)

In [23]:
# Language
triple_language.loc[:, 'triple_NL'] = triple_language.apply(substituir_x, axis=1)
triple_language.loc[:, 'triple_NL'] = triple_language.apply(substituir_y, axis=1)

#### Born In

In [63]:
random.seed(42)
# Selecionar aleatoriamente três itens da coluna 'text'
itens_selecionados = random.sample(triple_bornIn['triple_NL'].tolist(), 10)

# Gerar todas as combinações possíveis das frases selecionadas sem repeti-las
combinacoes = list(itertools.combinations(itens_selecionados, 3))
combinacoes = [list(tupla) for tupla in combinacoes]

# Lista para armazenar as frases unidas
frases_unidas = []

# Iterar sobre cada lista interna
for lista in combinacoes:
    # Unir as frases na lista interna
    frase_unida = ' '.join(lista)
    # Adicionar a frase unida à lista de frases unidas
    frases_unidas.append(frase_unida)

# Lista das colunas que você deseja concatenar
colunas_para_concatenar = bornIn_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_bornIn_witHelp_1 = bornIn_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(itens_selecionados) for _ in range(bornIn_prompt.shape[0])]
    filled_sentences_bornIn_witHelp_1[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, bornIn_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_bornIn_witHelp_1)

random.seed(42)
# Lista das colunas que você deseja concatenar
colunas_para_concatenar = bornIn_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_bornIn_witHelp_3 = bornIn_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(frases_unidas) for _ in range(bornIn_prompt.shape[0])]
    filled_sentences_bornIn_witHelp_3[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, bornIn_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_bornIn_witHelp_3)

#### Died In

In [64]:
random.seed(42)
# Selecionar aleatoriamente três itens da coluna 'text'
itens_selecionados = random.sample(triple_diedIn['triple_NL'].tolist(), 10)

# Gerar todas as combinações possíveis das frases selecionadas sem repeti-las
combinacoes = list(itertools.combinations(itens_selecionados, 3))
combinacoes = [list(tupla) for tupla in combinacoes]

# Lista para armazenar as frases unidas
frases_unidas = []

# Iterar sobre cada lista interna
for lista in combinacoes:
    # Unir as frases na lista interna
    frase_unida = ' '.join(lista)
    # Adicionar a frase unida à lista de frases unidas
    frases_unidas.append(frase_unida)

# Lista das colunas que você deseja concatenar
colunas_para_concatenar = diedIn_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_diedIn_witHelp_1 = diedIn_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(itens_selecionados) for _ in range(diedIn_prompt.shape[0])]
    filled_sentences_diedIn_witHelp_1[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, diedIn_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_diedIn_witHelp_1)

random.seed(42)
# Lista das colunas que você deseja concatenar
colunas_para_concatenar = diedIn_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_diedIn_witHelp_3 = diedIn_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(frases_unidas) for _ in range(diedIn_prompt.shape[0])]
    filled_sentences_diedIn_witHelp_3[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, diedIn_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_diedIn_witHelp_3)

#### Capital

In [65]:
random.seed(42)
# Selecionar aleatoriamente três itens da coluna 'text'
itens_selecionados = random.sample(triple_capital['triple_NL'].tolist(), 10)

# Gerar todas as combinações possíveis das frases selecionadas sem repeti-las
combinacoes = list(itertools.combinations(itens_selecionados, 3))
combinacoes = [list(tupla) for tupla in combinacoes]

# Lista para armazenar as frases unidas
frases_unidas = []

# Iterar sobre cada lista interna
for lista in combinacoes:
    # Unir as frases na lista interna
    frase_unida = ' '.join(lista)
    # Adicionar a frase unida à lista de frases unidas
    frases_unidas.append(frase_unida)

# Lista das colunas que você deseja concatenar
colunas_para_concatenar = capital_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_capital_witHelp_1 = capital_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(itens_selecionados) for _ in range(capital_prompt.shape[0])]
    filled_sentences_capital_witHelp_1[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, capital_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_capital_witHelp_1)

random.seed(42)
# Lista das colunas que você deseja concatenar
colunas_para_concatenar = capital_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_capital_witHelp_3 = capital_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(frases_unidas) for _ in range(capital_prompt.shape[0])]
    filled_sentences_capital_witHelp_3[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, capital_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_capital_witHelp_3)

#### Works For

In [66]:
random.seed(42)
# Selecionar aleatoriamente três itens da coluna 'text'
itens_selecionados = random.sample(triple_worksfor['triple_NL'].tolist(), 10)

# Gerar todas as combinações possíveis das frases selecionadas sem repeti-las
combinacoes = list(itertools.combinations(itens_selecionados, 3))
combinacoes = [list(tupla) for tupla in combinacoes]

# Lista para armazenar as frases unidas
frases_unidas = []

# Iterar sobre cada lista interna
for lista in combinacoes:
    # Unir as frases na lista interna
    frase_unida = ' '.join(lista)
    # Adicionar a frase unida à lista de frases unidas
    frases_unidas.append(frase_unida)

# Lista das colunas que você deseja concatenar
colunas_para_concatenar = worksfor_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_worksfor_witHelp_1 = worksfor_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(itens_selecionados) for _ in range(worksfor_prompt.shape[0])]
    filled_sentences_worksfor_witHelp_1[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, worksfor_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_worksfor_witHelp_1)

random.seed(42)
# Lista das colunas que você deseja concatenar
colunas_para_concatenar = worksfor_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_worksfor_witHelp_3 = worksfor_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(frases_unidas) for _ in range(worksfor_prompt.shape[0])]
    filled_sentences_worksfor_witHelp_3[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, worksfor_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_worksfor_witHelp_3)

#### Citizen

In [67]:
random.seed(42)
# Selecionar aleatoriamente três itens da coluna 'text'
itens_selecionados = random.sample(triple_citizen['triple_NL'].tolist(), 10)

# Gerar todas as combinações possíveis das frases selecionadas sem repeti-las
combinacoes = list(itertools.combinations(itens_selecionados, 3))
combinacoes = [list(tupla) for tupla in combinacoes]

# Lista para armazenar as frases unidas
frases_unidas = []

# Iterar sobre cada lista interna
for lista in combinacoes:
    # Unir as frases na lista interna
    frase_unida = ' '.join(lista)
    # Adicionar a frase unida à lista de frases unidas
    frases_unidas.append(frase_unida)

# Lista das colunas que você deseja concatenar
colunas_para_concatenar = citizen_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_citizen_witHelp_1 = citizen_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(itens_selecionados) for _ in range(citizen_prompt.shape[0])]
    filled_sentences_citizen_witHelp_1[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, citizen_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_citizen_witHelp_1)

random.seed(42)
# Lista das colunas que você deseja concatenar
colunas_para_concatenar = citizen_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_citizen_witHelp_3 = citizen_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(frases_unidas) for _ in range(citizen_prompt.shape[0])]
    filled_sentences_citizen_witHelp_3[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, citizen_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_citizen_witHelp_3)

#### Language

In [24]:
random.seed(42)
# Selecionar aleatoriamente três itens da coluna 'text'
itens_selecionados = random.sample(triple_language['triple_NL'].tolist(), 10)

# Gerar todas as combinações possíveis das frases selecionadas sem repeti-las
combinacoes = list(itertools.combinations(itens_selecionados, 3))
combinacoes = [list(tupla) for tupla in combinacoes]

# Lista para armazenar as frases unidas
frases_unidas = []

# Iterar sobre cada lista interna
for lista in combinacoes:
    # Unir as frases na lista interna
    frase_unida = ' '.join(lista)
    # Adicionar a frase unida à lista de frases unidas
    frases_unidas.append(frase_unida)

# Lista das colunas que você deseja concatenar
colunas_para_concatenar = language_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_language_witHelp_1 = language_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(itens_selecionados) for _ in range(language_prompt.shape[0])]
    filled_sentences_language_witHelp_1[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, language_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_language_witHelp_1)

random.seed(42)
# Lista das colunas que você deseja concatenar
colunas_para_concatenar = language_prompt.columns[3:]

# Copiando o DataFrame para evitar modificar o original
filled_sentences_language_witHelp_3 = language_prompt[['sub_label', 'template', 'obj_label',]].copy()

# Concatenando as colunas selecionadas e criando uma nova coluna no DataFrame para armazenar os resultados
for coluna in colunas_para_concatenar:
    # Selecionar aleatoriamente uma frase da lista itens_selecionados para cada linha
    frases_aleatorias = [random.choice(frases_unidas) for _ in range(language_prompt.shape[0])]
    filled_sentences_language_witHelp_3[coluna] = [frase + valor for frase, valor in zip(frases_aleatorias, language_prompt[coluna])]

# Exibir o DataFrame resultante
#display(filled_sentences_language_witHelp_3)

#### Roberta

In [25]:
def mask_substitution(text):
    """
    Replace '[MASK]' with '<mask>' in the input text.

    Parameters:
    - text: Input text

    Returns:
    - Transformed text
    """
    return re.sub(r'\[MASK\]', '<mask>', text)

def mask_substitution_use(df, columns):
    for coluna in columns:
        df[coluna] = df[coluna].apply(mask_substitution)
    
    return df

In [69]:
# Born In
filled_sentences_bornIn_witHelp_1_roberta = mask_substitution_use(filled_sentences_bornIn_witHelp_1, filled_sentences_bornIn_witHelp_1.columns[3:])
filled_sentences_bornIn_witHelp_2_roberta = mask_substitution_use(filled_sentences_bornIn_witHelp_3, filled_sentences_bornIn_witHelp_3.columns[3:])

# Capital
filled_sentences_capital_witHelp_1_roberta = mask_substitution_use(filled_sentences_capital_witHelp_1, filled_sentences_capital_witHelp_1.columns[3:])
filled_sentences_capital_witHelp_2_roberta = mask_substitution_use(filled_sentences_capital_witHelp_3, filled_sentences_capital_witHelp_3.columns[3:])

# Died In
filled_sentences_diedIn_witHelp_1_roberta = mask_substitution_use(filled_sentences_diedIn_witHelp_1, filled_sentences_diedIn_witHelp_1.columns[3:])
filled_sentences_diedIn_witHelp_2_roberta = mask_substitution_use(filled_sentences_diedIn_witHelp_3, filled_sentences_diedIn_witHelp_3.columns[3:])

# Works For
filled_sentences_worksfor_witHelp_1_roberta = mask_substitution_use(filled_sentences_worksfor_witHelp_1, filled_sentences_worksfor_witHelp_1.columns[3:])
filled_sentences_worksfor_witHelp_2_roberta = mask_substitution_use(filled_sentences_worksfor_witHelp_3, filled_sentences_worksfor_witHelp_3.columns[3:])

# Citizen
filled_sentences_citizen_witHelp_1_roberta = mask_substitution_use(filled_sentences_citizen_witHelp_1, filled_sentences_citizen_witHelp_1.columns[3:])
filled_sentences_citizen_witHelp_2_roberta = mask_substitution_use(filled_sentences_citizen_witHelp_3, filled_sentences_citizen_witHelp_3.columns[3:])

In [38]:
# Language
filled_sentences_language_witHelp_1_roberta = mask_substitution_use(filled_sentences_language_witHelp_1, filled_sentences_language_witHelp_1.columns[3:])
filled_sentences_language_witHelp_2_roberta = mask_substitution_use(filled_sentences_language_witHelp_3, filled_sentences_language_witHelp_3.columns[3:])

## Utilzando

### One Shoot

#### Born In

##### Bert

In [70]:
outputs_bert_bornIn_original_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_bornIn_t5_0_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_1_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_2_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_3_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_bornIn_t5_bert_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_bert_bornIn_original_one_shoot, 
                        't5_0': outputs_bert_bornIn_t5_0_one_shoot, 
                        't5_1': outputs_bert_bornIn_t5_1_one_shoot, 
                        't5_2': outputs_bert_bornIn_t5_2_one_shoot, 
                        't5_3': outputs_bert_bornIn_t5_3_one_shoot})

In [71]:
outputs_bornIn_t5_bert_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_bert_one_shoot.csv')

##### Roberta

In [72]:
outputs_roberta_bornIn_original_one_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_1_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_bornIn_t5_0_one_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_1_one_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_2_one_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_3_one_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_bornIn_t5_roberta_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_roberta_bornIn_original_one_shoot, 
                        't5_0': outputs_roberta_bornIn_t5_0_one_shoot, 
                        't5_1': outputs_roberta_bornIn_t5_1_one_shoot, 
                        't5_2': outputs_roberta_bornIn_t5_2_one_shoot, 
                        't5_3': outputs_roberta_bornIn_t5_3_one_shoot})

In [73]:
outputs_bornIn_t5_roberta_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_roberta_one_shoot.csv')

##### Electra

In [74]:
outputs_electra_bornIn_original_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_bornIn_t5_0_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_1_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_2_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_3_one_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_bornIn_t5_electra_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_electra_bornIn_original_one_shoot, 
                        't5_0': outputs_electra_bornIn_t5_0_one_shoot, 
                        't5_1': outputs_electra_bornIn_t5_1_one_shoot, 
                        't5_2': outputs_electra_bornIn_t5_2_one_shoot, 
                        't5_3': outputs_electra_bornIn_t5_3_one_shoot})

In [75]:
outputs_bornIn_t5_electra_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_electra_one_shoot.csv')

#### Died In

##### Bert

In [76]:
outputs_bert_diedIn_original_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_diedIn_t5_0_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_1_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_2_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_3_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_diedIn_t5_bert_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_bert_diedIn_original_one_shoot, 
                        't5_0': outputs_bert_diedIn_t5_0_one_shoot, 
                        't5_1': outputs_bert_diedIn_t5_1_one_shoot, 
                        't5_2': outputs_bert_diedIn_t5_2_one_shoot, 
                        't5_3': outputs_bert_diedIn_t5_3_one_shoot})

In [77]:
outputs_diedIn_t5_bert_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_bert_one_shoot.csv')

##### Roberta

In [78]:
outputs_roberta_diedIn_original_one_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_1_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_diedIn_t5_0_one_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_1_one_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_2_one_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_3_one_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_1_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_diedIn_t5_roberta_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_roberta_diedIn_original_one_shoot, 
                        't5_0': outputs_roberta_diedIn_t5_0_one_shoot, 
                        't5_1': outputs_roberta_diedIn_t5_1_one_shoot, 
                        't5_2': outputs_roberta_diedIn_t5_2_one_shoot, 
                        't5_3': outputs_roberta_diedIn_t5_3_one_shoot})

In [79]:
outputs_diedIn_t5_roberta_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_roberta_one_shoot.csv')

##### Electra

In [80]:
outputs_electra_diedIn_original_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_diedIn_t5_0_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_1_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_2_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_3_one_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_diedIn_t5_electra_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_electra_diedIn_original_one_shoot, 
                        't5_0': outputs_electra_diedIn_t5_0_one_shoot, 
                        't5_1': outputs_electra_diedIn_t5_1_one_shoot, 
                        't5_2': outputs_electra_diedIn_t5_2_one_shoot, 
                        't5_3': outputs_electra_diedIn_t5_3_one_shoot})

In [81]:
outputs_diedIn_t5_electra_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_electra_one_shoot.csv')

#### Capital

##### Bert

In [82]:
outputs_bert_capital_original_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_capital_t5_0_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_1_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_2_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_3_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_capital_t5_bert_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_bert_capital_original_one_shoot, 
                        't5_0': outputs_bert_capital_t5_0_one_shoot, 
                        't5_1': outputs_bert_capital_t5_1_one_shoot, 
                        't5_2': outputs_bert_capital_t5_2_one_shoot, 
                        't5_3': outputs_bert_capital_t5_3_one_shoot})

In [83]:
outputs_capital_t5_bert_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_bert_one_shoot.csv')

##### Roberta

In [84]:
outputs_roberta_capital_original_one_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_1_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_capital_t5_0_one_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_1_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_1_one_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_1_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_2_one_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_1_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_3_one_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_1_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_capital_t5_roberta_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_roberta_capital_original_one_shoot, 
                        't5_0': outputs_roberta_capital_t5_0_one_shoot, 
                        't5_1': outputs_roberta_capital_t5_1_one_shoot, 
                        't5_2': outputs_roberta_capital_t5_2_one_shoot, 
                        't5_3': outputs_roberta_capital_t5_3_one_shoot})

In [85]:
outputs_capital_t5_roberta_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_roberta_one_shoot.csv')

##### Electra

In [86]:
outputs_electra_capital_original_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_capital_t5_0_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_1_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_2_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_3_one_shoot= process_prompt_results2(filled_sentences_capital_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_capital_t5_electra_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_electra_capital_original_one_shoot, 
                        't5_0': outputs_electra_capital_t5_0_one_shoot, 
                        't5_1': outputs_electra_capital_t5_1_one_shoot, 
                        't5_2': outputs_electra_capital_t5_2_one_shoot, 
                        't5_3': outputs_electra_capital_t5_3_one_shoot})

In [87]:
outputs_capital_t5_electra_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_electra_one_shoot.csv')

#### Works for

##### Bert

In [88]:
outputs_bert_worksfor_original_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_worksfor_t5_0_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_1_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_2_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_3_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_worksfor_t5_bert_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_bert_worksfor_original_one_shoot, 
                        't5_0': outputs_bert_worksfor_t5_0_one_shoot, 
                        't5_1': outputs_bert_worksfor_t5_1_one_shoot, 
                        't5_2': outputs_bert_worksfor_t5_2_one_shoot, 
                        't5_3': outputs_bert_worksfor_t5_3_one_shoot})

In [89]:
outputs_worksfor_t5_bert_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_bert_one_shoot.csv')

##### Roberta

In [90]:
outputs_roberta_worksfor_original_one_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_1_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_worksfor_t5_0_one_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_1_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_1_one_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_1_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_2_one_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_1_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_3_one_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_1_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_worksfor_t5_roberta_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_roberta_worksfor_original_one_shoot, 
                        't5_0': outputs_roberta_worksfor_t5_0_one_shoot, 
                        't5_1': outputs_roberta_worksfor_t5_1_one_shoot, 
                        't5_2': outputs_roberta_worksfor_t5_2_one_shoot, 
                        't5_3': outputs_roberta_worksfor_t5_3_one_shoot})

In [91]:
outputs_worksfor_t5_roberta_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_roberta_one_shoot.csv')

##### Electra

In [92]:
outputs_electra_worksfor_original_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_worksfor_t5_0_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_1_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_2_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_3_one_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_worksfor_t5_electra_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_electra_worksfor_original_one_shoot, 
                        't5_0': outputs_electra_worksfor_t5_0_one_shoot, 
                        't5_1': outputs_electra_worksfor_t5_1_one_shoot, 
                        't5_2': outputs_electra_worksfor_t5_2_one_shoot, 
                        't5_3': outputs_electra_worksfor_t5_3_one_shoot})

In [93]:
outputs_worksfor_t5_electra_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_electra_one_shoot.csv')

#### Citizen

##### Bert

In [94]:
outputs_bert_citizen_original_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_citizen_t5_0_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_1_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_2_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_3_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_citizen_t5_bert_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_bert_citizen_original_one_shoot, 
                        't5_0': outputs_bert_citizen_t5_0_one_shoot, 
                        't5_1': outputs_bert_citizen_t5_1_one_shoot, 
                        't5_2': outputs_bert_citizen_t5_2_one_shoot, 
                        't5_3': outputs_bert_citizen_t5_3_one_shoot})

In [95]:
outputs_citizen_t5_bert_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_bert_one_shoot.csv')

##### Roberta

In [96]:
outputs_roberta_citizen_original_one_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_1_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_citizen_t5_0_one_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_1_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_1_one_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_1_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_2_one_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_1_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_3_one_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_1_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_citizen_t5_roberta_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_roberta_citizen_original_one_shoot, 
                        't5_0': outputs_roberta_citizen_t5_0_one_shoot, 
                        't5_1': outputs_roberta_citizen_t5_1_one_shoot, 
                        't5_2': outputs_roberta_citizen_t5_2_one_shoot, 
                        't5_3': outputs_roberta_citizen_t5_3_one_shoot})

In [97]:
outputs_citizen_t5_roberta_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_roberta_one_shoot.csv')

##### Electra

In [98]:
outputs_electra_citizen_original_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_citizen_t5_0_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_1_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_2_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_3_one_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_citizen_t5_electra_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_electra_citizen_original_one_shoot, 
                        't5_0': outputs_electra_citizen_t5_0_one_shoot, 
                        't5_1': outputs_electra_citizen_t5_1_one_shoot, 
                        't5_2': outputs_electra_citizen_t5_2_one_shoot, 
                        't5_3': outputs_electra_citizen_t5_3_one_shoot})

In [99]:
outputs_citizen_t5_electra_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_electra_one_shoot.csv')

#### Language

##### Bert

In [26]:
outputs_bert_language_original_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_language_t5_0_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_1_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_2_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_3_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_language_t5_bert_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_bert_language_original_one_shoot, 
                        't5_0': outputs_bert_language_t5_0_one_shoot, 
                        't5_1': outputs_bert_language_t5_1_one_shoot, 
                        't5_2': outputs_bert_language_t5_2_one_shoot, 
                        't5_3': outputs_bert_language_t5_3_one_shoot})

In [27]:
outputs_language_t5_bert_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_bert_one_shoot.csv')

##### Roberta

In [39]:
outputs_roberta_language_original_one_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_1_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_language_t5_0_one_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_1_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_1_one_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_1_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_2_one_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_1_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_3_one_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_1_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_language_t5_roberta_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_roberta_language_original_one_shoot, 
                        't5_0': outputs_roberta_language_t5_0_one_shoot, 
                        't5_1': outputs_roberta_language_t5_1_one_shoot, 
                        't5_2': outputs_roberta_language_t5_2_one_shoot, 
                        't5_3': outputs_roberta_language_t5_3_one_shoot})

In [40]:
outputs_language_t5_roberta_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_roberta_one_shoot.csv')

##### Electra

In [28]:
outputs_electra_language_original_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_language_t5_0_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_1_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_2_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_3_one_shoot= process_prompt_results2(filled_sentences_language_witHelp_1['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_language_t5_electra_one_shoot = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_electra_language_original_one_shoot, 
                        't5_0': outputs_electra_language_t5_0_one_shoot, 
                        't5_1': outputs_electra_language_t5_1_one_shoot, 
                        't5_2': outputs_electra_language_t5_2_one_shoot, 
                        't5_3': outputs_electra_language_t5_3_one_shoot})

In [29]:
outputs_language_t5_electra_one_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_electra_one_shoot.csv')

### Few Shoot

#### Born In

##### Bert

In [100]:
outputs_bert_bornIn_original_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_bornIn_t5_0_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_1_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_2_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_bornIn_t5_3_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_bornIn_t5_bert_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_bert_bornIn_original_few_shoot, 
                        't5_0': outputs_bert_bornIn_t5_0_few_shoot, 
                        't5_1': outputs_bert_bornIn_t5_1_few_shoot, 
                        't5_2': outputs_bert_bornIn_t5_2_few_shoot, 
                        't5_3': outputs_bert_bornIn_t5_3_few_shoot})

In [101]:
outputs_bornIn_t5_bert_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_bert_few_shoot.csv')

##### Roberta

In [102]:
outputs_roberta_bornIn_original_few_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_2_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_bornIn_t5_0_few_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_1_few_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_2_few_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_bornIn_t5_3_few_shoot= process_prompt_results_roberta2(filled_sentences_bornIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_bornIn_t5_roberta_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_roberta_bornIn_original_few_shoot, 
                        't5_0': outputs_roberta_bornIn_t5_0_few_shoot, 
                        't5_1': outputs_roberta_bornIn_t5_1_few_shoot, 
                        't5_2': outputs_roberta_bornIn_t5_2_few_shoot, 
                        't5_3': outputs_roberta_bornIn_t5_3_few_shoot})

In [103]:
outputs_bornIn_t5_roberta_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_roberta_few_shoot.csv')

##### Electra

In [104]:
outputs_electra_bornIn_original_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_bornIn_t5_0_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_1_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_2_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_bornIn_t5_3_few_shoot= process_prompt_results2(filled_sentences_bornIn_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_bornIn_t5_electra_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_bornIn_t5,
                        'original': outputs_electra_bornIn_original_few_shoot, 
                        't5_0': outputs_electra_bornIn_t5_0_few_shoot, 
                        't5_1': outputs_electra_bornIn_t5_1_few_shoot, 
                        't5_2': outputs_electra_bornIn_t5_2_few_shoot, 
                        't5_3': outputs_electra_bornIn_t5_3_few_shoot})

In [105]:
outputs_bornIn_t5_electra_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_bornIn_t5_electra_few_shoot.csv')

#### Died In

##### Bert

In [106]:
outputs_bert_diedIn_original_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_diedIn_t5_0_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_1_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_2_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_diedIn_t5_3_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_diedIn_t5_bert_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_bert_diedIn_original_few_shoot, 
                        't5_0': outputs_bert_diedIn_t5_0_few_shoot, 
                        't5_1': outputs_bert_diedIn_t5_1_few_shoot, 
                        't5_2': outputs_bert_diedIn_t5_2_few_shoot, 
                        't5_3': outputs_bert_diedIn_t5_3_few_shoot})

In [107]:
outputs_diedIn_t5_bert_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_bert_few_shoot.csv')

##### Roberta

In [108]:
outputs_roberta_diedIn_original_few_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_2_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_diedIn_t5_0_few_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_1_few_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_2_few_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_diedIn_t5_3_few_shoot= process_prompt_results_roberta2(filled_sentences_diedIn_witHelp_2_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_diedIn_t5_roberta_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_roberta_diedIn_original_few_shoot, 
                        't5_0': outputs_roberta_diedIn_t5_0_few_shoot, 
                        't5_1': outputs_roberta_diedIn_t5_1_few_shoot, 
                        't5_2': outputs_roberta_diedIn_t5_2_few_shoot, 
                        't5_3': outputs_roberta_diedIn_t5_3_few_shoot})

In [109]:
outputs_diedIn_t5_roberta_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_roberta_few_shoot.csv')

##### Electra

In [110]:
outputs_electra_diedIn_original_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_diedIn_t5_0_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_1_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_2_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_diedIn_t5_3_few_shoot= process_prompt_results2(filled_sentences_diedIn_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_diedIn_t5_electra_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_diedIn_t5,
                        'original': outputs_electra_diedIn_original_few_shoot, 
                        't5_0': outputs_electra_diedIn_t5_0_few_shoot, 
                        't5_1': outputs_electra_diedIn_t5_1_few_shoot, 
                        't5_2': outputs_electra_diedIn_t5_2_few_shoot, 
                        't5_3': outputs_electra_diedIn_t5_3_few_shoot})

In [111]:
outputs_diedIn_t5_electra_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_diedIn_t5_electra_few_shoot.csv')

#### Capital

##### Bert

In [112]:
outputs_bert_capital_original_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_capital_t5_0_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_1_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_2_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_capital_t5_3_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_capital_t5_bert_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_bert_capital_original_few_shoot, 
                        't5_0': outputs_bert_capital_t5_0_few_shoot, 
                        't5_1': outputs_bert_capital_t5_1_few_shoot, 
                        't5_2': outputs_bert_capital_t5_2_few_shoot, 
                        't5_3': outputs_bert_capital_t5_3_few_shoot})

In [113]:
outputs_capital_t5_bert_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_bert_few_shoot.csv')

##### Roberta

In [114]:
outputs_roberta_capital_original_few_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_2_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_capital_t5_0_few_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_2_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_1_few_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_2_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_2_few_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_2_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_capital_t5_3_few_shoot= process_prompt_results_roberta2(filled_sentences_capital_witHelp_2_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_capital_t5_roberta_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_roberta_capital_original_few_shoot, 
                        't5_0': outputs_roberta_capital_t5_0_few_shoot, 
                        't5_1': outputs_roberta_capital_t5_1_few_shoot, 
                        't5_2': outputs_roberta_capital_t5_2_few_shoot, 
                        't5_3': outputs_roberta_capital_t5_3_few_shoot})

In [115]:
outputs_capital_t5_roberta_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_roberta_few_shoot.csv')

##### Electra

In [116]:
outputs_electra_capital_original_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_capital_t5_0_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_1_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_2_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_capital_t5_3_few_shoot= process_prompt_results2(filled_sentences_capital_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_capital_t5_electra_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_capital_t5,
                        'original': outputs_electra_capital_original_few_shoot, 
                        't5_0': outputs_electra_capital_t5_0_few_shoot, 
                        't5_1': outputs_electra_capital_t5_1_few_shoot, 
                        't5_2': outputs_electra_capital_t5_2_few_shoot, 
                        't5_3': outputs_electra_capital_t5_3_few_shoot})

In [117]:
outputs_capital_t5_electra_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_capital_t5_electra_few_shoot.csv')

#### Works for

##### Bert

In [118]:
outputs_bert_worksfor_original_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_worksfor_t5_0_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_1_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_2_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_worksfor_t5_3_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_worksfor_t5_bert_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_bert_worksfor_original_few_shoot, 
                        't5_0': outputs_bert_worksfor_t5_0_few_shoot, 
                        't5_1': outputs_bert_worksfor_t5_1_few_shoot, 
                        't5_2': outputs_bert_worksfor_t5_2_few_shoot, 
                        't5_3': outputs_bert_worksfor_t5_3_few_shoot})

In [119]:
outputs_worksfor_t5_bert_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_bert_few_shoot.csv')

##### Roberta

In [120]:
outputs_roberta_worksfor_original_few_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_2_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_worksfor_t5_0_few_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_2_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_1_few_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_2_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_2_few_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_2_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_worksfor_t5_3_few_shoot= process_prompt_results_roberta2(filled_sentences_worksfor_witHelp_2_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_worksfor_t5_roberta_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_roberta_worksfor_original_few_shoot, 
                        't5_0': outputs_roberta_worksfor_t5_0_few_shoot, 
                        't5_1': outputs_roberta_worksfor_t5_1_few_shoot, 
                        't5_2': outputs_roberta_worksfor_t5_2_few_shoot, 
                        't5_3': outputs_roberta_worksfor_t5_3_few_shoot})

In [121]:
outputs_worksfor_t5_roberta_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_roberta_few_shoot.csv')

##### Electra

In [122]:
outputs_electra_worksfor_original_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_worksfor_t5_0_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_1_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_2_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_worksfor_t5_3_few_shoot= process_prompt_results2(filled_sentences_worksfor_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_worksfor_t5_electra_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_worksfor_t5,
                        'original': outputs_electra_worksfor_original_few_shoot, 
                        't5_0': outputs_electra_worksfor_t5_0_few_shoot, 
                        't5_1': outputs_electra_worksfor_t5_1_few_shoot, 
                        't5_2': outputs_electra_worksfor_t5_2_few_shoot, 
                        't5_3': outputs_electra_worksfor_t5_3_few_shoot})

In [123]:
outputs_worksfor_t5_electra_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_worksfor_t5_electra_few_shoot.csv')

#### Citizen

##### Bert

In [124]:
outputs_bert_citizen_original_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_citizen_t5_0_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_1_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_2_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_citizen_t5_3_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_citizen_t5_bert_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_bert_citizen_original_few_shoot, 
                        't5_0': outputs_bert_citizen_t5_0_few_shoot, 
                        't5_1': outputs_bert_citizen_t5_1_few_shoot, 
                        't5_2': outputs_bert_citizen_t5_2_few_shoot, 
                        't5_3': outputs_bert_citizen_t5_3_few_shoot})

In [125]:
outputs_citizen_t5_bert_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_bert_few_shoot.csv')

##### Roberta

In [126]:
outputs_roberta_citizen_original_few_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_2_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_citizen_t5_0_few_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_2_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_1_few_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_2_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_2_few_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_2_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_citizen_t5_3_few_shoot= process_prompt_results_roberta2(filled_sentences_citizen_witHelp_2_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_citizen_t5_roberta_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_roberta_citizen_original_few_shoot, 
                        't5_0': outputs_roberta_citizen_t5_0_few_shoot, 
                        't5_1': outputs_roberta_citizen_t5_1_few_shoot, 
                        't5_2': outputs_roberta_citizen_t5_2_few_shoot, 
                        't5_3': outputs_roberta_citizen_t5_3_few_shoot})

In [127]:
outputs_citizen_t5_roberta_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_roberta_few_shoot.csv')

##### Electra

In [128]:
outputs_electra_citizen_original_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_citizen_t5_0_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_1_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_2_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_citizen_t5_3_few_shoot= process_prompt_results2(filled_sentences_citizen_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_citizen_t5_electra_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_citizen_t5,
                        'original': outputs_electra_citizen_original_few_shoot, 
                        't5_0': outputs_electra_citizen_t5_0_few_shoot, 
                        't5_1': outputs_electra_citizen_t5_1_few_shoot, 
                        't5_2': outputs_electra_citizen_t5_2_few_shoot, 
                        't5_3': outputs_electra_citizen_t5_3_few_shoot})

In [129]:
outputs_citizen_t5_electra_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_citizen_t5_electra_few_shoot.csv')

#### Language

##### Bert

In [30]:
outputs_bert_language_original_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['masked_sentence'], 
                                                             unmasker_bertLarge)

outputs_bert_language_t5_0_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_1_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_2_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_bertLarge)
outputs_bert_language_t5_3_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_bertLarge)

outputs_language_t5_bert_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_bert_language_original_few_shoot, 
                        't5_0': outputs_bert_language_t5_0_few_shoot, 
                        't5_1': outputs_bert_language_t5_1_few_shoot, 
                        't5_2': outputs_bert_language_t5_2_few_shoot, 
                        't5_3': outputs_bert_language_t5_3_few_shoot})

In [31]:
outputs_language_t5_bert_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_bert_few_shoot.csv')

##### Roberta

In [41]:
outputs_roberta_language_original_few_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_2_roberta['masked_sentence'], 
                                                             unmasker_robertaLarge)

outputs_roberta_language_t5_0_few_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_2_roberta['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_1_few_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_2_roberta['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_2_few_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_2_roberta['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_robertaLarge)
outputs_roberta_language_t5_3_few_shoot= process_prompt_results_roberta2(filled_sentences_language_witHelp_2_roberta['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_robertaLarge)

outputs_language_t5_roberta_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_roberta_language_original_few_shoot, 
                        't5_0': outputs_roberta_language_t5_0_few_shoot, 
                        't5_1': outputs_roberta_language_t5_1_few_shoot, 
                        't5_2': outputs_roberta_language_t5_2_few_shoot, 
                        't5_3': outputs_roberta_language_t5_3_few_shoot})

In [42]:
outputs_language_t5_roberta_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_roberta_few_shoot.csv')

##### Electra

In [32]:
outputs_electra_language_original_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['masked_sentence'], 
                                                             unmasker_electraLarge)

outputs_electra_language_t5_0_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_0'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_1_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_1'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_2_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_2'], 
                                                         unmasker_electraLarge)
outputs_electra_language_t5_3_few_shoot= process_prompt_results2(filled_sentences_language_witHelp_3['T5_paraphrased_filled_sentence_3'], 
                                                         unmasker_electraLarge)

outputs_language_t5_electra_few_shoot = pd.DataFrame({'answers': answer_filled_sentences_language_t5,
                        'original': outputs_electra_language_original_few_shoot, 
                        't5_0': outputs_electra_language_t5_0_few_shoot, 
                        't5_1': outputs_electra_language_t5_1_few_shoot, 
                        't5_2': outputs_electra_language_t5_2_few_shoot, 
                        't5_3': outputs_electra_language_t5_3_few_shoot})

In [33]:
outputs_language_t5_electra_few_shoot.to_csv('/home/rafael/tese/code/data/novos_data/dados/outputs/outputs_language_t5_electra_few_shoot.csv')