In [138]:
import pandas as pd
import ast
import spacy
nlp = spacy.load("en_core_web_lg")

In [213]:
# Função para converter uma string para uma lista
def str_to_list(x):
    """
    Converte uma string em uma lista.

    Parâmetros:
    x (str): A string a ser convertida.

    Retorna:
    list: A lista resultante da conversão da string.
          Se a conversão falhar, retorna a string original.
    """
    try:
        return ast.literal_eval(x)
    except (SyntaxError, ValueError):
        return x
    
    # Função para substituir a máscara em uma string com um token específico
def substituir_prompt(sentence, token):
    return sentence.replace('[MASK]', token)

# Função para extrair tokens que são reconhecidos como entidades nomeadas
def extrair_tokens_gpe2(sentence, token_info, nlp):
    token = token_info['token']
    doc = nlp(substituir_prompt(sentence, token.capitalize()))
    #print(doc)
    for ent in doc.ents:
        print(ent)
        if str(ent).lower() == token.lower() and ent.label_ == 'GPE':
            return {'token': token.lower(), 'score': token_info['score']}
    return None

# Função para extrair tokens que são reconhecidos como entidades nomeadas
def extrair_tokens_org2(sentence, token_info, nlp):
    token = token_info['token']
    doc = nlp(substituir_prompt(sentence, token))
    #print(doc)
    for ent in doc.ents:
        if str(ent).lower() == token.lower() and ent.label_ == 'ORG':
            return {'token': token.lower(), 'score': token_info['score']}
    return None

# Função principal para aplicar extração de tokens em um DataFrame
def extrair_tokens_dataframe(dataframe, masked_columns, original_columns, nlp, extrator):
    # Inicializar um novo DataFrame para armazenar os tokens extraídos
    tokens_extraidos = {col: [] for col in original_columns}
    # Iterar sobre cada linha do DataFrame
    for _, row in dataframe.iterrows():
        for masked_col, original_col in zip(masked_columns, original_columns):
            tokens = []
            for token_info in row[original_col]:
                extracted_token = extrator(row[masked_col], token_info, nlp)
                if extracted_token:
                    tokens.append(extracted_token)
            tokens_extraidos[original_col].append(tokens)
    
    # Criar um novo DataFrame com os tokens extraídos
    tokens_dataframe = pd.DataFrame(tokens_extraidos)
    return tokens_dataframe


In [None]:
def mask_words_in_dataframe(df, columns, words_to_mask):
    """
    Substitui palavras específicas por '[MASK]' em colunas específicas de um DataFrame.
    
    :param df: DataFrame onde as palavras serão substituídas.
    :param columns: Lista de colunas onde as palavras serão substituídas.
    :param words_to_mask: Lista de palavras que serão substituídas por '[MASK]'. Deve ter o mesmo comprimento que o DataFrame.
    :return: DataFrame com as palavras substituídas por '[MASK]'.
    """
    df_copy = df.copy()  # Fazer uma cópia do DataFrame para evitar alterar o original
    for col, word in zip(columns, words_to_mask):
        df_copy[col] = df_copy[col].str.replace(word, '[MASK]', regex=False)
    return df_copy

# Spacy

## Zero-Shot

### Bert

In [214]:
prompt_bornIn = pd.read_csv('D:/tese/code/data/novos_data/dados/t5_bornIn.csv')
prompt_bornIn.drop(columns=['Unnamed: 0'], inplace=True)
bornIn_bert = pd.read_csv('D:/tese/code/data_v2/outputs/bert/outputs_bornIn_bert_v2.csv')
bornIn_bert.drop(columns=['Unnamed: 0'], inplace=True)
bornIn_bert = bornIn_bert.map(str_to_list)

prompt_diedIn = pd.read_csv('D:/tese/code/data/novos_data/dados/t5_diedIn.csv')
prompt_diedIn.drop(columns=['Unnamed: 0'], inplace=True)
diedIn_bert = pd.read_csv('D:/tese/code/data_v2/outputs/bert/outputs_diedIn_bert_v2.csv')
diedIn_bert.drop(columns=['Unnamed: 0'], inplace=True)
diedIn_bert = diedIn_bert.map(str_to_list)

prompt_capital = pd.read_csv('D:/tese/code/data/novos_data/dados/t5_capital.csv')
prompt_capital.drop(columns=['Unnamed: 0'], inplace=True)
capital_bert = pd.read_csv('D:/tese/code/data_v2/outputs/bert/outputs_capital_bert_v2.csv')
capital_bert.drop(columns=['Unnamed: 0'], inplace=True)
capital_bert = capital_bert.map(str_to_list)

prompt_citizen = pd.read_csv('D:/tese/code/data/novos_data/dados/t5_citizen.csv')
prompt_citizen.drop(columns=['Unnamed: 0'], inplace=True)
citizen_bert = pd.read_csv('D:/tese/code/data_v2/outputs/bert/outputs_citizen_bert_v2.csv')
citizen_bert.drop(columns=['Unnamed: 0'], inplace=True)
citizen_bert = citizen_bert.map(str_to_list)

prompt_worksfor = pd.read_csv('D:/tese/code/data/novos_data/dados/t5_worksfor.csv').iloc[:379]
prompt_worksfor.drop(columns=['Unnamed: 0'], inplace=True)
worksfor_bert = pd.read_csv('D:/tese/code/data_v2/outputs/bert/outputs_worksfor_bert_v2.csv')
worksfor_bert.drop(columns=['Unnamed: 0'], inplace=True)
worksfor_bert = worksfor_bert.map(str_to_list)

In [215]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_bert_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_bert_spacy.loc[:, 'original'] = bornIn_bert['original']
bornIn_bert_spacy.loc[:, 't5_0'] = bornIn_bert['t5_0']
bornIn_bert_spacy.loc[:, 't5_1'] = bornIn_bert['t5_1']
bornIn_bert_spacy.loc[:, 't5_2'] = bornIn_bert['t5_2']
bornIn_bert_spacy.loc[:, 't5_3'] = bornIn_bert['t5_3']
teste = bornIn_bert_spacy

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_bert_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_bert_spacy.loc[:, 'original'] = diedIn_bert['original']
diedIn_bert_spacy.loc[:, 't5_0'] = diedIn_bert['t5_0']
diedIn_bert_spacy.loc[:, 't5_1'] = diedIn_bert['t5_1']
diedIn_bert_spacy.loc[:, 't5_2'] = diedIn_bert['t5_2']
diedIn_bert_spacy.loc[:, 't5_3'] = diedIn_bert['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_bert_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_bert_spacy.loc[:, 'original'] = capital_bert['original']
capital_bert_spacy.loc[:, 't5_0'] = capital_bert['t5_0']
capital_bert_spacy.loc[:, 't5_1'] = capital_bert['t5_1']
capital_bert_spacy.loc[:, 't5_2'] = capital_bert['t5_2']
capital_bert_spacy.loc[:, 't5_3'] = capital_bert['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_bert_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_bert_spacy.loc[:, 'original'] = citizen_bert['original']
citizen_bert_spacy.loc[:, 't5_0'] = citizen_bert['t5_0']
citizen_bert_spacy.loc[:, 't5_1'] = citizen_bert['t5_1']
citizen_bert_spacy.loc[:, 't5_2'] = citizen_bert['t5_2']
citizen_bert_spacy.loc[:, 't5_3'] = citizen_bert['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_bert_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_bert_spacy.loc[:, 'original'] = worksfor_bert['original']
worksfor_bert_spacy.loc[:, 't5_0'] = worksfor_bert['t5_0']
worksfor_bert_spacy.loc[:, 't5_1'] = worksfor_bert['t5_1']
worksfor_bert_spacy.loc[:, 't5_2'] = worksfor_bert['t5_2']
worksfor_bert_spacy.loc[:, 't5_3'] = worksfor_bert['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_bert_spacy = extrair_tokens_dataframe(bornIn_bert_spacy[:10], masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_bert_spacy = extrair_tokens_dataframe(diedIn_bert_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_bert_spacy = extrair_tokens_dataframe(capital_bert_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_bert_spacy = extrair_tokens_dataframe(citizen_bert_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_bert_spacy = extrair_tokens_dataframe(worksfor_bert_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

In [216]:
# Função para substituir a máscara em uma string com um token específico
def substituir_prompt(sentence, token):
    return sentence.replace('[MASK]', token)

# Função para extrair tokens que são reconhecidos como entidades nomeadas
def extrair_tokens_gpe2(sentence, token_info, nlp):
    token = token_info['token']
    doc = nlp(substituir_prompt(sentence, token))
    print(doc)
    for ent in doc.ents:
        if str(ent).lower() == token.lower() and ent.label_ == 'GPE':
            return {'token': token.lower(), 'score': token_info['score']}
    return None

# Função principal para aplicar extração de tokens em um DataFrame
def extrair_tokens_dataframe(dataframe, masked_columns, original_columns, nlp):
    # Inicializar um novo DataFrame para armazenar os tokens extraídos
    tokens_extraidos = {col: [] for col in original_columns}
    
    # Iterar sobre cada linha do DataFrame
    for _, row in dataframe.iterrows():
        for masked_col, original_col in zip(masked_columns, original_columns):
            tokens = []
            for token_info in row[original_col]:
                extracted_token = extrair_tokens_gpe2(row[masked_col], token_info, nlp)
                if extracted_token:
                    tokens.append(extracted_token)
            tokens_extraidos[original_col].append(tokens)
    
    # Criar um novo DataFrame com os tokens extraídos
    tokens_dataframe = pd.DataFrame(tokens_extraidos)
    
    return tokens_dataframe

# Exemplo de uso
# Supondo que temos um DataFrame `df` com as colunas especificadas
columns_to_process2 = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns2 = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_bert_spacy = extrair_tokens_dataframe(bornIn_bert_spacy[:10], columns_to_process2,original_columns2, nlp)

# Visualizar o resultado
display(bornIn_bert_spacy)

Allan Peiper (born 26 April 1960 in melbourne, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 April 1960 in sydney, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 April 1960 in brisbane, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 April 1960 in adelaide, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 April 1960 in perth, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 April 1960 in canberra, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 April 1960 in victoria, Australia) is a former Professional cyclist, who competed in five Tour de France cycle races.
Allan Peiper (born 26 Ap

Unnamed: 0,original,t5_0,t5_1,t5_2,t5_3
0,"[{'token': 'melbourne', 'score': 0.439}, {'tok...",[],[],[],[]
1,[],[],"[{'token': 'birmingham', 'score': 0.008}]","[{'token': 'birmingham', 'score': 0.009}]",[]
2,[],"[{'token': 'toronto', 'score': 0.035}, {'token...",[],"[{'token': 'australia', 'score': 0.097}, {'tok...","[{'token': 'australia', 'score': 0.049}, {'tok..."
3,"[{'token': 'america', 'score': 0.28}, {'token'...","[{'token': 'chicago', 'score': 0.152}, {'token...",[],"[{'token': 'chicago', 'score': 0.052}, {'token...","[{'token': 'chicago', 'score': 0.117}, {'token..."
4,"[{'token': 'vienna', 'score': 0.831}, {'token'...","[{'token': 'vienna', 'score': 0.513}, {'token'...","[{'token': 'vienna', 'score': 0.813}, {'token'...","[{'token': 'vienna', 'score': 0.857}, {'token'...","[{'token': 'vienna', 'score': 0.517}, {'token'..."
5,"[{'token': 'paris', 'score': 0.685}, {'token':...","[{'token': 'paris', 'score': 0.71}, {'token': ...","[{'token': 'paris', 'score': 0.342}, {'token':...","[{'token': 'montreal', 'score': 0.147}, {'toke...","[{'token': 'paris', 'score': 0.685}, {'token':..."
6,"[{'token': 'tokyo', 'score': 0.787}, {'token':...","[{'token': 'tokyo', 'score': 0.797}, {'token':...","[{'token': 'tokyo', 'score': 0.544}, {'token':...","[{'token': 'tokyo', 'score': 0.763}, {'token':...","[{'token': 'tokyo', 'score': 0.778}, {'token':..."
7,"[{'token': 'paris', 'score': 0.057}, {'token':...","[{'token': 'england', 'score': 0.1}, {'token':...","[{'token': 'holland', 'score': 0.285}, {'token...","[{'token': 'holland', 'score': 0.118}, {'token...",[]
8,"[{'token': 'madrid', 'score': 0.496}, {'token'...",[],"[{'token': 'india', 'score': 0.998}, {'token':...","[{'token': 'barcelona', 'score': 0.309}, {'tok...","[{'token': 'spain', 'score': 0.312}, {'token':..."
9,"[{'token': 'rome', 'score': 0.335}, {'token': ...","[{'token': 'rome', 'score': 0.177}, {'token': ...","[{'token': 'rome', 'score': 0.171}, {'token': ...","[{'token': 'rome', 'score': 0.246}, {'token': ...","[{'token': 'rome', 'score': 0.224}, {'token': ..."


### Roberta

In [None]:
bornIn_roberta = pd.read_csv('D:/tese/code/data_v2/outputs/roberta/outputs_bornIn_roberta_v2.csv')
bornIn_roberta.drop(columns=['Unnamed: 0'], inplace=True)
bornIn_roberta = bornIn_roberta.map(str_to_list)


diedIn_roberta = pd.read_csv('D:/tese/code/data_v2/outputs/roberta/outputs_diedIn_roberta_v2.csv')
diedIn_roberta.drop(columns=['Unnamed: 0'], inplace=True)
diedIn_roberta = diedIn_roberta.map(str_to_list)


capital_roberta = pd.read_csv('D:/tese/code/data_v2/outputs/roberta/outputs_capital_roberta_v2.csv')
capital_roberta.drop(columns=['Unnamed: 0'], inplace=True)
capital_roberta = capital_roberta.map(str_to_list)


citizen_roberta = pd.read_csv('D:/tese/code/data_v2/outputs/roberta/outputs_citizen_roberta_v2.csv')
citizen_roberta.drop(columns=['Unnamed: 0'], inplace=True)
citizen_roberta = citizen_roberta.map(str_to_list)


worksfor_roberta = pd.read_csv('D:/tese/code/data_v2/outputs/roberta/outputs_worksfor_roberta_v2.csv')
worksfor_roberta.drop(columns=['Unnamed: 0'], inplace=True)
worksfor_roberta = worksfor_roberta.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_roberta_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_roberta_spacy.loc[:, 'original'] = bornIn_roberta['original']
bornIn_roberta_spacy.loc[:, 't5_0'] = bornIn_roberta['t5_0']
bornIn_roberta_spacy.loc[:, 't5_1'] = bornIn_roberta['t5_1']
bornIn_roberta_spacy.loc[:, 't5_2'] = bornIn_roberta['t5_2']
bornIn_roberta_spacy.loc[:, 't5_3'] = bornIn_roberta['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_roberta_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_roberta_spacy.loc[:, 'original'] = diedIn_roberta['original']
diedIn_roberta_spacy.loc[:, 't5_0'] = diedIn_roberta['t5_0']
diedIn_roberta_spacy.loc[:, 't5_1'] = diedIn_roberta['t5_1']
diedIn_roberta_spacy.loc[:, 't5_2'] = diedIn_roberta['t5_2']
diedIn_roberta_spacy.loc[:, 't5_3'] = diedIn_roberta['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_roberta_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_roberta_spacy.loc[:, 'original'] = capital_roberta['original']
capital_roberta_spacy.loc[:, 't5_0'] = capital_roberta['t5_0']
capital_roberta_spacy.loc[:, 't5_1'] = capital_roberta['t5_1']
capital_roberta_spacy.loc[:, 't5_2'] = capital_roberta['t5_2']
capital_roberta_spacy.loc[:, 't5_3'] = capital_roberta['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_roberta_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_roberta_spacy.loc[:, 'original'] = citizen_roberta['original']
citizen_roberta_spacy.loc[:, 't5_0'] = citizen_roberta['t5_0']
citizen_roberta_spacy.loc[:, 't5_1'] = citizen_roberta['t5_1']
citizen_roberta_spacy.loc[:, 't5_2'] = citizen_roberta['t5_2']
citizen_roberta_spacy.loc[:, 't5_3'] = citizen_roberta['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_roberta_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_roberta_spacy.loc[:, 'original'] = worksfor_roberta['original']
worksfor_roberta_spacy.loc[:, 't5_0'] = worksfor_roberta['t5_0']
worksfor_roberta_spacy.loc[:, 't5_1'] = worksfor_roberta['t5_1']
worksfor_roberta_spacy.loc[:, 't5_2'] = worksfor_roberta['t5_2']
worksfor_roberta_spacy.loc[:, 't5_3'] = worksfor_roberta['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_roberta_spacy = extrair_tokens_dataframe(bornIn_roberta_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_roberta_spacy = extrair_tokens_dataframe(diedIn_roberta_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_roberta_spacy = extrair_tokens_dataframe(capital_roberta_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_roberta_spacy = extrair_tokens_dataframe(citizen_roberta_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_roberta_spacy = extrair_tokens_dataframe(worksfor_roberta_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

### Electra

In [None]:
bornIn_electra = pd.read_csv('D:/tese/code/data_v2/outputs/electra/outputs_bornIn_electra_v2.csv')
bornIn_electra.drop(columns=['Unnamed: 0'], inplace=True)
bornIn_electra = bornIn_electra.map(str_to_list)


diedIn_electra = pd.read_csv('D:/tese/code/data_v2/outputs/electra/outputs_diedIn_electra_v2.csv')
diedIn_electra.drop(columns=['Unnamed: 0'], inplace=True)
diedIn_electra = diedIn_electra.map(str_to_list)


capital_electra = pd.read_csv('D:/tese/code/data_v2/outputs/electra/outputs_capital_electra_v2.csv')
capital_electra.drop(columns=['Unnamed: 0'], inplace=True)
capital_electra = capital_electra.map(str_to_list)


citizen_electra = pd.read_csv('D:/tese/code/data_v2/outputs/electra/outputs_citizen_electra_v2.csv')
citizen_electra.drop(columns=['Unnamed: 0'], inplace=True)
citizen_electra = citizen_electra.map(str_to_list)


worksfor_electra = pd.read_csv('D:/tese/code/data_v2/outputs/electra/outputs_worksfor_electra_v2.csv')
worksfor_electra.drop(columns=['Unnamed: 0'], inplace=True)
worksfor_electra = worksfor_electra.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_electra_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_electra_spacy.loc[:, 'original'] = bornIn_electra['original']
bornIn_electra_spacy.loc[:, 't5_0'] = bornIn_electra['t5_0']
bornIn_electra_spacy.loc[:, 't5_1'] = bornIn_electra['t5_1']
bornIn_electra_spacy.loc[:, 't5_2'] = bornIn_electra['t5_2']
bornIn_electra_spacy.loc[:, 't5_3'] = bornIn_electra['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_electra_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_electra_spacy.loc[:, 'original'] = diedIn_electra['original']
diedIn_electra_spacy.loc[:, 't5_0'] = diedIn_electra['t5_0']
diedIn_electra_spacy.loc[:, 't5_1'] = diedIn_electra['t5_1']
diedIn_electra_spacy.loc[:, 't5_2'] = diedIn_electra['t5_2']
diedIn_electra_spacy.loc[:, 't5_3'] = diedIn_electra['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_electra_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_electra_spacy.loc[:, 'original'] = capital_electra['original']
capital_electra_spacy.loc[:, 't5_0'] = capital_electra['t5_0']
capital_electra_spacy.loc[:, 't5_1'] = capital_electra['t5_1']
capital_electra_spacy.loc[:, 't5_2'] = capital_electra['t5_2']
capital_electra_spacy.loc[:, 't5_3'] = capital_electra['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_electra_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_electra_spacy.loc[:, 'original'] = citizen_electra['original']
citizen_electra_spacy.loc[:, 't5_0'] = citizen_electra['t5_0']
citizen_electra_spacy.loc[:, 't5_1'] = citizen_electra['t5_1']
citizen_electra_spacy.loc[:, 't5_2'] = citizen_electra['t5_2']
citizen_electra_spacy.loc[:, 't5_3'] = citizen_electra['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_electra_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_electra_spacy.loc[:, 'original'] = worksfor_electra['original']
worksfor_electra_spacy.loc[:, 't5_0'] = worksfor_electra['t5_0']
worksfor_electra_spacy.loc[:, 't5_1'] = worksfor_electra['t5_1']
worksfor_electra_spacy.loc[:, 't5_2'] = worksfor_electra['t5_2']
worksfor_electra_spacy.loc[:, 't5_3'] = worksfor_electra['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_electra_spacy = extrair_tokens_dataframe(bornIn_electra_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_electra_spacy = extrair_tokens_dataframe(diedIn_electra_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_electra_spacy = extrair_tokens_dataframe(capital_electra_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_electra_spacy = extrair_tokens_dataframe(citizen_electra_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_electra_spacy = extrair_tokens_dataframe(worksfor_electra_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

## One-Shot

### Bert

In [None]:
bornIn_bert_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/bert/v2/outputs_bornIn_t5_bert_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
bornIn_bert_one = bornIn_bert_one.map(str_to_list)


diedIn_bert_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/bert/v2/outputs_diedIn_t5_bert_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
diedIn_bert_one = diedIn_bert_one.map(str_to_list)


capital_bert_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/bert/v2/outputs_capital_t5_bert_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
capital_bert_one = capital_bert_one.map(str_to_list)


worksfor_bert_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/bert/v2/outputs_worksfor_t5_bert_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
worksfor_bert_one = worksfor_bert_one.map(str_to_list)


citizen_bert_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/bert/v2/outputs_citizen_t5_bert_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
citizen_bert_one = citizen_bert_one.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_bert_one_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_bert_one_spacy.loc[:, 'original'] = bornIn_bert_one['original']
bornIn_bert_one_spacy.loc[:, 't5_0'] = bornIn_bert_one['t5_0']
bornIn_bert_one_spacy.loc[:, 't5_1'] = bornIn_bert_one['t5_1']
bornIn_bert_one_spacy.loc[:, 't5_2'] = bornIn_bert_one['t5_2']
bornIn_bert_one_spacy.loc[:, 't5_3'] = bornIn_bert_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_bert_one_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_bert_one_spacy.loc[:, 'original'] = diedIn_bert_one['original']
diedIn_bert_one_spacy.loc[:, 't5_0'] = diedIn_bert_one['t5_0']
diedIn_bert_one_spacy.loc[:, 't5_1'] = diedIn_bert_one['t5_1']
diedIn_bert_one_spacy.loc[:, 't5_2'] = diedIn_bert_one['t5_2']
diedIn_bert_one_spacy.loc[:, 't5_3'] = diedIn_bert_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_bert_one_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_bert_one_spacy.loc[:, 'original'] = capital_bert_one['original']
capital_bert_one_spacy.loc[:, 't5_0'] = capital_bert_one['t5_0']
capital_bert_one_spacy.loc[:, 't5_1'] = capital_bert_one['t5_1']
capital_bert_one_spacy.loc[:, 't5_2'] = capital_bert_one['t5_2']
capital_bert_one_spacy.loc[:, 't5_3'] = capital_bert_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_bert_one_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_bert_one_spacy.loc[:, 'original'] = citizen_bert_one['original']
citizen_bert_one_spacy.loc[:, 't5_0'] = citizen_bert_one['t5_0']
citizen_bert_one_spacy.loc[:, 't5_1'] = citizen_bert_one['t5_1']
citizen_bert_one_spacy.loc[:, 't5_2'] = citizen_bert_one['t5_2']
citizen_bert_one_spacy.loc[:, 't5_3'] = citizen_bert_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_bert_one_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_bert_one_spacy.loc[:, 'original'] = worksfor_bert_one['original']
worksfor_bert_one_spacy.loc[:, 't5_0'] = worksfor_bert_one['t5_0']
worksfor_bert_one_spacy.loc[:, 't5_1'] = worksfor_bert_one['t5_1']
worksfor_bert_one_spacy.loc[:, 't5_2'] = worksfor_bert_one['t5_2']
worksfor_bert_one_spacy.loc[:, 't5_3'] = worksfor_bert_one['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_bert_one_spacy = extrair_tokens_dataframe(bornIn_bert_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_bert_one_spacy = extrair_tokens_dataframe(diedIn_bert_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_bert_one_spacy = extrair_tokens_dataframe(capital_bert_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_bert_one_spacy = extrair_tokens_dataframe(citizen_bert_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_bert_one_spacy = extrair_tokens_dataframe(worksfor_bert_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

### Roberta

In [None]:
bornIn_roberta_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/roberta/v2/outputs_bornIn_t5_roberta_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
bornIn_roberta_one = bornIn_roberta_one.map(str_to_list)


diedIn_roberta_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/roberta/v2/outputs_diedIn_t5_roberta_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
diedIn_roberta_one = diedIn_roberta_one.map(str_to_list)


capital_roberta_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/roberta/v2/outputs_capital_t5_roberta_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
capital_roberta_one = capital_roberta_one.map(str_to_list)


worksfor_roberta_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/roberta/v2/outputs_worksfor_t5_roberta_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
worksfor_roberta_one = worksfor_roberta_one.map(str_to_list)


citizen_roberta_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/roberta/v2/outputs_citizen_t5_roberta_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
citizen_roberta_one = citizen_roberta_one.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_roberta_one_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_roberta_one_spacy.loc[:, 'original'] = bornIn_roberta_one['original']
bornIn_roberta_one_spacy.loc[:, 't5_0'] = bornIn_roberta_one['t5_0']
bornIn_roberta_one_spacy.loc[:, 't5_1'] = bornIn_roberta_one['t5_1']
bornIn_roberta_one_spacy.loc[:, 't5_2'] = bornIn_roberta_one['t5_2']
bornIn_roberta_one_spacy.loc[:, 't5_3'] = bornIn_roberta_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_roberta_one_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_roberta_one_spacy.loc[:, 'original'] = diedIn_roberta_one['original']
diedIn_roberta_one_spacy.loc[:, 't5_0'] = diedIn_roberta_one['t5_0']
diedIn_roberta_one_spacy.loc[:, 't5_1'] = diedIn_roberta_one['t5_1']
diedIn_roberta_one_spacy.loc[:, 't5_2'] = diedIn_roberta_one['t5_2']
diedIn_roberta_one_spacy.loc[:, 't5_3'] = diedIn_roberta_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_roberta_one_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_roberta_one_spacy.loc[:, 'original'] = capital_roberta_one['original']
capital_roberta_one_spacy.loc[:, 't5_0'] = capital_roberta_one['t5_0']
capital_roberta_one_spacy.loc[:, 't5_1'] = capital_roberta_one['t5_1']
capital_roberta_one_spacy.loc[:, 't5_2'] = capital_roberta_one['t5_2']
capital_roberta_one_spacy.loc[:, 't5_3'] = capital_roberta_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_roberta_one_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_roberta_one_spacy.loc[:, 'original'] = citizen_roberta_one['original']
citizen_roberta_one_spacy.loc[:, 't5_0'] = citizen_roberta_one['t5_0']
citizen_roberta_one_spacy.loc[:, 't5_1'] = citizen_roberta_one['t5_1']
citizen_roberta_one_spacy.loc[:, 't5_2'] = citizen_roberta_one['t5_2']
citizen_roberta_one_spacy.loc[:, 't5_3'] = citizen_roberta_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_roberta_one_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_roberta_one_spacy.loc[:, 'original'] = worksfor_roberta_one['original']
worksfor_roberta_one_spacy.loc[:, 't5_0'] = worksfor_roberta_one['t5_0']
worksfor_roberta_one_spacy.loc[:, 't5_1'] = worksfor_roberta_one['t5_1']
worksfor_roberta_one_spacy.loc[:, 't5_2'] = worksfor_roberta_one['t5_2']
worksfor_roberta_one_spacy.loc[:, 't5_3'] = worksfor_roberta_one['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_roberta_one_spacy = extrair_tokens_dataframe(bornIn_roberta_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_roberta_one_spacy = extrair_tokens_dataframe(diedIn_roberta_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_roberta_one_spacy = extrair_tokens_dataframe(capital_roberta_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_roberta_one_spacy = extrair_tokens_dataframe(citizen_roberta_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_roberta_one_spacy = extrair_tokens_dataframe(worksfor_roberta_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

### Electra

In [None]:
bornIn_electra_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/electra/v2/outputs_bornIn_t5_electra_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
bornIn_electra_one = bornIn_electra_one.map(str_to_list)


diedIn_electra_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/electra/v2/outputs_diedIn_t5_electra_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
diedIn_electra_one = diedIn_electra_one.map(str_to_list)


capital_electra_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/electra/v2/outputs_capital_t5_electra_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
capital_electra_one = capital_electra_one.map(str_to_list)


worksfor_electra_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/electra/v2/outputs_worksfor_t5_electra_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
worksfor_electra_one = worksfor_electra_one.map(str_to_list)


citizen_electra_one = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/one/electra/v2/outputs_citizen_t5_electra_one_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
citizen_electra_one = citizen_electra_one.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_electra_one_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_electra_one_spacy.loc[:, 'original'] = bornIn_electra_one['original']
bornIn_electra_one_spacy.loc[:, 't5_0'] = bornIn_electra_one['t5_0']
bornIn_electra_one_spacy.loc[:, 't5_1'] = bornIn_electra_one['t5_1']
bornIn_electra_one_spacy.loc[:, 't5_2'] = bornIn_electra_one['t5_2']
bornIn_electra_one_spacy.loc[:, 't5_3'] = bornIn_electra_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_electra_one_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_electra_one_spacy.loc[:, 'original'] = diedIn_electra_one['original']
diedIn_electra_one_spacy.loc[:, 't5_0'] = diedIn_electra_one['t5_0']
diedIn_electra_one_spacy.loc[:, 't5_1'] = diedIn_electra_one['t5_1']
diedIn_electra_one_spacy.loc[:, 't5_2'] = diedIn_electra_one['t5_2']
diedIn_electra_one_spacy.loc[:, 't5_3'] = diedIn_electra_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_electra_one_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_electra_one_spacy.loc[:, 'original'] = capital_electra_one['original']
capital_electra_one_spacy.loc[:, 't5_0'] = capital_electra_one['t5_0']
capital_electra_one_spacy.loc[:, 't5_1'] = capital_electra_one['t5_1']
capital_electra_one_spacy.loc[:, 't5_2'] = capital_electra_one['t5_2']
capital_electra_one_spacy.loc[:, 't5_3'] = capital_electra_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_electra_one_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_electra_one_spacy.loc[:, 'original'] = citizen_electra_one['original']
citizen_electra_one_spacy.loc[:, 't5_0'] = citizen_electra_one['t5_0']
citizen_electra_one_spacy.loc[:, 't5_1'] = citizen_electra_one['t5_1']
citizen_electra_one_spacy.loc[:, 't5_2'] = citizen_electra_one['t5_2']
citizen_electra_one_spacy.loc[:, 't5_3'] = citizen_electra_one['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_electra_one_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_electra_one_spacy.loc[:, 'original'] = worksfor_electra_one['original']
worksfor_electra_one_spacy.loc[:, 't5_0'] = worksfor_electra_one['t5_0']
worksfor_electra_one_spacy.loc[:, 't5_1'] = worksfor_electra_one['t5_1']
worksfor_electra_one_spacy.loc[:, 't5_2'] = worksfor_electra_one['t5_2']
worksfor_electra_one_spacy.loc[:, 't5_3'] = worksfor_electra_one['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_electra_one_spacy = extrair_tokens_dataframe(bornIn_electra_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_electra_one_spacy = extrair_tokens_dataframe(diedIn_electra_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_electra_one_spacy = extrair_tokens_dataframe(capital_electra_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_electra_one_spacy = extrair_tokens_dataframe(citizen_electra_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_electra_one_spacy = extrair_tokens_dataframe(worksfor_electra_one_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

## Few-Shot

### Bert

In [None]:
bornIn_bert_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/bert/v2/outputs_bornIn_t5_bert_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
bornIn_bert_few = bornIn_bert_few.map(str_to_list)

diedIn_bert_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/bert/v2/outputs_diedIn_t5_bert_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
diedIn_bert_few = diedIn_bert_few.map(str_to_list)


capital_bert_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/bert/v2/outputs_capital_t5_bert_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
capital_bert_few = capital_bert_few.map(str_to_list)


worksfor_bert_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/bert/v2/outputs_worksfor_t5_bert_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
worksfor_bert_few = worksfor_bert_few.map(str_to_list)


citizen_bert_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/bert/v2/outputs_citizen_t5_bert_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
citizen_bert_few = citizen_bert_few.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_bert_few_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_bert_few_spacy.loc[:, 'original'] = bornIn_bert_few['original']
bornIn_bert_few_spacy.loc[:, 't5_0'] = bornIn_bert_few['t5_0']
bornIn_bert_few_spacy.loc[:, 't5_1'] = bornIn_bert_few['t5_1']
bornIn_bert_few_spacy.loc[:, 't5_2'] = bornIn_bert_few['t5_2']
bornIn_bert_few_spacy.loc[:, 't5_3'] = bornIn_bert_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_bert_few_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_bert_few_spacy.loc[:, 'original'] = diedIn_bert_few['original']
diedIn_bert_few_spacy.loc[:, 't5_0'] = diedIn_bert_few['t5_0']
diedIn_bert_few_spacy.loc[:, 't5_1'] = diedIn_bert_few['t5_1']
diedIn_bert_few_spacy.loc[:, 't5_2'] = diedIn_bert_few['t5_2']
diedIn_bert_few_spacy.loc[:, 't5_3'] = diedIn_bert_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_bert_few_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_bert_few_spacy.loc[:, 'original'] = capital_bert_few['original']
capital_bert_few_spacy.loc[:, 't5_0'] = capital_bert_few['t5_0']
capital_bert_few_spacy.loc[:, 't5_1'] = capital_bert_few['t5_1']
capital_bert_few_spacy.loc[:, 't5_2'] = capital_bert_few['t5_2']
capital_bert_few_spacy.loc[:, 't5_3'] = capital_bert_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_bert_few_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_bert_few_spacy.loc[:, 'original'] = citizen_bert_few['original']
citizen_bert_few_spacy.loc[:, 't5_0'] = citizen_bert_few['t5_0']
citizen_bert_few_spacy.loc[:, 't5_1'] = citizen_bert_few['t5_1']
citizen_bert_few_spacy.loc[:, 't5_2'] = citizen_bert_few['t5_2']
citizen_bert_few_spacy.loc[:, 't5_3'] = citizen_bert_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_bert_few_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_bert_few_spacy.loc[:, 'original'] = worksfor_bert_few['original']
worksfor_bert_few_spacy.loc[:, 't5_0'] = worksfor_bert_few['t5_0']
worksfor_bert_few_spacy.loc[:, 't5_1'] = worksfor_bert_few['t5_1']
worksfor_bert_few_spacy.loc[:, 't5_2'] = worksfor_bert_few['t5_2']
worksfor_bert_few_spacy.loc[:, 't5_3'] = worksfor_bert_few['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_bert_few_spacy = extrair_tokens_dataframe(bornIn_bert_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_bert_few_spacy = extrair_tokens_dataframe(diedIn_bert_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_bert_few_spacy = extrair_tokens_dataframe(capital_bert_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_bert_few_spacy = extrair_tokens_dataframe(citizen_bert_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_bert_few_spacy = extrair_tokens_dataframe(worksfor_bert_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

### Roberta

In [None]:
bornIn_roberta_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/roberta/v2/outputs_bornIn_t5_roberta_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
bornIn_roberta_few = bornIn_roberta_few.map(str_to_list)


diedIn_roberta_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/roberta/v2/outputs_diedIn_t5_roberta_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
diedIn_roberta_few = diedIn_roberta_few.map(str_to_list)


capital_roberta_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/roberta/v2/outputs_capital_t5_roberta_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
capital_roberta_few = capital_roberta_few.map(str_to_list)


worksfor_roberta_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/roberta/v2/outputs_worksfor_t5_roberta_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
worksfor_roberta_few = worksfor_roberta_few.map(str_to_list)


citizen_roberta_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/roberta/v2/outputs_citizen_t5_roberta_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
citizen_roberta_few = citizen_roberta_few.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_roberta_few_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_roberta_few_spacy.loc[:, 'original'] = bornIn_roberta_few['original']
bornIn_roberta_few_spacy.loc[:, 't5_0'] = bornIn_roberta_few['t5_0']
bornIn_roberta_few_spacy.loc[:, 't5_1'] = bornIn_roberta_few['t5_1']
bornIn_roberta_few_spacy.loc[:, 't5_2'] = bornIn_roberta_few['t5_2']
bornIn_roberta_few_spacy.loc[:, 't5_3'] = bornIn_roberta_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_roberta_few_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_roberta_few_spacy.loc[:, 'original'] = diedIn_roberta_few['original']
diedIn_roberta_few_spacy.loc[:, 't5_0'] = diedIn_roberta_few['t5_0']
diedIn_roberta_few_spacy.loc[:, 't5_1'] = diedIn_roberta_few['t5_1']
diedIn_roberta_few_spacy.loc[:, 't5_2'] = diedIn_roberta_few['t5_2']
diedIn_roberta_few_spacy.loc[:, 't5_3'] = diedIn_roberta_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_roberta_few_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_roberta_few_spacy.loc[:, 'original'] = capital_roberta_few['original']
capital_roberta_few_spacy.loc[:, 't5_0'] = capital_roberta_few['t5_0']
capital_roberta_few_spacy.loc[:, 't5_1'] = capital_roberta_few['t5_1']
capital_roberta_few_spacy.loc[:, 't5_2'] = capital_roberta_few['t5_2']
capital_roberta_few_spacy.loc[:, 't5_3'] = capital_roberta_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_roberta_few_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_roberta_few_spacy.loc[:, 'original'] = citizen_roberta_few['original']
citizen_roberta_few_spacy.loc[:, 't5_0'] = citizen_roberta_few['t5_0']
citizen_roberta_few_spacy.loc[:, 't5_1'] = citizen_roberta_few['t5_1']
citizen_roberta_few_spacy.loc[:, 't5_2'] = citizen_roberta_few['t5_2']
citizen_roberta_few_spacy.loc[:, 't5_3'] = citizen_roberta_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_roberta_few_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_roberta_few_spacy.loc[:, 'original'] = worksfor_roberta_few['original']
worksfor_roberta_few_spacy.loc[:, 't5_0'] = worksfor_roberta_few['t5_0']
worksfor_roberta_few_spacy.loc[:, 't5_1'] = worksfor_roberta_few['t5_1']
worksfor_roberta_few_spacy.loc[:, 't5_2'] = worksfor_roberta_few['t5_2']
worksfor_roberta_few_spacy.loc[:, 't5_3'] = worksfor_roberta_few['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_roberta_few_spacy = extrair_tokens_dataframe(bornIn_roberta_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_roberta_few_spacy = extrair_tokens_dataframe(diedIn_roberta_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_roberta_few_spacy = extrair_tokens_dataframe(capital_roberta_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_roberta_few_spacy = extrair_tokens_dataframe(citizen_roberta_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_roberta_few_spacy = extrair_tokens_dataframe(worksfor_roberta_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)

### Electra

In [None]:
bornIn_electra_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/electra/v2/outputs_bornIn_t5_electra_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
bornIn_electra_few = bornIn_electra_few.map(str_to_list)


diedIn_electra_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/electra/v2/outputs_diedIn_t5_electra_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
diedIn_electra_few = diedIn_electra_few.map(str_to_list)


capital_electra_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/electra/v2/outputs_capital_t5_electra_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
capital_electra_few = capital_electra_few.map(str_to_list)


worksfor_electra_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/electra/v2/outputs_worksfor_t5_electra_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
worksfor_electra_few = worksfor_electra_few.map(str_to_list)


citizen_electra_few = pd.read_csv('D:/tese/code/data/novos_data/dados/outputs/One_Few-Shoot/few/electra/v2/outputs_citizen_t5_electra_few_shoot_v2.csv')\
    .drop(columns=['Unnamed: 0'])
citizen_electra_few = citizen_electra_few.map(str_to_list)

In [None]:
# Creating a new DataFrame from 'prompt' with specified columns
bornIn_electra_few_spacy = prompt_bornIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
bornIn_electra_few_spacy.loc[:, 'original'] = bornIn_electra_few['original']
bornIn_electra_few_spacy.loc[:, 't5_0'] = bornIn_electra_few['t5_0']
bornIn_electra_few_spacy.loc[:, 't5_1'] = bornIn_electra_few['t5_1']
bornIn_electra_few_spacy.loc[:, 't5_2'] = bornIn_electra_few['t5_2']
bornIn_electra_few_spacy.loc[:, 't5_3'] = bornIn_electra_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
diedIn_electra_few_spacy = prompt_diedIn[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
diedIn_electra_few_spacy.loc[:, 'original'] = diedIn_electra_few['original']
diedIn_electra_few_spacy.loc[:, 't5_0'] = diedIn_electra_few['t5_0']
diedIn_electra_few_spacy.loc[:, 't5_1'] = diedIn_electra_few['t5_1']
diedIn_electra_few_spacy.loc[:, 't5_2'] = diedIn_electra_few['t5_2']
diedIn_electra_few_spacy.loc[:, 't5_3'] = diedIn_electra_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
capital_electra_few_spacy = prompt_capital[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
capital_electra_few_spacy.loc[:, 'original'] = capital_electra_few['original']
capital_electra_few_spacy.loc[:, 't5_0'] = capital_electra_few['t5_0']
capital_electra_few_spacy.loc[:, 't5_1'] = capital_electra_few['t5_1']
capital_electra_few_spacy.loc[:, 't5_2'] = capital_electra_few['t5_2']
capital_electra_few_spacy.loc[:, 't5_3'] = capital_electra_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
citizen_electra_few_spacy = prompt_citizen[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
citizen_electra_few_spacy.loc[:, 'original'] = citizen_electra_few['original']
citizen_electra_few_spacy.loc[:, 't5_0'] = citizen_electra_few['t5_0']
citizen_electra_few_spacy.loc[:, 't5_1'] = citizen_electra_few['t5_1']
citizen_electra_few_spacy.loc[:, 't5_2'] = citizen_electra_few['t5_2']
citizen_electra_few_spacy.loc[:, 't5_3'] = citizen_electra_few['t5_3']

# Creating a new DataFrame from 'prompt' with specified columns
worksfor_electra_few_spacy = prompt_worksfor[['masked_sentence', 'T5_paraphrased_filled_sentence_0', 
               'T5_paraphrased_filled_sentence_1','T5_paraphrased_filled_sentence_2', 
               'T5_paraphrased_filled_sentence_3']].copy()

# Using .loc to avoid SettingWithCopyWarning
worksfor_electra_few_spacy.loc[:, 'original'] = worksfor_electra_few['original']
worksfor_electra_few_spacy.loc[:, 't5_0'] = worksfor_electra_few['t5_0']
worksfor_electra_few_spacy.loc[:, 't5_1'] = worksfor_electra_few['t5_1']
worksfor_electra_few_spacy.loc[:, 't5_2'] = worksfor_electra_few['t5_2']
worksfor_electra_few_spacy.loc[:, 't5_3'] = worksfor_electra_few['t5_3']

In [None]:
# Exemplo de uso
# Supondo que temos um DataFrame `novo` com as colunas especificadas
masked_columns = [
    'masked_sentence', 
    'T5_paraphrased_filled_sentence_0', 
    'T5_paraphrased_filled_sentence_1',
    'T5_paraphrased_filled_sentence_2', 
    'T5_paraphrased_filled_sentence_3'
]

# As colunas que contêm os tokens originais
original_columns = [
    'original', 
    't5_0', 
    't5_1', 
    't5_2', 
    't5_3'
]

# Chamar a função para extrair tokens
bornIn_electra_few_spacy = extrair_tokens_dataframe(bornIn_electra_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
diedIn_electra_few_spacy = extrair_tokens_dataframe(diedIn_electra_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
capital_electra_few_spacy = extrair_tokens_dataframe(capital_electra_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
citizen_electra_few_spacy = extrair_tokens_dataframe(citizen_electra_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_gpe2)
worksfor_electra_few_spacy = extrair_tokens_dataframe(worksfor_electra_few_spacy, masked_columns, original_columns, nlp, extrair_tokens_org2)