In [5]:
import pandas as pd
import nltk
nltk.download('punkt')

[nltk_data] Error loading punkt: <urlopen error [SSL:
[nltk_data]     CERTIFICATE_VERIFY_FAILED] certificate verify failed:
[nltk_data]     unable to get local issuer certificate (_ssl.c:1010)>


False

In [6]:
import pandas as pd
import nltk
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
from bert_score import score as bert_score
from tqdm import tqdm
import os
import glob

# Inicializa avaliadores
rouge = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
smoothie = SmoothingFunction().method4

# Carrega a base completa
df_total = pd.read_csv('data/wikihow_results.csv')

# Define o tamanho do lote
batch_size = 200
num_batches = (len(df_total) + batch_size - 1) // batch_size  # Arredonda para cima

# Cria pasta de sa√≠da
output_dir = 'lotes_avaliados'
os.makedirs(output_dir, exist_ok=True)

# L√™ todos os CSVs existentes e junta os t√≠tulos processados
csvs_existentes = glob.glob(os.path.join(output_dir, '*.csv'))
titulos_processados = set()
for path in csvs_existentes:
    try:
        df_existente = pd.read_csv(path, usecols=['title'])
        titulos_processados.update(df_existente['title'].dropna().unique())
    except Exception as e:
        print(f"‚ö†Ô∏è Erro ao ler '{path}': {e}")

# Processa lote a lote
for i in range(num_batches):
    start = i * batch_size
    end = min((i + 1) * batch_size, len(df_total))
    df_batch = df_total.iloc[start:end].copy()

    # Verifica se todos os t√≠tulos desse lote j√° est√£o nos processados
    titulos_lote = set(df_batch['title'].dropna().unique())
    if titulos_lote.issubset(titulos_processados):
        print(f"‚è© Lote {i+1} j√° processado com base nos t√≠tulos. Pulando...\n")
        continue

    print(f"üîÑ Processando lote {i+1}/{num_batches}...")

    # TEXTOS
    textos = df_batch['text'].fillna('').astype(str).tolist()
    headlines = df_batch['headline'].fillna('').astype(str).tolist()
    resumos = df_batch['resumo'].fillna('').astype(str).tolist()

    # M√©tricas de HEADLINE vs TEXT
    rouge_h, bleu_h = [], []
    for ref, hyp in tqdm(zip(textos, headlines), total=len(textos), desc="‚Üí ROUGE/BLEU HEADLINE"):
        scores = rouge.score(ref, hyp)
        rouge_h.append((scores['rouge1'].fmeasure, scores['rouge2'].fmeasure, scores['rougeL'].fmeasure))

        ref_tokens = [nltk.word_tokenize(ref.lower())]
        hyp_tokens = nltk.word_tokenize(hyp.lower())
        bleu_h.append(sentence_bleu(ref_tokens, hyp_tokens, smoothing_function=smoothie))

    # BERTScore HEADLINE vs TEXT (em ingl√™s)
    P_h, R_h, F1_h = bert_score(headlines, textos, lang="en", rescale_with_baseline=False)

    # BERTScore RESUMO vs TEXT (pt vs en)
    P_r, R_r, F1_r = bert_score(resumos, textos, lang="multilingual", rescale_with_baseline=False)

    # Adiciona ao DataFrame
    df_batch['headline_ROUGE-1'] = [r[0] for r in rouge_h]
    df_batch['headline_ROUGE-2'] = [r[1] for r in rouge_h]
    df_batch['headline_ROUGE-L'] = [r[2] for r in rouge_h]
    df_batch['headline_BLEU'] = bleu_h
    df_batch['headline_BERTScore_F1'] = F1_h.tolist()
    df_batch['resumo_BERTScore_F1'] = F1_r.tolist()

    # Compara√ß√£o final
    df_batch['resumo_melhor_que_headline'] = df_batch['resumo_BERTScore_F1'] > df_batch['headline_BERTScore_F1']

    # Salva o lote
    output_path = os.path.join(output_dir, f'avaliacao_resumos_lote_{i}.csv')
    df_batch.to_csv(output_path, index=False)
    print(f"‚úÖ Lote {i+1} salvo em '{output_path}'\n")

print("üèÅ Todos os lotes foram processados com sucesso!")


‚è© Lote 1 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 2 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 3 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 4 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 5 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 6 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 7 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 8 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 9 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 10 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 11 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 12 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 13 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 14 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 15 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 16 j√° processado com base nos t√≠tulos. Pulando...

‚è© Lote 17 j√° p

‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 296.70it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 335 salvo em 'lotes_avaliados/avaliacao_resumos_lote_334.csv'

üîÑ Processando lote 336/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 274.53it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 336 salvo em 'lotes_avaliados/avaliacao_resumos_lote_335.csv'

üîÑ Processando lote 337/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 209.64it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 337 salvo em 'lotes_avaliados/avaliacao_resumos_lote_336.csv'

üîÑ Processando lote 338/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 711.50it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 338 salvo em 'lotes_avaliados/avaliacao_resumos_lote_337.csv'

üîÑ Processando lote 339/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 442.78it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 339 salvo em 'lotes_avaliados/avaliacao_resumos_lote_338.csv'

üîÑ Processando lote 340/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 395.71it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 340 salvo em 'lotes_avaliados/avaliacao_resumos_lote_339.csv'

üîÑ Processando lote 341/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 325.75it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 341 salvo em 'lotes_avaliados/avaliacao_resumos_lote_340.csv'

üîÑ Processando lote 342/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 239.54it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 342 salvo em 'lotes_avaliados/avaliacao_resumos_lote_341.csv'

üîÑ Processando lote 343/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 307.37it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 343 salvo em 'lotes_avaliados/avaliacao_resumos_lote_342.csv'

üîÑ Processando lote 344/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 152.81it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 344 salvo em 'lotes_avaliados/avaliacao_resumos_lote_343.csv'

üîÑ Processando lote 345/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 251.25it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 345 salvo em 'lotes_avaliados/avaliacao_resumos_lote_344.csv'

üîÑ Processando lote 346/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 350.60it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 346 salvo em 'lotes_avaliados/avaliacao_resumos_lote_345.csv'

üîÑ Processando lote 347/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 274.89it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 347 salvo em 'lotes_avaliados/avaliacao_resumos_lote_346.csv'

üîÑ Processando lote 348/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 280.58it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 348 salvo em 'lotes_avaliados/avaliacao_resumos_lote_347.csv'

üîÑ Processando lote 349/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 250.46it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 349 salvo em 'lotes_avaliados/avaliacao_resumos_lote_348.csv'

üîÑ Processando lote 350/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 272.72it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 350 salvo em 'lotes_avaliados/avaliacao_resumos_lote_349.csv'

üîÑ Processando lote 351/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 218.39it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 351 salvo em 'lotes_avaliados/avaliacao_resumos_lote_350.csv'

üîÑ Processando lote 352/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 274.61it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 352 salvo em 'lotes_avaliados/avaliacao_resumos_lote_351.csv'

üîÑ Processando lote 353/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 240.94it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 353 salvo em 'lotes_avaliados/avaliacao_resumos_lote_352.csv'

üîÑ Processando lote 354/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 228.91it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 354 salvo em 'lotes_avaliados/avaliacao_resumos_lote_353.csv'

üîÑ Processando lote 355/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 272.44it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 355 salvo em 'lotes_avaliados/avaliacao_resumos_lote_354.csv'

üîÑ Processando lote 356/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 245.89it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 356 salvo em 'lotes_avaliados/avaliacao_resumos_lote_355.csv'

üîÑ Processando lote 357/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 482.94it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 357 salvo em 'lotes_avaliados/avaliacao_resumos_lote_356.csv'

üîÑ Processando lote 358/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 426.70it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 358 salvo em 'lotes_avaliados/avaliacao_resumos_lote_357.csv'

üîÑ Processando lote 359/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 269.86it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 359 salvo em 'lotes_avaliados/avaliacao_resumos_lote_358.csv'

üîÑ Processando lote 360/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 286.71it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 360 salvo em 'lotes_avaliados/avaliacao_resumos_lote_359.csv'

üîÑ Processando lote 361/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 371.70it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 361 salvo em 'lotes_avaliados/avaliacao_resumos_lote_360.csv'

üîÑ Processando lote 362/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 137.21it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 362 salvo em 'lotes_avaliados/avaliacao_resumos_lote_361.csv'

üîÑ Processando lote 363/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 105.96it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 363 salvo em 'lotes_avaliados/avaliacao_resumos_lote_362.csv'

üîÑ Processando lote 364/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:02<00:00, 98.48it/s] 
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 364 salvo em 'lotes_avaliados/avaliacao_resumos_lote_363.csv'

üîÑ Processando lote 365/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 122.32it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 365 salvo em 'lotes_avaliados/avaliacao_resumos_lote_364.csv'

üîÑ Processando lote 366/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 105.91it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 366 salvo em 'lotes_avaliados/avaliacao_resumos_lote_365.csv'

üîÑ Processando lote 367/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 111.60it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 367 salvo em 'lotes_avaliados/avaliacao_resumos_lote_366.csv'

üîÑ Processando lote 368/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 110.20it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 368 salvo em 'lotes_avaliados/avaliacao_resumos_lote_367.csv'

üîÑ Processando lote 369/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 104.41it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 369 salvo em 'lotes_avaliados/avaliacao_resumos_lote_368.csv'

üîÑ Processando lote 370/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 162.71it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 370 salvo em 'lotes_avaliados/avaliacao_resumos_lote_369.csv'

üîÑ Processando lote 371/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:00<00:00, 235.56it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 371 salvo em 'lotes_avaliados/avaliacao_resumos_lote_370.csv'

üîÑ Processando lote 372/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 110.08it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 372 salvo em 'lotes_avaliados/avaliacao_resumos_lote_371.csv'

üîÑ Processando lote 373/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 121.76it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 373 salvo em 'lotes_avaliados/avaliacao_resumos_lote_372.csv'

üîÑ Processando lote 374/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 116.86it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 374 salvo em 'lotes_avaliados/avaliacao_resumos_lote_373.csv'

üîÑ Processando lote 375/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 119.97it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 375 salvo em 'lotes_avaliados/avaliacao_resumos_lote_374.csv'

üîÑ Processando lote 376/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 184.22it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 376 salvo em 'lotes_avaliados/avaliacao_resumos_lote_375.csv'

üîÑ Processando lote 377/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 195.06it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 377 salvo em 'lotes_avaliados/avaliacao_resumos_lote_376.csv'

üîÑ Processando lote 378/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 139.56it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 378 salvo em 'lotes_avaliados/avaliacao_resumos_lote_377.csv'

üîÑ Processando lote 379/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 122.86it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 379 salvo em 'lotes_avaliados/avaliacao_resumos_lote_378.csv'

üîÑ Processando lote 380/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 148.14it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 380 salvo em 'lotes_avaliados/avaliacao_resumos_lote_379.csv'

üîÑ Processando lote 381/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 114.30it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 381 salvo em 'lotes_avaliados/avaliacao_resumos_lote_380.csv'

üîÑ Processando lote 382/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 133.42it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 382 salvo em 'lotes_avaliados/avaliacao_resumos_lote_381.csv'

üîÑ Processando lote 383/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 104.92it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 383 salvo em 'lotes_avaliados/avaliacao_resumos_lote_382.csv'

üîÑ Processando lote 384/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 125.39it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 384 salvo em 'lotes_avaliados/avaliacao_resumos_lote_383.csv'

üîÑ Processando lote 385/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 200/200 [00:01<00:00, 114.32it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 385 salvo em 'lotes_avaliados/avaliacao_resumos_lote_384.csv'

üîÑ Processando lote 386/386...


‚Üí ROUGE/BLEU HEADLINE: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6/6 [00:00<00:00, 154.64it/s]
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


‚úÖ Lote 386 salvo em 'lotes_avaliados/avaliacao_resumos_lote_385.csv'

üèÅ Todos os lotes foram processados com sucesso!


In [9]:
import glob

all_files = glob.glob("lotes_avaliados/avaliacao_resumos_lote_*.csv")
df_final = pd.concat([pd.read_csv(f) for f in all_files], ignore_index=True)
df_final.to_csv("avaliacao_resumos_completa.csv", index=False)


In [10]:
df_final.head()

Unnamed: 0,headline,title,text,resumo,classificacao,metadata,max_length,language,nivel_ensino,status,erro,headline_ROUGE-1,headline_ROUGE-2,headline_ROUGE-L,headline_BLEU,headline_BERTScore_F1,resumo_BERTScore_F1,resumo_melhor_que_headline
0,"Understand what a prenuptial agreement does.,\...",How to Obtain a Prenuptial Agreement,"Put simply, a prenuptial agreement is a legal ...",,,,500,pt-BR,medio,erro,"HTTP 422: {""detail"":[{""type"":""string_too_long""...",0.117717,0.067817,0.086801,3.690934e-07,0.840141,0.0,False
1,"Calculate your budget for the wand.,\nChoose t...",How to Make a Wedding Ribbon Wand,How much money are you willing to spend on a w...,Este texto √© um guia para fazer varinhas perso...,,"{'tempo_processamento': 8.99620270729065, 'tam...",500,pt-BR,medio,ok,,0.140187,0.04918,0.086449,2.049932e-06,0.814617,0.618793,False
2,Buy or make a neat hold-all to keep everything...,How to Make a Bride Survival Kit,Ideally it should be compact and easily carrie...,Este texto fala sobre montar um kit de emerg√™n...,,"{'tempo_processamento': 5.824188232421875, 'ta...",500,pt-BR,medio,ok,,0.115385,0.034335,0.076923,1.192576e-06,0.816137,0.647328,False
3,Be prepared for your decision being seen by so...,How to Tell People You're Keeping Your Maiden ...,"However, traditions are not set in stone. In s...","Hoje em dia, n√£o √© mais obrigat√≥rio a mulher m...",,"{'tempo_processamento': 6.02925968170166, 'tam...",500,pt-BR,medio,ok,,0.160214,0.048193,0.101469,0.000107024,0.822365,0.68122,False
4,Pick a child that is three to seven years old....,How to Ask Someone to Be Your Ring Bearer,A general rule for the ring bearer is a child ...,Para escolher a crian√ßa que vai levar as alian...,,"{'tempo_processamento': 4.799262285232544, 'ta...",500,pt-BR,medio,ok,,0.180288,0.089157,0.120192,5.672125e-05,0.838326,0.64615,False
