In [6]:
!git clone https://github.com/PrimerAI/blanc.git

fatal: destination path 'blanc' already exists and is not an empty directory.


In [7]:
!pip install numpy==2.0.2, torch==2.6.0+cu124, transformers==4.55.0



To use blanc lib and perform blanc evaluation, it's needed to run these steps below:
- go to blanc/requirements.txt and delete:
  - torch;
  - transformers; and
  - numpy;

- go to blanc/blanc/blanc.py and do what follows:
  - Add this import "from torch.optim import AdamW"
  - Delete "AdamW" from transformers import

In [None]:
# Navigate into the cloned repository directory
%cd blanc

# Install the blanc package from the current directory
!pip install . -q

# Navigate back to the original directory if needed
%cd ..

import nltk
nltk.download('punkt_tab')
# from blanc import BlancHelp, BlancTune
import re
import os
import pandas as pd
from tqdm import tqdm

# from google.colab import drive
# drive.mount('/content/drive', force_remount=True)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Mounted at /content/drive


In [None]:
blanc_help = BlancHelp(device='cuda', show_progress_bar=False, inference_batch_size=128)
blanc_tune = BlancTune(device='cuda', finetune_mask_evenly=False, show_progress_bar=False, inference_batch_size=24, finetune_batch_size=24)

In [None]:
# Remove patterns like "word/ em os" and maintain just the word before the backslash
def clean_opinosis_text(text):
  return re.sub(r'([\wãõáéíóúâêôç]+)\/_ em os', r'\1', text)

In [None]:

data_csv = pd.read_csv("/content/drive/MyDrive/opinosis_tilic/relatorio_analise_sentimentos.csv")

results = []

# Calculating scores for each subject
for index, row in tqdm(data_csv.iterrows(), total=data_csv.shape[0], desc="Processing subjects"):
    subject = row["Assunto"]

    summaries = {
        "opinosis": clean_opinosis_text(row["Texto Opinosis"]),
        "llm_abstrativo": row["Texto LLM Abstrativo"],
        "llm_extrativo": row["Texto LLM Extrativo"]
    }

    docs = []
    doc_path = f"/content/drive/MyDrive/opinosis_tilic/OpiSums-PT/Textos_Fontes/{subject}/"
    if os.path.isdir(doc_path):
        for file_name in os.listdir(doc_path):
            if file_name.endswith(".txt"):
                with open(os.path.join(doc_path, file_name), "r", encoding="utf-8", errors="ignore") as f:
                    docs.append(f.read())

    if not docs:
        print(f"Warning: Any document found for subject: '{subject}'.")
        continue

    line_result = {"assunto": subject}

    for sum_name, sum_text in summaries.items():
        if not isinstance(sum_text, str) or not sum_text:
            line_result[f"blanc_help_{sum_name}"] = 0.0
            line_result[f"blanc_tune_{sum_name}"] = 0.0
            continue

        docs_to_evaluate = docs

        # Creating a list that repeats the same summarie for each document.
        # Doing it to perform evaluating in batches.
        sums_to_evaluate = [sum_text] * len(docs_to_evaluate)

        scores_help = blanc_help.eval_pairs(docs_to_evaluate, sums_to_evaluate)
        scores_tune = blanc_tune.eval_pairs(docs_to_evaluate, sums_to_evaluate)

        avg_score_help = np.mean(scores_help) if scores_help else 0.0
        avg_score_tune = np.mean(scores_tune) if scores_tune else 0.0

        line_result[f"blanc_help_{sum_name}"] = avg_score_help
        line_result[f"blanc_tune_{sum_name}"] = avg_score_tune

    results.append(line_result)

results_df = pd.DataFrame(results)

In [None]:
results_df.to_csv("/content/drive/MyDrive/opinosis_tilic/analiseBlanc.csv")

In [4]:
results_df = pd.read_csv("/content/drive/MyDrive/opinosis_tilic/analiseBlanc.csv")
results_df.drop(columns='Unnamed: 0', inplace=True)

In [None]:
print("Average blanc tune opinosis:",round(results_df['blanc_tune_opinosis'].mean(),2), "Standard deviation:", round(results_df['blanc_tune_opinosis'].std(),2))
print("Average blanc tune extractive llm:",round(results_df['blanc_tune_llm_extrativo'].mean(),2), "Standard deviation:", round(results_df['blanc_tune_llm_extrativo'].std(),2))
print("Average blanc tune abstractive llm:",round(results_df['blanc_tune_llm_abstrativo'].mean(),2), "Standard deviation:", round(results_df['blanc_tune_llm_abstrativo'].std(),2))

print("Average blanc help opinosis:", round(results_df['blanc_help_opinosis'].mean(),2), "Standard deviation:", round(results_df['blanc_help_opinosis'].std(),2))
print("Average blanc help extractive llm:", round(results_df['blanc_help_llm_extrativo'].mean(),2), "Standard deviation:", round(results_df['blanc_help_llm_extrativo'].std(),2))
print("Average blanc help abstractive llm:", round(results_df['blanc_help_llm_abstrativo'].mean(),2), "Standard deviation:", round(results_df['blanc_help_llm_abstrativo'].std(),2))

Average blanc tune opinosis: 0.09 Standard deviation: 0.08
Average blanc tune extractive llm: 0.07 Standard deviation: 0.07
Average blanc tune abstractive llm: 0.06 Standard deviation: 0.07
Average blanc help opinosis: 0.13 Standard deviation: 0.05
Average blanc help extractive llm: 0.12 Standard deviation: 0.06
Average blanc help abstractive llm: 0.11 Standard deviation: 0.05


In [None]:
display(results_df)

Unnamed: 0.1,Unnamed: 0,assunto,blanc_help_opinosis,blanc_tune_opinosis,blanc_help_llm_abstrativo,blanc_tune_llm_abstrativo,blanc_help_llm_extrativo,blanc_tune_llm_extrativo
0,0,1984,0.106455,0.00623,0.06672,0.019284,0.079945,0.028887
1,1,Capitaes-da-Areia,0.153749,0.040521,0.103971,-0.001536,0.109876,0.044678
2,2,Crepusculo,0.103223,0.091632,0.08563,0.019971,0.094172,0.059978
3,3,Ensaio-Sobre-a-Cegueira,0.117916,0.112852,0.111695,0.034855,0.157121,0.107578
4,4,Fala-Serio-Amiga,0.22651,0.194747,0.175454,0.125713,0.139827,0.120023
5,5,Fala-Serio-Amor,0.188988,0.147794,0.246483,0.201985,0.273653,0.196255
6,6,Fala-Serio-Mae,0.074194,0.057414,0.116499,0.071561,0.125358,0.06355
7,7,Fala-Serio-Pai,0.250454,0.262762,0.177158,0.178837,0.218524,0.217044
8,8,Fala-Serio-Professor,0.189396,0.149181,0.105266,0.075044,0.083043,0.017393
9,9,Galaxy-SIII,0.094053,-0.027205,0.0446,-0.025009,0.062287,-0.032376
