In [None]:
# !pip install rouge_score
# !pip install absl-py
# !pip install nltk
# !pip install evaluate
# !pip install unbabel-comet
# !pip install bert_score
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, Seq2SeqTrainingArguments, Seq2SeqTrainer, BartForConditionalGeneration, BartTokenizer
import evaluate
from datasets import load_dataset, load_from_disk
rouge = evaluate.load('rouge')
import pandas as pd

In [None]:
from comet import download_model, load_from_checkpoint

model_path = download_model("Unbabel/wmt22-comet-da")
comet_model = load_from_checkpoint(model_path)

In [None]:
df = pd.read_csv('datasets/testing.csv')

In [None]:
tokenizer = AutoTokenizer.from_pretrained('models/epoch4_lrate1e_b48_s15000v3000')
model = AutoModelForSeq2SeqLM.from_pretrained('models/epoch4_lrate1e_b48_s15000v3000')

In [None]:
max_input = 1024

In [None]:
def summarize(summary):
    # Consistent preprocessing
    inputs = tokenizer(
        summary, 
        truncation=True, 
        padding="max_length", 
        max_length=max_input, 
        return_tensors='pt'
    )   


    summary_ids = model.generate(
    inputs['input_ids'],
    max_length=150,       
    min_length=20,        
    length_penalty=1.0,  
    num_beams=4,        
    early_stopping=True
)   

    # Decode and return summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
  

    return summary

In [None]:
sources = df['article'][:500].tolist() 
references = df['highlights'][:500].tolist()

# Generate summaries for the source texts
predictions = [summarize(text) for text in sources]

In [None]:
# Prepare data for COMET
data = [
    {"src": src, "mt": pred, "ref": ref}
    for src, pred, ref in zip(sources, predictions, references)
]
# Compute COMET scores
model_output = comet_model.predict(data, batch_size=48, gpus=1)
model_output

In [5]:
from evaluate import load

# Load all three evaluation metrics
rouge = load("rouge")
bertscore = load("bertscore")
meteor = load("meteor")
def compute_metrics():
    # Compute ROUGE scores
    rouge_result = rouge.compute(predictions=predictions, references=references)
    
    # Compute BERTScore
    bertscore_result = bertscore.compute(predictions=predictions, references=references, lang="en")
    bert = bertscore_result["f1"]
         
    # Compute METEOR
    meteor_result = meteor.compute(predictions=predictions, references=references)
    met_res = meteor_result["meteor"]
    # Combine all results in a dictionary
    return rouge_result, bert, met_res

Using the latest cached version of the module from C:\Users\muldo\.cache\huggingface\modules\evaluate_modules\metrics\evaluate-metric--bertscore\cf4907b18f8f741f202232c0f8009a3bd49ff98802c245abcb6ea51a37a8c05b (last modified on Sat Apr  5 10:05:12 2025) since it couldn't be found locally at evaluate-metric--bertscore, or remotely on the Hugging Face Hub.


ModuleNotFoundError: No module named 'bert_score'

In [11]:
compute_metrics()

Using default tokenizer.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


({'rouge1': 0.36480658653911446,
  'rouge2': 0.1579440092910353,
  'rougeL': 0.2656104293710476,
  'rougeLsum': 0.2655514827967316},
 [0.8698649406433105,
  0.8559585213661194,
  0.8928570747375488,
  0.8718481659889221,
  0.8665789365768433,
  0.8759140372276306,
  0.9187641143798828,
  0.8953736424446106,
  0.8614893555641174,
  0.9071393013000488,
  0.8947478532791138,
  0.8992846608161926,
  0.9122779369354248,
  0.8621928095817566,
  0.8692147135734558,
  0.8714856505393982,
  0.881332278251648,
  0.909538984298706,
  0.8439680933952332,
  0.901391327381134,
  0.9162265658378601,
  0.8695036172866821,
  0.8883928060531616,
  0.8969877362251282,
  0.9375685453414917,
  0.8828003406524658,
  0.8735513091087341,
  0.8680959343910217,
  0.9022133350372314,
  0.8566632270812988,
  0.8635253310203552,
  0.9215579628944397,
  0.8867626190185547,
  0.8645318746566772,
  0.8745496273040771,
  0.8989417552947998,
  0.8212925791740417,
  0.8928385376930237,
  0.8998949527740479,
  0.90517455