In [38]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer, Seq2SeqTrainingArguments, Seq2SeqTrainer, BartForConditionalGeneration, BartTokenizer
import evaluate
from datasets import load_dataset, load_from_disk
rouge = evaluate.load('rouge')
import pandas as pd

In [39]:
from comet import download_model, load_from_checkpoint

model_path = download_model("Unbabel/wmt22-comet-da")
comet_model = load_from_checkpoint(model_path)

Fetching 5 files:   0%|          | 0/5 [00:00<?, ?it/s]

Lightning automatically upgraded your loaded checkpoint from v1.8.3.post1 to v2.5.1.post0. To apply the upgrade to your files permanently, run `python -m pytorch_lightning.utilities.upgrade_checkpoint C:\Users\muldo\.cache\huggingface\hub\models--Unbabel--wmt22-comet-da\snapshots\2760a223ac957f30acfb18c8aa649b01cf1d75f2\checkpoints\model.ckpt`
Encoder model frozen.
C:\Users\muldo\GitHubRepos\BartModel\.venv\Lib\site-packages\pytorch_lightning\core\saving.py:195: Found keys that are not in the model state dict but in the checkpoint: ['encoder.model.embeddings.position_ids']


In [40]:
df = pd.read_csv('datasets/testing.csv')

In [41]:
tokenizer = AutoTokenizer.from_pretrained('../app/app_model/epoch3_lrate2e_b48_s15000v3000')
model = AutoModelForSeq2SeqLM.from_pretrained('../app/app_model/epoch3_lrate2e_b48_s15000v3000')

In [42]:
max_input = 1024

In [43]:
def summarize(summary):
    # Consistent preprocessing
    inputs = tokenizer(
        summary, 
        truncation=True, 
        padding="max_length", 
        max_length=max_input, 
        return_tensors='pt'
    )   


    summary_ids = model.generate(
    inputs['input_ids'],
    max_length=150,       
    min_length=20,        
    length_penalty=1.0,  
    num_beams=4,        
    early_stopping=True
)   

    # Decode and return summary
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
  

    return summary

In [44]:
sources = df['article'][:500].tolist() 
references = df['highlights'][:500].tolist()

# Generate summaries for the source texts
predictions = [summarize(text) for text in sources]

In [45]:
# Prepare data for COMET
data = [
    {"src": src, "mt": pred, "ref": ref}
    for src, pred, ref in zip(sources, predictions, references)
]
# Compute COMET scores
model_output = comet_model.predict(data, batch_size=48, gpus=1)
model_output

Using default `ModelCheckpoint`. Consider installing `litmodels` package to enable `LitModelCheckpoint` for automatic upload to the Lightning model registry.
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Predicting DataLoader 0: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████| 11/11 [08:27<00:00, 46.15s/it]


Prediction([('scores',
             [0.7731872200965881,
              0.6981433033943176,
              0.7796196937561035,
              0.729459822177887,
              0.6821228265762329,
              0.7763604521751404,
              0.7369359135627747,
              0.8001688718795776,
              0.6631919741630554,
              0.7241567373275757,
              0.7273765206336975,
              0.80901700258255,
              0.7297235131263733,
              0.6272565722465515,
              0.7475302815437317,
              0.6462031006813049,
              0.7588425278663635,
              0.7545480132102966,
              0.6088325381278992,
              0.6763309240341187,
              0.845853328704834,
              0.6184629797935486,
              0.6471197009086609,
              0.7619783282279968,
              0.8182158470153809,
              0.7064818143844604,
              0.747429370880127,
              0.7734741568565369,
              0.65111178159713

In [47]:
from evaluate import load

# Load all three evaluation metrics
rouge = load("rouge")
bertscore = load("bertscore")
meteor = load("meteor")
def compute_metrics():
    # Compute ROUGE scores
    rouge_result = rouge.compute(predictions=predictions, references=references)
    
    # Compute BERTScore
    bertscore_result = bertscore.compute(predictions=predictions, references=references, lang="en")
    bert = bertscore_result["f1"]
         
    # Compute METEOR
    meteor_result = meteor.compute(predictions=predictions, references=references)
    met_res = meteor_result["meteor"]
    # Combine all results in a dictionary
    return rouge_result, bert, met_res

[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\muldo\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\muldo\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\muldo\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [48]:
compute_metrics()

Using default tokenizer.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


({'rouge1': 0.3660217868410258,
  'rouge2': 0.15917668741895716,
  'rougeL': 0.265600360531973,
  'rougeLsum': 0.26549945270016706},
 [0.8795509338378906,
  0.8523111939430237,
  0.9090915322303772,
  0.8832314610481262,
  0.863692045211792,
  0.8799954652786255,
  0.900166392326355,
  0.8953736424446106,
  0.8614893555641174,
  0.9071393013000488,
  0.8959293961524963,
  0.8811298608779907,
  0.9079644083976746,
  0.8466359972953796,
  0.8692147135734558,
  0.8691572546958923,
  0.881332278251648,
  0.903672993183136,
  0.8439680933952332,
  0.901391327381134,
  0.9264350533485413,
  0.8692262172698975,
  0.9119788408279419,
  0.8993963003158569,
  0.9444679617881775,
  0.888576090335846,
  0.8767159581184387,
  0.8960230946540833,
  0.893300473690033,
  0.8608577847480774,
  0.8662177324295044,
  0.9085112810134888,
  0.8589503169059753,
  0.8771271109580994,
  0.8799881935119629,
  0.9088752269744873,
  0.8553120493888855,
  0.8939591646194458,
  0.8998949527740479,
  0.905174553394

In [7]:
def compute_rouge():
    # Compute ROUGE scores
    rouge_result = rouge.compute(perdictions=rouge_pred, references=rouge_ref)

    return rouge_result

In [141]:
input = df['article'][326]

In [142]:
print(input)

You probably never met Harry Stamps. I never met him either. And, sadly, now he's gone. Harry left this world last Saturday. He was 80. Of course, there's no real reason why any of us should know Harry. He's just some guy from Long Beach, Mississippi. Though I say that with complete reverence. "Just some guy" is usually the one who helps you fix the lawn mower. Or looks after your dog. Or loans you his truck so you can go to Costco and buy 80 cases of pudding and maybe some lobster dip. I have needs. A truck would be helpful. But while most of the world never got to meet the man, now, thanks to the Internet, countless thousands know his name. And it's all because of one of the greatest obituaries ever written. When Amanda Lewis sat down to eulogize her father there was no way she'd know her words would go viral. Generally speaking, obituaries don't get wildly passed around online, for they tend to lack cats. Which Harry hated. "He wouldn't know what going viral means,"Amanda told the l

In [143]:
output = df['highlights'][326]
print(output)

Harry Stamps passed away on Saturday, March 9. His daughter wrote an obituary that went viral. Obituary: Harry never lost in "competitive sickness." Harry hated daylight saving time.


In [144]:
print(summarize(input))


"Just some guy" is usually the one who helps you fix the lawn mower. Or loans you his truck so you can go to Costco and buy 80 cases of pudding and maybe some lobster dip. But thanks to the Internet, countless thousands know his name.


In [149]:
rouge_pred = [summarize(df['article'][326])]

In [150]:
rouge_ref = [df['highlights'][326]]

In [151]:
def compute_rouge():
    # Compute ROUGE scores
    rouge_result = rouge.compute(predictions=rouge_pred, references=rouge_ref)

    return rouge_result

In [152]:
compute_rouge()

Using default tokenizer.


{'rouge1': 0.027027027027027025,
 'rouge2': 0.0,
 'rougeL': 0.027027027027027025,
 'rougeLsum': 0.027027027027027025}