Import all dependencies

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import evaluate
# .\.venv\Scripts\activate

  from .autonotebook import tqdm as notebook_tqdm


Load the CNN dataset (Data source)

In [2]:
dataset = load_dataset("cnn_dailymail", "3.0.0")
sample = dataset['test'][1]

article = sample['article']
reference_summary = sample['highlights']

print("ARTICLE:\n", article[:500], "...\n")
print("REFERENCE SUMMARY:\n", reference_summary, "\n")

ARTICLE:
 (CNN)Never mind cats having nine lives. A stray pooch in Washington State has used up at least three of her own after being hit by a car, apparently whacked on the head with a hammer in a misguided mercy killing and then buried in a field -- only to survive. That's according to Washington State University, where the dog -- a friendly white-and-black bully breed mix now named Theia -- has been receiving care at the Veterinary Teaching Hospital. Four days after her apparent death, the dog managed  ...

REFERENCE SUMMARY:
 Theia, a bully breed mix, was apparently hit by a car, whacked with a hammer and buried in a field .
"She's a true miracle dog and she deserves a good life," says Sara Mellado, who is looking for a home for Theia . 



Generate Summary

In [3]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

# Instruction-based Prompt
prompt = f"Please summarize the following article in 2–3 sentences:\n\n{article}"
inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
outputs = model.generate(inputs.input_ids, max_new_tokens=200)
summary = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("Instruction-based Summary:\n", summary, "\n")

Instruction-based Summary:
 A dog in Washington State has used up at least three of her own after being hit by a car, apparently whacked on the head with a hammer in a misguided mercy killing and then buried in a field -- only to survive. 



Evaluate (ROUGE)

In [4]:
rouge = evaluate.load("rouge")
results = rouge.compute(predictions=[summary], references=[reference_summary])
print("ROUGE Score - Instruction-based:", results)

ROUGE Score - Instruction-based: {'rouge1': np.float64(0.4), 'rouge2': np.float64(0.1927710843373494), 'rougeL': np.float64(0.3294117647058823), 'rougeLsum': np.float64(0.3764705882352941)}
