Import all dependencies

In [1]:
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
import evaluate
# .\.venv\Scripts\activate

  from .autonotebook import tqdm as notebook_tqdm


Load the CNN dataset (Data source)

In [2]:
dataset = load_dataset("cnn_dailymail", "3.0.0")
sample = dataset['test'][1]
article = sample['article']
reference_summary = sample['highlights']

Create Examples

In [3]:
example1_article = dataset['test'][1]['article'][:300]
example1_summary = dataset['test'][1]['highlights']

example2_article = dataset['test'][2]['article'][:300]
example2_summary = dataset['test'][2]['highlights']

prompt = (
    "Summarize the article based on the examples.\n\n"
    "Example 1:\n"
    f"Article: {example1_article}\n"
    f"Summary: {example1_summary}\n\n"
    "Example 2:\n"
    f"Article: {example2_article}\n"
    f"Summary: {example2_summary}\n\n"
    "Now summarize this article:\n"
    f"Article: {article}"
)

Generate Summary

In [6]:
model_name = "google/flan-t5-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)

inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=1024)
outputs = model.generate(inputs.input_ids, max_new_tokens=200)

summary = tokenizer.decode(outputs[0], skip_special_tokens=True)
print("Few Shot Summary:\n", summary)

Few Shot Summary:
 A dog in Washington State has used up at least three of her own after being hit by a car, apparently whacked on the head with a hammer in a misguided mercy killing and then buried in a field -- only to survive.


Evaluate (ROUGE)

In [7]:
rouge = evaluate.load("rouge")
results = rouge.compute(predictions=[summary], references=[reference_summary])
print("\nROUGE SCORE:", results)


ROUGE SCORE: {'rouge1': np.float64(0.4), 'rouge2': np.float64(0.1927710843373494), 'rougeL': np.float64(0.3294117647058823), 'rougeLsum': np.float64(0.3764705882352941)}
