# **SummarizeAI: News Article Summarization using T5**

In [2]:
!pip install torch transformers datasets rouge-score nltk sentencepiece



In [3]:
import numpy as np
import pandas as pd
import torch
from datasets import load_dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
from rouge_score import rouge_scorer
from nltk.translate.bleu_score import sentence_bleu
from transformers import pipeline

In [4]:
dataset = load_dataset('cnn_dailymail', '3.0.0', split={'train': 'train[:1%]', 'validation': 'validation[:1%]', 'test': 'test[:1%]'})
print(dataset['train'][0])

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office char

In [5]:
tokenizer = T5Tokenizer.from_pretrained('t5-small')

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


In [6]:
def preprocess_data(examples):
    model_inputs = tokenizer(examples['article'], max_length=256, truncation=True, padding='max_length')
    labels = tokenizer(examples['highlights'], max_length=80, truncation=True, padding='max_length')
    model_inputs['labels'] = labels['input_ids']
    return model_inputs

In [7]:
tokenized_datasets = {split: dataset[split].map(preprocess_data, batched=True) for split in dataset}

Map:   0%|          | 0/2871 [00:00<?, ? examples/s]

Map:   0%|          | 0/134 [00:00<?, ? examples/s]

Map:   0%|          | 0/115 [00:00<?, ? examples/s]

In [8]:
training_args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=4,  # Reduced batch size
    per_device_eval_batch_size=4,   # Reduced batch size
    num_train_epochs=1,  # Reduced epochs for faster training
    weight_decay=0.01,
    save_total_limit=2,
)



In [9]:
model = T5ForConditionalGeneration.from_pretrained('t5-small')

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

In [10]:
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_datasets['train'],
    eval_dataset=tokenized_datasets['validation'],
)


In [11]:
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mdmaker401[0m ([33mdmaker401-graphic-era-university[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,2.7338,1.412271


TrainOutput(global_step=718, training_loss=2.560815412686064, metrics={'train_runtime': 3845.3008, 'train_samples_per_second': 0.747, 'train_steps_per_second': 0.187, 'total_flos': 194283156013056.0, 'train_loss': 2.560815412686064, 'epoch': 1.0})

In [12]:
# Save trained model and tokenizer
trainer.save_model("./results")
tokenizer.save_pretrained("./results")

('./results/tokenizer_config.json',
 './results/special_tokens_map.json',
 './results/spiece.model',
 './results/added_tokens.json')

In [13]:
# Load trained model for evaluation
model = T5ForConditionalGeneration.from_pretrained('./results')
summarizer = pipeline("summarization", model=model, tokenizer=tokenizer)

Device set to use cpu


In [14]:
# Handle missing test set
dataset_key = 'test' if 'test' in tokenized_datasets else 'validation'
print(f"Using '{dataset_key}' dataset for evaluation.")

Using 'test' dataset for evaluation.


In [15]:
# Initialize metrics
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
rouge_scores, bleu_scores = [], []

In [16]:
# Evaluate on a small sample of 10 examples
for i in range(min(10, len(tokenized_datasets[dataset_key]))):
    example = tokenized_datasets[dataset_key][i]
    article = tokenizer.decode(example['input_ids'], skip_special_tokens=True)
    reference = tokenizer.decode(example['labels'], skip_special_tokens=True)

    # Generate summary
    summary = summarizer(article, max_length=80, min_length=30, do_sample=False)[0]['summary_text']

    # Compute ROUGE and BLEU scores
    rouge_score = scorer.score(reference, summary)
    rouge_scores.append(rouge_score)
    bleu_score = sentence_bleu([reference.split()], summary.split())
    bleu_scores.append(bleu_score)

The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [17]:
# Calculate average scores
avg_rouge1 = np.mean([score['rouge1'].fmeasure for score in rouge_scores])
avg_rouge2 = np.mean([score['rouge2'].fmeasure for score in rouge_scores])
avg_rougeL = np.mean([score['rougeL'].fmeasure for score in rouge_scores])
avg_bleu = np.mean(bleu_scores)

print(f"Average ROUGE-1: {avg_rouge1}")
print(f"Average ROUGE-2: {avg_rouge2}")
print(f"Average ROUGE-L: {avg_rougeL}")
print(f"Average BLEU: {avg_bleu}")

Average ROUGE-1: 0.3333212084288353
Average ROUGE-2: 0.11965005423031776
Average ROUGE-L: 0.24304145211726236
Average BLEU: 0.03808745197947644


In [18]:
# Calculate Perplexity
eval_results = trainer.evaluate()
perplexity = np.exp(eval_results['eval_loss'])
print(f"Perplexity: {perplexity}")

Perplexity: 4.1052660251606365


In [19]:
# Store results in a DataFrame
results = pd.DataFrame({
    'Metric': ['ROUGE-1', 'ROUGE-2', 'ROUGE-L', 'BLEU', 'Perplexity'],
    'Score': [avg_rouge1, avg_rouge2, avg_rougeL, avg_bleu, perplexity]
})

print(results)

       Metric     Score
0     ROUGE-1  0.333321
1     ROUGE-2  0.119650
2     ROUGE-L  0.243041
3        BLEU  0.038087
4  Perplexity  4.105266
