## Installing packages and importing Libraries

In [34]:
import warnings
warnings.filterwarnings("ignore")  # Suppress all warnings


In [36]:
!pip install torch transformers datasets rouge_score



In [38]:
import torch
import torch.nn as nn
from transformers import DataCollatorForSeq2Seq, AutoTokenizer, T5Tokenizer, T5ForConditionalGeneration
from torch.utils.data import DataLoader
from datasets import load_dataset
from peft import get_peft_model, LoraConfig, TaskType
from tqdm import tqdm
import torch.optim as optim
from rouge_score import rouge_scorer
import time

## Preprocessing Dataset

In [2]:
# Load CNN/DailyMail dataset
dataset = load_dataset("cnn_dailymail", "3.0.0")

# Train & test split
train_data = dataset["train"]
test_data = dataset["test"]

# Sample data
print(train_data[0])


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


README.md:   0%|          | 0.00/15.6k [00:00<?, ?B/s]

train-00000-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00001-of-00003.parquet:   0%|          | 0.00/257M [00:00<?, ?B/s]

train-00002-of-00003.parquet:   0%|          | 0.00/259M [00:00<?, ?B/s]

validation-00000-of-00001.parquet:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

{'article': 'LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported Â£20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won\'t cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don\'t plan to be one of those people who, as soon as they turn 18, suddenly buy themselves a massive sports car collection or something similar," he told an Australian interviewer earlier this month. "I don\'t think I\'ll be particularly extravagant. "The things I like buying are things that cost about 10 pounds -- books and CDs and DVDs." At 18, Radcliffe will be able to gamble in a casino, buy a drink in a pub or see the horror film "Hostel: Part II," currently six places below his number one movie on the UK box office cha

In [3]:
tokenizer = T5Tokenizer.from_pretrained("t5-small")

# Preprocessing Function
def preprocess_function(examples):
    inputs = ["summarize: " + str(doc) for doc in examples["article"]]

    model_inputs = tokenizer(
        inputs,
        max_length=512,
        truncation=True,
        padding="max_length",  # âœ… Ensure uniform size
        return_tensors="pt",  # âœ… Return PyTorch tensors
    )

    labels = tokenizer(
        examples["highlights"],
        max_length=128,
        truncation=True,
        padding="max_length",  # âœ… Ensure uniform size
        return_tensors="pt",
    )

    model_inputs["labels"] = labels["input_ids"]
    return model_inputs

# Apply preprocessing correctly
train_dataset = train_data.map(preprocess_function, batched=True, remove_columns=["article", "highlights", "id"])
test_dataset = test_data.map(preprocess_function, batched=True, remove_columns=["article", "highlights", "id"])


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


Map:   0%|          | 0/287113 [00:00<?, ? examples/s]

Map:   0%|          | 0/11490 [00:00<?, ? examples/s]

In [4]:
# Collator dynamically pads inputs within a batch
data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizer, model="t5-small", return_tensors="pt")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

train_dataloader = DataLoader(
    train_dataset, batch_size=32, shuffle=True, collate_fn=data_collator, pin_memory=True, num_workers=2
)
test_dataloader = DataLoader(
    test_dataset, batch_size=32, shuffle=False, collate_fn=data_collator, pin_memory=True, num_workers=2
)


## Implementing PEFT LoRA ove T5-small for text summarization

In [6]:
# Load pre-trained T5 model
model1 = T5ForConditionalGeneration.from_pretrained("t5-small")

# Define LoRA configuration
lora_config = LoraConfig(
    task_type=TaskType.SEQ_2_SEQ_LM,
    inference_mode=False,
    r=8,  # LoRA Rank
    lora_alpha=32,  # Scaling factor
    lora_dropout=0.05,  # Dropout rate
    target_modules=["q", "v"],  # Apply to attention layers
)

# Apply LoRA using `peft`
model1 = get_peft_model(model1, lora_config)
model1.print_trainable_parameters()


trainable params: 294,912 || all params: 60,801,536 || trainable%: 0.4850


In [13]:
# Optimizer
optimizer = optim.AdamW(model1.parameters(), lr=3e-4, weight_decay=0.01)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model1.to(device)

# Training loop
epochs = 2
scaler = torch.cuda.amp.GradScaler()  # Enable mixed precision

for epoch in range(epochs):
    model1.train()
    total_loss = 0
    batch_losses = []  # Store losses for real-time display

    progress_bar = tqdm(train_dataloader, desc=f"Epoch {epoch+1}", leave=True)

    for batch in progress_bar:
        inputs = {k: v.to(device) for k, v in batch.items()}

        optimizer.zero_grad()

        with torch.cuda.amp.autocast():  # Use mixed precision
            outputs = model1(**inputs)
            loss = outputs.loss

        # Check for NaN loss
        if torch.isnan(loss):
            print("NaN detected in loss, skipping batch")
            continue

        # Unscale before stepping
        scaler.scale(loss).backward()
        scaler.unscale_(optimizer)  # Prevent NaN gradients
        torch.nn.utils.clip_grad_norm_(model1.parameters(), max_norm=1.0)  # âœ… Gradient Clipping
        scaler.step(optimizer)
        scaler.update()

        batch_loss = loss.item()
        total_loss += batch_loss
        batch_losses.append(batch_loss)

        # Show live loss update in progress bar
        avg_loss = sum(batch_losses) / len(batch_losses)
        progress_bar.set_postfix({"Batch Loss": f"{batch_loss:.4f}", "Avg Loss": f"{avg_loss:.4f}"})

    epoch_loss = total_loss / len(train_dataloader)
    print(f"\nðŸ”¹ Epoch {epoch + 1}: Average Loss = {epoch_loss:.4f}\n")


  scaler = torch.cuda.amp.GradScaler()  # Enable mixed precision
  with torch.cuda.amp.autocast():  # Use mixed precision
Epoch 1: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8973/8973 [1:26:57<00:00,  1.72it/s, Batch Loss=1.1455, Avg Loss=1.1704]



ðŸ”¹ Epoch 1: Average Loss = 1.1704



Epoch 2: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 8973/8973 [1:26:55<00:00,  1.72it/s, Batch Loss=0.9536, Avg Loss=1.1598]


ðŸ”¹ Epoch 2: Average Loss = 1.1598






## Evaluation and Results

In [37]:
def evaluate_rouge(model, tokenizer, test_samples):
    model.eval()
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    total_scores = {"rouge1": 0, "rouge2": 0, "rougeL": 0}

    for sample in tqdm(test_samples, desc="Evaluating ROUGE", unit="sample"):
        input_text = "summarize: " + sample["article"]
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)

        with torch.no_grad():
            output = model.generate(**inputs, max_new_tokens=150)

        predicted_summary = tokenizer.decode(output[0], skip_special_tokens=True)
        reference_summary = sample["highlights"]

        scores = scorer.score(reference_summary, predicted_summary)
        for key in total_scores:
            total_scores[key] += scores[key].fmeasure

    # Average scores
    for key in total_scores:
        total_scores[key] /= len(test_samples)

    return total_scores

# Evaluate both models with tqdm progress bar
test_samples = list(test_data)[:100]
scores = evaluate_rouge(model1, tokenizer, test_samples)

# Print results with each score on a new line
print("Peft LoRA Results:")
for key, value in scores.items():
    print(f"{key}: {value:.4f}")



Evaluating ROUGE: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 100/100 [01:03<00:00,  1.58sample/s]

Peft LoRA Results:
rouge1: 0.3194
rouge2: 0.1227
rougeL: 0.2462





In [24]:
def measure_speed(model, tokenizer, test_samples, num_samples=50):
    start_time = time.time()
    for i in range(num_samples):
        input_text = "summarize: " + test_samples[i]["article"]
        inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)
        model.generate(**inputs, max_new_tokens=150)
    end_time = time.time()
    return (end_time - start_time) / num_samples  # Avg time per sample

print("Peft LoRA Inference Time:", measure_speed(model1, tokenizer, test_samples))


Peft LoRA Inference Time: 0.5928603363037109


In [26]:
def count_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Peft LoRA Trainable Params:", count_trainable_params(model1))


Peft LoRA Trainable Params: 294912


In [27]:
article_text = """The James Webb Space Telescope has captured stunning new images of the distant galaxy cluster SMACS 0723, revealing never-before-seen details of ancient galaxies.
These images, released by NASA, provide a glimpse into the universeâ€™s past, showcasing light that has traveled for billions of years.
Scientists believe that this breakthrough will help them better understand galaxy formation and evolution.
The telescopeâ€™s infrared capabilities allow it to peer through cosmic dust, uncovering structures that were previously hidden from view.
The new findings mark a significant step forward in humanityâ€™s quest to explore the cosmos."""


In [29]:
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained("t5-small")

def generate_summary(model, text):
    input_text = "summarize: " + text
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, max_length=512).to(device)

    with torch.no_grad():
        output = model.generate(**inputs, max_new_tokens=128)

    return tokenizer.decode(output[0], skip_special_tokens=True)

# Generate summaries
peft_lora_summary = generate_summary(model1, article_text)

# Print results
print("\nðŸ“œ **Original Article:**\n", article_text)
print("\nðŸ”¹ **Peft LoRA Summary:**\n", peft_lora_summary)



ðŸ“œ **Original Article:**
 The James Webb Space Telescope has captured stunning new images of the distant galaxy cluster SMACS 0723, revealing never-before-seen details of ancient galaxies.
These images, released by NASA, provide a glimpse into the universeâ€™s past, showcasing light that has traveled for billions of years.
Scientists believe that this breakthrough will help them better understand galaxy formation and evolution.
The telescopeâ€™s infrared capabilities allow it to peer through cosmic dust, uncovering structures that were previously hidden from view.
The new findings mark a significant step forward in humanityâ€™s quest to explore the cosmos.

ðŸ”¹ **Peft LoRA Summary:**
 James Webb Space Telescope has captured new images of the distant galaxy cluster SMACS 0723. The images provide a glimpse into the universe's past, revealing never-before-seen details of ancient galaxies. Scientists believe this breakthrough will help them better understand galaxy formation and evolut