<a href="https://colab.research.google.com/github/KrishnaPothula/Text-Summarization-and-Model-Comparison-with-ROUGE-Metrics/blob/main/Text_Summarization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install accelerate

In [None]:
pip install transformers[torch]

In [None]:
pip install datasets

In [None]:
pip install transformers

In [None]:
pip install rouge

In [None]:
pip install rouge-score

In [None]:
pip install evaluate

In [None]:
import pandas as pd
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
import numpy as np
from transformers import pipeline
from rouge_score import rouge_scorer

## Setup and Data Loading

In [None]:
# Load your dataset
dataset = load_dataset('cnn_dailymail', '3.0.0')

In [None]:
df = pd.DataFrame(dataset['train'])  # You can use 'train', 'test', or 'validation'
df1= pd.DataFrame(dataset['test'])
df2= pd.DataFrame(dataset['validation'])

In [None]:
dataframes = [df, df1, df2]

for dataframe in dataframes:
    dataframe.drop(columns=['id'], inplace=True)
    dataframe.dropna(inplace=True)


In [None]:
df1.shape

In [None]:
df.head()

In [None]:
# Load your dataset into dataframes (replace with your data loading code)
df_train = pd.DataFrame(df)  # Training data
df_val = pd.DataFrame(df1)  # Validation data
df_test = pd.DataFrame(df2)  # Test data


In [None]:
# Define a function to create a dataset with a limited number of samples
def prepare_dataset(dataframe, n_samples, dataset_name):
    sample = dataframe.sample(n_samples, ignore_index=True)
    sample.to_csv(f"{dataset_name}.csv", index=None)
    raw_data = load_dataset("csv", data_files=f"{dataset_name}.csv")
    return raw_data

## Data Sampling and Tokenization

In [None]:
# Sample a subset of your dataset for training and validation
train_samples = 1000  # Adjust as needed
val_samples = 500  # Adjust as needed
test_samples = 50  # Adjust as needed

raw_train = prepare_dataset(df_train, train_samples, 'train')
raw_val = prepare_dataset(df_val, val_samples, 'val')

# Define the tokenizer for each model
model_checkpoints = {
    "BERTSUM": "bert-base-uncased",
    "T5": "t5-small",
    "GPT-2": "gpt2",
}

tokenizers = {model: AutoTokenizer.from_pretrained(model_checkpoint) for model, model_checkpoint in model_checkpoints.items()}


## Fine-tuning of the Models

In [None]:
# Function for tokenizing
def tokenize_data(batch, model):
    input_texts = [f"summarize: {doc}" for doc in batch["article"]] if model == "T5" else batch["article"]
    target_texts = batch["highlights"] if model != "BERTSUM" else None

    model_inputs = tokenizers[model](input_texts, max_length=512, truncation=True, padding="max_length", return_tensors="pt", text_target=target_texts)
    return model_inputs

# Fine-tuning the model for each model
trained_models = {}

for model, model_checkpoint in model_checkpoints.items():
    print(f"Fine-tuning {model} model...")
    # Tokenize the data
    tokenized_train = raw_train.map(lambda batch: tokenize_data(batch, model), batched=True)
    tokenized_val = raw_val.map(lambda batch: tokenize_data(batch, model), batched=True)

    # Data Collator
    data_collator = DataCollatorForSeq2Seq(tokenizer=tokenizers[model], model=model_checkpoint)

    # Initialize the model
    summarization_model = AutoModelForSeq2SeqLM.from_pretrained(model_checkpoint)

    # Training Arguments
    training_args = Seq2SeqTrainingArguments(
        output_dir=f'{model.lower()}_summarization',
        evaluation_strategy='steps',
        learning_rate=2e-5,
        per_device_train_batch_size=2,  # Adjust batch size as needed
        per_device_eval_batch_size=2,  # Adjust batch size as needed
        weight_decay=0.01,
        save_total_limit=3,
        num_train_epochs=1,  # Reduced the number of epochs
        eval_steps=100,  # Evaluate more frequently
        save_steps=100,  # Save model more frequently
        disable_tqdm=True,  # Disable tqdm progress bar for speed
    )

    # Trainer
    trainer = Seq2SeqTrainer(
        model=summarization_model,
        args=training_args,
        train_dataset=tokenized_train['train'],
        eval_dataset=tokenized_val['train'],
        tokenizer=tokenizers[model],
        data_collator=data_collator,
        compute_metrics=None,  # Compute ROUGE metrics after training
    )

    # Train the model
    trainer.train()

    # Store the trained model
    trained_models[model] = trainer.model


## Evaluation and ROUGE Metrics

In [None]:
# Evaluation and ROUGE metrics for each model
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

for model, trained_model in trained_models.items():
    # Preparing the test dataset
    raw_test = prepare_dataset(df_test, test_samples, 'test')  # Adjust test_samples as needed

    # Tokenize the test data
    tokenized_test = raw_test.map(lambda batch: tokenize_data(batch, model), batched=True)

    # Generate summaries for the test set
    generated_summaries = []
    for article in tokenized_test['train']['article']:
        if model == "GPT-2":
            # Use the pipeline for GPT-2
            saved_model = pipeline('summarization', model=model_checkpoint)  # Load your trained model checkpoint
            summary = saved_model(article, max_length=150, min_length=30, do_sample=False)
            generated_summaries.append(summary[0]['summary_text'])
        else:
            # For BERTSUM and T5 models
            with torch.no_grad():
                summary_ids = trained_model.generate(article, max_length=150, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
                summary = tokenizers[model].decode(summary_ids[0], skip_special_tokens=True)
            generated_summaries.append(summary)



In [None]:
# Function to compute ROUGE metrics on the test set
    def compute_rouge_metrics_test():
        rouge_scores = {
            'rouge1': [],
            'rouge2': [],
            'rougeL': []
        }
        for generated_summary, reference_summary in zip(generated_summaries, tokenized_test['train']['highlights']):
            scores = scorer.score(reference_summary, generated_summary)
            rouge_scores['rouge1'].append(scores['rouge1'].fmeasure)
            rouge_scores['rouge2'].append(scores['rouge2'].fmeasure)
            rouge_scores['rougeL'].append(scores['rougeL'].fmeasure)

        return rouge_scores

    # Compute ROUGE metrics on the test set
    rouge_test_scores = compute_rouge_metrics_test()

    # Create a table to compare the ROUGE scores
    results_table = pd.DataFrame(rouge_test_scores, index=[model])

    # Display the results table for the current model
    print(f"ROUGE Scores for {model} Model:")
    print(results_table)
