In [1]:
import pandas as pd
import numpy as np
from datasets import load_metric
from transformers import pipeline, set_seed
from nltk.tokenize import sent_tokenize  # Make sure to import the necessary library

def baseline_summary_three_sent(text):
    sentences = sent_tokenize(text)
    return "\n".join(sentences[:])


def evaluate_summarization_models(models):
    # Load BLEU metric
    bleu_metric = load_metric("sacrebleu")

    # Load ROUGE metric
    rouge_metric = load_metric("rouge")
    rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]

    # User input for summarization
    user_input = input("Enter the text for summarization:\n")

    # Display user input
    print("\nUser Input:")
    print(user_input)

    # Dictionary to store BLEU and ROUGE scores for each model
    scores_dict = {}

    # Calculate and store BLEU and ROUGE scores for each model
    for model_name, summary_func in models.items():
        # Generate summary using the specified function
        generated_summary = summary_func(user_input)

        # Display generated summary
        print(f"\nGenerated Summary ({model_name}):")
        print(generated_summary)

        # BLEU score (without reference summary)
        bleu_metric.add(prediction=[generated_summary], reference=[""])
        bleu_results = bleu_metric.compute(smooth_method='floor', smooth_value=0)
        bleu_score = np.round(bleu_results['precisions'][0], 2)

        # ROUGE scores (without reference summary)
        rouge_metric.add(prediction=generated_summary, reference="")
        rouge_results = rouge_metric.compute()
        rouge_scores = dict((rn, np.round(rouge_results[rn].mid.fmeasure, 4)) for rn in rouge_names)

        # Store scores in the dictionary
        scores_dict[model_name] = {"BLEU": bleu_score, **rouge_scores}

    # Convert the scores dictionary to a DataFrame for better visualization
    scores_df = pd.DataFrame.from_dict(scores_dict, orient='index')

    # Display the scores DataFrame
    print("\nScores for each model:")
    print(scores_df)

# Example usage:
models = {
    'baseline': baseline_summary_three_sent,
    'gpt2': lambda x: pipeline('text-generation', model='gpt2-medium')(x, clean_up_tokenization_spaces=True)[0]["generated_text"],
    't5': lambda x: pipeline('summarization', model='t5-small')(x)[0]['summary_text'],
    'bart': lambda x: pipeline('summarization', model='facebook/bart-large-cnn')(x)[0]["summary_text"],
    'pegasus': lambda x: pipeline('summarization', model='google/pegasus-cnn_dailymail')(x)[0]["summary_text"].replace(" .<n>", ".\n")
}

evaluate_summarization_models(models)







  bleu_metric = load_metric("sacrebleu")



User Input:
Adani is also among the most controversial of India’s billionaires for his association with the Bharatiya Janata Party (BJP). His close relationship with the party is not coincidental: Adani frequently refers to his business strategy as motivated by “nation building,” which the Adani Group describes on its website as “helping build world-class infrastructure capabilities to help accelerate the growth of India.” Mundra Port and its associated Adani Special Economic Zone, the central components of Adani’s business empire, were attained and developed in cooperation with the Gujarat state government. The BJP led the Gujarat state government during key moments of the Adani Group’s growth, and the relationship resulted in the symbiotic rise of both the BJP and the Adani Group.     

Generated Summary (baseline):
Adani is also among the most controversial of India’s billionaires for his association with the Bharatiya Janata Party (BJP).
His close relationship with the party is no

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.



Generated Summary (gpt2):
Adani is also among the most controversial of India’s billionaires for his association with the Bharatiya Janata Party (BJP). His close relationship with the party is not coincidental: Adani frequently refers to his business strategy as motivated by “nation building,” which the Adani Group describes on its website as “helping build world-class infrastructure capabilities to help accelerate the growth of India.” Mundra Port and its associated Adani Special Economic Zone, the central components of Adani’s business empire, were attained and developed in cooperation with the Gujarat state government. The BJP led the Gujarat state government during key moments of the Adani Group’s growth, and the relationship resulted in the symbiotic rise of both the BJP and the Adani Group.      


Your max_length is set to 200, but your input_length is only 171. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=85)



Generated Summary (t5):
Adani is among the most controversial billionaires for his association with the Bharatiya Janata Party (BJP) he frequently refers to his business strategy as motivated by “nation building” .

Generated Summary (bart):
Adani is among the most controversial of India’s billionaires for his association with the Bharatiya Janata Party. Mundra Port and its associated Adani Special Economic Zone were attained and developed in cooperation with the Gujarat state government. The relationship resulted in the symbiotic rise of both the BJP and the Adani Group.


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-cnn_dailymail and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



Generated Summary (pegasus):
Adani is among the most controversial of India’s billionaires for his association with the Bharatiya Janata Party ( BJP)<n>The BJP led the Gujarat state government during key moments of the Adani Group’s growth, and the relationship resulted in the symbiotic rise of both the BJP and the Adani Group .

Scores for each model:
          BLEU  rouge1  rouge2  rougeL  rougeLsum
baseline   0.0     0.0     0.0     0.0        0.0
gpt2       0.0     0.0     0.0     0.0        0.0
t5         0.0     0.0     0.0     0.0        0.0
bart       0.0     0.0     0.0     0.0        0.0
pegasus    0.0     0.0     0.0     0.0        0.0
