In [None]:
pip install transformers pandas nltk rouge-score tqdm sentencepiece

In [4]:
import pandas as pd
from transformers import BartForConditionalGeneration, PegasusForConditionalGeneration, T5ForConditionalGeneration, BartTokenizer, PegasusTokenizer, T5Tokenizer
from nltk.translate.bleu_score import corpus_bleu
from nltk.translate.bleu_score import SmoothingFunction
from nltk.translate.bleu_score import sentence_bleu
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from tqdm import tqdm

# Load the dataset
data = pd.read_excel("ar_sum_dataset.xlsx", nrows=5)
articles = data['content'][:10].tolist()
human_summaries = data['human_summary'][:10].tolist()

# Initializing models and tokenizers
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large-cnn')
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')

# Function for text summarization
def generate_summary(model, tokenizer, input_text, max_length=150):
    inputs = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(inputs, max_length=max_length, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Generate summaries for each model
bart_summaries = []

for article in tqdm(articles):
    # BART
    bart_summary = generate_summary(bart_model, bart_tokenizer, article)
    bart_summaries.append(bart_summary)

# Evaluating ROUGE scores
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores_bart = [scorer.score(human_summaries[i], bart_summaries[i]) for i in range(len(human_summaries))]

# ROUGE scores
rouge_scores = {
    "BART": {
        "ROUGE-1": [score['rouge1'].fmeasure for score in scores_bart],
        "ROUGE-2": [score['rouge2'].fmeasure for score in scores_bart],
        "ROUGE-L": [score['rougeL'].fmeasure for score in scores_bart]
    }
}

# ROUGE scores in a table
rouge_df = pd.DataFrame(rouge_scores)

print("ROUGE Scores:")
print(rouge_df)


100%|██████████| 5/5 [02:11<00:00, 26.33s/it]

ROUGE Scores:
                                                      BART
ROUGE-1  [0.2435897435897436, 0.13114754098360656, 0.24...
ROUGE-2  [0.14193548387096774, 0.03305785123966942, 0.0...
ROUGE-L  [0.17307692307692307, 0.09016393442622951, 0.1...



