In [19]:
from google.colab import drive
drive.mount('/content/drive')

from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

fine_tuned_path = "/content/drive/My Drive/Colab Notebooks/article summarization nlp/fine-tuned-model"
fine_tuned_model = AutoModelForSeq2SeqLM.from_pretrained(fine_tuned_path)
tokenizer = AutoTokenizer.from_pretrained(fine_tuned_path)

pretrained_models = {
    "BART": "facebook/bart-large-cnn",
    "PEGASUS": "google/pegasus-xsum",
    "T5": "t5-small"
}

pretrained_model_objects = {}
pretrained_tokenizers = {}

for model_name, model_path in pretrained_models.items():
    pretrained_model_objects[model_name] = AutoModelForSeq2SeqLM.from_pretrained(model_path)
    pretrained_tokenizers[model_name] = AutoTokenizer.from_pretrained(model_path)

def generate_summary(model, tokenizer, text, max_length=50):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="longest").to(model.device)
    summary_ids = model.generate(
        **inputs,
        max_length=max_length,
        min_length=10,
        length_penalty=1.5,
        repetition_penalty=2.0,
        num_beams=4,
        early_stopping=True,
        no_repeat_ngram_size=3
    )
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

test_samples = [
    "Scientists have discovered a new exoplanet that may be habitable. The planet, located 100 light-years away, has conditions suitable for liquid water and an atmosphere similar to Earth's.",
    "A new study suggests that regular exercise can significantly reduce the risk of heart disease. Researchers found that even moderate physical activity, like walking 30 minutes a day, has long-term health benefits."
]

!pip install evaluate rouge-score

import evaluate

rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

fine_tuned_summaries = [generate_summary(fine_tuned_model, tokenizer, text) for text in test_samples]

pretrained_summaries = {model_name: [] for model_name in pretrained_models}

for model_name, model in pretrained_model_objects.items():
    for text in test_samples:
        summary = generate_summary(model, pretrained_tokenizers[model_name], text)
        pretrained_summaries[model_name].append(summary)

references = [[ref] for ref in test_samples]

fine_tuned_rouge = rouge.compute(predictions=fine_tuned_summaries, references=test_samples)
fine_tuned_bleu = bleu.compute(predictions=fine_tuned_summaries, references=references)

pretrained_scores = {}

for model_name in pretrained_models:
    rouge_score = rouge.compute(predictions=pretrained_summaries[model_name], references=test_samples)
    bleu_score = bleu.compute(predictions=pretrained_summaries[model_name], references=references)
    pretrained_scores[model_name] = {"ROUGE": rouge_score, "BLEU": bleu_score}

print("=== Fine-Tuned Model ===")
print(f"ROUGE Scores: {fine_tuned_rouge}")
print(f"BLEU Score: {fine_tuned_bleu}")

for model_name in pretrained_models:
    print(f"\n=== {model_name} Pretrained Model ===")
    print(f"ROUGE Scores: {pretrained_scores[model_name]['ROUGE']}")
    print(f"BLEU Score: {pretrained_scores[model_name]['BLEU']}")

for i, text in enumerate(test_samples):
    print(f"\n=== Sample {i+1} ===")
    print(f"Original Text: {text}\n")
    print(f"Fine-Tuned Model Summary: {fine_tuned_summaries[i]}\n")

    for model_name in pretrained_models:
        print(f"{model_name} Pretrained Model Summary: {pretrained_summaries[model_name][i]}\n")

    print("="*50)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




config.json:   0%|          | 0.00/1.39k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/259 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/3.52M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]



Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


=== Fine-Tuned Model ===
ROUGE Scores: {'rouge1': np.float64(0.9345501955671447), 'rouge2': np.float64(0.8638262322472849), 'rougeL': np.float64(0.6159061277705346), 'rougeLsum': np.float64(0.6159061277705346)}
BLEU Score: {'bleu': 0.8386910344747599, 'precisions': [0.9545454545454546, 0.890625, 0.8387096774193549, 0.7833333333333333], 'brevity_penalty': 0.9701515036966302, 'length_ratio': 0.9705882352941176, 'translation_length': 66, 'reference_length': 68}

=== BART Pretrained Model ===
ROUGE Scores: {'rouge1': np.float64(0.7869034406215316), 'rouge2': np.float64(0.7783113245298119), 'rougeL': np.float64(0.7869034406215316), 'rougeLsum': np.float64(0.7869034406215316)}
BLEU Score: {'bleu': 0.6087411826689866, 'precisions': [1.0, 0.9772727272727273, 0.9761904761904762, 0.975], 'brevity_penalty': 0.6198604731563714, 'length_ratio': 0.6764705882352942, 'translation_length': 46, 'reference_length': 68}

=== PEGASUS Pretrained Model ===
ROUGE Scores: {'rouge1': np.float64(0.38068181818181