In [1]:
# STEP 1: Load Preprocessed CNN/DailyMail Multi-Document Dataset
from google.colab import files
import json

print("📁 Please upload your `multidoc_test.jsonl` file...")
uploaded = files.upload()

with open("multidoc_test.jsonl", "r") as f:
    test_data = [json.loads(line) for line in f]

# Use only the first 10 samples for quick testing
test_data = test_data[:10]
print(f"✅ Loaded {len(test_data)} multi-document samples.")


📁 Please upload your `multidoc_test.jsonl` file...


Saving multidoc_test.jsonl to multidoc_test.jsonl
✅ Loaded 10 multi-document samples.


In [2]:
# STEP 2: Load BART Model and Tokenizer
from transformers import BartTokenizer, BartForConditionalGeneration

model_name = "facebook/bart-large-cnn"
tokenizer = BartTokenizer.from_pretrained(model_name)
model = BartForConditionalGeneration.from_pretrained(model_name)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

In [3]:
# Generate summaries using BART
generated_summaries = []

for item in test_data:
    input_text = " ".join(item["documents"])[:1024]  # truncate input for BART
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True, max_length=1024)

    summary_ids = model.generate(
        **inputs,
        num_beams=4,
        max_length=256,
        min_length=32,
        length_penalty=2.0,
        early_stopping=True
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    generated_summaries.append({
        "generated": summary,
        "reference": item["summary"]
    })

print("✅ Inference complete. Summaries generated.")


✅ Inference complete. Summaries generated.


In [5]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24934 sha256=4ca56ecb6a6b8e3cdfb8244bb03fbd823c211b673f49ef4ffcf8b572ade6fcce
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [6]:
# STEP 3: Evaluate ROUGE Scores
from rouge_score import rouge_scorer
import time

scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
scores = {"rouge1": [], "rouge2": [], "rougeL": []}

start_time = time.time()

for pair in generated_summaries:
    score = scorer.score(pair["reference"], pair["generated"])
    for k in scores:
        scores[k].append(score[k].fmeasure)

end_time = time.time()
avg_scores = {k: sum(v)/len(v) for k, v in scores.items()}

print("🔍 ROUGE Scores:")
for k, v in avg_scores.items():
    print(f"{k}: {v:.4f}")

print(f"\n⏱️ Avg time per sample: {(end_time - start_time)/len(generated_summaries):.2f} sec")


🔍 ROUGE Scores:
rouge1: 0.2428
rouge2: 0.1030
rougeL: 0.1826

⏱️ Avg time per sample: 0.01 sec
