In [None]:
# STEP 1: Imports and Setup
import nltk
import torch
import numpy as np
import time
import json
from datasets import load_dataset
from sklearn.cluster import KMeans
from sentence_transformers import SentenceTransformer
from rouge_score import rouge_scorer

nltk.download("punkt")

device = "cuda" if torch.cuda.is_available() else "cpu"
print("✅ Using device:", device)


✅ Using device: cpu


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [None]:
# ✅ FIXING FSSPEC COMPATIBILITY ISSUE
!pip install fsspec==2023.6.0 --quiet


In [None]:
from datasets import load_dataset

# Load full test set
raw_dataset = load_dataset("cnn_dailymail", "3.0.0", split="test")

# Select only first 50 articles for quick MDS
subset_dataset = raw_dataset.select(range(50))

# Group into 5 multi-document samples (10 articles each)
multidoc_test = []
group_size = 10

for i in range(0, 50, group_size):
    docs = subset_dataset.select(range(i, i + group_size))
    documents = [doc["article"] for doc in docs]
    summaries = [doc["highlights"] for doc in docs]

    multidoc_test.append({
        "documents": " ".join(documents),
        "summary": " ".join(summaries)
    })

print(f"✅ Prepared {len(multidoc_test)} multi-document samples.")


✅ Prepared 5 multi-document samples.


In [None]:
import nltk
nltk.download("punkt")
nltk.download("punkt_tab")  # Explicitly trying even though it's internal
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from rouge_score import rouge_scorer
nltk.download('punkt')

# Function to split document into sentences
def split_sentences(text):
    return nltk.sent_tokenize(text)

# Simple TF-IDF-based Absformer-style summarizer
def absformer_summarize(text, num_sentences=5):
    sentences = split_sentences(text)
    if len(sentences) <= num_sentences:
        return " ".join(sentences)

    tfidf = TfidfVectorizer().fit_transform(sentences)
    scores = np.asarray(tfidf.sum(axis=1)).ravel()
    top_indices = np.argsort(scores)[-num_sentences:]
    top_indices = sorted(top_indices)

    summary = [sentences[i] for i in top_indices]
    return " ".join(summary)

# Generate summaries for the test samples
generated_absformer = []
start = time.time()

for item in multidoc_test:
    generated = absformer_summarize(item["documents"], num_sentences=5)
    generated_absformer.append({
        "reference": item["summary"],
        "generated": generated
    })

elapsed = time.time() - start
print("✅ Absformer Generated", len(generated_absformer), "summaries.")
print(f"⏱️ Avg Time/sample (Absformer): {elapsed / len(generated_absformer):.4f} sec")



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


✅ Absformer Generated 5 summaries.
⏱️ Avg Time/sample (Absformer): 0.0574 sec


In [None]:
from rouge_score import rouge_scorer

# Initialize ROUGE scorer
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

# Calculate average ROUGE scores
scores_absformer = {"ROUGE-1": 0.0, "ROUGE-2": 0.0, "ROUGE-L": 0.0}
n = len(generated_absformer)

for item in generated_absformer:
    score = scorer.score(item["reference"], item["generated"])
    scores_absformer["ROUGE-1"] += score["rouge1"].fmeasure
    scores_absformer["ROUGE-2"] += score["rouge2"].fmeasure
    scores_absformer["ROUGE-L"] += score["rougeL"].fmeasure

# Average the scores
for key in scores_absformer:
    scores_absformer[key] /= n

# Print scores
print("📊 ROUGE (Absformer):", scores_absformer)



📊 ROUGE (Absformer): {'ROUGE-1': 0.3510844279701286, 'ROUGE-2': 0.05825467752300626, 'ROUGE-L': 0.14871108579979753}
