In [7]:
# STEP 1: Final package fix — clean uninstall + reinstall compatible versions
!pip uninstall -y gcsfs
!pip install --upgrade fsspec==2023.6.0


Found existing installation: gcsfs 2025.3.2
Uninstalling gcsfs-2025.3.2:
  Successfully uninstalled gcsfs-2025.3.2


In [None]:
# Must restart the runtime AFTER package fixes before loading dataset
import os
os.kill(os.getpid(), 9)


In [1]:
# STEP 3: Now load the dataset — this should succeed
from datasets import load_dataset

raw_dataset = load_dataset("cnn_dailymail", "3.0.0", split="test")
subset = raw_dataset.select(range(50))

# Form 5 multi-document samples, 10 docs per sample
multidoc_test = []
for i in range(0, 50, 10):
    docs = subset[i:i+10]
    documents = docs["article"]
    reference_summary = " ".join(docs["highlights"])
    multidoc_test.append({
        "documents": documents,
        "summary": reference_summary
    })

print("✅ Created", len(multidoc_test), "multi-document samples.")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Downloading data files:   0%|          | 0/3 [00:00<?, ?it/s]

Downloading data:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/259M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

Extracting data files:   0%|          | 0/3 [00:00<?, ?it/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

✅ Created 5 multi-document samples.


In [16]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import torch
import numpy as np

# ✅ Build a global TF-IDF vectorizer using all sentences across all documents
from sklearn.feature_extraction.text import TfidfVectorizer

all_sentences = []
for item in multidoc_test:
    for doc in item["documents"]:
        all_sentences.extend(doc.split(". "))

global_vectorizer = TfidfVectorizer()
global_vectorizer.fit(all_sentences)

print("✅ Global TF-IDF vectorizer fitted on", len(all_sentences), "sentences.")

def build_sentence_graph(docs, vectorizer, threshold=0.2):
    sentences = []
    doc_id_map = []
    for doc_idx, doc in enumerate(docs):
        sents = doc.split(". ")
        for sent in sents:
            sent = sent.strip()
            if sent:
                sentences.append(sent)
                doc_id_map.append(doc_idx)

    # TF-IDF using pre-fitted global vectorizer
    tfidf = vectorizer.transform(sentences)
    similarity_matrix = cosine_similarity(tfidf)

    # Build adjacency matrix with threshold
    adjacency_matrix = (similarity_matrix > threshold).astype(np.float32)
    np.fill_diagonal(adjacency_matrix, 0)  # remove self-loops

    return sentences, torch.tensor(adjacency_matrix), torch.tensor(tfidf.toarray(), dtype=torch.float32), doc_id_map



✅ Global TF-IDF vectorizer fitted on 1494 sentences.


In [17]:
import torch.nn as nn
import torch.nn.functional as F

class GCNLayer(nn.Module):
    def __init__(self, in_features, out_features):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_features, out_features)

    def forward(self, x, adj):
        out = torch.matmul(adj, x)
        out = self.linear(out)
        return F.relu(out)

class GCNEncoder(nn.Module):
    def __init__(self, in_features, hidden_dim, out_features):
        super(GCNEncoder, self).__init__()
        self.gcn1 = GCNLayer(in_features, hidden_dim)
        self.gcn2 = GCNLayer(hidden_dim, out_features)

    def forward(self, x, adj):
        x = self.gcn1(x, adj)
        x = self.gcn2(x, adj)
        return x  # returns sentence embeddings


In [18]:
import torch.nn as nn

# Simple scorer network to assign relevance scores to each sentence embedding
class SentenceScorer(nn.Module):
    def __init__(self, input_dim):
        super(SentenceScorer, self).__init__()
        self.linear = nn.Linear(input_dim, 1)

    def forward(self, sentence_embeddings):
        scores = self.linear(sentence_embeddings).squeeze(-1)
        return scores


In [19]:
# Function to generate summary from GCN outputs
def summarize_with_gcn(sentences, adj_matrix, tfidf_vectors, gcn_model, scorer_model, top_k=5):
    gcn_model.eval()
    scorer_model.eval()

    with torch.no_grad():
        node_features = tfidf_vectors
        encoded_nodes = gcn_model(node_features, adj_matrix)
        sentence_scores = scorer_model(encoded_nodes).squeeze()

    # Select top-k scored sentences
    top_indices = torch.topk(sentence_scores, top_k).indices.tolist()
    top_indices.sort()  # optional: preserve order in original text
    top_sentences = [sentences[i] for i in top_indices]

    return " ".join(top_sentences)

# Run on test samples
generated_tg_multisum = []

# Initialize model once
initialized = False

for item in multidoc_test:
    sentences, adj_matrix, tfidf_vectors, doc_map = build_sentence_graph(item["documents"], global_vectorizer)

    if not initialized:
        tfidf_dim = tfidf_vectors.shape[1]
        hidden_dim = 128
        output_dim = 64
        gcn_model = GCNEncoder(tfidf_dim, hidden_dim, output_dim)
        scorer_model = SentenceScorer(output_dim)
        initialized = True

    summary = summarize_with_gcn(sentences, adj_matrix, tfidf_vectors, gcn_model, scorer_model, top_k=5)

    generated_tg_multisum.append({
        "reference": item["summary"],
        "generated": summary
    })

print("✅ Generated", len(generated_tg_multisum), "TG-MultiSum summaries.")


✅ Generated 5 TG-MultiSum summaries.


In [20]:
from rouge_score import rouge_scorer
import numpy as np

# Function to compute average ROUGE scores
def compute_rouge_scores(predictions):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []

    for item in predictions:
        scores = scorer.score(item["reference"], item["generated"])
        rouge1_scores.append(scores["rouge1"].fmeasure)
        rouge2_scores.append(scores["rouge2"].fmeasure)
        rougeL_scores.append(scores["rougeL"].fmeasure)

    return {
        "ROUGE-1": np.mean(rouge1_scores),
        "ROUGE-2": np.mean(rouge2_scores),
        "ROUGE-L": np.mean(rougeL_scores)
    }

# Run evaluation
tg_multisum_scores = compute_rouge_scores(generated_tg_multisum)

# Print results
for k, v in tg_multisum_scores.items():
    print(f"{k}: {v:.6f}")



ROUGE-1: 0.177608
ROUGE-2: 0.022534
ROUGE-L: 0.091695
