In [1]:
# Install required packages: sumy for extractive summarization, nltk for sentence tokenization, evaluate for ROUGE scoring
!pip install sumy nltk evaluate rouge-score -q

import nltk
nltk.download('punkt_tab', quiet=True)

from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer as SumyTokenizer
from sumy.summarizers.text_rank import TextRankSummarizer
from sumy.summarizers.lex_rank import LexRankSummarizer
from sumy.nlp.stemmers import Stemmer
from sumy.utils import get_stop_words

import evaluate
from tqdm import tqdm


  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.3/97.3 kB[0m [31m6.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.1/84.1 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.3/6.3 MB[0m [31m69.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Building wheel for breadability (setup.py) ... [?25l[?25hdone
  Building wheel for docopt (setup.py) ... [?25l[?25hdone


In [4]:
from datasets import load_dataset

dataset = load_dataset('ccdv/pubmed-summarization')

# Select 'test' split explicitly
test_dataset = dataset['test']

# Select first 1000 samples from test split
test_data = test_dataset.select(range(1000))

print(f"Loaded {len(test_data)} test samples from PubMed (ccdv/pubmed-summarization)")


Loaded 1000 test samples from PubMed (ccdv/pubmed-summarization)


In [5]:
def extractive_summarize_textrank(text, sentences_count=5):
    parser = PlaintextParser.from_string(text, SumyTokenizer("english"))
    stemmer = Stemmer("english")
    summarizer = TextRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words("english")
    return " ".join([str(sentence) for sentence in summarizer(parser.document, sentences_count)])

def extractive_summarize_lexrank(text, sentences_count=5):
    parser = PlaintextParser.from_string(text, SumyTokenizer("english"))
    stemmer = Stemmer("english")
    summarizer = LexRankSummarizer(stemmer)
    summarizer.stop_words = get_stop_words("english")
    return " ".join([str(sentence) for sentence in summarizer(parser.document, sentences_count)])


In [11]:
from transformers import AutoTokenizer

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained('microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract-fulltext')

def preprocess_article(text, max_length=1024):
    inputs = tokenizer(text, max_length=max_length, truncation=True, padding='max_length', return_tensors='pt')
    return tokenizer.decode(inputs['input_ids'][0], skip_special_tokens=True)

# Preprocess all articles
test_data_preprocessed = [preprocess_article(sample['article']) for sample in test_data]

# Generate extractive summaries on preprocessed articles
textrank_summaries = []
lexrank_summaries = []

for text in tqdm(test_data_preprocessed, desc="Generating Extractive Summaries"):
    textrank_summaries.append(extractive_summarize_textrank(text))
    lexrank_summaries.append(extractive_summarize_lexrank(text))

print("Extractive summaries generated on preprocessed articles.")


tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/385 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Generating Extractive Summaries: 100%|██████████| 1000/1000 [01:28<00:00, 11.24it/s]

Extractive summaries generated on preprocessed articles.





In [12]:
# Load ROUGE metric
rouge = evaluate.load("rouge")

# Extract references (gold summaries)
references = [sample['abstract'] for sample in test_data]

# Compute ROUGE scores for TextRank summaries
textrank_results = rouge.compute(predictions=textrank_summaries, references=references, use_stemmer=True)
textrank_scores = {k: round(v * 100, 2) for k, v in textrank_results.items()}

# Compute ROUGE scores for LexRank summaries
lexrank_results = rouge.compute(predictions=lexrank_summaries, references=references, use_stemmer=True)
lexrank_scores = {k: round(v * 100, 2) for k, v in lexrank_results.items()}

# Display results
print("\nExtractive Summarization Results (100 test samples):")
print("-" * 50)
print("TextRank:")
print(f"  ROUGE-1: {textrank_scores['rouge1']:.2f}%")
print(f"  ROUGE-2: {textrank_scores['rouge2']:.2f}%")
print(f"  ROUGE-L: {textrank_scores['rougeL']:.2f}%")

print("\nLexRank:")
print(f"  ROUGE-1: {lexrank_scores['rouge1']:.2f}%")
print(f"  ROUGE-2: {lexrank_scores['rouge2']:.2f}%")
print(f"  ROUGE-L: {lexrank_scores['rougeL']:.2f}%")



Extractive Summarization Results (100 test samples):
--------------------------------------------------
TextRank:
  ROUGE-1: 39.10%
  ROUGE-2: 13.72%
  ROUGE-L: 20.60%

LexRank:
  ROUGE-1: 38.38%
  ROUGE-2: 13.15%
  ROUGE-L: 20.52%


In [13]:
import textwrap

def print_wrapped(text, width=80):
    print(textwrap.fill(text, width=width))

# Display 4 example summaries (indices) from test set
example_indices = [0, 99, 599, 999]  # Adapt as needed

for idx in example_indices:
    print("\n" + "="*80)
    print(f"EXAMPLE {idx}")
    print("="*80)

    article = test_data[idx]['article']
    reference = test_data[idx]['abstract']
    textrank_summary = textrank_summaries[idx]
    lexrank_summary = lexrank_summaries[idx]

    # Print article preview (first 300 chars)
    print("\nArticle preview:")
    print(article[:300] + ("..." if len(article) > 300 else ""))

    print("\nReference Summary:")
    print(reference)

    print("\nTextRank Summary:")
    print_wrapped(textrank_summary, width=80)

    print("\nLexRank Summary:")
    print_wrapped(lexrank_summary, width=80)



EXAMPLE 0

Article preview:
anxiety affects quality of life in those living with parkinson 's disease ( pd ) more so than overall cognitive status , motor deficits , apathy , and depression [ 13 ] . 
 although anxiety and depression are often related and coexist in pd patients , recent research suggests that anxiety rather tha...

Reference Summary:
research on the implications of anxiety in parkinson 's disease ( pd ) has been neglected despite its prevalence in nearly 50% of patients and its negative impact on quality of life . 
 previous reports have noted that neuropsychiatric symptoms impair cognitive performance in pd patients ; however , to date , no study has directly compared pd patients with and without anxiety to examine the impact of anxiety on cognitive impairments in pd . 
 this study compared cognitive performance across 50 pd participants with and without anxiety ( 17 pda+ ; 33 pda ) , who underwent neurological and neuropsychological assessment . 
 group performance w