In [None]:
!pip install transformers torch sumy rouge-score nltk
import nltk
nltk.download("punkt")
nltk.download("punkt_tab")




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [None]:
# 📌 Extractive Summarization (TextRank)
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
from sumy.summarizers.text_rank import TextRankSummarizer

def extractive_summary(text, num_sentences=3):
    parser = PlaintextParser.from_string(text, Tokenizer("english"))
    summarizer = TextRankSummarizer()
    summary = summarizer(parser.document, num_sentences)
    return " ".join([str(sentence) for sentence in summary])

# 📌 Abstractive Summarization (T5-small & BART)
from transformers import pipeline
t5_summarizer = pipeline("summarization", model="t5-small")
bart_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def abstractive_summary(text, model="t5", max_len=60, min_len=10):
    if model == "t5":
        return t5_summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
    else:
        return bart_summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']

# 📌 Evaluation with ROUGE
from rouge_score import rouge_scorer
def evaluate_summary(reference, summary):
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'], use_stemmer=True)
    return scorer.score(reference, summary)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Device set to use cpu


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/1.63G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Device set to use cpu


In [None]:
# =========================
# CONFIGURATION
# =========================
text = """Text summarization is one of the most interesting problems in NLP.
It’s hard for us as humans to manually extract the summary of a large document of text.
Automatic text summarization solves this by identifying meaningful information
and presenting it in a shorter version while preserving the overall meaning."""

# Number of sentences for extractive summary
num_sentences = 2

# Abstractive summary length controls
min_len = 15
max_len = 60

# =========================
# RUN SUMMARIZATION
# =========================
print("🔹 ORIGINAL TEXT")
print("="*60)
print(text, "\n")

# --- Extractive ---
print("📍 Extractive Summary (TextRank)")
print("-"*60)
extractive = extractive_summary(text, num_sentences)
print(extractive, "\n")

# --- Abstractive (T5) ---
print("📍 Abstractive Summary (T5-small)")
print("-"*60)
t5_sum = abstractive_summary(text, model="t5", min_len=min_len, max_len=max_len)
print(t5_sum, "\n")

# --- Abstractive (BART) ---
print("📍 Abstractive Summary (BART)")
print("-"*60)
bart_sum = abstractive_summary(text, model="bart", min_len=min_len, max_len=max_len)
print(bart_sum, "\n")

# --- Evaluation Example (ROUGE) ---
print("📊 Evaluation Metrics (ROUGE)")
print("-"*60)
scores = evaluate_summary(text, bart_sum)
for metric, score in scores.items():
    print(f"{metric}: Precision={score.precision:.4f}, Recall={score.recall:.4f}, F1={score.fmeasure:.4f}")


🔹 ORIGINAL TEXT
Text summarization is one of the most interesting problems in NLP.
It’s hard for us as humans to manually extract the summary of a large document of text.
Automatic text summarization solves this by identifying meaningful information
and presenting it in a shorter version while preserving the overall meaning. 

📍 Extractive Summary (TextRank)
------------------------------------------------------------
It’s hard for us as humans to manually extract the summary of a large document of text. Automatic text summarization solves this by identifying meaningful information and presenting it in a shorter version while preserving the overall meaning. 

📍 Abstractive Summary (T5-small)
------------------------------------------------------------


Both `max_new_tokens` (=256) and `max_length`(=60) seem to have been set. `max_new_tokens` will take precedence. Please refer to the documentation for more information. (https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)


text summarization is one of the most interesting problems in NLP . it’s hard for us as humans to manually extract the summary of a large document . 

📍 Abstractive Summary (BART)
------------------------------------------------------------
Text summarization is one of the most interesting problems in NLP. It’s hard for us as humans to manually extract the summary of a large document of text. Automatic text summarization solves this by identifying meaningful information and presenting it in a shorter version. 

📊 Evaluation Metrics (ROUGE)
------------------------------------------------------------
rouge1: Precision=1.0000, Recall=0.9000, F1=0.9474
rougeL: Precision=1.0000, Recall=0.9000, F1=0.9474
