<a href="https://colab.research.google.com/github/RealSahilp7676/Extractive-Text-Summarization/blob/main/ML_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rouge-score

Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=35ab6cfccfd8f498a9cf2684dab100379a77dd397caa2df1d75523ae220e1de4
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
#Step-1 Import NLP Libraries
import nltk
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords

In [None]:
# Download necessary resources
nltk.download('punkt_tab')
nltk.download('stopwords')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [None]:
document = """The wind whispered through the ancient trees, carrying secrets from a forgotten time.
A lone traveler paused by a crumbling stone wall, tracing the moss-covered carvings with curious fingers.
Overhead, a flock of birds darted across the sky, their shadows flickering like memories on the forest floor.
Somewhere in the distance, a river hummed a gentle tune, its waters weaving through the land like threads of silver.
By nightfall, the stars would emerge, silent witnesses to the mysteries still buried beneath the earth."""


In [None]:
#Step-2 Preprocessing
sentences = sent_tokenize(document)
stop_words = set(stopwords.words("english"))

In [None]:
#Step-3 TF-IDF Vectorization
vectorizer = TfidfVectorizer(stop_words=list(stop_words), ngram_range=(1, 2))
X = vectorizer.fit_transform(sentences)

In [None]:
# Build Similarity Matrix
similarity_matrix = (X * X.T).toarray()
np.fill_diagonal(similarity_matrix, 0)

# Build similarity graph
graph = nx.from_numpy_array(similarity_matrix)
scores = nx.pagerank(graph)

In [None]:
num_sentences = min(2, len(sentences))  # Choose top 2 sentences for summary
ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)
summarized_text = " ".join([sent for _, sent in ranked_sentences[:num_sentences]])

In [None]:
# Output
print("Original Text:\n", document)
# print("\nSummarized Text:\n", summarized_text)

import textwrap
wrapped_text = textwrap.fill(summarized_text, width=120)
print("\nSummarized Text:\n", wrapped_text)

from rouge_score import rouge_scorer

scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
scores = scorer.score(document, summarized_text)

# Print each metric on a separate line
print("\nROUGE-1:", scores['rouge1'])
print("ROUGE-2:", scores['rouge2'])
print("ROUGE-L:", scores['rougeL'])


Original Text:
 The wind whispered through the ancient trees, carrying secrets from a forgotten time.
A lone traveler paused by a crumbling stone wall, tracing the moss-covered carvings with curious fingers.
Overhead, a flock of birds darted across the sky, their shadows flickering like memories on the forest floor.
Somewhere in the distance, a river hummed a gentle tune, its waters weaving through the land like threads of silver.
By nightfall, the stars would emerge, silent witnesses to the mysteries still buried beneath the earth.

Summarized Text:
 Somewhere in the distance, a river hummed a gentle tune, its waters weaving through the land like threads of silver.
Overhead, a flock of birds darted across the sky, their shadows flickering like memories on the forest floor.

ROUGE-1: Score(precision=1.0, recall=0.4523809523809524, fmeasure=0.6229508196721312)
ROUGE-2: Score(precision=0.972972972972973, recall=0.43373493975903615, fmeasure=0.6)
ROUGE-L: Score(precision=0.578947368421052