In [1]:
!pip install nltk networkx numpy sumy

Collecting sumy
  Using cached sumy-0.12.0-py3-none-any.whl.metadata (8.3 kB)
Collecting breadability>=0.1.20 (from sumy)
  Using cached breadability-0.1.20-py2.py3-none-any.whl
Collecting docopt-ng>=0.6.1 (from sumy)
  Using cached docopt_ng-0.9.0-py3-none-any.whl.metadata (13 kB)
Collecting lxml-html-clean (from sumy)
  Using cached lxml_html_clean-0.4.3-py3-none-any.whl.metadata (2.3 kB)
Collecting pycountry>=18.2.23 (from sumy)
  Using cached pycountry-26.2.16-py3-none-any.whl.metadata (12 kB)
Collecting docopt<0.7,>=0.6.1 (from breadability>=0.1.20->sumy)
  Using cached docopt-0.6.2-py2.py3-none-any.whl
Using cached sumy-0.12.0-py3-none-any.whl (73 kB)
Using cached docopt_ng-0.9.0-py3-none-any.whl (16 kB)
Using cached pycountry-26.2.16-py3-none-any.whl (8.0 MB)
Using cached lxml_html_clean-0.4.3-py3-none-any.whl (14 kB)
Installing collected packages: docopt, pycountry, lxml-html-clean, docopt-ng, breadability, sumy

   ------ --------------------------------- 1/6 [pycountry]
   --

In [2]:
#import libraries
import nltk
import numpy as np
import networkx as nx
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from string import punctuation

In [4]:
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Admin\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [5]:
text = """ The rapid growth of urban populations over the past few decades has significantly transformed cities around the world. As more people migrate from rural areas in search of better employment opportunities, education, and healthcare, urban centers have expanded both economically and geographically. While this growth has led to technological innovation, cultural exchange, and increased productivity, it has also created serious challenges. Cities now face problems such as traffic congestion, air pollution, housing shortages, and increased pressure on public services.

Governments and urban planners are attempting to address these issues by investing in sustainable infrastructure, improving public transportation systems, and promoting environmentally friendly policies. Smart city technologies, including data-driven traffic management and energy-efficient buildings, are becoming more common. However, experts argue that long-term solutions require inclusive planning that considers social equity, affordable housing, and climate resilience. The future of urban development will depend largely on how effectively cities balance economic growth with environmental sustainability and quality of life for residents."""

In [8]:
#frequency summarizer

def frequency_summarizer(text, summary_ratio=0.4):
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text.lower())

    word_freq = {}
    for word in words:
        if word not in stop_words and word not in punctuation:
            if word not in word_freq:
                word_freq[word] = 1
            else:
                word_freq[word] += 1
    
    max_freq = max(word_freq.values())
    for word in word_freq:
        word_freq[word] = word_freq[word] / max_freq

    sentences = sent_tokenize(text)
    sentence_scores = {}

    for sentence in sentences:
        for word in word_tokenize(sentence.lower()):
            if word in word_freq:
                if sentence not in sentence_scores:
                    sentence_scores[sentence] = word_freq[word]
                else:
                    sentence_scores[sentence] += word_freq[word]

    summary_length = int(len(sentences) * summary_ratio)
    summarized_sentences = sorted(sentence_scores, 
                                   key=sentence_scores.get, 
                                   reverse=True)[:summary_length]
    
    return " ".join(summarized_sentences)

In [9]:
freq_summary = frequency_summarizer(text)
print("Frequency Based Summary:\n")
print(freq_summary)


Frequency Based Summary:

The future of urban development will depend largely on how effectively cities balance economic growth with environmental sustainability and quality of life for residents. Governments and urban planners are attempting to address these issues by investing in sustainable infrastructure, improving public transportation systems, and promoting environmentally friendly policies. Cities now face problems such as traffic congestion, air pollution, housing shortages, and increased pressure on public services.


In [10]:
#text rank summarizer
def textrank_summarizer(text, summary_ratio=0.4):
    sentences = sent_tokenize(text)
    
    def sentence_similarity(sent1, sent2):
        words1 = [w.lower() for w in word_tokenize(sent1)]
        words2 = [w.lower() for w in word_tokenize(sent2)]
        
        all_words = list(set(words1 + words2))
        
        vector1 = [words1.count(w) for w in all_words]
        vector2 = [words2.count(w) for w in all_words]
        
        return np.dot(vector1, vector2)
    
    similarity_matrix = np.zeros((len(sentences), len(sentences)))
    
    for i in range(len(sentences)):
        for j in range(len(sentences)):
            if i != j:
                similarity_matrix[i][j] = sentence_similarity(
                    sentences[i], sentences[j]
                )
    
    # page rank
    nx_graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(nx_graph)
    
    # rank sentences
    ranked_sentences = sorted(
        ((scores[i], s) for i, s in enumerate(sentences)),
        reverse=True
    )
    
    summary_length = int(len(sentences) * summary_ratio)
    summary = " ".join([ranked_sentences[i][1] 
                        for i in range(summary_length)])
    
    return summary


In [11]:
textrank_summary = textrank_summarizer(text)
print("TextRank Summary:\n")
print(textrank_summary)


TextRank Summary:

As more people migrate from rural areas in search of better employment opportunities, education, and healthcare, urban centers have expanded both economically and geographically. Cities now face problems such as traffic congestion, air pollution, housing shortages, and increased pressure on public services. While this growth has led to technological innovation, cultural exchange, and increased productivity, it has also created serious challenges.


In [14]:
#comparing original vs summarized 
def compare_lengths(original, summary):
    print("Original Length (words):", len(word_tokenize(original)))
    print("Summary Length (words):", len(word_tokenize(summary)))
    print("Compression Ratio:",
          round(len(word_tokenize(summary)) / 
                len(word_tokenize(original)), 2))
print("=== Frequency Based ===")
compare_lengths(text, freq_summary)

print("\n=== TextRank ===")
compare_lengths(text, textrank_summary)


=== Frequency Based ===
Original Length (words): 183
Summary Length (words): 74
Compression Ratio: 0.4

=== TextRank ===
Original Length (words): 183
Summary Length (words): 73
Compression Ratio: 0.4


In [16]:
print("----- ORIGINAL TEXT -----\n")
print(text)

print("\n----- FREQUENCY SUMMARY -----\n")
print(freq_summary)

print("\n----- TEXTRANK SUMMARY -----\n")
print(textrank_summary)


----- ORIGINAL TEXT -----

 The rapid growth of urban populations over the past few decades has significantly transformed cities around the world. As more people migrate from rural areas in search of better employment opportunities, education, and healthcare, urban centers have expanded both economically and geographically. While this growth has led to technological innovation, cultural exchange, and increased productivity, it has also created serious challenges. Cities now face problems such as traffic congestion, air pollution, housing shortages, and increased pressure on public services.

Governments and urban planners are attempting to address these issues by investing in sustainable infrastructure, improving public transportation systems, and promoting environmentally friendly policies. Smart city technologies, including data-driven traffic management and energy-efficient buildings, are becoming more common. However, experts argue that long-term solutions require inclusive plannin