In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/newdata/delhi.txt
/kaggle/input/delhitxt/delhi.txt


In [2]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from collections import Counter

nltk.download('punkt')
nltk.download('stopwords')

def tokenize_sentences(text):
    """Tokenize the text into sentences."""
    return sent_tokenize(text)

def tokenize_words(sentence):
    """Tokenize a sentence into words."""
    return word_tokenize(sentence.lower()) 

def remove_stopwords(word_list):
    """Remove stopwords from a list of words."""
    stop_words = set(stopwords.words('english'))
    return [word for word in word_list if word.isalnum() and word not in stop_words]

def compute_word_frequencies(sentences):
    """Compute word frequencies from a list of sentences."""
    all_words = []
    for sentence in sentences:
        words = tokenize_words(sentence)
        filtered_words = remove_stopwords(words)
        all_words.extend(filtered_words)
    
    return Counter(all_words)

def rank_sentences(sentences, word_freq):
    """Rank sentences based on the sum of inverse ranks of the words."""
    word_ranks = {word: rank for rank, (word, _) in enumerate(word_freq.most_common(), start=1)}
    sentence_scores = []

    for sentence in sentences:
        words = tokenize_words(sentence)
        score = sum([1 / word_ranks.get(word, float('inf')) for word in words if word in word_ranks])
        sentence_scores.append((sentence, score))
    
    ranked_sentences = sorted(sentence_scores, key=lambda x: x[1], reverse=True)
    return ranked_sentences

def summarize(file_path, k):
    """Summarize the text in the specified file."""
    with open(file_path, 'r') as file:
        text = file.read()

    sentences = tokenize_sentences(text)
    word_freq = compute_word_frequencies(sentences)
    
    # Print first 10 sentences
    print("First 10 sentences:")
    for sentence in sentences[:10]:
        print(sentence)

    print("\nWord Frequencies:")
    for word, freq in word_freq.items():
        print(f"{word}: {freq / len(sentences):.4f}") 

    ranked_sentences = rank_sentences(sentences, word_freq)

    print("\nRanked Sentences and Scores:")
    for sentence, score in ranked_sentences:
        print(f"Score: {score:.4f} - Sentence: {sentence}")

    summary = [sentence for sentence, _ in ranked_sentences[:k]]
    return summary

file_path = "/kaggle/input/newdata/delhi.txt" 
k = 3
summary = summarize(file_path, k)
print("\nSummary:")
print("\n".join(summary))


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
First 10 sentences:

Delhi, the capital city of India, is a place where history, culture, and modernity blend seamlessly, creating a dynamic environment that reflects the essence of the country.
With its roots going back over two millennia, Delhi has served as the seat of power for various dynasties and empires, each leaving its mark on the city’s architecture, traditions, and way of life.
From the Mauryas and the Tughlaqs to the Mughals and the British, Delhi has witnessed the rise and fall of empires, becoming a city that embodies the diverse history of the Indian subcontinent.
One of the most iconic symbols of the city’s historical significance is the Qutub Minar, built in 1193 by Qutb-ud-din Aibak, which is considered the tallest brick minaret in