In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
import joblib

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

# Sample text document
text = """
Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems use to perform a specific task without using explicit instructions, relying on patterns and inference instead. It is seen as a subset of artificial intelligence. Machine learning algorithms build a mathematical model based on sample data, known as "training data", in order to make predictions or decisions without being explicitly programmed to perform the task. Machine learning algorithms are used in a wide variety of applications, such as email filtering, and computer vision, where it is infeasible to develop an algorithm of specific instructions for performing the task.
"""

# Tokenize the text into sentences
sentences = sent_tokenize(text)

# Tokenize the text into words and remove stopwords
stop_words = set(stopwords.words('english'))
word_tokens = word_tokenize(text.lower())
filtered_tokens = [word for word in word_tokens if word.isalnum() and word not in stop_words]

# Calculate word frequency
word_freq = nltk.FreqDist(filtered_tokens)

# Assign scores to sentences based on the sum of their word frequencies
sent_scores = {}
for sentence in sentences:
    for word in word_tokenize(sentence.lower()):
        if word in word_freq.keys():
            if len(sentence.split(' ')) < 30:  # Consider only sentences with less than 30 words
                if sentence not in sent_scores.keys():
                    sent_scores[sentence] = word_freq[word]
                else:
                    sent_scores[sentence] += word_freq[word]

# Select the top N sentences with highest scores for summarization
num_sentences = 2  # Number of sentences in the summary
summary_sentences = sorted(sent_scores, key=sent_scores.get, reverse=True)[:num_sentences]

# Generate the summary
summary = ' '.join(summary_sentences)
print("Summary:")
print(summary)




[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\vijay\AppData\Roaming\nltk_data...


Summary:
It is seen as a subset of artificial intelligence.


[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\vijay\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
