In [1]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.stem import PorterStemmer
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


True

In [2]:
def preprocess_text(text):
  words = word_tokenize (text)
  # Remove stopwords and punctuation
  stop_words = set(stopwords.words('english'))
  filtered_words = [word for word in words if word.lower() not in stop_words and word.isalnum()]
  # Stemming
  stemmer = PorterStemmer()
  stemmed_words = [stemmer.stem (word) for word in filtered_words]
  return stemmed_words

In [3]:
def generate_summary (text, num_sentences=3):
  sentences = sent_tokenize(text)
  preprocessed_text = preprocess_text(text)
  # Calculate the frequency of each word
  word_frequencies = nltk. FreqDist(preprocessed_text)
  # Calculate the score for each sentence based on word frequency
  sentence_scores = {}
  for sentence in sentences:
    for word, freq in word_frequencies.items():
      if word in sentence.lower():
        if sentence not in sentence_scores:
          sentence_scores [sentence] = freq
        else:
          sentence_scores [sentence] += freq
  # Select top N sentences with highest scores
  summary_sentences = sorted(sentence_scores, key=sentence_scores.get, reverse=True) [:num_sentences]
  return''.join(summary_sentences)

In [4]:
if __name__ == "__main__":
  input_text = """
Natural language processing (NLP) is the ability of
a computer program to understand human language as it's
spoken and written -- referred to as natural language.
It's a component of artificial intelligence (AI).

NLP has existed for more than 50 years and has roots in
the field of linguistics. It has a variety of real-world
applications in numerous fields, including medical research,
search engines and business intelligence.
  """

  summary = generate_summary(input_text)
  print("Original Text:")
  print(input_text)
  print("\nSummary:")
  print(summary)

Original Text:

Natural language processing (NLP) is the ability of 
a computer program to understand human language as it's 
spoken and written -- referred to as natural language. 
It's a component of artificial intelligence (AI).

NLP has existed for more than 50 years and has roots in 
the field of linguistics. It has a variety of real-world 
applications in numerous fields, including medical research, 
search engines and business intelligence.
  

Summary:

Natural language processing (NLP) is the ability of 
a computer program to understand human language as it's 
spoken and written -- referred to as natural language.It has a variety of real-world 
applications in numerous fields, including medical research, 
search engines and business intelligence.NLP has existed for more than 50 years and has roots in 
the field of linguistics.
