In [11]:
# The following summarizer extracts important 50% of the sentences through summarization through extraction
import nltk
nltk.download('stopwords')
nltk.download('punkt')

import re
import heapq
import numpy as np
import pandas as pd
import sys

def read_text(path):
  with open(path, 'r') as f:
    file_data = f.read()
  text = file_data
  return text

def rank_sentence(text, clean_text):
  sentences = nltk.sent_tokenize(text)
  stop_words = nltk.corpus.stopwords.words('english')
  #counts word frequency
  word_count = {}
  for word in nltk.word_tokenize(clean_text):
    if word not in stop_words:
      if word not in word_count.keys():
        word_count[word] = 1
      else:
        word_count[word] += 1
  # ranking sentence based on word frequency
  sentence_score = {}
  for sentence in sentences:
    for word in nltk.word_tokenize(sentence.lower()):
      if word in word_count.keys():
        if sentence not in sentence_score.keys():
          sentence_score[sentence] = word_count[word]
        else:
          sentence_score[sentence] += word_count[word]
  return sentence_score

def generate_summary_from_file(path):
  text = read_text(path)
  summary = generate_text_summary(text)
  return summary
def enhance_text(text):
  # removes clutter in text for unique word extraction
  text = re.sub(r'\[[0-9]*\]',' ',text)
  text = re.sub(r'\s+',' ',text)
  clean_text = text.lower()
  regex_patterns = [r'\W',r'\d',r'\s+']
  for regex in regex_patterns:
    clean_text = re.sub(regex,' ',clean_text)
  return clean_text,text

def generate_text_summary(text):
  clean_text,text = enhance_text(text)
  sentence_score = rank_sentence(text, clean_text)
  #extracting top 50% of sentence based on rank
  best_sentences = heapq.nlargest(int(len(sentence_score)//2), sentence_score, key=sentence_score.get)
  summarized_text = []
  sentences = nltk.sent_tokenize(text)
  for sentence in sentences:
    if sentence in best_sentences:
      summarized_text.append(sentence)
  summarized_text = "\n".join(summarized_text)
  print(summarized_text)
  return summarized_text
def main():
    '''
    #THE FOLLOWING COMMENTED CODE IS FOR FILE BASED IMPLIMENTATION

    file_path = input('1. Enter text file name: ')
    output_location = file_path[:-4]+"_sum.txt"
    
    summary = generate_summary_from_file(file_path)
    text_file = open(output_location, "w")
    text_file.write(summary)
    text_file.close()
    print('Summarization task completed. Please check your output file located in '+output_location)
    '''
    text = '''
    The possibility of life on Mars has excited the imagination. Among the scientific community, the current thinking is that life may have existed on the earth’s ruddy planetary neighbour a long time ago. Understanding this will enrich our studies of evolution and nurture of life outside the earth. The recent NASA mission, Mars 2020, that was launched from Cape Canaveral, Florida on July 30, 2020, landed on the Jezero Crater in Mars on February 18, to much celebration. Of special magnificence was the entry, descent and landing of the mission’s Perseverance rover, described as the ‘shortest and most intense part’. Entering the Martian atmosphere at about 20,000 km per hour, the mission had to bring the Perseverance rover to a halt on the surface in just seven minutes. Also, since it takes 11 minutes for a radio signal to reach the earth from Mars, the mission control could not really guide the landing, and the rover had to complete this process by itself. During the complicated landing process, using a camera eye, the rover checked the ground below to avoid hazardous terrain, all in a few breathtaking minutes.
NASA’s exploration of Mars has focused on finding traces and trails of water that may have existed, and relate it to finding evidence of ancient life. Its earlier Mars expedition which carried the Curiosity rover, landed on August 5, 2012. It identified regions that could have hosted life. Expected to last at least the duration of one Mars year, or about 687 earth days, the science goals this time are to look for signs of ancient life and collect rock and soil samples. Perseverance will take the inquiry made by Curiosity to the next level and search for signs of past life by studying the Jezero Crater. The crater was chosen for study as based on an earlier aerial survey, it was found to be home to an ancient delta. Clay minerals and carbonates were seen, making the crater a good place to search for life’s existence. Further, the rover will study the geology here and store samples in a place that can be accessed by a future mission which would return them to the earth. The rover will test out technologies that could help sustain the presence of humans there in the future. This includes an instrument to extract oxygen from the atmospheric carbon dioxide. The rover also carries a helicopter named Ingenuity that is specially designed to fly in Mars’s thin atmosphere; its sole purpose would be to demonstrate flight on Mars. Finally, to the question whether little green microbes did inhabit Mars in the distant past — only time and Perseverance can answer that.
'''
    generate_text_summary(text)
if __name__ == '__main__':
    main()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
Among the scientific community, the current thinking is that life may have existed on the earth’s ruddy planetary neighbour a long time ago.
The recent NASA mission, Mars 2020, that was launched from Cape Canaveral, Florida on July 30, 2020, landed on the Jezero Crater in Mars on February 18, to much celebration.
Of special magnificence was the entry, descent and landing of the mission’s Perseverance rover, described as the ‘shortest and most intense part’.
Entering the Martian atmosphere at about 20,000 km per hour, the mission had to bring the Perseverance rover to a halt on the surface in just seven minutes.
Also, since it takes 11 minutes for a radio signal to reach the earth from Mars, the mission control could not really guide the landing, and the rover 