In [6]:
import re
import nltk
import math
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import sent_tokenize,word_tokenize
from nltk.corpus import stopwords

nltk.download('wordnet')
nltk.download('punkt')
nltk.download('stopwords')

def tokenize_sentence(text):
    return sent_tokenize(text)

def tokenize_word(text):
    return word_tokenize(text)

def lemmatize(word):
    lemmatizer=WordNetLemmatizer()
    return lemmatizer.lemmatize(word)

def is_stop_word(word):
    stop_words=set(stopwords.words('english'))
    return word.lower() in stop_words

def calculate_tf(word,sentence):
    words=tokenize_word(sentence)
    return words.count(word)/len(words)

def calculate_idf(word,sentences):
    no=sum(1 for sentence in sentences if word in tokenize_word(sentence))
    return math.log(len(sentences)/(no+1))

def calculate_tf_idf(sentence,sentences):
    words=set(tokenize_word(sentence))
    tf_idf_scores=0
    for word in words:
        tf=calculate_tf(word,sentence)
        idf=calculate_idf(word,sentences)
        tf_idf_scores+=tf*idf
    return tf_idf_scores

def find_max_sentence(scores):
    max_score=float('-inf')
    max_sentence=None
    for sentence,score in scores.items():
        if(score>max_score):
            max_score=score
            max_sentence=sentence
    return max_sentence

def n_largest(scores,n):
    sentences=[]
    for i in range(n):
        max_sentence=find_max_sentence(scores)
        sentences.append(max_sentence)
        del scores[max_sentence]
    return sentences

def summarize_text(text,length):
    sentences=tokenize_sentence(text)
    sentence_scores={sentence:calculate_tf_idf(sentence,sentences) for sentence in sentences}
    selected_sentences=n_largest(sentence_scores,length)
    summary=' '.join(selected_sentences)
    return summary

text="Scientists have discovered a new species of marine life in the deep ocean off the coast of Antarctica. This species, tentatively named 'Antarctic Deep-sea Octopus,' is a remarkable find due to its unique adaptation to extreme cold and high-pressure conditions. The octopus has translucent skin, allowing it to camouflage effectively in its frigid environment, and it possesses bioluminescent properties, which it uses for communication and hunting in pitch-black depths. Researchers believe that the discovery of this species could shed light on the adaptability of life in some of Earth's most extreme environments."

summary=summarize_text(text,2)
print("Summary:")
print(summary)

Summary:
The octopus has translucent skin, allowing it to camouflage effectively in its frigid environment, and it possesses bioluminescent properties, which it uses for communication and hunting in pitch-black depths. This species, tentatively named 'Antarctic Deep-sea Octopus,' is a remarkable find due to its unique adaptation to extreme cold and high-pressure conditions.


[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\LENOVO\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
