# Text Summarizer #

## Library Import ##

In [1]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

## Download Text ##

In [3]:
# import corpus
import nltk
nltk.download('gutenberg')
from nltk.corpus import gutenberg as gut

[nltk_data] Downloading package gutenberg to
[nltk_data]     /Users/charlotteportenseigne/nltk_data...
[nltk_data]   Package gutenberg is already up-to-date!


In [4]:
alice_text = nltk.corpus.gutenberg.raw('carroll-alice.txt')
alice_text = alice_text[58:]

## Text Cleaning ##

In [5]:
stopwords = list(STOP_WORDS)
nlp = spacy.load('en_core_web_sm')
doc = nlp(alice_text)

In [6]:
tokens = [token.text for token in doc]

In [7]:
punctuation = punctuation + "\n" + "*" + "\n\n"
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~\n*\n\n'

In [8]:
# make a dictionary of words as key and value is their frequencies
word_frequencies = {}
for word in doc:
    if word.text.lower() not in stopwords:
        if word.text.lower() not in punctuation:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1

In [9]:
word_frequencies

{'CHAPTER': 12,
 'I.': 1,
 'Rabbit': 44,
 'Hole': 1,
 'Alice': 394,
 'beginning': 14,
 'tired': 7,
 'sitting': 10,
 'sister': 9,
 'bank': 3,
 'having': 10,
 'twice': 4,
 'peeped': 3,
 'book': 11,
 'reading': 3,
 'pictures': 4,
 'conversations': 1,
 'use': 18,
 'thought': 74,
 'conversation': 10,
 'considering': 3,
 'mind': 9,
 'hot': 7,
 'day': 28,
 'feel': 8,
 'sleepy': 5,
 'stupid': 5,
 'pleasure': 2,
 'making': 8,
 'daisy': 1,
 'chain': 1,
 'worth': 4,
 'trouble': 6,
 'getting': 22,
 'picking': 2,
 'daisies': 1,
 'suddenly': 12,
 'White': 22,
 'pink': 1,
 'eyes': 28,
 'ran': 16,
 'close': 13,
 'remarkable': 2,
 'think': 47,
 'way': 54,
 'hear': 14,
 'Oh': 34,
 'dear': 28,
 'shall': 23,
 'late': 6,
 'occurred': 2,
 'ought': 14,
 'wondered': 1,
 'time': 68,
 'natural': 4,
 'actually': 1,
 'TOOK': 1,
 'WATCH': 1,
 'WAISTCOAT': 1,
 'POCKET': 1,
 'looked': 45,
 'hurried': 11,
 'started': 2,
 'feet': 19,
 'flashed': 1,
 'seen': 15,
 'rabbit': 5,
 'waistcoat': 1,
 'pocket': 6,
 'watch': 7,

In [10]:
max_frequency = max(word_frequencies.values())

In [11]:
max_frequency

453

## Normalize the frequency of words ##

In [12]:
for word in word_frequencies.keys():
    word_frequencies[word] = word_frequencies[word]/max_frequency

In [13]:
print(word_frequencies)



In [14]:
sentence_tokens = [sent for sent in doc.sents]

In [15]:
sentence_tokens 

[CHAPTER I. Down the Rabbit-Hole
 
 Alice was beginning to get very tired of sitting by her sister on the
 bank, and of having nothing to do: once or twice she had peeped into the
 book her sister was reading, but it had no pictures or conversations in
 it, 'and what is the use of a book,' thought Alice 'without pictures or
 conversation?',
 
 ,
 So she was considering in her own mind (as well as she could, for the
 hot day made her feel very sleepy and stupid), whether the pleasure
 of making a daisy-chain would be worth the trouble of getting up and
 picking the daisies, when suddenly a White Rabbit with pink eyes ran
 close by her.,
 
 
 There was nothing so VERY remarkable in that; nor did Alice think it so
 VERY much out of the way to hear the Rabbit say to itself, 'Oh dear!,
 ,
 Oh dear!,
 I shall be late!',
 (when she thought it over afterwards, it
 occurred to her that she ought to have wondered at this, but at the time
 it all seemed quite natural); but when the Rabbit actuall

In [16]:
# calculate the sentences score
sentence_scores = {}
for sent in sentence_tokens:
    for word in sent:
        if word.text.lower() in word_frequencies.keys():
            if sent not in sentence_scores.keys():
                sentence_scores[sent] = word_frequencies[word.text.lower()]
            else:
                sentence_scores[sent] += word_frequencies[word.text.lower()]

In [17]:
sentence_scores

{CHAPTER I. Down the Rabbit-Hole
 
 Alice was beginning to get very tired of sitting by her sister on the
 bank, and of having nothing to do: once or twice she had peeped into the
 book her sister was reading, but it had no pictures or conversations in
 it, 'and what is the use of a book,' thought Alice 'without pictures or
 conversation?': 0.47240618101545256,
 So she was considering in her own mind (as well as she could, for the
 hot day made her feel very sleepy and stupid), whether the pleasure
 of making a daisy-chain would be worth the trouble of getting up and
 picking the daisies, when suddenly a White Rabbit with pink eyes ran
 close by her.: 0.43046357615894043,
 
 
 There was nothing so VERY remarkable in that; nor did Alice think it so
 VERY much out of the way to hear the Rabbit say to itself, 'Oh dear!: 0.3532008830022075,
 Oh dear!: 0.0838852097130243,
 I shall be late!': 0.06401766004415012,
 (when she thought it over afterwards, it
 occurred to her that she ought to ha

In [18]:
from heapq import nlargest

In [19]:
select_lenght = int(len(sentence_tokens)*0.3)
select_lenght

459

In [20]:
summary = nlargest(select_lenght, sentence_scores, key=sentence_scores.get)

In [21]:
summary

["'
 
 'Thank you, it's a very interesting dance to watch,' said Alice, feeling
 very glad that it was over at last: 'and I do so like that curious song
 about the whiting!'
 
 'Oh, as to the whiting,' said the Mock Turtle, 'they--you've seen them,
 of course?'
 
 'Yes,' said Alice, 'I've often seen them at dinn--' she checked herself
 hastily.,
 
 
 'Well, perhaps you haven't found it so yet,' said Alice; 'but when you
 have to turn into a chrysalis--you will some day, you know--and then
 after that into a butterfly, I should think you'll feel it a little
 queer, won't you?'
 
 'Not a bit,' said the Caterpillar.,
 
 
 'Well, if I must, I must,' the King said, with a melancholy air, and,
 after folding his arms and frowning at the cook till his eyes were
 nearly out of sight, he said in a deep voice, 'What are tarts made of?'
 
 'Pepper, mostly,' said the cook.,
 By the use of this ointment--one shilling the box--
     Allow me to sell you a couple?'
 
    'You are old,' said the youth

In [22]:
# combine sentence together
final_summary = [word.text for word in summary]

In [23]:
summary = ' '.join(final_summary)

In [24]:
print(summary)

"'

'Thank you, it's a very interesting dance to watch,' said Alice, feeling
very glad that it was over at last: 'and I do so like that curious song
about the whiting!'

'Oh, as to the whiting,' said the Mock Turtle, 'they--you've seen them,
of course?'

'Yes,' said Alice, 'I've often seen them at dinn--' she checked herself
hastily. 

'Well, perhaps you haven't found it so yet,' said Alice; 'but when you
have to turn into a chrysalis--you will some day, you know--and then
after that into a butterfly, I should think you'll feel it a little
queer, won't you?'

'Not a bit,' said the Caterpillar. 

'Well, if I must, I must,' the King said, with a melancholy air, and,
after folding his arms and frowning at the cook till his eyes were
nearly out of sight, he said in a deep voice, 'What are tarts made of?'

'Pepper, mostly,' said the cook. By the use of this ointment--one shilling the box--
    Allow me to sell you a couple?'

   'You are old,' said the youth, 'and your jaws are too weak
   

## Compare original text and summary ##

In [25]:
len(alice_text), len(summary)

(144337, 57979)