In [0]:
import spacy

# ***TEXT PRE PROCESSING***

In [0]:
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [0]:
stopwords = list(STOP_WORDS)

# ***TRAINING DOCUMENTS***

In [0]:
document1= """F. angorensis Gmelin, 1788 F. vulgaris Fischer, 1829.The cat (Felis catus) is a small carnivorous mammal.[1][2] It is the only domesticated species in the family Felidae and often referred to as the domestic cat to distinguish it from wild members of the family.[4] The cat is either a house cat or a farm cat, which are pets, or a feral cat, which ranges freely and avoids human contact.[5] A house cat is valued by humans for companionship and for its ability to hunt rodents. About 60 cat breeds are recognized by various cat registries.[6]

Cats are similar in anatomy to the other felid species, with a strong flexible body, quick reflexes, sharp teeth and retractable claws adapted to killing small prey. They are predators who are most active at dawn and dusk (crepuscular). Cats can hear sounds too faint or too high in frequency for human ears, such as those made by mice and other small animals. Compared to humans, they see better in the dark (they see in near total darkness) and have a better sense of smell, but poorer color vision. Cats, despite being solitary hunters, are a social species. Cat communication includes the use of vocalizations including meowing, purring, trilling, hissing, growling and grunting as well as cat-specific body language.[7] Cats also communicate by secreting and perceiving pheromones.[8]

Female domestic cats can have kittens from spring to late autumn, with litter sizes ranging from two to five kittens.[9] Domestic cats can be bred and shown as registered pedigreed cats, a hobby known as cat fancy. Failure to control the breeding of pet cats by spaying and neutering, as well as abandonment of pets, has resulted in large numbers of feral cats worldwide, contributing to the extinction of entire bird species, and evoking population control.[10]

It was long thought that cat domestication was initiated in Egypt, because cats in ancient Egypt were venerated since around 3100 BC.[11][12] However, the earliest indication for the taming of an African wildcat (F. lybica) was found in Cyprus, where a cat skeleton was excavated close by a human Neolithic grave dating to around 7500 BC.[13] African wildcats were probably first domesticated in the Near East.[14] The leopard cat (Prionailurus bengalensis) was tamed independently in China around 5500 BC, though this line of partially domesticated cats leaves no trace in the domestic cat populations of today.[15][16]

As of 2017, the domestic cat was the second-most popular pet in the U.S. by number of pets owned, after freshwater fish,[17] with 95 million cats owned.[18][19] In the United Kingdom, around 7.3 million cats lived in more than 4.8 million households as of 2019.[20] In 1965, the country's cat population was 4.1 million."""

In [0]:
document2 = """Our Father who art in heaven, hallowed be thy name. Thy kingdom come. Thy will be done, on earth as it is in heaven. Give us this day our daily bread; and forgive us our trespasses, as we forgive those who trespass against us; and lead us not into temptation, but deliver us from evil
"""

In [0]:
nlp = spacy.load('en')

In [0]:
# Build an NLP Object
docx = nlp(document1)

In [0]:
# Tokenization of Text
mytokens = [token.text for token in docx]

In [0]:
# Build Word Frequency
# word.text is tokenization in spacy
word_frequencies = {}
for word in docx:
    if word.text not in stopwords:
            if word.text not in word_frequencies.keys():
                word_frequencies[word.text] = 1
            else:
                word_frequencies[word.text] += 1

In [0]:
word_frequencies

{'\n': 2,
 '\n\n': 4,
 '(': 5,
 ')': 5,
 ',': 31,
 '-': 2,
 '.': 8,
 '1788': 1,
 '1829': 1,
 '1965': 1,
 '2017': 1,
 '2019.[20': 1,
 '3100': 1,
 '4.1': 1,
 '4.8': 1,
 '5500': 1,
 '60': 1,
 '7.3': 1,
 '7500': 1,
 '95': 1,
 'A': 1,
 'About': 1,
 'African': 2,
 'As': 1,
 'BC': 1,
 'BC.[11][12': 1,
 'BC.[13': 1,
 'Cat': 1,
 'Cats': 4,
 'China': 1,
 'Compared': 1,
 'Cyprus': 1,
 'Domestic': 1,
 'East.[14': 1,
 'Egypt': 2,
 'F.': 3,
 'Failure': 1,
 'Felidae': 1,
 'Felis': 1,
 'Female': 1,
 'Fischer': 1,
 'Gmelin': 1,
 'However': 1,
 'In': 2,
 'It': 2,
 'Kingdom': 1,
 'Near': 1,
 'Neolithic': 1,
 'Prionailurus': 1,
 'The': 3,
 'They': 1,
 'U.S.': 1,
 'United': 1,
 ']': 15,
 'abandonment': 1,
 'ability': 1,
 'active': 1,
 'adapted': 1,
 'anatomy': 1,
 'ancient': 1,
 'angorensis': 1,
 'animals': 1,
 'autumn': 1,
 'avoids': 1,
 'bengalensis': 1,
 'better': 2,
 'bird': 1,
 'body': 2,
 'bred': 1,
 'breeding': 1,
 'breeds': 1,
 'carnivorous': 1,
 'cat': 17,
 'cats': 9,
 'catus': 1,
 'claws': 1,
 'c

In [0]:
# Maximum Word Frequency
maximum_frequency = max(word_frequencies.values())

In [0]:
for word in word_frequencies.keys():  
        word_frequencies[word] = (word_frequencies[word]/maximum_frequency)

In [0]:
# Frequency Table
word_frequencies

{'\n': 0.06451612903225806,
 '\n\n': 0.12903225806451613,
 '(': 0.16129032258064516,
 ')': 0.16129032258064516,
 ',': 1.0,
 '-': 0.06451612903225806,
 '.': 0.25806451612903225,
 '1788': 0.03225806451612903,
 '1829': 0.03225806451612903,
 '1965': 0.03225806451612903,
 '2017': 0.03225806451612903,
 '2019.[20': 0.03225806451612903,
 '3100': 0.03225806451612903,
 '4.1': 0.03225806451612903,
 '4.8': 0.03225806451612903,
 '5500': 0.03225806451612903,
 '60': 0.03225806451612903,
 '7.3': 0.03225806451612903,
 '7500': 0.03225806451612903,
 '95': 0.03225806451612903,
 'A': 0.03225806451612903,
 'About': 0.03225806451612903,
 'African': 0.06451612903225806,
 'As': 0.03225806451612903,
 'BC': 0.03225806451612903,
 'BC.[11][12': 0.03225806451612903,
 'BC.[13': 0.03225806451612903,
 'Cat': 0.03225806451612903,
 'Cats': 0.12903225806451613,
 'China': 0.03225806451612903,
 'Compared': 0.03225806451612903,
 'Cyprus': 0.03225806451612903,
 'Domestic': 0.03225806451612903,
 'East.[14': 0.0322580645161290

In [0]:
# Sentence Tokens
sentence_list = [ sentence for sentence in docx.sents ]

In [0]:
# Sentence Score via comparing each word with sentence
sentence_scores = {}  
for sent in sentence_list:  
        for word in sent:
            if word.text.lower() in word_frequencies.keys():
                if len(sent.text.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word.text.lower()]
                    else:
                        sentence_scores[sent] += word_frequencies[word.text.lower()]


In [0]:
# Sentence Score Table
sentence_scores

{F. angorensis Gmelin, 1788
 F. vulgaris Fischer, 1829: 2.258064516129032,
 The cat (Felis catus) is a small carnivorous mammal.[1][2]: 1.5483870967741935,
 It is the only domesticated species in the family Felidae and often referred to as the domestic cat to distinguish it from wild members of the family.[4]: 1.5806451612903225,
 The cat is either a house cat or a farm cat, which are pets, or a feral cat, which ranges freely and avoids human contact.[5]: 6.161290322580644,
 A house cat is valued by humans for companionship and for its ability to hunt rodents.: 1.0967741935483868,
 About 60 cat breeds are recognized by various cat registries.[6]
 : 1.8387096774193545,
 Cats are similar in anatomy to the other felid species, with a strong flexible body, quick reflexes, sharp teeth and retractable claws adapted to killing small prey.: 4.290322580645161,
 They are predators who are most active at dawn and dusk (crepuscular).: 0.7419354838709676,
 Cats can hear sounds too faint or too high

In [0]:
# Import Heapq 
from heapq import nlargest

In [0]:
summarized_sentences = nlargest(7, sentence_scores, key=sentence_scores.get)


In [0]:
summarized_sentences

[Cat communication includes the use of vocalizations including meowing, purring, trilling, hissing, growling and grunting as well as cat-specific body language.[7] Cats also communicate by secreting and perceiving pheromones.[8]
 ,
 The cat is either a house cat or a farm cat, which are pets, or a feral cat, which ranges freely and avoids human contact.[5],
 Cats are similar in anatomy to the other felid species, with a strong flexible body, quick reflexes, sharp teeth and retractable claws adapted to killing small prey.,
 Compared to humans, they see better in the dark (they see in near total darkness) and have a better sense of smell, but poorer color vision.,
 Cats, despite being solitary hunters, are a social species.,
 Domestic cats can be bred and shown as registered pedigreed cats, a hobby known as cat fancy.,
 It was long thought that cat domestication was initiated in Egypt, because cats in ancient Egypt were venerated since around 3100 BC.[11][12]]

In [0]:
# Convert Sentences from Spacy Span to Strings for joining entire sentence
for w in summarized_sentences:
    print(w.text)

Cat communication includes the use of vocalizations including meowing, purring, trilling, hissing, growling and grunting as well as cat-specific body language.[7] Cats also communicate by secreting and perceiving pheromones.[8]


The cat is either a house cat or a farm cat, which are pets, or a feral cat, which ranges freely and avoids human contact.[5]
Cats are similar in anatomy to the other felid species, with a strong flexible body, quick reflexes, sharp teeth and retractable claws adapted to killing small prey.
Compared to humans, they see better in the dark (they see in near total darkness) and have a better sense of smell, but poorer color vision.
Cats, despite being solitary hunters, are a social species.
Domestic cats can be bred and shown as registered pedigreed cats, a hobby known as cat fancy.
It was long thought that cat domestication was initiated in Egypt, because cats in ancient Egypt were venerated since around 3100 BC.[11][12]


In [0]:
# List Comprehension of Sentences Converted From Spacy.span to strings
final_sentences = [ w.text for w in summarized_sentences ]


In [0]:
summary = ' '.join(final_sentences)

In [0]:
summary

'Cat communication includes the use of vocalizations including meowing, purring, trilling, hissing, growling and grunting as well as cat-specific body language.[7] Cats also communicate by secreting and perceiving pheromones.[8]\n\n The cat is either a house cat or a farm cat, which are pets, or a feral cat, which ranges freely and avoids human contact.[5] Cats are similar in anatomy to the other felid species, with a strong flexible body, quick reflexes, sharp teeth and retractable claws adapted to killing small prey. Compared to humans, they see better in the dark (they see in near total darkness) and have a better sense of smell, but poorer color vision. Cats, despite being solitary hunters, are a social species. Domestic cats can be bred and shown as registered pedigreed cats, a hobby known as cat fancy. It was long thought that cat domestication was initiated in Egypt, because cats in ancient Egypt were venerated since around 3100 BC.[11][12]'

In [0]:
# Length of Summary
len(summary)

958

In [0]:
# Length of Original Text
len(document1)

2742

In [0]:
from gensim.summarization import summarize

In [0]:
gensum=summarize(document1)

In [0]:
gensum

'Cats are similar in anatomy to the other felid species, with a strong flexible body, quick reflexes, sharp teeth and retractable claws adapted to killing small prey.\nFailure to control the breeding of pet cats by spaying and neutering, as well as abandonment of pets, has resulted in large numbers of feral cats worldwide, contributing to the extinction of entire bird species, and evoking population control.[10]\nlybica) was found in Cyprus, where a cat skeleton was excavated close by a human Neolithic grave dating to around 7500 BC.[13] African wildcats were probably first domesticated in the Near East.[14] The leopard cat (Prionailurus bengalensis) was tamed independently in China around 5500 BC, though this line of partially domesticated cats leaves no trace in the domestic cat populations of today.[15][16]'

In [0]:
len(gensum)

818