In [17]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation

In [18]:
text = '''Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems 
use to progressively improve their performance on a specific task. Machine learning algorithms build a mathematical model 
of sample data, known as “training data”, in order to make predictions or decisions without being explicitly programmed 
to perform the task. Machine learning algorithms are used in the applications of email filtering, detection of network 
intruders, and computer vision, where it is infeasible to develop an algorithm of specific instructions for performing 
the task. Machine learning is closely related to computational statistics, which focuses on making predictions using 
computers. The study of mathematical optimization delivers methods, theory and application domains to the field of machine 
learning. Data mining is a field of study within machine learning and focuses on exploratory data analysis through 
unsupervised learning. In its application across business problems, machine learning is also referred to as predictive
analytics.'''

In [19]:
len(text)

1077

In [27]:
nlp = spacy.load("en_core_web_sm") # python3 -m spacy download en_core_web_sm

In [28]:
doc = nlp(text)

In [29]:
len(list(doc.sents))

7

In [30]:
tokens = [token.text for token in doc]
print(tokens)

['Machine', 'learning', '(', 'ML', ')', 'is', 'the', 'scientific', 'study', 'of', 'algorithms', 'and', 'statistical', 'models', 'that', 'computer', 'systems', '\n', 'use', 'to', 'progressively', 'improve', 'their', 'performance', 'on', 'a', 'specific', 'task', '.', 'Machine', 'learning', 'algorithms', 'build', 'a', 'mathematical', 'model', '\n', 'of', 'sample', 'data', ',', 'known', 'as', '“', 'training', 'data', '”', ',', 'in', 'order', 'to', 'make', 'predictions', 'or', 'decisions', 'without', 'being', 'explicitly', 'programmed', '\n', 'to', 'perform', 'the', 'task', '.', 'Machine', 'learning', 'algorithms', 'are', 'used', 'in', 'the', 'applications', 'of', 'email', 'filtering', ',', 'detection', 'of', 'network', '\n', 'intruders', ',', 'and', 'computer', 'vision', ',', 'where', 'it', 'is', 'infeasible', 'to', 'develop', 'an', 'algorithm', 'of', 'specific', 'instructions', 'for', 'performing', '\n', 'the', 'task', '.', 'Machine', 'learning', 'is', 'closely', 'related', 'to', 'computa

In [31]:
punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [32]:
punctuation = punctuation + '\n'

In [39]:
# Frequency Of Words
word_freq = {}
stop_words = list(STOP_WORDS)

for word in doc:
    if word.text.lower() not in stop_words:
        if word.text.lower() not in punctuation:
            if word.text not in word_freq.keys():
                word_freq[word.text] = 1
            else:
                word_freq[word.text] += 1


In [61]:
print(word_freq)

{'Machine': 0.0625, 'learning': 0.125, 'ML': 0.015625, 'scientific': 0.015625, 'study': 0.046875, 'algorithms': 0.046875, 'statistical': 0.015625, 'models': 0.015625, 'computer': 0.03125, 'systems': 0.015625, 'use': 0.015625, 'progressively': 0.015625, 'improve': 0.015625, 'performance': 0.015625, 'specific': 0.03125, 'task': 0.046875, 'build': 0.015625, 'mathematical': 0.03125, 'model': 0.015625, 'sample': 0.015625, 'data': 0.046875, 'known': 0.015625, '“': 0.015625, 'training': 0.015625, '”': 0.015625, 'order': 0.015625, 'predictions': 0.03125, 'decisions': 0.015625, 'explicitly': 0.015625, 'programmed': 0.015625, 'perform': 0.015625, 'applications': 0.015625, 'email': 0.015625, 'filtering': 0.015625, 'detection': 0.015625, 'network': 0.015625, 'intruders': 0.015625, 'vision': 0.015625, 'infeasible': 0.015625, 'develop': 0.015625, 'algorithm': 0.015625, 'instructions': 0.015625, 'performing': 0.015625, 'closely': 0.015625, 'related': 0.015625, 'computational': 0.015625, 'statistics':

In [62]:
max_freq = max(word_freq.values())

In [63]:
# Normalising
for word in word_freq.keys():
    word_freq[word] = word_freq[word] / max_freq

In [64]:
print(word_freq)

{'Machine': 0.5, 'learning': 1.0, 'ML': 0.125, 'scientific': 0.125, 'study': 0.375, 'algorithms': 0.375, 'statistical': 0.125, 'models': 0.125, 'computer': 0.25, 'systems': 0.125, 'use': 0.125, 'progressively': 0.125, 'improve': 0.125, 'performance': 0.125, 'specific': 0.25, 'task': 0.375, 'build': 0.125, 'mathematical': 0.25, 'model': 0.125, 'sample': 0.125, 'data': 0.375, 'known': 0.125, '“': 0.125, 'training': 0.125, '”': 0.125, 'order': 0.125, 'predictions': 0.25, 'decisions': 0.125, 'explicitly': 0.125, 'programmed': 0.125, 'perform': 0.125, 'applications': 0.125, 'email': 0.125, 'filtering': 0.125, 'detection': 0.125, 'network': 0.125, 'intruders': 0.125, 'vision': 0.125, 'infeasible': 0.125, 'develop': 0.125, 'algorithm': 0.125, 'instructions': 0.125, 'performing': 0.125, 'closely': 0.125, 'related': 0.125, 'computational': 0.125, 'statistics': 0.125, 'focuses': 0.25, 'making': 0.125, 'computers': 0.125, 'optimization': 0.125, 'delivers': 0.125, 'methods': 0.125, 'theory': 0.125

In [65]:
sent_tokens = [sent for sent in doc.sents]
print(sent_tokens)

[Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems 
use to progressively improve their performance on a specific task., Machine learning algorithms build a mathematical model 
of sample data, known as “training data”, in order to make predictions or decisions without being explicitly programmed 
to perform the task., Machine learning algorithms are used in the applications of email filtering, detection of network 
intruders, and computer vision, where it is infeasible to develop an algorithm of specific instructions for performing 
the task., Machine learning is closely related to computational statistics, which focuses on making predictions using 
computers., The study of mathematical optimization delivers methods, theory and application domains to the field of machine 
learning., Data mining is a field of study within machine learning and focuses on exploratory data analysis through 
unsupervised learning., In its application acr

In [66]:
sent_score = {}

In [69]:
for sent in sent_tokens:
    for word in sent:
        if word.text.lower() in word_freq.keys():
            if sent not in sent_score.keys():
                sent_score[sent] = word_freq[word.text.lower()]
            else:
                sent_score[sent] += word_freq[word.text.lower()]
                

In [70]:
print(sent_score)

{Machine learning (ML) is the scientific study of algorithms and statistical models that computer systems 
use to progressively improve their performance on a specific task.: 4.0, Machine learning algorithms build a mathematical model 
of sample data, known as “training data”, in order to make predictions or decisions without being explicitly programmed 
to perform the task.: 4.875, Machine learning algorithms are used in the applications of email filtering, detection of network 
intruders, and computer vision, where it is infeasible to develop an algorithm of specific instructions for performing 
the task.: 4.125, Machine learning is closely related to computational statistics, which focuses on making predictions using 
computers.: 2.625, The study of mathematical optimization delivers methods, theory and application domains to the field of machine 
learning.: 3.125, Data mining is a field of study within machine learning and focuses on exploratory data analysis through 
unsupervised 

In [71]:
from heapq import nlargest

In [72]:
len(sent_score) * 0.3

2.1

In [74]:
2

2

In [78]:
summary = nlargest(n = 2, iterable = sent_score, key = sent_score.get)

In [83]:
final_summary = [word.text for word in summary]
summary = " ".join(final_summary)
print(summary)

Machine learning algorithms build a mathematical model 
of sample data, known as “training data”, in order to make predictions or decisions without being explicitly programmed 
to perform the task. Data mining is a field of study within machine learning and focuses on exploratory data analysis through 
unsupervised learning.
