In [1]:
import spacy 
from spacy import displacy
from spacy.lang.en.stop_words import STOP_WORDS

In [2]:
def simple_summarizer(doc, n_sent):
    '''
    A simple extractive summarizer on the sentence level: just counting the frequencies of the words
    in the documents and choose the most "important sentences" based on the number of most-occuring
    words they contain
    
    Input:
        doc: a spaCy document object of the text
        n_sent: number of sentences to choose from the original text, 
                must be < the number of sentences in the text
    
    Output: a statistically summarized version of the original text
    '''
    import spacy 
    from spacy import displacy
    from spacy.lang.en.stop_words import STOP_WORDS
    from heapq import nlargest
    sw = list(STOP_WORDS)
    
    #Build a frequency table
    tokens = [token.text for token in doc]
    freq = {}

    for token in doc: 
        if token.text not in sw: 
            if token.text not in freq.keys():
                freq[token.text] = 1
            else: 
                freq[token.text] += 1
                
    max_freq = max(freq.values())
    sentence_scores = {}
    sentences = [sent for sent in doc.sents if len(sent.text.split(' ')) <= 20]
    for sent in sentences: 
        for word in sent: 
            if word.text.lower() in freq.keys():
                if sent not in sentence_scores.keys():
                    sentence_scores[sent] = 1
                else: 
                    sentence_scores[sent] += 1

    summarized = nlargest(n_sent, sentence_scores, key=sentence_scores.get)
    summarized = [sent.text for sent in summarized]
    return ' '.join(summarized)

In [5]:
nlp = spacy.load('en_core_web_lg')

In [11]:
doc = '''For a long time, core NLP techniques were dominated by machine-learning approaches that used linear models such as support vector machines or logistic regression, trained over very high dimensional yet very sparse feature vectors.
Recently, the field has seen some success in switching from such linear models over sparse inputs to non-linear neural-network models over dense inputs. While most of the neural network techniques are easy to apply, sometimes as almost drop-in replacements of the old linear classifiers, there is in many cases a strong barrier of entry. In this tutorial I attempt to provide NLP practitioners (as well as newcomers) with the basic background, jargon, tools and methodology that will allow them to understand the principles behind the neural network models and apply them to their own work. This tutorial is expected to be self-contained, while presenting the different approaches under a unified notation and framework. It repeats a lot of material which is available elsewhere. It also points to external sources for more advanced topics when appropriate.
'''
doc = nlp(doc)
print('Full text:', doc)
simple_summarizer(doc, 5)

Full text: For a long time, core NLP techniques were dominated by machine-learning approaches that used linear models such as support vector machines or logistic regression, trained over very high dimensional yet very sparse feature vectors.
Recently, the field has seen some success in switching from such linear models over sparse inputs to non-linear neural-network models over dense inputs. While most of the neural network techniques are easy to apply, sometimes as almost drop-in replacements of the old linear classifiers, there is in many cases a strong barrier of entry. In this tutorial I attempt to provide NLP practitioners (as well as newcomers) with the basic background, jargon, tools and methodology that will allow them to understand the principles behind the neural network models and apply them to their own work. This tutorial is expected to be self-contained, while presenting the different approaches under a unified notation and framework. It repeats a lot of material which is

'This tutorial is expected to be self-contained, while presenting the different approaches under a unified notation and framework. It also points to external sources for more advanced topics when appropriate.\n It repeats a lot of material which is available elsewhere.'