In [1]:
import spacy
from spacy.lang.en.stop_words import STOP_WORDS
from string import punctuation
import sys
from heapq import nlargest

In [2]:
nlp = spacy.load('en_core_web_lg')

In [3]:
stopwords = list(STOP_WORDS)

In [4]:
def calc_word_frequencies(doc):
    word_frequencies = {}
    for word in doc:
        if word.text not in stopwords and word.text not in punctuation:
                if word.text not in word_frequencies.keys():
                    word_frequencies[word.text] = 1
                else:
                    word_frequencies[word.text] += 1
    return word_frequencies

In [5]:
def get_max_frequency(word_frequencies):
    return max(word_frequencies.values())

In [6]:
def normalize_word_frequencies(word_frequencies):
    max_frequency = get_max_frequency(word_frequencies)
    for word in word_frequencies.keys():
        word_frequencies[word] = (word_frequencies[word]/max_frequency)
    return word_frequencies

In [7]:
def get_sent_scores(sentence_list,word_frequencies):
    sentence_scores = {}  
    for sent in sentence_list:  
        for word in sent:
            if word.text in word_frequencies.keys():
                if len(sent.text.split(' ')) < 30:
                    if sent not in sentence_scores.keys():
                        sentence_scores[sent] = word_frequencies[word.text]
                    else:
                        sentence_scores[sent] += word_frequencies[word.text]
    return sentence_scores

In [56]:
def generate_summary(doc,sents_in_summary):
    word_frequencies = calc_word_frequencies(doc)
    word_frequencies = normalize_word_frequencies(word_frequencies)
    sentence_scores = get_sent_scores([sent for sent in doc.sents],word_frequencies)
    summarized_sentences = nlargest(sents_in_summary, sentence_scores, key=sentence_scores.get)
    final_sentences = [w.text for w in summarized_sentences]
    summary = " ".join(final_sentences)
    return summary

In [62]:
doc = '''Recitals are often included immediately following the preamble, to set forth background
information regarding the parties and the purpose of the employment agreement and to provide
relevant information to put the terms of the employment agreement in proper context. The
recitals usually conclude with a brief statement affirming the intention of the parties to be bound
by the agreement. Recitals can demonstrate intent of the parties if there is an ambiguity in the
agreement, but they will not be used to determine if an ambiguity exists unless expressly made
part of the agreement. Sometimes, the recitals are incorporated by reference as material terms of
the agreement.
An example of a preamble and recitals is set forth below:
PHYSICIAN EMPLOYMENT AGREEMENT
This Physician Employment Agreement (this "Agreement") is entered into on
(the "Execution Date"), but effective as of
_ (the "Effective Date"), by and between Large
Texas Medical Group, P.A., a Texas professional association ("Employer"), and
Joan Q. Physician, M.D. ("Physician").
Recitals
WHEREAS, through its employed and contracted physicians, Employer conducts
a medical practice at
in
Texas; and
WHEREAS, Physician is duly licensed to practice medicine in the State of Texas;
and
WHEREAS, Employer desires to employ Physician to render medical services as
an employed physician of Employer, and Physician desires to accept such
employment, all as stated in this Agreement.
NOW, THEREFORE, in consideration of the mutual terms and conditions set
forth in this Agreement, the parties, intending to be legally bound, agree as
follows:
B.
Agreement to Employ and Acceptance
The employment agreement should state that the employer employs the physician, and
the physician accepts employment with the employer, in each case on the terms and subject to
the conditions set forth in the employment agreement. A sample provision is set forth below; for
discussion regarding the commencement date of employment, see "Term of Agreement" at
Section J below:
WHEREAS, Employer desires to employ Physician to render medical services as
an employed physician of Employer, and Physician desires to accept such
employment, all as stated in this Agreement.'''

In [63]:
doc = nlp(doc.lower())

In [64]:
tokens = [token for token in doc]

In [65]:
sents_in_summary = 10

In [66]:
summary = generate_summary(doc,sents_in_summary)

In [67]:
print(summary)

(the "execution date"), but effective as of
_ (the "effective date"), by and between large
texas medical group, p.a., a texas professional association ("employer"), and
joan q. physician, m.d. ("physician").
 now, therefore, in consideration of the mutual terms and conditions set
forth in this agreement, the parties, intending to be legally bound, agree as
follows:
 an employed physician of employer, and physician desires to accept such
employment, all as stated in this agreement.
 an example of a preamble and recitals is set forth below:
physician employment agreement
 a sample provision is set forth below; for
discussion regarding the commencement date of employment, see "term of agreement" at
 section j below:
whereas, employer desires to employ physician to render medical services as
 the
recitals usually conclude with a brief statement affirming the intention of the parties to be bound
by the agreement. an employed physician of employer, and physician desires to accept such
employ