### In the terminal, paste the following: 
# python -m pip install -U pydantic spacy==3.4.4

In [1]:
# spaCy is more useful in the development and production environment because 
# it provides a very fast and accurate semantic analysis compared to NLTK.

import spacy 

In [2]:
# "en_core_web_sm" is spaCy's small English pipeline trained on written web text 
# (blogs, news, comments), that includes vocabulary, syntax and entities.

nlp = spacy.load("en_core_web_sm")

In [3]:
text = ("When Sebastian Thrun started working on self-driving cars at "
        "Google in 2007, few people outside of the company took him "
        "seriously. “I can tell you very senior CEOs of major American "
        "car companies would shake my hand and turn away because I wasn’t "
        "worth talking to,” said Thrun, in an interview with Recode earlier "
        "this week.")

########## Process Text ##########
doc = nlp(text)

In [4]:
# Analyze syntax
print("Noun phrases:", [chunk.text for chunk in doc.noun_chunks])
print("Verbs:", [token.lemma_ for token in doc if token.pos_ == "VERB"])

# Find named entities, phrases and concepts
for entity in doc.ents:
    print(entity.text, entity.label_)

Noun phrases: ['Sebastian Thrun', 'self-driving cars', 'Google', 'few people', 'the company', 'him', 'I', 'you', 'very senior CEOs', 'major American car companies', 'my hand', 'I', 'Thrun', 'an interview', 'Recode']
Verbs: ['start', 'work', 'drive', 'take', 'tell', 'shake', 'turn', 'talk', 'say']
Sebastian Thrun PERSON
Google ORG
2007 DATE
American NORP
Thrun PERSON
Recode ORG
earlier this week DATE


In [5]:
########## Counting Number of Words ##########
word_dict = {}

for word in doc:
    word = word.text.lower()

    if word in word_dict:
        word_dict[word] += 1
    else:
        word_dict[word] = 1

print(word_dict)

{'when': 1, 'sebastian': 1, 'thrun': 2, 'started': 1, 'working': 1, 'on': 1, 'self': 1, '-': 1, 'driving': 1, 'cars': 1, 'at': 1, 'google': 1, 'in': 2, '2007': 1, ',': 3, 'few': 1, 'people': 1, 'outside': 1, 'of': 2, 'the': 1, 'company': 1, 'took': 1, 'him': 1, 'seriously': 1, '.': 2, '“': 1, 'i': 2, 'can': 1, 'tell': 1, 'you': 1, 'very': 1, 'senior': 1, 'ceos': 1, 'major': 1, 'american': 1, 'car': 1, 'companies': 1, 'would': 1, 'shake': 1, 'my': 1, 'hand': 1, 'and': 1, 'turn': 1, 'away': 1, 'because': 1, 'was': 1, 'n’t': 1, 'worth': 1, 'talking': 1, 'to': 1, '”': 1, 'said': 1, 'an': 1, 'interview': 1, 'with': 1, 'recode': 1, 'earlier': 1, 'this': 1, 'week': 1}


In [6]:
########## Score Sentences ##########
# (index, sentence, score)

sentences = []

sentence_score = 0

for i, sentence in enumerate(doc.sents):
    for word in sentence:
        word = word.text.lower()
        sentence_score += word_dict[word]

    sentences.append((1, sentence.text.replace("\n", " "), sentence_score/len(sentence)))

In [7]:
print(sentence)

“I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week.


In [8]:
########## Sort Sentences By Importance ##########
sorted_sentence = sorted(sentences, key = lambda x: -x[2])

top_three = sorted(sorted_sentence[:3], key = lambda x: x[0])

summary_text = ""

for sentence in top_three:
    summary_text += sentence[1] + " "
print(summary_text)

“I can tell you very senior CEOs of major American car companies would shake my hand and turn away because I wasn’t worth talking to,” said Thrun, in an interview with Recode earlier this week. When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously. 
