In [1]:
from spacy.lang.en import English
nlp = English()
text = """When learning data science, you shouldn't get discouraged!
Challenges and setbacks aren't failures, they're just part of the journey. You've got this!"""


In [2]:
my_doc = nlp(text)

token_list = []
for token in my_doc:
    token_list.append(token.text)
print(token_list)

['When', 'learning', 'data', 'science', ',', 'you', 'should', "n't", 'get', 'discouraged', '!', '\n', 'Challenges', 'and', 'setbacks', 'are', "n't", 'failures', ',', 'they', "'re", 'just', 'part', 'of', 'the', 'journey', '.', 'You', "'ve", 'got', 'this', '!']


In [3]:
nlp = English()
sbd = nlp.create_pipe('sentencizer')
nlp.add_pipe(sbd)

In [4]:
text = """When learning data science, you shouldn't get discouraged!
Challenges and setbacks aren't failures, they're just part of the journey. You've got this!"""

doc = nlp(text)

sents_list = []
for sent in doc.sents:
    sents_list.append(sent.text)
print(sents_list)

["When learning data science, you shouldn't get discouraged!", "\nChallenges and setbacks aren't failures, they're just part of the journey.", "You've got this!"]


In [13]:
import spacy
spacy_stopwords = spacy.lang.en.stop_words.STOP_WORDS

print('Number of stop wordds: %d' % len(spacy_stopwords))

Number of stop wordds: 326


In [14]:
print('First ten stop words: %s' % list(spacy_stopwords)[:20])

First ten stop words: ['was', 'anywhere', 'though', 'many', '‘re', 'her', 'whatever', 'everyone', 'ca', 'nothing', 'last', 'else', 'almost', '’m', 'itself', 'quite', 'give', 'move', 'formerly', 'while']


In [15]:
from spacy.lang.en.stop_words import STOP_WORDS
filtered_sent = []
doc = nlp(text)

for word in doc:
    if word.is_stop == False:
        filtered_sent.append(word)
print("Filtered Sentence:", filtered_sent)

Filtered Sentence: [learning, data, science, ,, discouraged, !, 
, Challenges, setbacks, failures, ,, journey, ., got, !]


In [6]:
lem = nlp("run runs running runner")
for word in lem:
    print(word.text, word.lemma_)

run run
runs run
running run
runner runner


In [8]:
import en_core_web_sm
nlp = en_core_web_sm.load()
doc = nlp(u"All is well that ends well")

for word in doc:
    print(word.text, word.pos_)

All DET
is VERB
well ADV
that DET
ends VERB
well ADV


In [22]:
from spacy import displacy

nytimes= nlp(u"""New York City on Tuesday declared a public health emergency and ordered mandatory measles vaccinations amid an outbreak, becoming the latest national flash point over refusals to inoculate against dangerous diseases.

At least 285 people have contracted measles in the city since September, mostly in Brooklyn’s Williamsburg neighborhood. The order covers four Zip codes there, Mayor Bill de Blasio (D) said Tuesday.

The mandate orders all unvaccinated people in the area, including a concentration of Orthodox Jews, to receive inoculations, including for children as young as 6 months old. Anyone who resists could be fined up to $1,000.""")

entities=[(i, i.label_, i.label) for i in nytimes.ents]
entities

[(New York City, 'GPE', 384),
 (Tuesday, 'DATE', 391),
 (At least 285, 'CARDINAL', 397),
 (September, 'DATE', 391),
 (Brooklyn, 'GPE', 384),
 (Williamsburg, 'GPE', 384),
 (four, 'CARDINAL', 397),
 (Bill de Blasio, 'PERSON', 380),
 (Tuesday, 'DATE', 391),
 (Orthodox Jews, 'NORP', 381),
 (6 months old, 'DATE', 391),
 (up to $1,000, 'MONEY', 394)]

In [23]:
displacy.render(nytimes, style = "ent",jupyter = True)