In [1]:
import spacy

In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [9]:
for token in doc:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [10]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x11f89df90>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x11f5c81a0>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x11f5c8670>)]

In [11]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [12]:
doc2 = nlp(u"Tesla isn't looking into startups anymore.")

In [13]:
for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


## Tokenization



In [14]:
mystring = '"We\'re moving to L.A.!"'

In [15]:
print(mystring)

"We're moving to L.A.!"


In [16]:
doc = nlp(mystring)
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [18]:
doc2 = nlp(u"We're here to help! Send snail=mail, email support@site.com or visit us at https://www.page.com")

In [19]:
for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
=
mail
,
email
support@site.com
or
visit
us
at
https://www.page.com


In [20]:
doc3 = nlp(u"A 5km NYC cab costs $10.30")

In [21]:
for t in doc3:
    print(t)

A
5
km
NYC
cab
costs
$
10.30


In [25]:
doc4 = nlp(u"Let's visit St. Lous in the U.S. next year.")

In [26]:
for t in doc4:
    print(t)

Let
's
visit
St.
Lous
in
the
U.S.
next
year
.


In [27]:
len(doc4)

11

In [28]:
len(doc4.vocab)

520

In [29]:
doc5 = nlp(u"It is better to give than receive.")

In [30]:
doc5[0]

It

In [31]:
doc5[2:5]

better to give

In [32]:
doc5[0] = "Change"

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [33]:
doc8 = nlp(u"Apple to build a Hong Kong factory for $6 million")

In [35]:
for t in doc8:
    print(t.text, end=" | ")

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 

In [39]:
for entity in doc8.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




In [None]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")