In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u'Tesla is looking at buying U.S. Startup for $6 million')

In [9]:
for token in doc:
    print(token.text,token.pos, token.pos_, token.dep, token.dep_)
    

Tesla 95 PROPN 426 nsubj
is 99 VERB 402 aux
looking 99 VERB 8206900633647566924 ROOT
at 84 ADP 440 prep
buying 99 VERB 435 pcomp
U.S. 95 PROPN 7037928807040764755 compound
Startup 95 PROPN 413 dobj
for 84 ADP 440 prep
$ 98 SYM 443 quantmod
6 92 NUM 7037928807040764755 compound
million 92 NUM 436 pobj


In [10]:
nlp.pipeline

[('tagger', <spacy.pipeline.Tagger at 0x1b06fefec48>),
 ('parser', <spacy.pipeline.DependencyParser at 0x1b0755ddbe8>),
 ('ner', <spacy.pipeline.EntityRecognizer at 0x1b0755fa1c8>)]

In [11]:
nlp.pipe_names

['tagger', 'parser', 'ner']

Tokenization

In [14]:
doc2 = nlp(u"Tesla isn't    looking for startups anymore")

In [15]:
for token in doc2:
    print(token.text,token.pos,token.pos_,token.dep_)

Tesla 95 PROPN nsubj
is 99 VERB aux
n't 85 ADV neg
    102 SPACE 
looking 99 VERB ROOT
for 84 ADP prep
startups 91 NOUN pobj
anymore 85 ADV advmod


In [16]:
doc2[0]

Tesla

In [17]:
doc2[0].pos_

'PROPN'

In [19]:
type(doc2)

spacy.tokens.doc.Doc

In [20]:
spacy.explain('nsubj')

'nominal subject'

In [22]:
br =doc2[4:7]
br

looking for startups

In [23]:
type(br)

spacy.tokens.span.Span

In [26]:
doc4 = nlp(u"This is the first sentence. This is another sentence. This is the last sentence!")
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence!


In [27]:
doc4[5]

.

In [28]:
doc4[6].is_sent_start

True

In [29]:
doc4[8].is_sent_start

In [30]:
mystring = '"We\'re moving to L.A.!"'

In [31]:
print(mystring)

"We're moving to L.A.!"


In [32]:
doc = nlp(mystring)
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [35]:
doc2 = nlp(u"We're here to help! send snail-mail, email:support@oursite.com or visit us at http://www.oursite.com!")
for t in doc2:
    print(t)

We
're
here
to
help
!
send
snail
-
mail
,
email:support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [36]:
doc3 = nlp(u"A 5 km NYC cab ride costs $10.30")
for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [38]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")
for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [39]:
len(doc4)

11

In [40]:
doc4.vocab

<spacy.vocab.Vocab at 0x1b070d73cc8>

In [41]:
len(doc4.vocab)

57852

In [42]:
doc5 = nlp(u"It is better to give than receive.")

In [43]:
doc5[0]

It

In [44]:
doc5[2:5]

better to give

In [46]:
doc8 = nlp(u'Apple to build a Indore factory for $6 million')

In [48]:
for token in doc8:
    print(token.text, end='| ')

Apple| to| build| a| Indore| factory| for| $| 6| million| 

In [53]:
for entity in doc8.ents:
    print(entity, entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')

Apple ORG
Companies, agencies, institutions, etc.


Indore GPE
Countries, cities, states


$6 million MONEY
Monetary values, including unit




In [54]:
doc9 = nlp(u'Autonomous cars shift insurance liability towards manufactures.')
for chunk in doc9.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufactures


In [55]:
from spacy import displacy
doc8

Apple to build a Indore factory for $6 million

In [57]:
displacy.render(doc8, style='dep',jupyter = True, options = {'distance':110})

In [58]:
displacy.render(doc8, style='dep',jupyter = True, options = {'distance':50})

In [59]:
doc = nlp(u"Over the last quater Apple sold nearly 20 thousand iPods for a profit of $6 million.")

In [60]:
displacy.render(doc,style = 'ent', jupyter = True)