In [1]:
import spacy


In [2]:
nlp= spacy.load('en_core_web_sm')

In [15]:
doc=nlp(u"the quick brown fox jumped over the laxy dog's fox")

In [16]:
print(doc.text)

the quick brown fox jumped over the laxy dog's fox


In [17]:
for x in doc:
    print(f"{x.text:10}{x.tag_:10}{spacy.explain(x.tag_)}")

the       DT        determiner
quick     JJ        adjective
brown     JJ        adjective
fox       NN        noun, singular or mass
jumped    VBD       verb, past tense
over      IN        conjunction, subordinating or preposition
the       DT        determiner
laxy      NN        noun, singular or mass
dog       NN        noun, singular or mass
's        POS       possessive ending
fox       NN        noun, singular or mass


In [18]:
pos=doc.count_by(spacy.attrs.POS)

In [19]:
pos

{83: 2, 99: 1, 84: 1, 89: 2, 91: 4, 93: 1}

In [21]:
for k,v in sorted(pos.items()):
    print(f"{k}. {doc.vocab[k].text:5} {v}")

83. ADJ   2
84. ADP   1
89. DET   2
91. NOUN  4
93. PART  1
99. VERB  1


In [22]:
from spacy import displacy

In [30]:
doc2=nlp(u"the quick brown fox juped over the lazt dog's back")

In [33]:
displacy.render(doc,style='dep', jupyter=True)

In [40]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print("No entities found.")

In [35]:
doc3=nlp(u"May i go to Washington, dc next May to see the Washington Monument?")

In [41]:
show_ents(doc3)

Washington - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Monument - ORG - Companies, agencies, institutions, etc.


In [58]:
doc4=nlp(u'Tesla to build a U.K. factory for $6 million')
show_ents(doc4)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [66]:
from spacy.tokens import Span

ORG = doc4.vocab.strings[u'ORG']  

new_ent = Span(doc4, 0, 1, label=doc.vocab.strings[u"ORG"])

doc4.ents = list(doc4.ents) + [new_ent]

In [67]:
show_ents(doc4)

Tesla - ORG - Companies, agencies, institutions, etc.
U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [68]:
doc5 = nlp(u'Our company plans to introduce a new vacuum cleaner. '
          u'If successful, the vacuum cleaner will be our first product.')

show_ents(doc5)

first - ORDINAL - "first", "second", etc.


In [70]:
from spacy.matcher import PhraseMatcher
macther=PhraseMatcher(nlp.vocab)

In [71]:
phrase_list=['vacuum cleaner', 'vacuum-cleaner']
phrase_pattern=[nlp(text) for text in phrase_list]

In [75]:
macther.add('new',None,*phrase_pattern)
found=macther(doc5)
found

[(4753564829687343602, 7, 9), (4753564829687343602, 14, 16)]

In [79]:
prod=doc.vocab.strings[u'PRODUCT']


In [80]:
newents= [Span(doc5,x[1],x[2],label=prod) for x in found]


In [83]:
doc5.ents= list(doc5.ents)+ newents

In [84]:
show_ents(doc5)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
first - ORDINAL - "first", "second", etc.
