In [2]:
import spacy
nlp=spacy.load('en_core_web_sm')

In [6]:
doc=nlp(u"The Quick brown fox jumper over the lazy dog's back.")
print(doc.text)

The Quick brown fox jumper over the lazy dog's back.


In [7]:
print(doc[2].text,doc[2].pos_,doc[2].tag_,spacy.explain(doc[2].tag_))

brown ADJ JJ adjective (English), other noun-modifier (Chinese)


In [5]:
for token in doc:
    print(f"{token.text:{10}}{token.pos_:{8}}{token.tag_:{6}}{spacy.explain(token.tag_)}")

The       DET     DT    determiner
Quick     ADJ     JJ    adjective (English), other noun-modifier (Chinese)
brown     ADJ     JJ    adjective (English), other noun-modifier (Chinese)
fox       NOUN    NN    noun, singular or mass
jumper    NOUN    NN    noun, singular or mass
over      ADP     IN    conjunction, subordinating or preposition
the       DET     DT    determiner
lazy      ADJ     JJ    adjective (English), other noun-modifier (Chinese)
dog       NOUN    NN    noun, singular or mass
's        PART    POS   possessive ending
bacl      NOUN    NN    noun, singular or mass
.         PUNCT   .     punctuation mark, sentence closer


In [8]:
doc=nlp(u'I read books on NLP')
r=doc[1]
print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')


read       VERB     VBP    verb, non-3rd person singular present


In [9]:
doc=nlp(u'I read a book on NLP')
r=doc[1]
print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')


read       VERB     VBD    verb, past tense


In [10]:
Dep_Counts=doc.count_by(spacy.attrs.DEP)
for k,v in sorted(Dep_Counts.items()):
    print(f'{k}.{doc.vocab[k].text:{4}}:{v}')

415.det :1
416.dobj:1
429.nsubj:1
439.pobj:1
443.prep:1
8206900633647566924.ROOT:1


In [11]:
from spacy import displacy

In [13]:
doc=nlp(u"The quick brown fox jumped over the lazy dog's back")
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{7}} {token.dep_:{7}} {spacy.explain(token.dep_)}')

The        DET     det     determiner
quick      ADJ     amod    adjectival modifier
brown      ADJ     amod    adjectival modifier
fox        NOUN    nsubj   nominal subject
jumped     VERB    ROOT    root
over       ADP     prep    prepositional modifier
the        DET     det     determiner
lazy       ADJ     amod    adjectival modifier
dog        NOUN    pobj    object of preposition
's         PART    case    case marking
back       ADV     advmod  adverbial modifier


displacy.render(doc,style='dep',jupyter=True,options={'distance':110})

In [15]:
displacy.render(doc,style='dep',jupyter=True,options={'distance':110})

In [16]:
doc2=nlp(u"This is a sentence.This is another possible longer sentence.")
spans=list(doc2.sents)
options={'distance':110,'compact':'False','color':'yellow','bg':'#09a3d5','font':'Times'}
displacy.render(doc2,style='dep',jupyter=True,options=options)

In [18]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+'-'+ent.label_+'-'+str(spacy.explain(ent.label_)))
    else:
        print("No named entities found")
        

In [19]:
doc=nlp(u'May I go to Washingtion,DC next May to see the Washington Monuments?')
show_ents(doc)

Washingtion-GPE-Countries, cities, states
DC-GPE-Countries, cities, states
next May-DATE-Absolute or relative dates or periods
Washington Monuments-GPE-Countries, cities, states


In [20]:
doc=nlp(u'Can I please borrow 500 dollars from you to buy some microsoft stock?')
for ent in doc.ents:
    print(ent.text,ent.start,ent.end,ent.start_char,ent.end_char,ent.label_)

500 dollars 4 6 20 31 MONEY


In [21]:
doc=nlp(u"Tesla to build U.K. factory for $6 million")
show_ents(doc)

U.K.-GPE-Countries, cities, states
$6 million-MONEY-Monetary values, including unit


In [22]:
doc=nlp(u"Tesla to build U.K. factory for $6 million")
ORG=doc.vocab.strings[u'ORG']
new_ent=spacy.tokens.span.Span(doc,0,1,label=ORG)
doc.ents=list(doc.ents) + [new_ent]
show_ents(doc)

Tesla-ORG-Companies, agencies, institutions, etc.
U.K.-GPE-Countries, cities, states
$6 million-MONEY-Monetary values, including unit


In [23]:
doc=nlp(u"our company plans to introduce a new vaccum cleaner."u"If Successful,the vaccum cleaner will be our first product.")
show_ents(doc)

Successful-PERSON-People, including fictional
first-ORDINAL-"first", "second", etc.


In [26]:
from spacy.matcher import PhraseMatcher
matcher=PhraseMatcher(nlp.vocab)

In [29]:
doc=nlp(u"our company plans to introduce a new vaccum cleaner."u"If Successful,the vaccum cleaner will be our first product.")
phrase_list=['vaccum cleaner','vaccum_cleaner']
phrase_patterns=[nlp(text)for text in phrase_list]
matcher.add('newProduct',None,*phrase_patterns)
matches=matcher(doc)
matches


[(4452177204818730156, 7, 9), (4452177204818730156, 14, 16)]