In [1]:
# Perform standard imports
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
# Create a simple Doc object
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [3]:
# Print the fifth word and associated tags:
print(doc[4].text, doc[4].pos_, doc[4].tag_, spacy.explain(doc[4].tag_))

jumped VERB VBD verb, past tense


In [4]:
for token in doc:
    print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

The        DET      DT     determiner
quick      ADJ      JJ     adjective
brown      ADJ      JJ     adjective
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
the        DET      DT     determiner
lazy       ADJ      JJ     adjective
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass
.          PUNCT    .      punctuation mark, sentence closer


In [5]:
doc2 = nlp(u"I read books on NLP")

In [6]:
token = doc2[1]
print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

read       VERB     VBP    verb, non-3rd person singular present


In [9]:
doc3 = nlp(u"I read a book on NLP.")

In [10]:
token = doc3[1]
print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

read       VERB     VBD    verb, past tense


In [17]:
POS_counts = doc.count_by(spacy.attrs.POS)

In [12]:
doc.vocab[83].text

'ADJ'

In [15]:
doc[2].pos_

'ADJ'

In [18]:
for k,v in sorted(POS_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

83. ADJ   3
84. ADP   1
89. DET   2
91. NOUN  3
93. PART  1
96. PUNCT 1
99. VERB  1


In [19]:
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted(TAG_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


In [20]:
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

399. amod  3
412. det   2
426. nsubj 1
436. pobj  1
437. poss  1
440. prep  1
442. punct 1
8110129090154140942. case  1
8206900633647566924. ROOT  1


In [21]:
from spacy import displacy

In [22]:
displacy.render(doc, style='dep',jupyter=True)

In [23]:
options = {'distance': 110, 'compact': 'True', 'color': 'yellow', 'bg': '#09a3d5', 'font': 'Times'}
displacy.render(doc, style='dep',options=options, jupyter=True)

In [26]:
doc2 = nlp(u"This is a sentence. This is another, possibly longer sentence.")

# Create spans from Doc.sents:
spans = list(doc2.sents)

displacy.render(spans, style='dep', options={'distance': 110},jupyter=True)

In [30]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text + ' - '+ent.label_ + ' - ' + str(spacy.explain(ent.label_)))
    else:
        print('No entities found')
        


In [31]:
doc = nlp(u'hi How are you?')

In [32]:
show_ents(doc)

No entities found


In [33]:
doc = nlp(u"May I go to Washington, DC next May to see the Washington monument?")

In [34]:
show_ents(doc)

Washington, DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
Washington - GPE - Countries, cities, states


In [37]:
doc = nlp(u"Can I please have 500 dollars of Microsoft stock?")

In [38]:
show_ents(doc)

500 dollars - MONEY - Monetary values, including unit
Microsoft - ORG - Companies, agencies, institutions, etc.


In [39]:
doc = nlp(u"Tesla to build a U.K. factory for $6 million")

In [40]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [41]:
from spacy.tokens import Span

In [42]:
ORG = doc.vocab.strings[u"ORG"]