In [26]:
import spacy

In [27]:
nlp = spacy.load('en_core_web_sm')

In [28]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [29]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [31]:
word = doc[4]
print(word.text, word.pos_, word.tag_)

jumped VERB VBD


In [36]:
for token in doc:
    print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_)}")

The       DET       DT        determiner
quick     ADJ       JJ        adjective
brown     ADJ       JJ        adjective
fox       NOUN      NN        noun, singular or mass
jumped    VERB      VBD       verb, past tense
over      ADP       IN        conjunction, subordinating or preposition
the       DET       DT        determiner
lazy      ADJ       JJ        adjective
dog       NOUN      NN        noun, singular or mass
's        PART      POS       possessive ending
back      NOUN      NN        noun, singular or mass
.         PUNCT     .         punctuation mark, sentence closer


In [43]:
doc = nlp(u"I read books on NLP.")

In [44]:
word = doc[1]

In [45]:
print(word)

read


In [40]:
print(f"{word.text:{10}}{word.pos_:{10}}{word.tag_:{10}}{spacy.explain(word.tag_)}")

read      VERB      VBD       verb, past tense


In [41]:
doc = nlp(u"I read a book on NLP.")

In [42]:
word = doc[1]
print(f"{word.text:{10}}{word.pos_:{10}}{word.tag_:{10}}{spacy.explain(word.tag_)}")

read      VERB      VBD       verb, past tense


In [47]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [59]:
POS_count = doc.count_by(spacy.attrs.POS)

In [60]:
print(POS_count)

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}


In [63]:
print(doc.vocab[84].text,)

ADJ


In [71]:
for x,y in sorted(POS_count.items()):
    print(f"{x}.   {doc.vocab[x].text:{10}} {y}")

84.   ADJ        3
85.   ADP        1
90.   DET        2
92.   NOUN       3
94.   PART       1
97.   PUNCT      1
100.   VERB       1


In [75]:
TAG_count = doc.count_by(spacy.attrs.TAG)
for x,y in sorted(TAG_count.items()):
    print(f"{x}.   {doc.vocab[x].text:{10}} {y}")

74.   POS        1
1292078113972184607.   IN         1
10554686591937588953.   JJ         3
12646065887601541794.   .          1
15267657372422890137.   DT         2
15308085513773655218.   NN         3
17109001835818727656.   VBD        1


In [76]:
len(doc.vocab)

788

In [78]:
DEP_counts = doc.count_by(spacy.attrs.DEP)
for a,b in sorted(DEP_counts.items()):
    print(f"{a}. {doc.vocab[a].text} {b}")

402. amod 3
415. det 2
429. nsubj 1
439. pobj 1
440. poss 1
443. prep 1
445. punct 1
8110129090154140942. case 1
8206900633647566924. ROOT 1
