In [1]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [2]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [3]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [7]:
print(doc[4].pos_)

VERB


In [8]:
print(doc[4].tag_)

VBD


In [11]:
for token in doc:
    print(f'{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_):{10}}')

The       DET       DT        determiner
quick     ADJ       JJ        adjective 
brown     ADJ       JJ        adjective 
fox       NOUN      NN        noun, singular or mass
jumped    VERB      VBD       verb, past tense
over      ADP       IN        conjunction, subordinating or preposition
the       DET       DT        determiner
lazy      ADJ       JJ        adjective 
dog       NOUN      NN        noun, singular or mass
's        PART      POS       possessive ending
back      NOUN      NN        noun, singular or mass
.         PUNCT     .         punctuation mark, sentence closer


In [21]:
doc = nlp(u"I read books on NLP.")
word = doc[1]

In [22]:
print(word.text, word.pos_, word.tag_, spacy.explain(word.tag_))

read VERB VBD verb, past tense


In [20]:
doc2 = nlp(u"I read a book on NLP.")
word = doc2[1]
print(word.text, word.pos_, word.tag_, spacy.explain(word.tag_))

read VERB VBD verb, past tense


#### Counting POS Tags

In [23]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [27]:
POS_counts = doc.count_by(spacy.attrs.POS)

In [39]:
print(POS_counts)

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}


In [40]:
print(POS_counts.items())

dict_items([(90, 2), (84, 3), (92, 3), (100, 1), (85, 1), (94, 1), (97, 1)])


In [34]:
doc[2].pos_

'ADJ'

In [35]:
doc[2].pos

84

##### Creating frequency list

In [37]:
for k, v in sorted(POS_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

84. ADJ   3
85. ADP   1
90. DET   2
92. NOUN  3
94. PART  1
97. PUNCT 1
100. VERB  1


In [42]:
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k, v in sorted(TAG_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{5}} {v}")

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


In [48]:
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k, v in sorted(DEP_counts.items()):
    print(f"{k}. {doc.vocab[k].text:{}} {spacy.explain(doc.vocab[k].text) {v}")

402. amod  adjectival modifier 3
415. det   determiner 2
429. nsubj nominal subject 1
439. pobj  object of preposition 1
440. poss  possession modifier 1
443. prep  prepositional modifier 1
445. punct punctuation 1
8110129090154140942. case  case marking 1


TypeError: unsupported format string passed to NoneType.__format__