In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [12]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [13]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [14]:
print(doc)

The quick brown fox jumped over the lazy dog's back.


In [15]:
#fine grain tag (gives more info/context than a pos_ tag
print(doc[4].tag_)

VBD


In [16]:
#to see what fine grain tag VBD means..
#so its a verb with VBD.. if we look at the Coarse-grained Part-of-speech Tags table
#we can see this means a past tense verb
print(doc[4].pos_)

VERB


In [19]:
for token in doc:
    print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective
brown      ADJ        JJ         adjective
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [20]:
doc2 = nlp(u"I read books on NLP.")

In [23]:
word = doc2[1]

In [24]:
word.text

'read'

In [25]:
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

read       VERB       VBP        verb, non-3rd person singular present


In [26]:
doc3 = nlp(u"I read a book on NLP.")

In [27]:
word = doc3[1]
token = word
print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

read       VERB       VBD        verb, past tense


In [28]:
#how to count pos tags .. how many pos do we have in a document
doc = nlp("The quick brown fox jumped over the lazy dog's back.")

In [31]:
#returns a dictionary of how many times what pos tags appeared
pos_count = doc.count_by(spacy.attrs.POS)

In [32]:
pos_count

{96: 1, 83: 3, 99: 1, 84: 1, 89: 2, 91: 3, 93: 1}

In [34]:
#to get the name..
#from this we can tell that there are 3 adjectives in the sentence
doc.vocab[83].text

'ADJ'

In [46]:
pos_count.items()

dict_items([(96, 1), (83, 3), (99, 1), (84, 1), (89, 2), (91, 3), (93, 1)])

In [50]:
#making a frequency list
for k,v in sorted(pos_count.items()):
    print(f"{k}. {doc.vocab[k].text:{10}} {v}")
    

83. ADJ        3
84. ADP        1
89. DET        2
91. NOUN       3
93. PART       1
96. PUNCT      1
99. VERB       1


In [54]:
#we can also list the TAG frequency

TAG_count = doc.count_by(spacy.attrs.TAG)
for k,v in sorted(TAG_count.items()):
    print(f"{k:<{25}}. {doc.vocab[k].text:{5}} {v}")

74                       . POS   1
1292078113972184607      . IN    1
10554686591937588953     . JJ    3
12646065887601541794     . .     1
15267657372422890137     . DT    2
15308085513773655218     . NN    3
17109001835818727656     . VBD   1


In [55]:
DEP_count = doc.count_by(spacy.attrs.DEP)
for k,v in sorted(DEP_count.items()):
    print(f"{k:<{25}}. {doc.vocab[k].text:{5}} {v}")

399                      . amod  3
412                      . det   2
426                      . nsubj 1
436                      . pobj  1
437                      . poss  1
440                      . prep  1
442                      . punct 1
8110129090154140942      . case  1
8206900633647566924      . ROOT  1
