#### POS BASICS

In [2]:
import spacy

In [3]:
nlp = spacy.load("en_core_web_sm")

In [6]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [7]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [10]:
doc[4].tag_ 

'VBD'

In [11]:
doc[4].pos_

'VERB'

In [14]:
for token in doc:
    print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_)}")

The       DET       DT        determiner
quick     ADJ       JJ        adjective
brown     ADJ       JJ        adjective
fox       NOUN      NN        noun, singular or mass
jumped    VERB      VBD       verb, past tense
over      ADP       IN        conjunction, subordinating or preposition
the       DET       DT        determiner
lazy      ADJ       JJ        adjective
dog       NOUN      NN        noun, singular or mass
's        PART      POS       possessive ending
back      NOUN      NN        noun, singular or mass
.         PUNCT     .         punctuation mark, sentence closer


In [15]:
doc = nlp(u"I read book on NLP.")

In [16]:
word = doc[1]

In [17]:
word.text

'read'

In [18]:
token=word
print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_)}")

read      VERB      VBP       verb, non-3rd person singular present


In [20]:
doc=nlp(u"I read a book on NLP")

In [21]:
word = doc[1]
token=word
print(f"{token.text:{10}}{token.pos_:{10}}{token.tag_:{10}}{spacy.explain(token.tag_)}")

read      VERB      VBD       verb, past tense


In [22]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [23]:
POS_counts = doc.count_by(spacy.attrs.POS) ## gives the frequency of POS number in the doc

In [24]:
POS_counts

{96: 1, 83: 3, 99: 1, 84: 1, 89: 2, 91: 3, 93: 1}

In [29]:
for count in POS_counts: ## get the POS name
    print(doc.vocab[count].text)

PUNCT
ADJ
VERB
ADP
DET
NOUN
PART


In [39]:
for k,v in sorted(POS_counts.items()): ### Sorted output on the POS and its frequency
    print(f"{k}.{doc.vocab[k].text:{6}} {v}")

83.ADJ    3
84.ADP    1
89.DET    2
91.NOUN   3
93.PART   1
96.PUNCT  1
99.VERB   1


In [40]:
TAG_Counts = doc.count_by(spacy.attrs.TAG) ## gives the frequency of TAG number in the doc

for k,v in sorted(TAG_Counts.items()): ### Sorted output on the TAGS and its frequency
    print(f"{k:{20}}. {doc.vocab[k].text:{6}} {v}")

                  74. POS    1
 1292078113972184607. IN     1
10554686591937588953. JJ     3
12646065887601541794. .      1
15267657372422890137. DT     2
15308085513773655218. NN     3
17109001835818727656. VBD    1


In [41]:
DEB_Counts = doc.count_by(spacy.attrs.DEP)  ## gives the frequency of deb number in the doc
for k,v in sorted(DEB_Counts.items()): ### Sorted output on the debs and its frequency
    print(f"{k:{20}}. {doc.vocab[k].text:{6}} {v}")

                 399. amod   3
                 412. det    2
                 426. nsubj  1
                 436. pobj   1
                 437. poss   1
                 440. prep   1
                 442. punct  1
 8110129090154140942. case   1
 8206900633647566924. ROOT   1
