<a href="https://colab.research.google.com/github/SarthakKeshari/LearningNLP/blob/main/PartOfSpeech_Basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Perform import

import spacy
nlp = spacy.load('en_core_web_sm')

In [4]:
doc = nlp(u'Apple is looking at buying U.K. startup for $1billion')

for token in doc:
  print(token.text,token.lemma_,token.pos_,token.tag_,token.dep_,token.shape_,token.is_alpha,token.is_stop)

Apple Apple PROPN NNP nsubj Xxxxx True False
is be AUX VBZ aux xx True True
looking look VERB VBG ROOT xxxx True False
at at ADP IN prep xx True True
buying buy VERB VBG pcomp xxxx True False
U.K. U.K. PROPN NNP compound X.X. False False
startup startup NOUN NN dobj xxxx True False
for for ADP IN prep xxx True True
$ $ SYM $ nmod $ False False
1billion 1billion NUM CD pobj dxxxx False False


In [5]:
#Perform import

import spacy
nlp = spacy.load('en_core_web_sm')

In [6]:
#Create a simple Doc object

doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [9]:
# Print full text
print(doc.text)


# Print fifth word and associated tags:
print(doc[4].text,doc[4].pos_,doc[4].tag_,spacy.explain(doc[4].tag_))

The quick brown fox jumped over the lazy dog's back.
jumped VERB VBD verb, past tense


In [12]:
for token in doc:
  print(f'{token.text:{10}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

The        DET      DT     determiner
quick      ADJ      JJ     adjective
brown      ADJ      JJ     adjective
fox        NOUN     NN     noun, singular or mass
jumped     VERB     VBD    verb, past tense
over       ADP      IN     conjunction, subordinating or preposition
the        DET      DT     determiner
lazy       ADJ      JJ     adjective
dog        NOUN     NN     noun, singular or mass
's         PART     POS    possessive ending
back       NOUN     NN     noun, singular or mass
.          PUNCT    .      punctuation mark, sentence closer


Course-grained Part-of-speech Tags

Fine-grained Part-of-speech Tags

In [13]:
#Working with POS tags

doc = nlp(u'I read books on NLP')
r = doc[1]

print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBD    verb, past tense


In [19]:
doc = nlp(u'I read a book on NLP')
r = doc[1]

print(f'{r.text:{10}} {r.pos_:{8}} {r.tag_:{6}} {spacy.explain(r.tag_)}')

read       VERB     VBD    verb, past tense


In [21]:
# Counting POS Tags

doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")
POS_counts = doc.count_by(spacy.attrs.POS)
POS_counts

{84: 3, 85: 1, 90: 2, 92: 3, 94: 1, 97: 1, 100: 1}

In [24]:
doc.vocab[84].text

'ADJ'

In [28]:
#Create a frequency list of POS tags from the entire document

for k,v in sorted(POS_counts.items()):
  print(f'{k:{4}}. {doc.vocab[k].text:{5}} {v}')

  84. ADJ   3
  85. ADP   1
  90. DET   2
  92. NOUN  3
  94. PART  1
  97. PUNCT 1
 100. VERB  1


In [32]:
#Create a frequency list of different fine-grained tags
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted(TAG_counts.items()):
  print(f'{k:{20}}. {doc.vocab[k].text:{5}} {v}')

                  74. POS   1
 1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


In [33]:
#Create a frequency list of different dependencies
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
  print(f'{k:{20}}. {doc.vocab[k].text:{5}} {v}')

                 402. amod  3
                 415. det   2
                 429. nsubj 1
                 439. pobj  1
                 440. poss  1
                 443. prep  1
                 445. punct 1
 8110129090154140942. case  1
 8206900633647566924. ROOT  1


Using displacy

In [45]:
import spacy
nlp = spacy.load('en_core_web_sm')

from spacy import displacy
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

displacy.render(doc,style='dep',jupyter=True,options={'distance':120})