# NLP Lecture -07: Parts of Speech Tagging


In [1]:
# Importing library
import numpy as np  
import pandas as pd 
import os

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [2]:
# !pip install spacy
import spacy

In [3]:
# Loading english library
nlp = spacy.load('en_core_web_sm')

In [4]:
# Applying POS tagging
doc = nlp("I am about to complete full watch list of NLP")
doc.text

'I am about to complete full watch list of NLP'

In [5]:
# NOTE: Here we can access each word by its index position
doc[4], doc[7], doc[9]

(complete, list, NLP)

In [6]:
# Coarse grain Parts of Speech
# Checking POS and tag of words
doc[0].pos, doc[1].pos, doc[2].pos, doc[3].pos, doc[4].pos, doc[5].pos, doc[6].pos, doc[7].pos, doc[8].pos, doc[9].pos

(95, 87, 84, 94, 100, 84, 92, 92, 85, 96)

In [7]:
doc[0].pos_, doc[1].pos_, doc[2].pos_, doc[3].pos_, doc[4].pos_, doc[5].pos_, doc[6].pos_, doc[7].pos_, doc[8].pos_, doc[9].pos_

('PRON', 'AUX', 'ADJ', 'PART', 'VERB', 'ADJ', 'NOUN', 'NOUN', 'ADP', 'PROPN')

In [8]:
doc[0].tag, doc[1].tag, doc[2].tag, doc[3].tag, doc[4].tag, doc[5].tag, doc[6].tag, doc[7].tag, doc[8].tag, doc[9].tag

(13656873538139661788,
 9188597074677201817,
 10554686591937588953,
 5595707737748328492,
 14200088355797579614,
 10554686591937588953,
 15308085513773655218,
 15308085513773655218,
 1292078113972184607,
 15794550382381185553)

In [12]:
doc[0].tag_, doc[1].tag_, doc[2].tag_, doc[3].tag_, doc[4].tag_, doc[5].tag_, doc[6].tag_, doc[7].tag_, doc[8].tag_, doc[9].tag_

('PRP', 'VBP', 'JJ', 'TO', 'VB', 'JJ', 'NN', 'NN', 'IN', 'NNP')

In [13]:
# Fine grain Parts of Speech
spacy.explain(doc[5].tag_)

'adjective (English), other noun-modifier (Chinese)'

In [14]:
# Using loop
for i in doc:
    print(i, '-->', i.pos_, '-->', i.tag_, '-->', spacy.explain(i.tag_))

I --> PRON --> PRP --> pronoun, personal
am --> AUX --> VBP --> verb, non-3rd person singular present
about --> ADJ --> JJ --> adjective (English), other noun-modifier (Chinese)
to --> PART --> TO --> infinitival "to"
complete --> VERB --> VB --> verb, base form
full --> ADJ --> JJ --> adjective (English), other noun-modifier (Chinese)
watch --> NOUN --> NN --> noun, singular or mass
list --> NOUN --> NN --> noun, singular or mass
of --> ADP --> IN --> conjunction, subordinating or preposition
NLP --> PROPN --> NNP --> noun, proper singular


In [15]:
# Can spacy able to differentiate word POS depending on context?
doc1 = nlp('I left the room')
for i in doc1:
    print(i, '-->', i.pos_, '-->', i.tag_, '-->', spacy.explain(i.tag_))
print()

doc2 = nlp('To the left of room')
for i in doc2:
    print(i, '-->', i.pos_, '-->', i.tag_, '-->', spacy.explain(i.tag_))

I --> PRON --> PRP --> pronoun, personal
left --> VERB --> VBD --> verb, past tense
the --> DET --> DT --> determiner
room --> NOUN --> NN --> noun, singular or mass

To --> ADP --> IN --> conjunction, subordinating or preposition
the --> DET --> DT --> determiner
left --> NOUN --> NN --> noun, singular or mass
of --> ADP --> IN --> conjunction, subordinating or preposition
room --> NOUN --> NN --> noun, singular or mass


In [16]:
# Can spacy able to differentiate word POS depending on context?
doc3 = nlp('I read books on politics')
for i in doc3:
    print(i, '-->', i.pos_, '-->', i.tag_, '-->', spacy.explain(i.tag_))
print()

doc4 = nlp('I have read a book on politics')
for i in doc4:
    print(i, '-->', i.pos_, '-->', i.tag_, '-->', spacy.explain(i.tag_))

I --> PRON --> PRP --> pronoun, personal
read --> VERB --> VBP --> verb, non-3rd person singular present
books --> NOUN --> NNS --> noun, plural
on --> ADP --> IN --> conjunction, subordinating or preposition
politics --> NOUN --> NNS --> noun, plural

I --> PRON --> PRP --> pronoun, personal
have --> AUX --> VBP --> verb, non-3rd person singular present
read --> VERB --> VBN --> verb, past participle
a --> DET --> DT --> determiner
book --> NOUN --> NN --> noun, singular or mass
on --> ADP --> IN --> conjunction, subordinating or preposition
politics --> NOUN --> NNS --> noun, plural


In [17]:
doc5 = nlp('The quick brown fox jumped over the lazy dog')
for i in doc5:
    print(i, '-->', i.pos_, '-->', i.tag_, '-->', spacy.explain(i.tag_))

The --> DET --> DT --> determiner
quick --> ADJ --> JJ --> adjective (English), other noun-modifier (Chinese)
brown --> ADJ --> JJ --> adjective (English), other noun-modifier (Chinese)
fox --> NOUN --> NN --> noun, singular or mass
jumped --> VERB --> VBD --> verb, past tense
over --> ADP --> IN --> conjunction, subordinating or preposition
the --> DET --> DT --> determiner
lazy --> ADJ --> JJ --> adjective (English), other noun-modifier (Chinese)
dog --> NOUN --> NN --> noun, singular or mass


In [18]:
# Visual representation of POS
from spacy import displacy
option = {'distance':100,
         'compact':True,
         'color':'#fff',
         'bg':'#00a65a'}
displacy.render(doc5, style='dep', jupyter=True, options=option)