# 1) POS Tagging Basics

In [1]:
import spacy

In [2]:
# load the English language library
nlp = spacy.load(name='en_core_web_sm')

In [3]:
# create a doc object
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

In [4]:
print(doc.text)

Apple is looking at buying U.K. startup for $1 billion


In [5]:
# we can grab tokens by their index positions
print(doc[2])

looking


In [6]:
print(doc[2].pos_)

VERB


In [7]:
print(doc[2].tag_)

VBG


In [8]:
for token in doc:
    print(f'{token.text:{10}} {token.lemma_:{8}} {token.pos_:{8}} {token.tag_:{6}} {spacy.explain(token.tag_)}')

Apple      Apple    PROPN    NNP    noun, proper singular
is         be       AUX      VBZ    verb, 3rd person singular present
looking    look     VERB     VBG    verb, gerund or present participle
at         at       ADP      IN     conjunction, subordinating or preposition
buying     buy      VERB     VBG    verb, gerund or present participle
U.K.       U.K.     PROPN    NNP    noun, proper singular
startup    startup  NOUN     NN     noun, singular or mass
for        for      ADP      IN     conjunction, subordinating or preposition
$          $        SYM      $      symbol, currency
1          1        NUM      CD     cardinal number
billion    billion  NUM      CD     cardinal number



# 2) Counting POS Tags

In [9]:
POS_counts = doc.count_by(spacy.attrs.POS) # attrs for attributes

In [10]:
print(POS_counts)


{96: 2, 87: 1, 100: 2, 85: 2, 92: 1, 99: 1, 93: 2}


In [13]:
for key, val in doc.count_by(spacy.attrs.POS).items():
  print(key, doc.vocab[key].text, val)

96 PROPN 2
87 AUX 1
100 VERB 2
85 ADP 2
92 NOUN 1
99 SYM 1
93 NUM 2


# 3) Visualizing the Parts of Speech

In [14]:
import spacy
# load the English language library
nlp = spacy.load(name='en_core_web_sm')
# Import the displaCy library
from spacy import displacy

In [15]:
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")

In [16]:
displacy.render(doc, style='dep', jupyter=True, options={'distance': 80})