# Parts of Speech

In [1]:
import spacy

In [2]:
nlp = spacy.load("en_core_web_sm")

## POS and Tags

In [3]:
# define spacy doc object
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [4]:
doc.text

"The quick brown fox jumped over the lazy dog's back."

In [7]:
# part of speech
print(doc[4].pos_)
# Fine grained tag
print(doc[4].tag_)

VERB
VBD


In [13]:
print("{:10} {:5} {:5} {:12}".format("<Word>", "<POS>", "<Tag>", "<Explanation>"))
for t in doc:
    print("{:10} {:5} {:5} {:12}".format(t.text, t.pos_, t.tag_, spacy.explain(t.tag_)))

<Word>     <POS> <Tag> <Explanation>
The        DET   DT    determiner  
quick      ADJ   JJ    adjective   
brown      ADJ   JJ    adjective   
fox        NOUN  NN    noun, singular or mass
jumped     VERB  VBD   verb, past tense
over       ADP   IN    conjunction, subordinating or preposition
the        DET   DT    determiner  
lazy       ADJ   JJ    adjective   
dog        NOUN  NN    noun, singular or mass
's         PART  POS   possessive ending
back       NOUN  NN    noun, singular or mass
.          PUNCT .     punctuation mark, sentence closer


In [15]:
# Spacy will be able to distinguish between present and past
doc = nlp(u"I read books on NLP")
doc = nlp(u"I read a book on NLP")

## POS Count

In [16]:
# define spacy doc object
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [18]:
# Token dict
pos_counts = doc.count_by(spacy.attrs.POS)

In [21]:
for k, v in sorted(pos_counts.items()):
    print("{:7} {}".format(doc.vocab[k].text, v))

ADJ     3
ADP     1
DET     2
NOUN    3
PART    1
PUNCT   1
VERB    1


# TAG Count

In [22]:
tag_count = doc.count_by(spacy.attrs.TAG)

In [23]:
for k, v in sorted(tag_count.items()):
    print("{} {}".format(doc.vocab[k].text, v))

POS 1
IN 1
JJ 3
. 1
DT 2
NN 3
VBD 1


## Display the Tags

In [24]:
from spacy import displacy

In [25]:
option = {"distance": 110, "compact": True, "color": "yellow", "bg": "#09a3d5", "font": "Times"}

In [26]:
displacy.render(doc, style="dep", jupyter=True, options=option)

In [27]:
doc_2 = nlp(u"This is a sentence. This is another sentence, possibly longer than the previous one")

In [28]:
spans = list(doc_2.sents)

In [None]:
displacy.serve(spans, style="dep", options=option)