In [1]:
import spacy

In [4]:
nlp = spacy.load("en_core_web_sm")

In [5]:
doc1 = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [6]:
print(doc1.text)

The quick brown fox jumped over the lazy dog's back.


In [7]:
print(doc1[4].text)

jumped


In [9]:
print(doc1[4].pos_)

VERB


In [10]:
print(doc1[4].tag_)

VBD


In [12]:
for token in doc1:
    print(f"{token.text} \t {token.pos_} \t {token.tag_} \t {spacy.explain(token.tag_)}")

The 	 DET 	 DT 	 determiner
quick 	 ADJ 	 JJ 	 adjective (English), other noun-modifier (Chinese)
brown 	 ADJ 	 JJ 	 adjective (English), other noun-modifier (Chinese)
fox 	 NOUN 	 NN 	 noun, singular or mass
jumped 	 VERB 	 VBD 	 verb, past tense
over 	 ADP 	 IN 	 conjunction, subordinating or preposition
the 	 DET 	 DT 	 determiner
lazy 	 ADJ 	 JJ 	 adjective (English), other noun-modifier (Chinese)
dog 	 NOUN 	 NN 	 noun, singular or mass
's 	 PART 	 POS 	 possessive ending
back 	 NOUN 	 NN 	 noun, singular or mass
. 	 PUNCT 	 . 	 punctuation mark, sentence closer


In [15]:
doc2 = nlp(u"I read books on NLP. And then, I read a book on Deep Learning")

In [16]:
for token in doc2:
    print(f"{token.text} \t {token.pos_} \t {spacy.explain(token.tag_)}")

I 	 PRON 	 pronoun, personal
read 	 VERB 	 verb, non-3rd person singular present
books 	 NOUN 	 noun, plural
on 	 ADP 	 conjunction, subordinating or preposition
NLP 	 PROPN 	 noun, proper singular
. 	 PUNCT 	 punctuation mark, sentence closer
And 	 CCONJ 	 conjunction, coordinating
then 	 ADV 	 adverb
, 	 PUNCT 	 punctuation mark, comma
I 	 PRON 	 pronoun, personal
read 	 VERB 	 verb, past tense
a 	 DET 	 determiner
book 	 NOUN 	 noun, singular or mass
on 	 ADP 	 conjunction, subordinating or preposition
Deep 	 PROPN 	 noun, proper singular
Learning 	 PROPN 	 noun, proper singular


Here, the word "read" was used in 2 different tenses and SpaCy successfully picked it up.

## Count of POS

In [17]:
pos_counts = doc1.count_by(spacy.attrs.POS)

In [18]:
pos_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [19]:
doc1.vocab[84].text

'ADJ'

In [21]:
for k,v in sorted(pos_counts.items()):
    print(f"{k}. {doc1.vocab[k].text:{5}} {v}")

84. ADJ   3
85. ADP   1
90. DET   2
92. NOUN  3
94. PART  1
97. PUNCT 1
100. VERB  1


In [22]:
tag_counts = doc1.count_by(spacy.attrs.TAG)

for k,v in sorted(tag_counts.items()):
    print(f"{k}. {doc1.vocab[k].text:{5}} {v}")

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


# POS Visualization

In [2]:
import spacy

In [1]:
from spacy import displacy

In [7]:
displacy.render(doc1, style = 'dep', jupyter=True )

In [8]:
options = {'distance': 110, 'compact': 'True', 'color': 'yellow', 'bg': '#09a3d5', 'font': 'Times'}

In [9]:
displacy.render(doc1, style = 'dep', jupyter=True,options=options)

In [10]:
doc3 = nlp(u"This is a sentence. This is another sentence, possibly longer than the first one.")

In [12]:
spans = list(doc3.sents)

In [13]:
displacy.serve(spans, style='dep', options = options)




Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...



127.0.0.1 - - [15/Jan/2024 09:10:10] "GET / HTTP/1.1" 200 11707
127.0.0.1 - - [15/Jan/2024 09:10:10] "GET /favicon.ico HTTP/1.1" 200 11707
