In [9]:
import spacy

In [10]:
nlp = spacy.load("en_core_web_sm")

In [11]:
doc = nlp("The fence was confused about whether it was supposed to keep things in or keep things out.")

for i in doc:
    print(i,"  |  ",i.pos_,"  |  ",spacy.explain(i.pos_),"  |  ",i.tag_,"  |  ",spacy.explain(i.tag_))

#i.pos_ is used to get the part of speech of the word
#i.tag_ is used to get the detailed part of speech of the word
#spacy.explain() is used to get the explanation of the part of speech

The   |   DET   |   determiner   |   DT   |   determiner
fence   |   NOUN   |   noun   |   NN   |   noun, singular or mass
was   |   AUX   |   auxiliary   |   VBD   |   verb, past tense
confused   |   ADJ   |   adjective   |   JJ   |   adjective (English), other noun-modifier (Chinese)
about   |   ADP   |   adposition   |   IN   |   conjunction, subordinating or preposition
whether   |   SCONJ   |   subordinating conjunction   |   IN   |   conjunction, subordinating or preposition
it   |   PRON   |   pronoun   |   PRP   |   pronoun, personal
was   |   AUX   |   auxiliary   |   VBD   |   verb, past tense
supposed   |   VERB   |   verb   |   VBN   |   verb, past participle
to   |   PART   |   particle   |   TO   |   infinitival "to"
keep   |   VERB   |   verb   |   VB   |   verb, base form
things   |   NOUN   |   noun   |   NNS   |   noun, plural
in   |   ADP   |   adposition   |   RP   |   adverb, particle
or   |   CCONJ   |   coordinating conjunction   |   CC   |   conjunction, coordin

**Remove PUNCT from the tokens**

In [12]:
for i in doc:
    if i.pos_ != "PUNCT":
        print(i,"  |  ",i.pos_)

The   |   DET
fence   |   NOUN
was   |   AUX
confused   |   ADJ
about   |   ADP
whether   |   SCONJ
it   |   PRON
was   |   AUX
supposed   |   VERB
to   |   PART
keep   |   VERB
things   |   NOUN
in   |   ADP
or   |   CCONJ
keep   |   VERB
things   |   NOUN
out   |   ADP


**Count POS in a Sentence**

In [13]:
print(doc.count_by(spacy.attrs.POS))
print(doc.vocab[90].text,"  |  ",spacy.explain(doc.vocab[90].text) )

{90: 1, 92: 3, 87: 2, 84: 1, 85: 3, 98: 1, 95: 1, 100: 3, 94: 1, 89: 1, 97: 1}
DET   |   determiner


In [14]:
count = doc.count_by(spacy.attrs.POS)
for i,j in count.items():
    print(doc.vocab[i].text,"  |  ",spacy.explain(doc.vocab[i].text),"  |  ",j)

DET   |   determiner   |   1
NOUN   |   noun   |   3
AUX   |   auxiliary   |   2
ADJ   |   adjective   |   1
ADP   |   adposition   |   3
SCONJ   |   subordinating conjunction   |   1
PRON   |   pronoun   |   1
VERB   |   verb   |   3
PART   |   particle   |   1
CCONJ   |   coordinating conjunction   |   1
PUNCT   |   punctuation   |   1


*Exercise*


In [20]:
with open("./news_story.txt","r") as f:
    text = f.read()
text

'Inflation rose again in April, continuing a climb that has pushed consumers to the brink and is threatening the economic expansion, the Bureau of Labor Statistics reported Wednesday.\n\nThe consumer price index, a broad-based measure of prices for goods and services, increased 8.3% from a year ago, higher than the Dow Jones estimate for an 8.1% gain. That represented a slight ease from Marchâ€™s peak but was still close to the highest level since the summer of 1982.\n\nRemoving volatile food and energy prices, so-called core CPI still rose 6.2%, against expectations for a 6% gain, clouding hopes that inflation had peaked in March.\n\nThe month-over-month gains also were higher than expectations â€” 0.3% on headline CPI versus the 0.2% estimate and a 0.6% increase for core, against the outlook for a 0.4% gain.\n\nThe price gains also meant that workers continued to lose ground. Real wages adjusted for inflation decreased 0.1% on the month despite a nominal increase of 0.3% in average h

In [22]:
doc = nlp(text)
for i in doc:
    print(i,"  |  ",i.pos_)

Inflation   |   NOUN
rose   |   VERB
again   |   ADV
in   |   ADP
April   |   PROPN
,   |   PUNCT
continuing   |   VERB
a   |   DET
climb   |   NOUN
that   |   PRON
has   |   AUX
pushed   |   VERB
consumers   |   NOUN
to   |   ADP
the   |   DET
brink   |   NOUN
and   |   CCONJ
is   |   AUX
threatening   |   VERB
the   |   DET
economic   |   ADJ
expansion   |   NOUN
,   |   PUNCT
the   |   DET
Bureau   |   PROPN
of   |   ADP
Labor   |   PROPN
Statistics   |   PROPN
reported   |   VERB
Wednesday   |   PROPN
.   |   PUNCT


   |   SPACE
The   |   DET
consumer   |   NOUN
price   |   NOUN
index   |   NOUN
,   |   PUNCT
a   |   DET
broad   |   ADV
-   |   PUNCT
based   |   VERB
measure   |   NOUN
of   |   ADP
prices   |   NOUN
for   |   ADP
goods   |   NOUN
and   |   CCONJ
services   |   NOUN
,   |   PUNCT
increased   |   VERB
8.3   |   NUM
%   |   NOUN
from   |   ADP
a   |   DET
year   |   NOUN
ago   |   ADV
,   |   PUNCT
higher   |   ADJ
than   |   ADP
the   |   DET
Dow   |   PROPN
Jones  

In [23]:
nouns = []
nums = []
for i in doc:
    if i.pos_ == "NOUN":
        nouns.append(i)
    elif i.pos_ == "NUM":
        nums.append(i)

print(nouns)
print(nums)

[Inflation, climb, consumers, brink, expansion, consumer, price, index, measure, prices, goods, services, %, year, estimate, %, gain, ease, Marchâ€, ™, peak, level, summer, food, energy, prices, core, %, expectations, %, gain, hopes, inflation, month, month, gains, expectations, %, headline, %, estimate, %, increase, core, outlook, %, gain, price, gains, workers, ground, wages, inflation, %, month, increase, %, earnings, year, earnings, %, earnings, %, Inflation, threat, recovery, pandemic, economy, stage, year, growth, level, prices, pump, grocery, stores, problem, inflation, areas, housing, auto, sales, host, areas, officials, problem, interest, rate, hikes, year, pledges, inflation, %, goal, ™, data, job, Credits]
[8.3, 8.1, 1982, 6.2, 6, â€, 0.3, 0.2, 0.6, 0.4, 0.1, 0.3, 2.6, 5.5, 2021, 1984, one, two, two, 2]


In [24]:
count = doc.count_by(spacy.attrs.POS)

{92: 98,
 100: 27,
 86: 15,
 85: 39,
 96: 17,
 97: 32,
 90: 34,
 95: 4,
 87: 13,
 89: 10,
 84: 23,
 103: 7,
 93: 20,
 94: 4,
 98: 8,
 101: 1}

In [28]:
for i,j in count.items():
    print(doc.vocab[i].text,"  |  ",spacy.explain(doc.vocab[i].text),"  |  ",j)

DET   |   determiner   |   1
NOUN   |   noun   |   3
AUX   |   auxiliary   |   2
ADJ   |   adjective   |   1
ADP   |   adposition   |   3
SCONJ   |   subordinating conjunction   |   1
PRON   |   pronoun   |   1
VERB   |   verb   |   3
PART   |   particle   |   1
CCONJ   |   coordinating conjunction   |   1
PUNCT   |   punctuation   |   1
