# POS tagging

1) POS Tagging is the process of labeling each word in a sentence with its part of speech, such as noun, verb, adjective, etc.

In [2]:
import spacy
nlp = spacy.load('en_core_web_sm')

In [8]:
doc = nlp('Elon musk bought Twitter for 100$ Billion')
for token in doc:
    print(token,'|',token.pos,'|',token.pos_,'|',spacy.explain(token.pos_))

Elon | 96 | PROPN | proper noun
musk | 92 | NOUN | noun
bought | 100 | VERB | verb
Twitter | 96 | PROPN | proper noun
for | 85 | ADP | adposition
100 | 93 | NUM | numeral
$ | 99 | SYM | symbol
Billion | 93 | NUM | numeral


In [15]:
#for more details like past tens use tags
doc = nlp('Elon musk bought Twitter for 100$ Billion')
for token in doc:
    print(token,'|',token.pos_,'|',spacy.explain(token.pos_),'|', token.tag_,'|',spacy.explain(token.tag_))

Elon | PROPN | proper noun | NNP | noun, proper singular
musk | NOUN | noun | NN | noun, singular or mass
bought | VERB | verb | VBD | verb, past tense
Twitter | PROPN | proper noun | NNP | noun, proper singular
for | ADP | adposition | IN | conjunction, subordinating or preposition
100 | NUM | numeral | CD | cardinal number
$ | SYM | symbol | $ | symbol, currency
Billion | NUM | numeral | CD | cardinal number


In [19]:
# spacy is smart enough let's see the imp thing

In [20]:
doc = nlp ('He quits the job')
print(doc[1],'|',doc[1].tag_,spacy.explain(doc[1].tag_))

quits | VBZ verb, 3rd person singular present


In [21]:
doc = nlp ('He quit the job')
print(doc[1],'|',doc[1].tag_,spacy.explain(doc[1].tag_))

quit | VBD verb, past tense


In [26]:
earning_text = '''Microsoft Corp. today announced the following results for the quarter ended June 30, 2024, as compared to the corresponding period of last fiscal year:

·        Revenue was $64.7 billion and increased 15% (up 16% in constant currency)

·        Operating income was $27.9 billion and increased 15% (up 16% in constant currency)

·        Net income was $22.0 billion and increased 10% (up 11% in constant currency)

·        Diluted earnings per share was $2.95 and increased 10% (up 11% in constant currency)'''

In [25]:
doc = nlp(earning_text)
for tok in doc:
    print(tok,'|',tok.pos_,'|',spacy.explain(tok.pos_))

  | SPACE | space
Microsoft | PROPN | proper noun
Corp. | PROPN | proper noun
today | NOUN | noun
announced | VERB | verb
the | DET | determiner
following | VERB | verb
results | NOUN | noun
for | ADP | adposition
the | DET | determiner
quarter | NOUN | noun
ended | VERB | verb
June | PROPN | proper noun
30 | NUM | numeral
, | PUNCT | punctuation
2024 | NUM | numeral
, | PUNCT | punctuation
as | SCONJ | subordinating conjunction
compared | VERB | verb
to | ADP | adposition
the | DET | determiner
corresponding | ADJ | adjective
period | NOUN | noun
of | ADP | adposition
last | ADJ | adjective
fiscal | ADJ | adjective
year | NOUN | noun
: | PUNCT | punctuation


 | SPACE | space
· | PUNCT | punctuation
        | SPACE | space
Revenue | NOUN | noun
was | AUX | auxiliary
$ | SYM | symbol
64.7 | NUM | numeral
billion | NUM | numeral
and | CCONJ | coordinating conjunction
increased | VERB | verb
15 | NUM | numeral
% | NOUN | noun
( | PUNCT | punctuation
up | ADV | adverb
16 | NUM | numeral
%

In [31]:
hey = '''Inflation rose again in April, continuing a climb that has pushed consumers to the brink and is threatening the economic expansion, the Bureau of Labor Statistics reported Wednesday.\n\nThe consumer price index, a broad-based measure of prices for goods and services, increased 8.3% from a year ago, higher than the Dow Jones estimate for an 8.1% gain. That represented a slight ease from Marchâ€™s peak but was still close to the highest level since the summer of 1982.\n\nRemoving volatile food and ene'
'''
doc = nlp(hey)
num_token = []
noun_token = []
for tok in doc:
    if tok.pos_ == 'NOUN':
        noun_token.append(tok)
    elif tok.pos_ == 'NUM':
        num_token.append(tok)

In [32]:
num_token

[8.3, 8.1, 1982]

In [34]:
noun_token[:10]

[Inflation,
 climb,
 consumers,
 brink,
 expansion,
 consumer,
 price,
 index,
 measure,
 prices]