# Part Of Speech POS Tagging

In [6]:
text = '''Inflation rose again in April, continuing a climb that has pushed consumers to the brink and is threatening the economic expansion, the Bureau of Labor Statistics reported Wednesday.\n\nThe consumer price index, a broad-based measure of prices for goods and services, increased 8.3% from a year ago, higher than the Dow Jones estimate for an 8.1% gain. That represented a slight ease from Marchâ€™s peak but was still close to the highest level since the summer of 1982.\n\nRemoving volatile food and ene'''

## Extract NOUN and NUM tokens

In [7]:
# import spacy library
import spacy
nlp = spacy.load('en_core_web_sm')

In [8]:
# create the object
doc = nlp(text)

noun_tokens = []
num_tokens = []

for token in doc:
  if token.pos_ == 'NOUN':
    noun_tokens.append(token)
  elif token.pos_ == 'NUM':
    num_tokens.append(token)

In [10]:
# noun tokens
noun_tokens

[Inflation,
 climb,
 consumers,
 brink,
 expansion,
 consumer,
 price,
 index,
 measure,
 prices,
 goods,
 services,
 %,
 year,
 estimate,
 %,
 gain,
 ease,
 Marchâ€,
 ™,
 peak,
 level,
 summer,
 food,
 ene]

In [11]:
# numerical tokens
num_tokens

[8.3, 8.1, 1982]

## Print a count of all POS tags

In [12]:
count = doc.count_by(spacy.attrs.POS)
count

{92: 25,
 100: 9,
 86: 4,
 85: 11,
 96: 7,
 97: 9,
 90: 12,
 95: 2,
 87: 3,
 89: 4,
 84: 6,
 103: 2,
 93: 3,
 94: 1,
 98: 1}

In [15]:
for key, value in count.items():
  print(doc.vocab[key].text, ' | ', value)

NOUN  |  25
VERB  |  9
ADV  |  4
ADP  |  11
PROPN  |  7
PUNCT  |  9
DET  |  12
PRON  |  2
AUX  |  3
CCONJ  |  4
ADJ  |  6
SPACE  |  2
NUM  |  3
PART  |  1
SCONJ  |  1
