In [2]:
import spacy

nlp = spacy.load('en_core_web_sm')

In [8]:
text = "Elon flew to mars yesterday. He is an intelligent person"
doc = nlp(text)

for token in doc:
    print(token.text, '|', token.pos_, '|', spacy.explain(token.pos_))

Elon | PROPN | proper noun
flew | VERB | verb
to | ADP | adposition
mars | NOUN | noun
yesterday | NOUN | noun
. | PUNCT | punctuation
He | PRON | pronoun
is | AUX | auxiliary
an | DET | determiner
intelligent | ADJ | adjective
person | NOUN | noun


In [10]:
doc = nlp("Wow! Dr. Strange made 265 million $ on the very first day")

for token in doc:
    print(token, '|', token.pos_, '|', spacy.explain(token.pos_), '|', token.tag_, '|', spacy.explain(token.tag_))

Wow | INTJ | interjection | UH | interjection
! | PUNCT | punctuation | . | punctuation mark, sentence closer
Dr. | PROPN | proper noun | NNP | noun, proper singular
Strange | PROPN | proper noun | NNP | noun, proper singular
made | VERB | verb | VBD | verb, past tense
265 | NUM | numeral | CD | cardinal number
million | NUM | numeral | CD | cardinal number
$ | NUM | numeral | CD | cardinal number
on | ADP | adposition | IN | conjunction, subordinating or preposition
the | DET | determiner | DT | determiner
very | ADV | adverb | RB | adverb
first | ADJ | adjective | JJ | adjective (English), other noun-modifier (Chinese)
day | NOUN | noun | NN | noun, singular or mass


In [35]:
nouns = []
numbers = []
with open('news_story.txt', 'r') as file:
    content = file.read()

file.close()
doc = nlp(content)
for token in doc:
    if token.pos_ == 'NOUN':
        nouns.append(token)

    elif token.pos_ == 'NUM':
        numbers.append(token)
        
print(nouns[:10], sep=' ')
print(numbers[:10], sep='')

[Inflation, climb, consumers, brink, expansion, consumer, price, index, measure, prices]
[8.3, 8.1, 1982, 6.2, 6, 0.3, 0.2, 0.6, 0.4, 0.1]


In [39]:
count = doc.count_by(spacy.attrs.POS)
count

{92: 96,
 100: 27,
 86: 15,
 85: 39,
 96: 20,
 97: 32,
 90: 34,
 95: 4,
 87: 13,
 89: 10,
 84: 23,
 103: 7,
 93: 19,
 94: 4,
 98: 8,
 101: 1}

In [44]:
for k,v in count.items():
    print(doc.vocab[k].text, '|', v)

NOUN | 96
VERB | 27
ADV | 15
ADP | 39
PROPN | 20
PUNCT | 32
DET | 34
PRON | 4
AUX | 13
CCONJ | 10
ADJ | 23
SPACE | 7
NUM | 19
PART | 4
SCONJ | 8
X | 1
