In [1]:
import spacy
nlp = spacy.load("en_core_web_sm")

In [5]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [12]:
doc = nlp("Wow make eat The cat chased the playful dog around the old oak tree. Elon ate Biryani later.")
for token in doc:
    print(token, "|", token.pos_, "|", spacy.explain(token.pos_), "|", token.tag_, "|", spacy.explain(token.tag_))

Wow | INTJ | interjection | UH | interjection
make | VERB | verb | VB | verb, base form
eat | VERB | verb | VB | verb, base form
The | DET | determiner | DT | determiner
cat | NOUN | noun | NN | noun, singular or mass
chased | VERB | verb | VBD | verb, past tense
the | DET | determiner | DT | determiner
playful | ADJ | adjective | JJ | adjective (English), other noun-modifier (Chinese)
dog | NOUN | noun | NN | noun, singular or mass
around | ADP | adposition | IN | conjunction, subordinating or preposition
the | DET | determiner | DT | determiner
old | ADJ | adjective | JJ | adjective (English), other noun-modifier (Chinese)
oak | NOUN | noun | NN | noun, singular or mass
tree | NOUN | noun | NN | noun, singular or mass
. | PUNCT | punctuation | . | punctuation mark, sentence closer
Elon | PROPN | proper noun | NNP | noun, proper singular
ate | VERB | verb | VBD | verb, past tense
Biryani | PROPN | proper noun | NNP | noun, proper singular
later | ADV | adverb | RB | adverb
. | PUNCT |

In [15]:
text = """
Part of speech or POS tagging is used to tag parts of speech while building an NLP application. In this video, we will cover the basics of POS first and then write code in spacy
"""
filtered_tokens = []
doc = nlp(text)
for token in doc:
    if token.pos_ not in ["SPACE","X","PUNCT"]:
        print(token, "|", token.pos_, "|", spacy.explain(token.pos_))
        filtered_tokens.append(token)

Part | NOUN | noun
of | ADP | adposition
speech | NOUN | noun
or | CCONJ | coordinating conjunction
POS | PROPN | proper noun
tagging | NOUN | noun
is | AUX | auxiliary
used | VERB | verb
to | PART | particle
tag | VERB | verb
parts | NOUN | noun
of | ADP | adposition
speech | NOUN | noun
while | SCONJ | subordinating conjunction
building | VERB | verb
an | DET | determiner
NLP | PROPN | proper noun
application | NOUN | noun
In | ADP | adposition
this | DET | determiner
video | NOUN | noun
we | PRON | pronoun
will | AUX | auxiliary
cover | VERB | verb
the | DET | determiner
basics | NOUN | noun
of | ADP | adposition
POS | PROPN | proper noun
first | ADV | adverb
and | CCONJ | coordinating conjunction
then | ADV | adverb
write | VERB | verb
code | NOUN | noun
in | ADP | adposition
spacy | NOUN | noun


In [17]:
filtered_tokens[:10]

[Part, of, speech, or, POS, tagging, is, used, to, tag]

In [18]:
count = doc.count_by(spacy.attrs.POS)
count

{103: 2,
 92: 10,
 85: 5,
 89: 2,
 96: 3,
 87: 2,
 100: 5,
 94: 1,
 98: 1,
 90: 3,
 97: 2,
 95: 1,
 86: 2}

In [20]:
doc.vocab[85].text

'ADP'

In [22]:
for k,v in count.items():
    print(f"Key : {doc.vocab[k].text}, Value: {v}")

Key : SPACE, Value: 2
Key : NOUN, Value: 10
Key : ADP, Value: 5
Key : CCONJ, Value: 2
Key : PROPN, Value: 3
Key : AUX, Value: 2
Key : VERB, Value: 5
Key : PART, Value: 1
Key : SCONJ, Value: 1
Key : DET, Value: 3
Key : PUNCT, Value: 2
Key : PRON, Value: 1
Key : ADV, Value: 2
