<a href="https://colab.research.google.com/github/AhmedEssamK/UdemyNLP/blob/POS-and-NER/POS_Tagging.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [4]:
print(doc.text)

The quick brown fox jumped over the lazy dog's back.


In [6]:
print(doc[4].tag_)

VBD


In [7]:
print(doc[4].pos_)

VERB


In [8]:
for token in doc:
  print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

The        DET        DT         determiner
quick      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
brown      ADJ        JJ         adjective (English), other noun-modifier (Chinese)
fox        NOUN       NN         noun, singular or mass
jumped     VERB       VBD        verb, past tense
over       ADP        IN         conjunction, subordinating or preposition
the        DET        DT         determiner
lazy       ADJ        JJ         adjective (English), other noun-modifier (Chinese)
dog        NOUN       NN         noun, singular or mass
's         PART       POS        possessive ending
back       NOUN       NN         noun, singular or mass
.          PUNCT      .          punctuation mark, sentence closer


In [9]:
doc = nlp(u"I read books on NLP.")

In [10]:
word = doc[1]

In [11]:
word.text

'read'

In [12]:
token = word
for token in doc:
  print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

I          PRON       PRP        pronoun, personal
read       VERB       VBP        verb, non-3rd person singular present
books      NOUN       NNS        noun, plural
on         ADP        IN         conjunction, subordinating or preposition
NLP        PROPN      NNP        noun, proper singular
.          PUNCT      .          punctuation mark, sentence closer


In [13]:
doc = nlp(u"I read a book on NLP.")

In [14]:
word = doc[1]

token = word
for token in doc:
  print(f"{token.text:{10}} {token.pos_:{10}} {token.tag_:{10}} {spacy.explain(token.tag_)}")

I          PRON       PRP        pronoun, personal
read       VERB       VBD        verb, past tense
a          DET        DT         determiner
book       NOUN       NN         noun, singular or mass
on         ADP        IN         conjunction, subordinating or preposition
NLP        PROPN      NNP        noun, proper singular
.          PUNCT      .          punctuation mark, sentence closer


In [15]:
doc = nlp(u"The quick brown fox jumped over the lazy dog's back.")

In [16]:
POS_counts = doc.count_by(spacy.attrs.POS)

In [17]:
POS_counts

{90: 2, 84: 3, 92: 3, 100: 1, 85: 1, 94: 1, 97: 1}

In [19]:
doc.vocab[90].text

'DET'

In [20]:
doc[2]

brown

In [21]:
for k,v in sorted(POS_counts.items()):
  print(f"{k}. {doc.vocab[k].text:{5}} {v}")

84. ADJ   3
85. ADP   1
90. DET   2
92. NOUN  3
94. PART  1
97. PUNCT 1
100. VERB  1


In [22]:
TAG_counts = doc.count_by(spacy.attrs.TAG)

for k,v in sorted(TAG_counts.items()):
  print(f"{k}. {doc.vocab[k].text:{5}} {v}")

74. POS   1
1292078113972184607. IN    1
10554686591937588953. JJ    3
12646065887601541794. .     1
15267657372422890137. DT    2
15308085513773655218. NN    3
17109001835818727656. VBD   1


In [23]:
len(doc.vocab)

791

In [24]:
DEP_counts = doc.count_by(spacy.attrs.DEP)

for k,v in sorted(DEP_counts.items()):
  print(f"{k}. {doc.vocab[k].text:{5}} {v}")

400. advmod 1
402. amod  3
415. det   2
429. nsubj 1
439. pobj  1
443. prep  1
445. punct 1
8110129090154140942. case  1
8206900633647566924. ROOT  1
