In [3]:
# Probabilistic parser
import nltk
from nltk import PCFG

grammar = PCFG.fromstring('''
NP -> NNS [0.5] | JJ NNS [0.3] | NP CC NP [0.2]
NNS -> "men" [0.1] | "women" [0.2] | "children" [0.3] | NNS CC NNS [0.4]
JJ -> "old" [0.4] | "young" [0.6]
CC -> "and" [0.9] | "or" [0.1]
''')

print(grammar)

viterbi_parser = nltk.ViterbiParser(grammar)

token = "old men and women".split()

obj = viterbi_parser.parse(token)

print("Output: ")
for x in obj:
    print(x)


Grammar with 11 productions (start state = NP)
    NP -> NNS [0.5]
    NP -> JJ NNS [0.3]
    NP -> NP CC NP [0.2]
    NNS -> 'men' [0.1]
    NNS -> 'women' [0.2]
    NNS -> 'children' [0.3]
    NNS -> NNS CC NNS [0.4]
    JJ -> 'old' [0.4]
    JJ -> 'young' [0.6]
    CC -> 'and' [0.9]
    CC -> 'or' [0.1]
Output: 
(NP (JJ old) (NNS (NNS men) (CC and) (NNS women))) (p=0.000864)


In [1]:
# Speech tagging using spacy
import spacy
  
# Load English tokenizer, tagger, 
# parser, NER and word vectors
nlp = spacy.load("en_core_web_sm")
  
# Process whole documents
text = ("""My name is Ramesh Bhutka. 
I am an aspiring individual looking to work in the industry as a Data Scientist.""")
  
doc = nlp(text)
  
# Token and Tag
for token in doc:
  print(token, token.pos_)
  
# You want list of Verb tokens
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"])

My PRON
name NOUN
is AUX
Ramesh PROPN
Bhutka PROPN
. PUNCT

 SPACE
I PRON
am AUX
an DET
aspiring VERB
individual ADJ
looking VERB
to PART
work VERB
in ADP
the DET
industry NOUN
as ADP
a DET
Data PROPN
Scientist PROPN
. PUNCT
Verbs: ['aspiring', 'looking', 'work']


In [3]:
import spacy
sp = spacy.load('en_core_web_sm')
sen = sp(u"I like to play football. I hated it in my childhood though")
for word in sen:
    print(f'{word.text:{12}} {word.pos_:{10}} {word.tag_:{8}} {spacy.explain(word.tag_)}')


I            PRON       PRP      pronoun, personal
like         VERB       VBP      verb, non-3rd person singular present
to           PART       TO       infinitival "to"
play         VERB       VB       verb, base form
football     NOUN       NN       noun, singular or mass
.            PUNCT      .        punctuation mark, sentence closer
I            PRON       PRP      pronoun, personal
hated        VERB       VBD      verb, past tense
it           PRON       PRP      pronoun, personal
in           ADP        IN       conjunction, subordinating or preposition
my           PRON       PRP$     pronoun, possessive
childhood    NOUN       NN       noun, singular or mass
though       ADV        RB       adverb


In [4]:
#Visualizing Parts of Speech Tags
from spacy import displacy

sen = sp(u"I like to play football. I hated it in my childhood though")
displacy.render(sen, style='dep', jupyter=True, options={'distance': 85})

In [10]:
# Speech tagging using nltk.
import nltk
from nltk.tokenize import word_tokenize
text = word_tokenize("My name is Ramesh Bhutka. I am an aspiring individual looking to work in the industry as a Data Scientist.")

nltk.pos_tag(text)

[('My', 'PRP$'),
 ('name', 'NN'),
 ('is', 'VBZ'),
 ('Ramesh', 'NNP'),
 ('Bhutka', 'NNP'),
 ('.', '.'),
 ('I', 'PRP'),
 ('am', 'VBP'),
 ('an', 'DT'),
 ('aspiring', 'VBG'),
 ('individual', 'JJ'),
 ('looking', 'VBG'),
 ('to', 'TO'),
 ('work', 'VB'),
 ('in', 'IN'),
 ('the', 'DT'),
 ('industry', 'NN'),
 ('as', 'IN'),
 ('a', 'DT'),
 ('Data', 'NNP'),
 ('Scientist', 'NNP'),
 ('.', '.')]

In [11]:
# Natural Language Toolkit: code_give

def give(t):
    return t.label() == 'VP' and len(t) > 2 and t[1].label() == 'NP'\
           and (t[2].label() == 'PP-DTV' or t[2].label() == 'NP')\
           and ('give' in t[0].leaves() or 'gave' in t[0].leaves())
def sent(t):
    return ' '.join(token for token in t.leaves() if token[0] not in '*-0')
def print_node(t, width):
        output = "%s %s: %s / %s: %s" %\
            (sent(t[0]), t[1].label(), sent(t[1]), t[2].label(), sent(t[2]))
        if len(output) > width:
            output = output[:width] + "..."
        print(output)



In [12]:
for tree in nltk.corpus.treebank.parsed_sents():
     for t in tree.subtrees(give):
         print_node(t, 72)

gave NP: the chefs / NP: a standing ovation
give NP: advertisers / NP: discounts for maintaining or increasing ad sp...
give NP: it / PP-DTV: to the politicians
gave NP: them / NP: similar help
give NP: them / NP: 
give NP: only French history questions / PP-DTV: to students in a Europe...
give NP: federal judges / NP: a raise
give NP: consumers / NP: the straight scoop on the U.S. waste crisis
gave NP: Mitsui / NP: access to a high-tech medical product
give NP: Mitsubishi / NP: a window on the U.S. glass industry
give NP: much thought / PP-DTV: to the rates she was receiving , nor to ...
give NP: your Foster Savings Institution / NP: the gift of hope and free...
give NP: market operators / NP: the authority to suspend trading in futu...
gave NP: quick approval / PP-DTV: to $ 3.18 billion in supplemental appr...
give NP: the Transportation Department / NP: up to 50 days to review any...
give NP: the president / NP: such power
give NP: me / NP: the heebie-jeebies
give NP: holders / NP: 