In [1]:
import nltk

from nltk import word_tokenize
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /home/jovyan/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/jovyan/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

### CFG

In [2]:
from nltk import CFG

grammer = CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | N PP
VP -> V NP |V VP PP
Det -> 'a' | 'the'
N -> 'dog' | 'cat'
V -> 'chased' | 'sat'
P -> 'on' | 'in'
""")


In [3]:
grammer

<Grammar with 14 productions>

In [4]:
sentence = 'the dog chased a cat'

sentence = nltk.pos_tag(word_tokenize(sentence))
sentence

[('the', 'DT'), ('dog', 'NN'), ('chased', 'VBD'), ('a', 'DT'), ('cat', 'NN')]

### Regex Parser

In [5]:
grammar1 = "NP:{<DT>?<JJ>*<NN>}" 
rp = nltk.RegexpParser(grammar1)
rp.parse(sentence)
print (list(rp.parse(sentence)))
print("\n-----------------------------------------------\n")

for tree in rp.parse(sentence):
     print(tree)


#Uncomment this line if executing locally
#rp.parse(sentence).draw()

[Tree('NP', [('the', 'DT'), ('dog', 'NN')]), ('chased', 'VBD'), Tree('NP', [('a', 'DT'), ('cat', 'NN')])]

-----------------------------------------------

(NP the/DT dog/NN)
('chased', 'VBD')
(NP a/DT cat/NN)


### Recursive Decent Parser

In [6]:
from nltk.parse import RecursiveDescentParser

rdp = RecursiveDescentParser(grammer)

sentence = 'the dog chased a cat'.split()

for t in rdp.parse(sentence):
  print(t)


(S (NP (Det the) (N dog)) (VP (V chased) (NP (Det a) (N cat))))


### Chart parser


In [7]:
chart_parser = nltk.ChartParser(grammer)

for t in chart_parser.parse(sentence):
  print(t)

(S (NP (Det the) (N dog)) (VP (V chased) (NP (Det a) (N cat))))


### Probabilistic CFG using inside chart parser

In [8]:
from nltk.grammar import toy_pcfg1
from nltk.parse import pchart
d1 = [(('John saw the man with the telescope'),toy_pcfg1)]
sent, grammar = d1[0]
sent=sent.split()
print(sent)

parser = pchart.InsideChartParser(grammar)
for tree in parser.parse(sent):
    print(tree)

['John', 'saw', 'the', 'man', 'with', 'the', 'telescope']
(S
  (NP John)
  (VP
    (V saw)
    (NP
      (NP (Det the) (N man))
      (PP (P with) (NP (Det the) (N telescope)))))) (p=0.00027755)
(S
  (NP John)
  (VP
    (VP (V saw) (NP (Det the) (N man)))
    (PP (P with) (NP (Det the) (N telescope))))) (p=0.00011102)


### Probabilistic CFG using viterbi algorithm

In [9]:
from nltk.grammar import toy_pcfg1
from nltk.parse import ViterbiParser
d1 = [(('John saw the man with the telescope'),toy_pcfg1)]
sent, grammar = d1[0]
sent=sent.split()
print(sent)

viterbi_parser = ViterbiParser(grammar)

for tree in parser.parse(sent):
    print(tree)

['John', 'saw', 'the', 'man', 'with', 'the', 'telescope']
(S
  (NP John)
  (VP
    (V saw)
    (NP
      (NP (Det the) (N man))
      (PP (P with) (NP (Det the) (N telescope)))))) (p=0.00027755)
(S
  (NP John)
  (VP
    (VP (V saw) (NP (Det the) (N man)))
    (PP (P with) (NP (Det the) (N telescope))))) (p=0.00011102)


### Identifying if the sentence is ambiguous


In [10]:
cnt = 0;

for tree in rdp.parse(sentence):
  print(str(tree) + "\n")
  print("\n")
  cnt+=1
pass


if cnt > 1:
  print("Sentence Ambiguous")
else:
  print("Sentence not Ambiguous")

(S (NP (Det the) (N dog)) (VP (V chased) (NP (Det a) (N cat))))



Sentence not Ambiguous
