## Analyzing Sentence Structure

## general parsing

In [1]:
import nltk

In [2]:
# nltk.parse_cfg
groucho_grammer  = nltk.CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I'
VP -> V NP | VP PP
Det -> 'an' | 'my'
N -> 'elephant' | 'pajamas'
V -> 'shot'
P -> 'in'
                                  """)

In [3]:
sent = ['I','shot','an','elephant','in','my','pajamas']
parser = nltk.ChartParser(groucho_grammer)
trees = parser.parse(sent)
for tree in trees :
    print (tree)

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


In [4]:
nltk.app.rdparser()

In [5]:
## Fighting animals could be dangerous.
## Visiting relatives can be tiresome.

In [6]:
# nltk.parse_cfg
grammer1  = nltk.CFG.fromstring("""
S -> NP VP
PP -> P NP
NP -> Det N | Det N PP | 'I' | V N
VP -> V NP | VP PP | V V JJ
N -> JJ N
S -> Aux NP VP
Det -> 'an' | 'my'
N ->  'animals' | 'relatives'
V -> 'could' | 'be' | 'fighting' | 'visiting' | 'can'
P -> 'in'
JJ -> 'dangerous' | 'tiresome'

                                  """)

In [7]:
sent1 = ['fighting','animals','could','be','dangerous']
sent2 = ['visiting','relatives','can','be','tiresome']

In [8]:
parser = nltk.ChartParser(grammer1)

In [9]:
trees = parser.parse(sent2)
for tree in trees :
    print (tree)

(S (NP (V visiting) (N relatives)) (VP (V can) (V be) (JJ tiresome)))


## Context-Free Grammar

In [10]:
grammar1 = nltk.CFG.fromstring("""
S -> NP VP
VP -> V NP | V NP PP
PP -> P NP
V -> "saw" | "ate" | "walked"
NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
Det -> "a" | "an" | "the" | "my"
N -> "man" | "dog" | "cat" | "telescope" | "park"
P -> "in" | "on" | "by" | "with"
""")
sent = "Mary saw Bob".split()
rd_parser = nltk.RecursiveDescentParser(grammar1)


In [11]:
for tree in rd_parser.parse(sent):
    print (tree)

(S (NP Mary) (VP (V saw) (NP Bob)))


## Recursion in Syntactic Structure

In [12]:
grammar2 = nltk.CFG.fromstring("""
S -> NP VP
NP -> Det Nom | PropN
Nom -> Adj Nom | N
VP -> V Adj | V NP | V S | V NP PP
PP -> P NP
PropN -> 'Buster' | 'Chatterer' | 'Joe'
Det -> 'the' | 'a'
N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
Adj -> 'angry' | 'frightened' | 'little' | 'tall'
V -> 'chased'| 'saw' | 'said' | 'thought' | 'was' | 'put'
P -> 'on'
""")

In [13]:
rd_parser = nltk.RecursiveDescentParser(grammar1)
sent = 'Mary saw a dog'.split()

In [14]:
for tree in rd_parser.parse(sent):
    print (tree)

(S (NP Mary) (VP (V saw) (NP (Det a) (N dog))))


## Shift-Reduce Parsing

In [15]:
nltk.app.srparser()



In [16]:
sr_parse = nltk.ShiftReduceParser(grammar1,trace=2)
sent = 'Mary saw a dog'.split()
s=(sr_parse.parse(sent))
print(*s)

Parsing 'Mary saw a dog'
    [ * Mary saw a dog]
  S [ 'Mary' * saw a dog]
  R [ NP * saw a dog]
  S [ NP 'saw' * a dog]
  R [ NP V * a dog]
  S [ NP V 'a' * dog]
  R [ NP V Det * dog]
  S [ NP V Det 'dog' * ]
  R [ NP V Det N * ]
  R [ NP V NP * ]
  R [ NP VP * ]
  R [ S * ]
(S (NP Mary) (VP (V saw) (NP (Det a) (N dog))))


In [17]:
# interactive chart parser
nltk.app.chartparser()

grammar= (
('    ', 'S -> NP VP,')
('    ', 'VP -> VP PP,')
('    ', 'VP -> V NP,')
('    ', 'VP -> V,')
('    ', 'NP -> Det N,')
('    ', 'NP -> NP PP,')
('    ', 'PP -> P NP,')
('    ', "NP -> 'John',")
('    ', "NP -> 'I',")
('    ', "Det -> 'the',")
('    ', "Det -> 'my',")
('    ', "Det -> 'a',")
('    ', "N -> 'dog',")
('    ', "N -> 'cookie',")
('    ', "N -> 'table',")
('    ', "N -> 'cake',")
('    ', "N -> 'fork',")
('    ', "V -> 'ate',")
('    ', "V -> 'saw',")
('    ', "P -> 'on',")
('    ', "P -> 'under',")
('    ', "P -> 'with',")
)
tokens = ['John', 'ate', 'the', 'cake', 'on', 'the', 'table']
Calling "ChartParserApp(grammar, tokens)"...


## Treebanks and Grammars

In [18]:
from nltk.corpus import treebank

In [19]:
t=treebank.parsed_sents('wsj_0001.mrg')[0]
print(t)

(S
  (NP-SBJ
    (NP (NNP Pierre) (NNP Vinken))
    (, ,)
    (ADJP (NP (CD 61) (NNS years)) (JJ old))
    (, ,))
  (VP
    (MD will)
    (VP
      (VB join)
      (NP (DT the) (NN board))
      (PP-CLR (IN as) (NP (DT a) (JJ nonexecutive) (NN director)))
      (NP-TMP (NNP Nov.) (CD 29))))
  (. .))


## probability CFG

In [20]:
grammar = nltk.PCFG.fromstring("""
S -> NP VP [1.0]
VP -> TV NP [0.4]
VP -> IV [0.3]
VP -> DatV NP NP [0.3]
TV -> 'saw' [1.0] 
IV -> 'ate' [1.0]
DatV -> 'gave' [1.0]
NP -> 'telescopes' [0.8]
NP -> 'Jack' [0.2]
""")
print (grammar)

Grammar with 9 productions (start state = S)
    S -> NP VP [1.0]
    VP -> TV NP [0.4]
    VP -> IV [0.3]
    VP -> DatV NP NP [0.3]
    TV -> 'saw' [1.0]
    IV -> 'ate' [1.0]
    DatV -> 'gave' [1.0]
    NP -> 'telescopes' [0.8]
    NP -> 'Jack' [0.2]


In [21]:
viterbi_parser = nltk.ViterbiParser(grammar)

In [22]:
print(*viterbi_parser.parse(['Jack', 'saw','telescopes']))

(S (NP Jack) (VP (TV saw) (NP telescopes))) (p=0.064)


In [23]:
grammar22 = nltk.CFG.fromstring("""
S -> NP VP | Aux NP VP | VP
NP -> Det Nom | PropN | PRON
Nom -> Adj Nom | N | Nom PP | Nom PP
VP -> V Adj | V NP | V S | V NP PP | V | VP PP
PP -> P NP
PropN -> 'Buster' | 'Chatterer' | 'Joe'
PRON -> 'he' | 'she'
Det -> 'the' | 'a' | 'this'
N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log' | 'book' | 'boys' | 'girl'
PRP -> 'with' | 'in'
Adj -> 'angry' | 'frightened' | 'little' | 'tall'
V -> 'chased'| 'saw' | 'said' | 'thought' | 'was' | 'put' | 'takes' | 'take'
P -> 'on'
""")

In [27]:
sent = "take this book".split()
rd_parser = nltk.RecursiveDescentParser(grammar22)

In [32]:
for tree in rd_parser.parse(sent):
    print(tree)

(S (VP (V take) (NP (Det this) (Nom (N book)))))


RecursionError: maximum recursion depth exceeded