In [1]:
import nltk
from nltk import load_parser, CFG, Tree
from nltk.sem import chat80
from nltk.draw.tree import draw_trees

In [2]:
groucho_grammar = nltk.CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
""")

In [3]:
sent2 = ['I','shot','an','elephant','in','my','pajamas']
parser = nltk.ChartParser(groucho_grammar)
for tree in parser.parse(sent2):
    print(tree)
    tree.pretty_print()

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
     S                                       
  ___|______________                          
 |                  VP                       
 |         _________|__________               
 |        VP                   PP            
 |    ____|___              ___|___           
 |   |        NP           |       NP        
 |   |     ___|_____       |    ___|_____     
 NP  V   Det        N      P  Det        N   
 |   |    |         |      |   |         |    
 I  shot  an     elephant  in  my     pajamas

(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))
     S                                   
  ___|__________                          
 |              VP                       
 |    __________|______                   
 |   |                 NP                
 |   |     ____________|___               
 |   |    |     |       

In [4]:
groucho_grammar.productions(rhs=sent2[1])

[V -> 'shot']

In [5]:
groucho_grammar2 = nltk.CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det Nom | Det N 
    VP -> V NP PP
    Nom -> Adj N | Adj Adj N 

    Det -> 'The' | 'the'
    N -> 'bear' | 'trout' | 'brook'
    V -> 'saw'
    P -> 'in'
    Adj -> 'little' | 'fine' | 'fat'
    """)

In [6]:
sent = ['The', 'little', 'bear', 'saw', 'the', 'fine', 'fat', 'trout', 'in', 'the', 'brook']
parser = nltk.ChartParser(groucho_grammar2)
for tree in parser.parse(sent):
    print(tree)
    tree.pretty_print()

(S
  (NP (Det The) (Nom (Adj little) (N bear)))
  (VP
    (V saw)
    (NP (Det the) (Nom (Adj fine) (Adj fat) (N trout)))
    (PP (P in) (NP (Det the) (N brook)))))
                             S                                  
       ______________________|________                           
      |                               VP                        
      |               ________________|_____________             
      NP             |            NP                PP          
  ____|_____         |    ________|___           ___|___         
 |         Nom       |   |           Nom        |       NP      
 |     _____|___     |   |    ________|____     |    ___|____    
Det  Adj        N    V  Det Adj      Adj   N    P  Det       N  
 |    |         |    |   |   |        |    |    |   |        |   
The little     bear saw the fine     fat trout  in the     brook



In [7]:
import nltk
groucho_grammar3 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

In [8]:
sent = "Mary saw Bob".split()
sent2 = "the dog saw a man".split()
parser = nltk.ChartParser(groucho_grammar3)
for tree in parser.parse(sent):
    print(tree)
    tree.pretty_print()
for tree in parser.parse(sent2):
    print(tree)
    tree.pretty_print()

(S (NP Mary) (VP (V saw) (NP Bob)))
      S         
  ____|___       
 |        VP    
 |     ___|___   
 NP   V       NP
 |    |       |  
Mary saw     Bob

(S (NP (Det the) (N dog)) (VP (V saw) (NP (Det a) (N man))))
             S             
      _______|___           
     |           VP        
     |        ___|___       
     NP      |       NP    
  ___|___    |    ___|___   
Det      N   V  Det      N 
 |       |   |   |       |  
the     dog saw  a      man



In [9]:
nltk.app.rdparser()

In [10]:
grammar = nltk.data.load('file:mygrammar.cfg')
rd_parser = nltk.RecursiveDescentParser(grammar, trace=2)
for tree in rd_parser.parse(sent):
    print(tree)

Parsing 'Mary saw Bob'
    [ * S ]
  E [ * NP VP ]
  E [ * 'John' VP ]
  E [ * 'Mary' VP ]
  M [ 'Mary' * VP ]
  E [ 'Mary' * V NP ]
  E [ 'Mary' * 'saw' NP ]
  M [ 'Mary' 'saw' * NP ]
  E [ 'Mary' 'saw' * 'John' ]
  E [ 'Mary' 'saw' * 'Mary' ]
  E [ 'Mary' 'saw' * 'Bob' ]
  M [ 'Mary' 'saw' 'Bob' ]
  + [ 'Mary' 'saw' 'Bob' ]
(S (NP Mary) (VP (V saw) (NP Bob)))
  E [ 'Mary' 'saw' * Det N ]
  E [ 'Mary' 'saw' * 'a' N ]
  E [ 'Mary' 'saw' * 'an' N ]
  E [ 'Mary' 'saw' * 'the' N ]
  E [ 'Mary' 'saw' * 'my' N ]
  E [ 'Mary' 'saw' * Det N PP ]
  E [ 'Mary' 'saw' * 'a' N PP ]
  E [ 'Mary' 'saw' * 'an' N PP ]
  E [ 'Mary' 'saw' * 'the' N PP ]
  E [ 'Mary' 'saw' * 'my' N PP ]
  E [ 'Mary' * 'ate' NP ]
  E [ 'Mary' * 'walked' NP ]
  E [ 'Mary' * V NP PP ]
  E [ 'Mary' * 'saw' NP PP ]
  M [ 'Mary' 'saw' * NP PP ]
  E [ 'Mary' 'saw' * 'John' PP ]
  E [ 'Mary' 'saw' * 'Mary' PP ]
  E [ 'Mary' 'saw' * 'Bob' PP ]
  M [ 'Mary' 'saw' 'Bob' * PP ]
  E [ 'Mary' 'saw' 'Bob' * P NP ]
  E [ 'Mary' 'saw' 'B

In [11]:
groucho_dep_grammar = nltk.DependencyGrammar.fromstring("""
    'shot' -> 'I' | 'elephant' | 'in'
    'elephant' -> 'an' | 'in'
    'in' -> 'pajamas'
    'pajamas' -> 'my'
""")
print(groucho_dep_grammar)

Dependency grammar with 7 productions
  'shot' -> 'I'
  'shot' -> 'elephant'
  'shot' -> 'in'
  'elephant' -> 'an'
  'elephant' -> 'in'
  'in' -> 'pajamas'
  'pajamas' -> 'my'


In [12]:
pdp = nltk.ProjectiveDependencyParser(groucho_dep_grammar)
sent = 'I shot an elephant in my pajamas'.split()
trees = pdp.parse(sent)
for tree in trees:
    print(tree)
    tree.pretty_print()

(shot I (elephant an (in (pajamas my))))
    shot                 
  ___|______              
 |       elephant        
 |    ______|________     
 |   |               in  
 |   |               |    
 |   |            pajamas
 |   |               |    
 I   an              my  

(shot I (elephant an) (in (pajamas my)))
      shot          
  _____|________     
 |     |        in  
 |     |        |    
 |  elephant pajamas
 |     |        |    
 I     an       my  



In [13]:
help(Tree)

Help on class Tree in module nltk.tree.tree:

class Tree(builtins.list)
 |  Tree(node, children=None)
 |  
 |  A Tree represents a hierarchical grouping of leaves and subtrees.
 |  For example, each constituent in a syntax tree is represented by a single Tree.
 |  
 |  A tree's children are encoded as a list of leaves and subtrees,
 |  where a leaf is a basic (non-tree) value; and a subtree is a
 |  nested Tree.
 |  
 |      >>> from nltk.tree import Tree
 |      >>> print(Tree(1, [2, Tree(3, [4]), 5]))
 |      (1 2 (3 4) 5)
 |      >>> vp = Tree('VP', [Tree('V', ['saw']),
 |      ...                  Tree('NP', ['him'])])
 |      >>> s = Tree('S', [Tree('NP', ['I']), vp])
 |      >>> print(s)
 |      (S (NP I) (VP (V saw) (NP him)))
 |      >>> print(s[1])
 |      (VP (V saw) (NP him))
 |      >>> print(s[1,1])
 |      (NP him)
 |      >>> t = Tree.fromstring("(S (NP I) (VP (V saw) (NP him)))")
 |      >>> s == t
 |      True
 |      >>> t[1][1].set_label('X')
 |      >>> t[1][1].labe

In [14]:
tree1 = Tree('NP', 
             [Tree('JJ', ['old']), 
              Tree('NP', 
                   [Tree('N', ['men']), Tree('Conj', ['and']), Tree('N', ['women'])])])
print(tree1)

(NP (JJ old) (NP (N men) (Conj and) (N women)))


In [15]:
# Define a context-free grammar
grammar = CFG.fromstring("""
S -> NP VP
NP -> Det N
VP -> V NP
Det -> 'a'
N -> 'dog'
V -> 'chased'
""")

# Generate parse trees using the grammar
parser = nltk.parse.chart.ChartParser(grammar)
sentences = ["a dog chased a dog", "a dog chased the dog", "the dog chased a dog"]


### Queries from databases

In [22]:
cp = load_parser('grammars/book_grammars/sql0.fcfg')
query = 'What cities are located in Greece'
trees = list(cp.parse(query.split()))
answer = trees[0].label()['SEM']
q = ' '.join(answer)
print(q)

SELECT City FROM city_table WHERE   Country="greece"


In [17]:
rows = chat80.sql_query('corpora/city_database/city.db', q)
for r in rows: print(r[0], end=' ')

canton chungking dairen harbin kowloon mukden peking shanghai sian tientsin 

In [18]:
q = "SELECT City, Population FROM city_table WHERE Country = 'china' and Population > 1000"
for answer in chat80.sql_query('corpora/city_database/city.db', q):
    print("%-10s %4s" % answer)

canton     1496
chungking  1100
mukden     1551
peking     2031
shanghai   5407
tientsin   1795


In [19]:
nltk.boolean_ops()

negation       	-
conjunction    	&
disjunction    	|
implication    	->
equivalence    	<->


In [20]:
read_expr = nltk.sem.Expression.fromstring
read_expr('-(P & Q)') # - negation, | or, -> implication(A=True, therefore B=False), <-> equivalence(Both True/Both False).

<NegatedExpression -(P & Q)>