In [None]:
import sys,os
import nltk, re, pprint
import numpy as np

#nltk.download() #download NLTK repositories
#sudo apt-get install python3-pil.imagetk #install for tree visualization

In [None]:
# Noun Phrase Chunking

sentence = [("the", "DT"), ("little", "JJ"), ("yellow", "JJ"),
            ("dog", "NN"), ("barked", "VBD"), ("at", "IN"),
            ("the", "DT"), ("cat", "NN")]

grammar = "NP: {<DT>?<JJ>*<NN>}"

#This rule says that an NP chunk should be formed whenever the chunker
#finds an optional determiner (DT) followed by any number of 
#adjectives (JJ) and then a noun (NN).

cp = nltk.RegexpParser(grammar)
result = cp.parse(sentence)
print(result)
result.draw()

In [None]:
#Chunk structure for a sentence

grammar = r"""
  NP: {<DT|PP\$>?<JJ>*<NN>}   # chunk determiner/possessive, adjectives and noun
      {<NNP>+}                # chunk sequences of proper nouns
"""
cp = nltk.RegexpParser(grammar)

#The first rule matches an optional determiner or possessive pronoun, 
#zero or more adjectives, then a noun. The second rule matches 
#one or more proper nouns.


sentence = [("Rapunzel", "NNP"), ("let", "VBD"), ("down", "RP"),
            ("her", "PP$"), ("long", "JJ"), ("golden", "JJ"), 
            ("hair", "NN")]

print(cp.parse(sentence))
cp.parse(sentence).draw()

In [None]:
from nltk.corpus import treebank
t = treebank.parsed_sents('wsj_0001.mrg')[0]
print (t)
t.draw()

In [None]:
# Extract phrases matching a particular sequence of part-of-speech tags

cp = nltk.RegexpParser('CHUNK: {<V.*> <TO> <V.*>}')
brown = nltk.corpus.brown
for sent in brown.tagged_sents():
# for sent in treebank.tagged_sents():
    tree = cp.parse(sent)
    for subtree in tree.subtrees():
        if subtree.label() == 'CHUNK':
            print(subtree)

            
#Trying finding other phrases that uses to-infinitive:

#Nouns with to-infinitives {happy to come; surprised to see}
#Adjectives with to-infinitives {an opportunity to escape, need to shout}

In [9]:
tree1 = nltk.Tree('NP', ['Alice'])
print(tree1)
tree1.draw()

tree2 = nltk.Tree('NP', ['the', 'rabbit'])
print(tree2)
tree2.draw()

tree3 = nltk.Tree('VP', ['chased', tree2])
tree3.draw()

tree4 = nltk.Tree('S', [tree1, tree3])
print(tree4)
tree4.draw()

(NP Alice)
(NP the rabbit)
(S (NP Alice) (VP chased (NP the rabbit)))


In [11]:
from nltk import CFG,ChartParser
grammar=CFG.fromstring('''
        NP -> NNS|JJ NNS|NP CC NP
        NNS -> "cats"|"dogs"|"mice"|NNS CC NNS 
        JJ -> "big"|"small"
        CC -> "and"|"or"
        ''')

parser=ChartParser(grammar)

sent = 'small cats and mice'
tokens = sent.split()

parse=parser.parse(tokens)

# print parse

for tree in parse:
        print(tree)
        #tree.draw()

(NP (JJ small) (NNS (NNS cats) (CC and) (NNS mice)))
(NP (NP (JJ small) (NNS cats)) (CC and) (NP (NNS mice)))


In [12]:
groucho_grammar = nltk.CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    Det -> 'an' | 'my'
    N -> 'elephant' | 'pajamas'
    V -> 'shot'
    P -> 'in'
    """)

sent = "I shot an elephant in my pajamas"
tokens = sent.split()
parser = nltk.ChartParser(groucho_grammar)
for tree in parser.parse(tokens):
    print(tree)
    #tree.draw()

(S
  (NP I)
  (VP
    (VP (V shot) (NP (Det an) (N elephant)))
    (PP (P in) (NP (Det my) (N pajamas)))))
(S
  (NP I)
  (VP
    (V shot)
    (NP (Det an) (N elephant) (PP (P in) (NP (Det my) (N pajamas))))))


In [14]:
grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

# sent = "Mary saw Bob"
# sent = "John saw Bob with a dog"
sent = "John saw the cat with a dog"
# sent = "the dog saw a man in the park"
tokens = sent.split()

#Parsing algorithm

parser = nltk.ChartParser(grammar1)

trees = []
for tree in parser.parse(tokens):
    print(tree)
    trees.append(tree)
tree.draw()

(S
  (NP John)
  (VP
    (V saw)
    (NP (Det the) (N cat))
    (PP (P with) (NP (Det a) (N dog)))))
(S
  (NP John)
  (VP
    (V saw)
    (NP (Det the) (N cat) (PP (P with) (NP (Det a) (N dog))))))


In [15]:
#To compare multiple trees in a single window, we can use the 
#draw_trees() method

from nltk.draw.tree import draw_trees

draw_trees(trees[0], trees[1])     

In [16]:
#Recursion

grammar2 = nltk.CFG.fromstring("""
  S  -> NP VP
  NP -> Det Nom | PropN
  Nom -> Adj Nom | N
  VP -> V Adj | V NP | V S | V NP PP
  PP -> P NP
  PropN -> 'Buster' | 'Chatterer' | 'Joe'
  Det -> 'the' | 'a'
  N -> 'bear' | 'squirrel' | 'tree' | 'fish' | 'log'
  Adj  -> 'angry' | 'frightened' |  'little' | 'tall'
  V ->  'chased'  | 'saw' | 'said' | 'thought' | 'was' | 'put'
  P -> 'on'
  """)

# sent = "the angry bear chased the frightened little squirrel"
sent = "Chatterer said Buster thought the tree was tall"
tokens = sent.split()

#Parsing algorithm

# parser = nltk.RecursiveDescentParser(grammar2)
# parser = nltk.ChartParser(grammar2)
# parser = nltk.ChartParser(grammar2, trace=2)
# parser = nltk.ShiftReduceParser(grammar2)
parser = nltk.parse.BottomUpLeftCornerChartParser(grammar2)

for tree in parser.parse(tokens):
    print(tree)
    tree.draw()


#If the command print(tree) produces no output, this is probably 
#because your sentence sent is not admitted by your grammar.


#To investigate call the parser with tracing set to be on...
#You can also check what productions are currently in the grammar 
#with the command


# for p in grammar2.productions():
#     print(p)

(S
  (NP (PropN Chatterer))
  (VP
    (V said)
    (S
      (NP (PropN Buster))
      (VP
        (V thought)
        (S (NP (Det the) (Nom (N tree))) (VP (V was) (Adj tall)))))))


In [33]:
#Download <nltk.download('large_grammars')>

#Reading the ATIS grammar

grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
# print grammar

#Reading the test sentences.

sentences = nltk.data.load('grammars/large_grammars/atis_sentences.txt')
sentences = nltk.parse.util.extract_test_sentences(sentences)
# print sentences
# print(len(sentences))

testsentence = sentences[22]
# print testsentence

# print testsentence[0] #sentence
# print testsentence[1] #number of parse trees according to grammar

sentence = testsentence[0]
print (sentence)


['show', 'me', 'northwest', 'flights', 'to', 'detroit', '.']


In [34]:
parser = nltk.parse.EarleyChartParser(grammar)

chart = parser.chart_parse(sentence)

#Return the final parse ``Chart`` from which all possible 
#parse trees can be extracted.

print((chart.num_edges())) #edges number
print((len(list(chart.parses(grammar.start()))))) #parse trees

trees = list(chart.parses(grammar.start()))
draw_trees(trees[0][0], trees[1][0])

28352
17


In [82]:
testsentence = "A villager went to city to sell his property along with his wife"
testsentence = testsentence.lower()
print(testsentence)
tokens = testsentence.split()
print(tokens)
grammar2 = nltk.CFG.fromstring("""
  NP -> DT NN 
  S ->  NP VP | VP
  VP -> VBD PP S | TO VP | VB NP PP
  PP -> IN NP |IN PP
  NP -> NN | PRP NN |DT NN
  DT -> 'a'
  NN -> 'villager'
  VBD -> 'went' 
  IN -> 'to' | 'along' | 'with'
  NN -> 'city' | 'property' |'wife' 
  TO -> 'to'
  VB -> 'sell'
  PRP -> 'his'
  """)
parser = nltk.parse.BottomUpLeftCornerChartParser(grammar2)

for tree in parser.parse(tokens):
    print('entered')
    print(tree)
    tree.draw()

a villager went to city to sell his property along with his wife
['a', 'villager', 'went', 'to', 'city', 'to', 'sell', 'his', 'property', 'along', 'with', 'his', 'wife']


In [80]:
print(chart.num_edges())

115


In [75]:
print((len(list(chart.parses(grammar2.start()))))) #parse trees

0


In [64]:
trees = list(chart.parses(grammar2.start()))

In [60]:
draw_trees(trees)