# NLTK Examples

## Grammar

In [1]:
import nltk

grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

sent = "Mary saw Bob".split()
rd_parser = nltk.RecursiveDescentParser(grammar1)
for tree in rd_parser.parse(sent):
    print(tree)
    tree.draw()
    

(S (NP Mary) (VP (V saw) (NP Bob)))


### Writing your own grammar

[mygrammar.cfg](mygrammar.cfg)

In [2]:
grammar2 = nltk.data.load("file:mygrammar.cfg")
print(grammar2)
sent = "a porcupine ate a taco".split()
rd_parser = nltk.RecursiveDescentParser(grammar2)
for tree in rd_parser.parse(sent):
    print(tree)

Grammar with 27 productions (start state = S)
    S -> NP VP
    VP -> V NP
    VP -> V NP PP
    PP -> P NP
    V -> 'saw'
    V -> 'ate'
    V -> 'walked'
    NP -> 'John'
    NP -> 'Mary'
    NP -> 'Bob'
    NP -> Det N
    NP -> Det N PP
    Det -> 'a'
    Det -> 'an'
    Det -> 'the'
    Det -> 'my'
    N -> 'man'
    N -> 'porcupine'
    N -> 'elephant'
    N -> 'telescope'
    N -> 'park'
    N -> 'salad'
    N -> 'taco'
    P -> 'in'
    P -> 'on'
    P -> 'by'
    P -> 'with'
(S (NP (Det a) (N porcupine)) (VP (V ate) (NP (Det a) (N taco))))


## Parsing


### Top Down

In [3]:
rd_parser = nltk.RecursiveDescentParser(grammar1)
sent = 'Mary saw a dog with a telescope'.split()
for tree in rd_parser.parse(sent):
    print(tree)

(S
  (NP Mary)
  (VP
    (V saw)
    (NP (Det a) (N dog) (PP (P with) (NP (Det a) (N telescope))))))
(S
  (NP Mary)
  (VP
    (V saw)
    (NP (Det a) (N dog))
    (PP (P with) (NP (Det a) (N telescope)))))


### Bottom Up

In [4]:
sr_parser = nltk.ShiftReduceParser(grammar1, trace=1)
sent = 'Mary saw a dog with a telescope'.split()
for tree in sr_parser.parse(sent):
    print(tree)

Parsing 'Mary saw a dog with a telescope'
    [ * Mary saw a dog with a telescope]
    [ 'Mary' * saw a dog with a telescope]
    [ NP 'saw' * a dog with a telescope]
    [ NP V 'a' * dog with a telescope]
    [ NP V Det 'dog' * with a telescope]
    [ S 'with' * a telescope]
    [ S P 'a' * telescope]
    [ S P Det 'telescope' * ]


### Chart Parser

In [5]:
chart_parser = nltk.ChartParser(grammar1)
sent = 'Mary saw a dog with a telescope'.split()
for tree in chart_parser.parse(sent):
    print(tree)

(S
  (NP Mary)
  (VP
    (V saw)
    (NP (Det a) (N dog))
    (PP (P with) (NP (Det a) (N telescope)))))
(S
  (NP Mary)
  (VP
    (V saw)
    (NP (Det a) (N dog) (PP (P with) (NP (Det a) (N telescope))))))


## Sentence Generation

In [6]:
import nltk
from nltk.parse.generate import generate

grammar3 = nltk.data.load("file:gengrammar.cfg")

for sentence in generate(grammar3, n=10):
    print(' '.join(sentence))

for sentence in generate(grammar3, depth=4):
    print(' '.join(sentence))

for sentence in generate(grammar3, depth= 4, n=10):
    print(' '.join(sentence))

John saw John
John saw a man
John saw a porcupine telescope salad
John saw a taco
John saw the man
John saw the porcupine telescope salad
John saw the taco
John saw a man with John
John saw a man with a man
John saw a man with a porcupine telescope salad
John saw John
John ate John
John walked John
a man saw John
a man ate John
a man walked John
a porcupine telescope salad saw John
a porcupine telescope salad ate John
a porcupine telescope salad walked John
a taco saw John
a taco ate John
a taco walked John
the man saw John
the man ate John
the man walked John
the porcupine telescope salad saw John
the porcupine telescope salad ate John
the porcupine telescope salad walked John
the taco saw John
the taco ate John
the taco walked John
John saw John
John ate John
John walked John
a man saw John
a man ate John
a man walked John
a porcupine telescope salad saw John
a porcupine telescope salad ate John
a porcupine telescope salad walked John
a taco saw John


## Treebanks

In [7]:
from nltk.corpus import treebank
t = treebank.parsed_sents('wsj_0005.mrg')[1]
print(t)

(S
  (NP-SBJ (PRP He))
  (VP
    (VBZ succeeds)
    (NP
      (NP (NNP Terrence) (NNP D.) (NNP Daniels))
      (, ,)
      (NP
        (ADVP (RB formerly))
        (DT a)
        (NNP W.R.)
        (NNP Grace)
        (NN vice)
        (NN chairman))
      (, ,)
      (SBAR
        (WHNP-11 (WP who))
        (S (NP-SBJ (-NONE- *T*-11)) (VP (VBD resigned))))))
  (. .))


In [8]:
def filter(tree):
    child_nodes = [child.label() for child in tree
                   if isinstance(child, nltk.Tree)]
    return  (tree.label() == 'NP') and ('CD' in child_nodes)


parsed_sents = treebank.parsed_sents()[:100]

for tree in parsed_sents:
    for subtree in tree.subtrees(filter):
        print(subtree)

(NP (CD 61) (NNS years))
(NP (CD 55) (NNS years))
(NP (CD 1956))
(NP (DT the) (JJ early) (CD 1950s))
(NP (CD 1956))
(NP (CD 1953))
(NP (CD 1955))
(NP (CD 33) (NNS men))
(NP (CD 28))
(NP (CD Four))
(NP (DT the) (CD five) (VBG surviving) (NNS workers))
(NP (CD three))
(NP (CD 18) (NNS deaths))
(NP (CD one))
(NP (CD 1997))
(NP (DT the) (CD 1950s))
(NP (CD 35) (NNS years))
(NP (DT the) (CD 400) (JJ taxable) (NNS funds))
(NP (CD 8.45) (NN %))
(NP (CD 8.47) (NN %))
(NP (CD 41) (NNS days))
(NP (CD 33) (NNS days))
(NP (CD 8.04) (NN %))
(NP (CD 7.90) (NN %))
(NP (DT the) (CD 400) (JJ taxable) (NNS funds))
(NP (CD 9.37) (NN %))
(NP (CD 9.45) (NN %))
(NP (DT the) (CD 400) (NNS funds))
(NP (CD 8.14) (NN %))
(NP (DT an) (JJ average) (CD 8.19) (NN %))
(NP (CD 8.22) (NN %))
(NP (DT an) (JJ average) (CD 8.53) (NN %))
(NP (CD 8.56) (NN %))
(NP (CD three))
(NP
  (NP (NNP Grace) (NNP Energy) (POS 's))
  (CD seven)
  (NN board)
  (NNS seats))
(NP ($ $) (CD 27) (-NONE- *U*))
(NP (CD 2,700) (NNS people))
(N

### Production Rules

In [9]:
production_set = []
for sent in parsed_sents[:5]:
    for production in sent.productions():
        print(production)

S -> NP-SBJ VP .
NP-SBJ -> NP , ADJP ,
NP -> NNP NNP
NNP -> 'Pierre'
NNP -> 'Vinken'
, -> ','
ADJP -> NP JJ
NP -> CD NNS
CD -> '61'
NNS -> 'years'
JJ -> 'old'
, -> ','
VP -> MD VP
MD -> 'will'
VP -> VB NP PP-CLR NP-TMP
VB -> 'join'
NP -> DT NN
DT -> 'the'
NN -> 'board'
PP-CLR -> IN NP
IN -> 'as'
NP -> DT JJ NN
DT -> 'a'
JJ -> 'nonexecutive'
NN -> 'director'
NP-TMP -> NNP CD
NNP -> 'Nov.'
CD -> '29'
. -> '.'
S -> NP-SBJ VP .
NP-SBJ -> NNP NNP
NNP -> 'Mr.'
NNP -> 'Vinken'
VP -> VBZ NP-PRD
VBZ -> 'is'
NP-PRD -> NP PP
NP -> NN
NN -> 'chairman'
PP -> IN NP
IN -> 'of'
NP -> NP , NP
NP -> NNP NNP
NNP -> 'Elsevier'
NNP -> 'N.V.'
, -> ','
NP -> DT NNP VBG NN
DT -> 'the'
NNP -> 'Dutch'
VBG -> 'publishing'
NN -> 'group'
. -> '.'
S -> NP-SBJ-1 VP .
NP-SBJ-1 -> NP , UCP ,
NP -> NNP NNP
NNP -> 'Rudolph'
NNP -> 'Agnew'
, -> ','
UCP -> ADJP CC NP
ADJP -> NP JJ
NP -> CD NNS
CD -> '55'
NNS -> 'years'
JJ -> 'old'
CC -> 'and'
NP -> NP PP
NP -> JJ NN
JJ -> 'former'
NN -> 'chairman'
PP -> IN NP
IN -> 'of'
N

## Probabilistic Parsing

In [10]:
grammar = nltk.PCFG.fromstring("""
    S    -> NP VP              [1.0]
    VP   -> V NP               [0.6]
    VP   -> V NP PP            [0.4]
    PP   -> P NP               [1.0]
    V    -> 'saw'              [0.4]
    V    -> 'ate'              [0.3]
    V    -> 'walked'           [0.3]
    NP   -> Det N              [0.4]
    NP   -> Det N PP           [0.3]
    NP   -> 'John'             [0.1]
    NP   -> 'Mary'             [0.1]
    NP   -> 'Bob'              [0.1]
    Det  -> 'a'                [0.3]
    Det  -> 'an'               [0.2]
    Det  -> 'the'              [0.4]
    Det  -> 'my'               [0.1]
    N    -> 'telescope'        [0.1]
    N    -> 'man'              [0.3]
    N    -> 'dog'              [0.2]
    N    -> 'cat'              [0.2]
    N    -> 'park'             [0.2]
    P    -> 'in'               [0.3]  
    P    -> 'on'               [0.3]  
    P    -> 'by'               [0.2]  
    P    -> 'with'             [0.2]  
    """)

print(grammar)

Grammar with 25 productions (start state = S)
    S -> NP VP [1.0]
    VP -> V NP [0.6]
    VP -> V NP PP [0.4]
    PP -> P NP [1.0]
    V -> 'saw' [0.4]
    V -> 'ate' [0.3]
    V -> 'walked' [0.3]
    NP -> Det N [0.4]
    NP -> Det N PP [0.3]
    NP -> 'John' [0.1]
    NP -> 'Mary' [0.1]
    NP -> 'Bob' [0.1]
    Det -> 'a' [0.3]
    Det -> 'an' [0.2]
    Det -> 'the' [0.4]
    Det -> 'my' [0.1]
    N -> 'telescope' [0.1]
    N -> 'man' [0.3]
    N -> 'dog' [0.2]
    N -> 'cat' [0.2]
    N -> 'park' [0.2]
    P -> 'in' [0.3]
    P -> 'on' [0.3]
    P -> 'by' [0.2]
    P -> 'with' [0.2]


### Viterbi Parser

In [11]:
viterbi_parser = nltk.ViterbiParser(grammar)
sentence = "Mary saw a dog with a telescope"
words = nltk.word_tokenize(sentence)
for tree in viterbi_parser.parse(words):
    print(tree)

(S
  (NP Mary)
  (VP
    (V saw)
    (NP
      (Det a)
      (N dog)
      (PP (P with) (NP (Det a) (N telescope)))))) (p=1.0368e-06)
