In [None]:
import nltk
import nltk.grammar

In [None]:
cfg_rules = """
S -> NP VP
NP -> Det N | PropN
Det -> PosPro | Art
VP -> Vt NP

Art -> 'the' | 'a'
PropN -> 'Alice'
N -> 'duck' | 'telescope' | 'park'
Vt -> 'saw'
PosPro -> 'my' | 'her'
"""
cfg = nltk.CFG.fromstring(cfg_rules)

In [None]:
print(cfg)

Grammar with 15 productions (start state = S)
    S -> NP VP
    NP -> Det N
    NP -> PropN
    Det -> PosPro
    Det -> Art
    VP -> Vt NP
    Art -> 'the'
    Art -> 'a'
    PropN -> 'Alice'
    N -> 'duck'
    N -> 'telescope'
    N -> 'park'
    Vt -> 'saw'
    PosPro -> 'my'
    PosPro -> 'her'


In [None]:
print(cfg.is_flexible_chomsky_normal_form())

True


In [None]:
cfg.chomsky_normal_form()

<Grammar with 17 productions>

In [None]:
print(cfg.chomsky_normal_form())

Grammar with 17 productions (start state = S)
    S -> NP VP
    PropN -> 'Alice'
    VP -> Vt NP
    PosPro -> 'her'
    Det -> 'my'
    NP -> Det N
    PosPro -> 'my'
    Art -> 'a'
    N -> 'telescope'
    Vt -> 'saw'
    N -> 'park'
    NP -> 'Alice'
    Det -> 'the'
    Det -> 'a'
    Det -> 'her'
    N -> 'duck'
    Art -> 'the'


In [None]:
from nltk import CFG

In [None]:
grammar = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | NP PP
    VP -> V NP | VP PP
    Det -> 'a' | 'the'
    N -> 'dog' | 'cat'
    V -> 'chased' | 'sat'
    P -> 'on' | 'in'
""")

In [None]:
print(grammar)

Grammar with 14 productions (start state = S)
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    N -> 'dog'
    N -> 'cat'
    V -> 'chased'
    V -> 'sat'
    P -> 'on'
    P -> 'in'


In [None]:
grammar.start()

S

In [None]:
grammar.productions()

[S -> NP VP,
 PP -> P NP,
 NP -> Det N,
 NP -> NP PP,
 VP -> V NP,
 VP -> VP PP,
 Det -> 'a',
 Det -> 'the',
 N -> 'dog',
 N -> 'cat',
 V -> 'chased',
 V -> 'sat',
 P -> 'on',
 P -> 'in']

In [None]:
#Probabilistic CFGs:
from nltk import PCFG

In [None]:
toy_pcfg1 = PCFG.fromstring("""
    S -> NP VP [1.0]
    NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15]
    Det -> 'the' [0.8] | 'my' [0.2]
    N -> 'man' [0.5] | 'telescope' [0.5]
    VP -> VP PP [0.1] | V NP [0.7] | V [0.2]
    V -> 'ate' [0.35] | 'saw' [0.65]
    PP -> P NP [1.0]
    P -> 'with' [0.61] | 'under' [0.39]
    """)

In [None]:
toy_pcfg1.start()

S

In [None]:
toy_pcfg1.productions()

[S -> NP VP [1.0],
 NP -> Det N [0.5],
 NP -> NP PP [0.25],
 NP -> 'John' [0.1],
 NP -> 'I' [0.15],
 Det -> 'the' [0.8],
 Det -> 'my' [0.2],
 N -> 'man' [0.5],
 N -> 'telescope' [0.5],
 VP -> VP PP [0.1],
 VP -> V NP [0.7],
 VP -> V [0.2],
 V -> 'ate' [0.35],
 V -> 'saw' [0.65],
 PP -> P NP [1.0],
 P -> 'with' [0.61],
 P -> 'under' [0.39]]

In [None]:
g = CFG.fromstring("VP^<TOP> -> VBP NP^<VP-TOP>")
g

<Grammar with 1 productions>

In [None]:
g.productions()[0].lhs()

VP^<TOP>

In [None]:
#Grammars can contain both empty strings and empty productions:
from nltk.grammar import CFG
from nltk.parse.generate import generate

In [None]:
grammar = CFG.fromstring("""
    S -> A B
    A -> 'a'
    # An empty string:
    B -> 'b' | ''
    """)

In [None]:
print(grammar)

Grammar with 4 productions (start state = S)
    S -> A B
    A -> 'a'
    B -> 'b'
    B -> ''


In [None]:
list(generate(grammar))

[['a', 'b'], ['a', '']]

In [None]:
grammar = CFG.fromstring("""
    S -> A B
    A -> 'a' | 'c' | 'd'
    # An empty production:
    B -> 'b' |
    """)

In [None]:
print(grammar)

Grammar with 6 productions (start state = S)
    S -> A B
    A -> 'a'
    A -> 'c'
    A -> 'd'
    B -> 'b'
    B -> 


In [None]:
list(generate(grammar))

[['a', 'b'], ['a'], ['c', 'b'], ['c'], ['d', 'b'], ['d']]

##Exercise 2

In [None]:
sentences = [
    "the purchase price includes two ancillary companies .".split(),
    "the guild began a strike against the TV and movie industry in March 1988 .".split(),
]

In [None]:
# for s in sentences:
#     grammar.check_coverage(s)

In [None]:
sentences = [
    "cat sat on the mat".split()]

In [None]:
grammar = CFG.fromstring("""
    S -> A B
    A -> 'a'
    # An empty string:
    B -> 'b' | ''
    """)

##Main Exercise 1

In [None]:
grammar1 = nltk.CFG.fromstring("""
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  V -> "saw" | "ate" | "walked"
  NP -> "John" | "Mary" | "Bob" | Det N | Det N PP
  Det -> "a" | "an" | "the" | "my"
  N -> "man" | "dog" | "cat" | "telescope" | "park"
  P -> "in" | "on" | "by" | "with"
  """)

In [None]:
sentance1 ="John ate my telescope".split()

In [None]:
cnf_grammar1 = grammar1.chomsky_normal_form()

In [None]:
cnf_grammar1

<Grammar with 27 productions>

In [None]:
from nltk.parse.chart import BottomUpChartParser
parser = BottomUpChartParser(cnf_grammar1)
parses = list(parser.parse(sentance1))

In [None]:
parses

[Tree('S', [Tree('NP', ['John']), Tree('VP', [Tree('V', ['ate']), Tree('NP', [Tree('Det', ['my']), Tree('N', ['telescope'])])])])]

##Main Exercise 2

In [None]:
grammar2 = CFG.fromstring("""
    S -> NP VP
    PP -> P NP
    NP -> Det N | NP PP
    VP -> V NP | VP PP
    Det -> 'a' | 'the'
    N -> 'dog' | 'cat'
    V -> 'chased' | 'sat'
    P -> 'on' | 'in'
    """)

In [None]:
sentance2 ="the dog chased cat".split()

In [None]:
cnf_grammar2 = grammar2.chomsky_normal_form()

In [None]:
cnf_grammar2

<Grammar with 14 productions>

In [None]:
from nltk.parse.chart import BottomUpChartParser
parser = BottomUpChartParser(cnf_grammar2)
parses = list(parser.parse(sentance2))

In [None]:
parses

[]