In [1]:
# 1.
from nltk import CFG
from nltk.parse import RecursiveDescentParser, ChartParser

In [2]:
# 2. a.

# Define the context-free grammar
grammar_string = """
    S -> NP VP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | VP PP
    PP -> P NP
    Det -> 'the' | 'a'
    N -> 'dog' | 'cat' | 'man' | 'woman' | 'telescope'
    V -> 'chased' | 'saw' | 'bit'
    P -> 'in' | 'on' | 'with'
"""

# Create a CFG from the grammar string
cfg = CFG.fromstring(grammar_string)

In [3]:
# 3. 
sentence = "the cat chased the dog"
# Create parser
parser = ChartParser(cfg)

# Split sentence
split_sentence = sentence.split()
for tree in parser.parse(split_sentence):
    tree.pretty_print()

              S               
      ________|_____           
     |              VP        
     |         _____|___       
     NP       |         NP    
  ___|___     |      ___|___   
Det      N    V    Det      N 
 |       |    |     |       |  
the     cat chased the     dog



In [4]:
# 3. another sentence with ambiguity
sentence = "I saw the man with the telescope"
# Create parser
parser = ChartParser(cfg)

# Split sentence
split_sentence = sentence.split()
for tree in parser.parse(split_sentence):
    tree.pretty_print()

     S                                    
  ___|___________                          
 |               VP                       
 |        _______|________                 
 |       VP               PP              
 |    ___|___         ____|___             
 |   |       NP      |        NP          
 |   |    ___|___    |     ___|______      
 NP  V  Det      N   P   Det         N    
 |   |   |       |   |    |          |     
 I  saw the     man with the     telescope

     S                                
  ___|_______                          
 |           VP                       
 |    _______|___                      
 |   |           NP                   
 |   |    _______|____                 
 |   |   |   |        PP              
 |   |   |   |    ____|___             
 |   |   |   |   |        NP          
 |   |   |   |   |     ___|______      
 NP  V  Det  N   P   Det         N    
 |   |   |   |   |    |          |     
 I  saw the man with the     telescope



In [5]:
# 4.
# 4. a. = Due the recursive definition of the VP.
# 4. b. = If it had more than one parse tree, it means that the sentenece is-
# -ambigous. since more than one tree is there for the sentence the parser-
# -cannot verify the grammar of the sentence.

In [6]:
# 5. a. b.
# Define the context-free grammar
# Added VP -> V PP, PP -> P NP production to attach PP to VP
# which removes recursive definitioon of VP.
grammar_string = """
    S -> NP VP
    NP -> Det N | Det N PP | 'I'
    VP -> V NP | V PP
    PP -> P NP
    Det -> 'the' | 'a'
    N -> 'dog' | 'cat' | 'man' | 'woman' | 'telescope'
    V -> 'chased' | 'saw' | 'bit'
    P -> 'in' | 'on' | 'with'
"""

# Create a CFG from the grammar string
cfg = CFG.fromstring(grammar_string)

sentence = "I saw the man with the telescope"
# Create parser
parser = ChartParser(cfg)

# Split sentence
split_sentence = sentence.split()
for tree in parser.parse(split_sentence):
    tree.pretty_print()

     S                                
  ___|_______                          
 |           VP                       
 |    _______|___                      
 |   |           NP                   
 |   |    _______|____                 
 |   |   |   |        PP              
 |   |   |   |    ____|___             
 |   |   |   |   |        NP          
 |   |   |   |   |     ___|______      
 NP  V  Det  N   P   Det         N    
 |   |   |   |   |    |          |     
 I  saw the man with the     telescope



In [7]:
# 5. c = By modifying the grammer we remove ambiguity, which produces only-
# -a single parse tree.

In [8]:
grammar_string = """
S -> NP VP
NP -> N | D N | A N | A N P | D N P | D A N P | N P | N N
NP -> Pn | Pr
VP -> V | NP V | Advp V | Advp Advp V | Advp Adv Advp V
Advp -> Adv | Deg Adv | N N N | N N | N | Pg N N N
N -> "බල්ලා"|"මිනිසා"|"මේසය"|"වන්න"|"බත්"|"ඊයේ"|"පෙරේදා"|"සඳුදා"|"බදාදා"|"ඉරිදා"|"ජනවාරි"|"පෙබරවාරි"|"සනියේ"|"මාසේ" | "අවුරුද්දේ"
D -> "ඒ"|"මේ"|"අර"|"ඔය"|"සමහර"
A -> "උස"|"හොඳ"|"නරක"|"කොට" "ලස්සන"
P -> "උඩ"|"යට"|"මන"
Pn -> "අමල්"|"කමල්"|"සමන්"|"සචිනි"|"මාලා"|"නාමල්"
Pr -> "ඇය" "ඔවුන්"|"අපි"/"මම"|"ඌ"|"උන්"|"ඔබ"|"ඔබලා"
V -> "කෑවා"|"බිව්වා"|"දැක්කා"|"ගියා"|"දිව්වා"|"වා"|"නැටුවා"|"කමු"
Adv -> "ලස්සනට" | "කැනට" | "ඉක්මනට" | "වේගයෙන්" | "හෙමින්" | "පහසුවෙන්" |"පන්නියේදි"|"ඉස්කෝලෙදි"|"ගමේ දී"|"බස් එකේ දී"|"නුවරදි" 
Deg -> "බොහොම" | "හරිම" | "නොම" | "මාර"
PA -> "කන" | "බොන" | "යන" | "එන" | "බලන" | "කියන" | "ලියන" | "නටන" | "කාපු" | "බීපු" | "නාපු" |"ආපු" | "ලියපු" | "කියවපු" | "කිව්ව" | "ලිව්ව" | "ආව" | "ගිය" | "නැටුව"
"""

# Create a CFG from the grammar string
cfg = CFG.fromstring(grammar_string)

sentence1 = "අපි බත් කමු"
sentence2 = "කමල් ගිය අවුරුද්දේ ජනවාරි මාසේ ඉස්කෝලෙදි බොහොම ලස්සනට නැටුවා "
sentence3 = "සමන් හරිම වේගයෙන් දිව්වා"

# Create parser
parser = ChartParser(cfg)

# Split sentence
def print_parse_tree(sentence):
    print(sentence)
    split_sentence = sentence.split()
    for tree in parser.parse(split_sentence):
        tree.pretty_print()

# print_parse_tree(sentence1)
print_parse_tree(sentence2)

කමල් ගිය අවුරුද්දේ ජනවාරි මාසේ ඉස්කෝලෙදි බොහොම ලස්සනට නැටුවා 


In [9]:
print_parse_tree(sentence1)

අපි බත් කමු


In [10]:
print_parse_tree(sentence2)

කමල් ගිය අවුරුද්දේ ජනවාරි මාසේ ඉස්කෝලෙදි බොහොම ලස්සනට නැටුවා 


In [15]:
print_parse_tree(sentence3)

සමන් හරිම වේගයෙන් දිව්වා
           S                 
  _________|______            
 |                VP         
 |          ______|______     
 NP       Advp           |   
 |     ____|______       |    
 Pn  Deg         Adv     V   
 |    |           |      |    
සමන් හරිම      වේගයෙන් දිව්වා

