# Série TP 4 - TALN - Analyse Syntaxique – Parse Trees with NLTK


In [1]:
import nltk 

### CFG Grammar

In [2]:
# Grammar string declaration

mygrammar_str= """
  S -> NP VP
  VP -> V NP | V NP PP
  PP -> P NP
  NP -> Det N | Det N PP
  V -> 'saw' | 'ate' | 'walked' | 'shot'
  NP -> 'John' | 'Mary' | 'Bob' 
  Det -> 'a' | 'an' | 'the' | 'my'
  N -> 'girl' | 'dog' | 'pajamas' | 'telescope' | 'elephant' | 'bone'
  P -> 'in' | 'on' | 'by' | 'with'
  """

In [3]:
# NLTK CFG grammar from string initiation

cfg = nltk.CFG.fromstring(mygrammar_str)

### Text Data - Example 1 

In [4]:
text = 'the dog ate the bone'

**NB** : Il n'est pas possible de parser directement le texte brut avec nltk. Il faut d'abord le tokenisé, puis vérifier si les terminaux/tokens dans la grammaire fourni sont POS-taggé ou pas. S'ils ne le sont pas, il faut procéder à cette étape avant le parsing.  

In [5]:
# Tokenization
tokens = nltk.word_tokenize(text)

In [6]:
tokens

['the', 'dog', 'ate', 'the', 'bone']

### Parsing - ChatParser and RecursiveDescentParser

In [7]:
# Parsing with ChartParser algorithm

chat_parser = nltk.ChartParser(cfg)
trees = chat_parser.parse(tokens)

In [8]:
# Print and draw trees
for tree in trees:
    print(tree)
    tree.draw()

(S (NP (Det the) (N dog)) (VP (V ate) (NP (Det the) (N bone))))


In [9]:
# Parsing with RecursiveDescentParser algorithm

rd_parser = nltk.RecursiveDescentParser(cfg)
trees = rd_parser.parse(tokens)

In [10]:
# Print and draw trees
for tree in trees:
    print(tree)
    tree.draw()

(S (NP (Det the) (N dog)) (VP (V ate) (NP (Det the) (N bone))))


### Tree objects from string

In [11]:
# Tree declaration in NLTK : Tree objects
mytree = nltk.Tree.fromstring('(S (NP I) (VP (V enjoyed) (NP my cookie)))')
print(mytree)

(S (NP I) (VP (V enjoyed) (NP my cookie)))


In [12]:
mytree.pretty_print()

             S                
  ___________|___              
 |               VP           
 |      _________|___          
 NP    V             NP       
 |     |          ___|____     
 I  enjoyed      my     cookie



### Text Data - Example 2 

In [13]:
text2 = "John saw a girl with a telescope"

In [14]:
tokens2 = nltk.word_tokenize(text2)

In [15]:
rd_parser = nltk.RecursiveDescentParser(cfg)
trees2 = rd_parser.parse(tokens2)

In [16]:
# Two parse trees
for tree in trees2:
    print(tree)
    tree.draw()

(S
  (NP John)
  (VP
    (V saw)
    (NP (Det a) (N girl) (PP (P with) (NP (Det a) (N telescope))))))
(S
  (NP John)
  (VP
    (V saw)
    (NP (Det a) (N girl))
    (PP (P with) (NP (Det a) (N telescope)))))


In [17]:
# Print second tree only
trees2 = list(rd_parser.parse(tokens2))
print(trees2[1])

(S
  (NP John)
  (VP
    (V saw)
    (NP (Det a) (N girl))
    (PP (P with) (NP (Det a) (N telescope)))))


In [18]:
trees2[0].pretty_print()

      S                                 
  ____|_______                           
 |            VP                        
 |     _______|____                      
 |    |            NP                   
 |    |    ________|____                 
 |    |   |   |         PP              
 |    |   |   |     ____|___             
 |    |   |   |    |        NP          
 |    |   |   |    |     ___|______      
 NP   V  Det  N    P   Det         N    
 |    |   |   |    |    |          |     
John saw  a  girl with  a      telescope



In [19]:
trees2[1].pretty_print()

      S                                     
  ____|___________                           
 |                VP                        
 |     ___________|_________                 
 |    |       |             PP              
 |    |       |         ____|___             
 |    |       NP       |        NP          
 |    |    ___|___     |     ___|______      
 NP   V  Det      N    P   Det         N    
 |    |   |       |    |    |          |     
John saw  a      girl with  a      telescope



## Basic CYK Parser in Python

- Source : https://github.com/ikergarcia1996/Basic-CYK-Parser
- Author : Iker García Ferrero
- File : CYK_Parser.py

In [20]:
from CYK_Parser import Grammar

In [21]:
# Initialize the grammar and read the rules from a file
g = Grammar('example_grammar1.txt')


Grammar file readed succesfully. Rules readed:
S --> NP VP
NP --> Det N
VP --> V NP
N --> flight
V --> includes
N --> meal
Det --> a
Det --> the



In [22]:
# Text Data
text = 'the flight includes a meal'

In [23]:
# Parse a sentence
g.parse(text)

Applied Rule: NP[2,1] --> Det[1,1] N[1,2]
Applied Rule: NP[2,4] --> Det[1,4] N[1,5]
Applied Rule: VP[3,3] --> V[1,3] NP[2,4]
Applied Rule: S[5,1] --> NP[2,1] VP[3,3]
----------------------------------------
The sentence IS accepted in the language
Number of possible trees: 1
----------------------------------------


In [24]:
# Print the table used for parsing the sentence
g.print_parse_table()


-------  ------  --------  -------  -----
['S']
[]       []
[]       []      ['VP']
['NP']   []      []        ['NP']
['Det']  ['N']   ['V']     ['Det']  ['N']
the      flight  includes  a        meal
-------  ------  --------  -------  -----



In [25]:
# Get the list of trees generated for the sentence
trees = g.get_trees()
print(trees)

[<CYK_Parser.production_rule object at 0x000001F060DA77F0>]


### Example 2

In [27]:
g2 = Grammar('example_grammar2.txt')

g2.parse('they borrowed that book from the library')
g2.print_parse_table()

trees = g2.get_trees()
print(trees)


Grammar file readed succesfully. Rules readed:
S --> NP VP
PP --> P NP
VP --> V NP
VP --> VP PP
NP --> Det N
NP --> NP PP
NP --> they
N --> book
N --> library
P --> from
V --> borrowed
Det --> the
Det --> that

Applied Rule: NP[2,3] --> Det[1,3] N[1,4]
Applied Rule: NP[2,6] --> Det[1,6] N[1,7]
Applied Rule: VP[3,2] --> V[1,2] NP[2,3]
Applied Rule: PP[3,5] --> P[1,5] NP[2,6]
Applied Rule: S[4,1] --> NP[1,1] VP[3,2]
Applied Rule: NP[5,3] --> NP[2,3] PP[3,5]
Applied Rule: VP[6,2] --> V[1,2] NP[5,3]
Applied Rule: VP[6,2] --> VP[3,2] PP[3,5]
Applied Rule: S[7,1] --> NP[1,1] VP[6,2]
Applied Rule: S[7,1] --> NP[1,1] VP[6,2]
----------------------------------------
The sentence IS accepted in the language
Number of possible trees: 2
----------------------------------------

----------  ------------  -------  -----  ------  -------  -------
['S', 'S']
[]          ['VP', 'VP']
[]          []            ['NP']
['S']       []            []       []
[]          ['VP']        []       []     ['PP']