# Imports

In [10]:
import nltk
import spacy
from nltk.tree import TreePrettyPrinter

# Tree generator function

This function parse the sentence given in input and add a prodcution in the grammar for each token

In [11]:
def tree(text, grammar, nlp):
    """
    Generate the syntax tree for a specific text
    Arguments:
        text: a list containing a sentence or a text in general
        grammar: a string that represents the grammar that we will use
        nlp: an object of type spacy used for POS tagging
    Returns:
        The syntax tree in a string format
    """
    for sent in text:
        print(sent)
        
        parsed_sent = nlp(sent)
        prod = {}

        for token in parsed_sent:
            if token.pos_ not in prod.keys():
                prod.update({token.pos_ : []})
            prod[token.pos_].append(f"'{token.text}'")

        for pos, word in prod.items():
            grammar += f"{pos} -> {' | '.join(word)}\n"
        
        print(grammar)
        
        nltk_grammar = nltk.CFG.fromstring(grammar)
        parser = nltk.ChartParser(nltk_grammar)
        words = nltk.word_tokenize(text[0])
        trees = list(parser.parse(words))
        
        return(TreePrettyPrinter(trees[0]).text())

# Grammars
I defined two different grammars, one for Italian and French and one for German and English.

The rules are the same specified in the assignment.

In [12]:
grammarItFr = """
S ->  NP VP OBJ S | 
NP -> DET NOUN ADJ | DET NOUN | PROPN | PRON
VP -> VERB | VERB ADV | AUX ADV | AUX
OBJ -> PUNCT | NP PUNCT 
""" 

grammarDeEn = """
S ->  NP VP OBJ S | 
NP -> DET ADJ NOUN | DET NOUN | PROPN | PRON
VP -> VERB | VERB ADV | AUX ADV | AUX
OBJ -> PUNCT | NP PUNCT 
"""



# Italian


In [13]:
nlp = spacy.load("it_core_news_sm") 
text = [ "Il gatto mangia il pesce." ]

print(tree(text, grammarItFr, nlp))

Il gatto mangia il pesce.

S ->  NP VP OBJ S | 
NP -> DET NOUN ADJ | DET NOUN | PROPN | PRON
VP -> VERB | VERB ADV | AUX ADV | AUX
OBJ -> PUNCT | NP PUNCT 
DET -> 'Il' | 'il'
NOUN -> 'gatto' | 'pesce'
VERB -> 'mangia'
PUNCT -> '.'

                      S                     
      ________________|___________________   
     |          |             OBJ         | 
     |          |          ____|_____     |  
     NP         VP        NP         |    | 
  ___|____      |      ___|____      |    |  
DET      NOUN  VERB  DET      NOUN PUNCT  S 
 |        |     |     |        |     |    |  
 Il     gatto mangia  il     pesce   .   ...



# English

In [14]:
nlp = spacy.load("en_core_web_sm") 
text = ["I attend the university."]

print(tree(text, grammarDeEn, nlp))

I attend the university.

S ->  NP VP OBJ S | 
NP -> DET ADJ NOUN | DET NOUN | PROPN | PRON
VP -> VERB | VERB ADV | AUX ADV | AUX
OBJ -> PUNCT | NP PUNCT 
PRON -> 'I'
VERB -> 'attend'
DET -> 'the'
NOUN -> 'university'
PUNCT -> '.'

             S                          
  ___________|________________________   
 |     |               OBJ            | 
 |     |          ______|________     |  
 NP    VP        NP              |    | 
 |     |      ___|______         |    |  
PRON  VERB  DET        NOUN    PUNCT  S 
 |     |     |          |        |    |  
 I   attend the     university   .   ...



# French

In [15]:
nlp = spacy.load("fr_core_news_sm") 
text = ["Le cuisinier prépare le dîner."]

print(tree(text, grammarItFr, nlp))

Le cuisinier prépare le dîner.

S ->  NP VP OBJ S | 
NP -> DET NOUN ADJ | DET NOUN | PROPN | PRON
VP -> VERB | VERB ADV | AUX ADV | AUX
OBJ -> PUNCT | NP PUNCT 
DET -> 'Le' | 'le'
NOUN -> 'cuisinier' | 'dîner'
VERB -> 'prépare'
PUNCT -> '.'

                           S                     
      _____________________|___________________   
     |               |             OBJ         | 
     |               |          ____|_____     |  
     NP              VP        NP         |    | 
  ___|______         |      ___|____      |    |  
DET        NOUN     VERB  DET      NOUN PUNCT  S 
 |          |        |     |        |     |    |  
 Le     cuisinier prépare  le     dîner   .   ...



# German

In [9]:
nlp = spacy.load("de_core_news_sm") 
text = ["Ich liebe dich."]

print(tree(text, grammarDeEn, nlp))

            S                
  __________|______________   
 |     |        OBJ        | 
 |     |     ____|____     |  
 NP    VP   NP        |    | 
 |     |    |         |    |  
PRON  VERB PRON     PUNCT  S 
 |     |    |         |    |  
Ich  liebe dich       .   ...

