# Dependency Grammar (first assignment NLU)
* Gaia Trebucchi
* Gaia.trebucchi@gmail.com



In [1]:
import spacy
spacy_nlp = spacy.load('en_core_web_sm')

sentence='I saw the man with the telescope.'
sentence1='Gaia brought her cat Costina some delicious food'

### Function 1:
#### Extract a path of dependency relations from the ROOT to a token. 
This function takes as input a sentence and return a dictionary whose keys are the tokens of the sentence and the value for each keys is a list, representing the token we encounter from the root of the sentence to the token stored as the key. Each element of the list is a tuple composed by a token and the dependency relation with his head.

In [2]:
def path_dependency(sentence):
    doc=spacy_nlp(sentence)
    list_path=dict()
    for token in doc:
        dep_path=[(token.text,token.dep_)]
        index=token.i
        while doc[index].dep_!='ROOT':
            index=doc[index].head.i
            dep_path.append((doc[index].text,doc[index].dep_))
        path=dep_path[::-1]
        list_path[token]=path
    return list_path

Example with the sentence "I saw the man with the telescope":

In [4]:
print(path_dependency(sentence))

{I: [('saw', 'ROOT'), ('I', 'nsubj')], saw: [('saw', 'ROOT')], the: [('saw', 'ROOT'), ('man', 'dobj'), ('the', 'det')], man: [('saw', 'ROOT'), ('man', 'dobj')], with: [('saw', 'ROOT'), ('man', 'dobj'), ('with', 'prep')], the: [('saw', 'ROOT'), ('man', 'dobj'), ('with', 'prep'), ('telescope', 'pobj'), ('the', 'det')], telescope: [('saw', 'ROOT'), ('man', 'dobj'), ('with', 'prep'), ('telescope', 'pobj')], .: [('saw', 'ROOT'), ('.', 'punct')]}


### Fuction 2:
#### Extract subtree of a dependents given a token.
The input of this function is a sentence and the output is a dictionary whose keys are the tokens of the sentence and whose value for each key is the list of tokens (as strings) belonging to the subtree of the token stored as a key, in the order they appear in the sentence.

In [6]:
def subtree_token(sentence):
    doc=spacy_nlp(sentence)
    sub_token=dict()
    for token in doc:
        depend=[]
        sub=token.subtree
        for t in sub:
            depend.append(t.text)
        sub_token[token]=depend
    return sub_token

Example with the sentence "I saw the man with the telescope":

In [7]:
print(subtree_token(sentence))

{I: ['I'], saw: ['I', 'saw', 'the', 'man', 'with', 'the', 'telescope', '.'], the: ['the'], man: ['the', 'man', 'with', 'the', 'telescope'], with: ['with', 'the', 'telescope'], the: ['the'], telescope: ['the', 'telescope'], .: ['.']}


### Function 3:
#### check if a given list of tokens (segment of a sentence) forms a subtree.
This function takes as input a sentence and a segment of the sentence and returns as output True if the segment forms a subtree of dependencies in the sentence and False if it doesn't. 

In [10]:
def check_subtree(sentence, segment):
    sub_tree=subtree_token(sentence)
    return segment in sub_tree.values()    

Example with two different segments (one that forms a subtree of dependencies in the input sentence parsing and one that doesn't) and the sentence "I saw the man with the telescope":

In [11]:
print(check_subtree(sentence,[ 'the', 'telescope', 'with']))
print(check_subtree(sentence,[ 'the', 'man','with','the', 'telescope']))

False
True


### Function 4:
#### identify head of a span, given its tokens.
The input of this function is a list of tokens (not necessarily a sentence) and the output is the head of the span of the tokens.

In [13]:
def head_of_span(segment):
    seg=segment[0]
    for i in range(1,len(segment)):
        seg+=" "+segment[i]
    doc=spacy_nlp(seg)
    span=doc[:]
    return span.root
            

Example with different lists of tokens:

In [15]:
print(head_of_span(['the', 'man','with','the','telescope']))
print(head_of_span(['I','saw','you','last','week']))
print(head_of_span(['last','chance','for','you']))


man
saw
chance


### Function 5:
#### extract sentence subject, direct object and indirect object spans.
This function takes as input a sentence and return a dictionary whose keys are tuples consisting of the token and its dependency relation (nsubj for the subject, dobj for the direct object and dative for the indirect object) and the value for each key is the span of the token.

In [16]:
def get_spans(sentence):
    doc=spacy_nlp(sentence)
    spans_dict=dict()
    for token in doc:
        if token.dep_=='nsubj' or token.dep_=="dobj" or token.dep_=="dative":
            index=token.i
            span=doc[doc[index].left_edge.i : doc[index].right_edge.i+1]
            spans_dict[(token,token.dep_)]=span
    return spans_dict

Example with the two sentences: "I saw the man with the telescope", "Gaia brought her cat Costina some delicious food"

In [19]:
print(get_spans(sentence))
print(get_spans(sentence1))

{(I, 'nsubj'): I, (man, 'dobj'): the man with the telescope}
{(Gaia, 'nsubj'): Gaia, (Costina, 'dative'): her cat Costina, (food, 'dobj'): some delicious food}
