## Assignment: Working with Dependency Graphs (Parses)

__Requirements__

- [spaCy](https://spacy.io/)
    - run `pip install spacy`
    - run `python -m spacy download en_core_web_sm` to install English models

In [1]:
import spacy
from spacy import displacy

nlp = spacy.load('en_core_web_sm')

### Extract a path of dependency relations from the ROOT to a token

In [2]:
def dep_paths (sentence, nlp = spacy.load('en_core_web_sm')):
    '''
    @param nlp: a Spacy model to parse the sentece
    @param sentence: a string to be processed
    @return: a dictionary mapping each token to its path of dependency relations starting from ROOT
    '''
    doc = nlp(sentence)
    dep_path = {}
    for sent in doc.sents:
        for tok in sent:
            path = [tok.dep_]
            for token in tok.ancestors:
                path = [token.dep_] + path
            dep_path[tok] = path
    return dep_path

string = 'I saw a man with a telescope.'
dep_paths = dep_paths(string, nlp)


displacy.render(nlp(string), options={'compact': True})

for tok in dep_paths:
    print('{}\t{}'.format(tok.text, dep_paths[tok]))

I	['ROOT', 'nsubj']
saw	['ROOT']
a	['ROOT', 'dobj', 'det']
man	['ROOT', 'dobj']
with	['ROOT', 'dobj', 'prep']
a	['ROOT', 'dobj', 'prep', 'pobj', 'det']
telescope	['ROOT', 'dobj', 'prep', 'pobj']
.	['ROOT', 'punct']


### Extract subtree of dependents given a token

In [3]:
def subtrees (sentence, nlp = spacy.load('en_core_web_sm')):
    '''
    @param nlp: a Spacy model to parse the sentece
    @param sentence: a string to be processed
    @return: a dictionary mapping each token to the list of the members of its subtree
    '''
    doc = nlp(sentence)
    subtrees = {}
    for sent in doc.sents:
        for tok in sent:
            subtrees[tok] = [token for token in tok.subtree]
    return subtrees

string = 'I saw a man with a telescope.'
subtrees = subtrees(string, nlp)

displacy.render(nlp(string), options={'compact': True})

for token in subtrees:
    print('{}\t{}'.format(token.text, subtrees[token]))

I	[I]
saw	[I, saw, a, man, with, a, telescope, .]
a	[a]
man	[a, man, with, a, telescope]
with	[with, a, telescope]
a	[a]
telescope	[a, telescope]
.	[.]


### Check if a given list of tokens (segment of a sentence) forms a subtree

In [4]:
def check_subtree(sentence, subsentence, nlp = spacy.load('en_core_web_sm')):
    '''
    @param nlp: a Spacy model to parse the sentece
    @param sentence: a string to use as reference
    @param subsentence: a ordered list of words of sentence
    @return: True if subsentence forms a subtree given sentence, false otherwise
    '''
    doc = nlp(sentence)
    for token in doc:
        subtree = [tok.text for tok in token.subtree]
        if(subsentence == subtree):
            return True
    return False

string = 'I saw a man with a telescope.'
print(check_subtree( string, ['a', 'telescope'], nlp))
print(check_subtree( string, ['telescope', 'a'], nlp))
print(check_subtree(string, ['I', 'a'], nlp))

True
False
False


### Identify head of a span, given its tokens

In [5]:
def span_root (sentence, span_start, span_end, nlp = spacy.load('en_core_web_sm')):
    '''
    Function to identify the head of a span in a sentence
    @param sentence: a string to use as reference
    @param span_start: index of first word of span
    @param span_end: index of last word of span
    @param nlp: a Spacy model to parse the sentece
    @return: the Token representing the head of the span
    '''
    doc = nlp(sentence)
    span = doc[span_start:span_end]
    return span.root

def span_root_nosent (span, nlp = spacy.load('en_core_web_sm')):
    '''
    Function to identify the head of a span
    @param span: a string representing a span
    @param nlp: a Spacy model to parse the sentece
    @return: the Token representing the head of the span
    '''
    doc = nlp(span)
    sp = doc[:]
    return sp.root

string = 'I saw a man with a telescope.'

span = string.split()[3:5]
span = ' '.join(span)

print('Head of \'{}\':\t \'{}\''.format(span, span_root(string, 3, 5, nlp)))
print('Head of \'{}\':\t \'{}\''.format(span, span_root_nosent(span, nlp)))

Head of 'man with':	 'man'
Head of 'man with':	 'man'


### Extract sentence subject, direct object and indirect object spans (use subtree)

In [6]:
def extract (sentence, nlp = spacy.load('en_core_web_sm')):
    '''
    Function to extract sentence subject, direct object and indirect object spans
    @param sentence: a string to use as reference
    @param nlp: a Spacy model to parse the sentece
    @return: the dict mapping subj, dobj and iobj to their spans
    '''
    doc = nlp(sentence)
    ret_dict = {}
    ret_dict['nsubj'] = []
    ret_dict['dobj'] = []
    ret_dict['iobj'] = []
    for span in doc.sents:
        for tok in span:
            if tok.dep_ == 'nsubj':
                ret_dict['nsubj'] = [token.text for token in tok.subtree]
            if tok.dep_ == 'dobj':
                ret_dict['dobj'] = [token.text for token in tok.subtree]
            if tok.dep_ == 'iobj':
                ret_dict['iobj'] = [token.text for token in tok.subtree]
        break;
    return ret_dict

string = 'I saw a man with a telescope.'
displacy.render(nlp(string), options={'compact': True})
mapping = extract(string, nlp)
print(mapping)

string = 'We like to eat together'
displacy.render(nlp(string), options={'compact': True})
mapping = extract(string, nlp)
print(mapping)

{'nsubj': ['I'], 'dobj': ['a', 'man', 'with', 'a', 'telescope'], 'iobj': []}


{'nsubj': ['We'], 'dobj': [], 'iobj': []}
