# Dependency visualizations
Explore dependency parsed sentences with spacy

In [1]:
import spacy
from spacy import displacy

nlp = spacy.load("en_core_web_sm")

In [116]:
sent = nlp("the phone has good battery life")
for chunk in sent.noun_chunks:
    print(chunk)
displacy.render(sent, style='dep', jupyter = True)

the phone
good battery life


In [123]:
sent = nlp("The paper is cheap, weak, and flimsy")
displacy.render(sent, style='dep', jupyter = True)

In [97]:
sent = nlp("Black matte finish has good construction design and is cool.")
displacy.render(sent, style='dep', jupyter = True)

In [114]:
sent = nlp("Marketer brought diversity action alleging that manufacturer committed material breach of contract, deliberately and intentionally breached its duty of good faith and fair dealing, and breached its promise to provide appropriate advertising and promotion for sale of its products in IsraelCLAIM.")
displacy.render(sent, style='dep', jupyter = True)

In [197]:
sent = nlp("I was happy")
displacy.render(sent, style='dep', jupyter = True)

### testing extraction code

In [180]:
link_verbs = ['is', 'am', 'are', 'wa', 'were', 'look', 'sound', 'smell', 'tast','feel','appear',
              'remain','seem','ha','had', 'have']



from nltk.stem import PorterStemmer
stem = PorterStemmer()


### I need to check the linking verbs in their root form.  I.e., look vs. looks vs. looked 

In [214]:
import string
import re
sent = nlp("I was happy and the phone is sturdy.")

# extracts the children for a verb (removes punctuation and verifies its not part of original chunk)
def extractChildren(verb, chunk):
    return [child for child in verb.children 
            if child.text not in string.punctuation 
            and child.text not in [word.text for word in chunk]]

# noun chunk links to adjectives
def link2adj(child, aspects):
    # if adjective then we know its part of a feature
    if child.pos_ == "ADJ":
        
        # how can I add logic that captures "to verb" in an easy way
        child_text = None
        for c in child.children:
            if c.pos_ == "VERB" and list(c.children)[0].text == "to":
                child_text = ' '.join([word for word in [child.text, "to", c.text]])
        
        if child_text == None:
            child_text = child.text
        
        aspects.append(' '.join([word for word in [chunk.text, verb.text, child_text]]))

    return aspects

# noun chunk links to noun then adjective
def link2noun(child, aspects, skips, sent):
    if child.pos_ == "NOUN":
        # collect the entire chunk 
        feat_chunk = [chunk.text for chunk in list(sent.noun_chunks)
                      if re.search(child.text, chunk.text)][0]
        
        # add to aspects and skips
        aspects.append(' '.join([word for word in [chunk.text, verb.text, feat_chunk]]))
        skips.append(feat_chunk)
        
    return aspects, skips

# recursive algorithm to extract conjugations 
def extractCONJ(child, aspects):

    # see if we need to go further down the tree
    # if there are further children (multiple conjugations then we continue traversing to the bottom)
    child_children = extractChildren(child, chunk)
    if len(child_children) > 0:
        for c in child_children:
            aspects = extractCONJ(c, aspects)

    # otherwise extract the aspects
    # once at the bottom, we check to see if our criteria are met then add aspects
    # then we will come back up the tree
    if child.dep_ in ['acomp',  'conj', "xcomp"]:
        aspects = link2adj(child, aspects)
        aspects, _ = link2noun(child, aspects, skips, sent)
            
    return aspects

        
aspects = []
skips = [] # skips already reviewed noun chunks 
for chunk in sent.noun_chunks:
    
    # skip if noun chunk was already reviewed or if the noun chunk is a pronoun
    if chunk.text in skips or any([word for word in chunk if word.pos_ == "PRON"]):
        continue
    if (chunk.root.dep_ == "nsubj") and (stem.stem(chunk.root.head.text) in link_verbs):
        verb = chunk.root.head
        
        # get children for each verb, removing core noun chunk and any punctuation
        children = extractChildren(verb, chunk)

        for child in children:
            if child.dep_ in ['acomp', 'dobj']: # unsure about including dobj, but can leave for now
                # create aspect if links to adjective
                aspects = link2adj(child, aspects)
                
                # create aspect if links to other noun chunk
                aspects, skips = link2noun(child, aspects, skips, sent)
                    
                # if criteria are met, we check to see if there are any conjugations
                # recursively call functions to extract conjugations
                # since it is conjugations, we do not need to worry about skips, since it will have been captured already
                child_children = extractChildren(child, chunk)
                for c in child_children:
                    aspects = extractCONJ(c, aspects)

print(aspects)

['the phone is sturdy']


False

In [211]:
chunk

the phone