NOUN-VERB pairs

In [1]:
import spacy



In [2]:
nlp = spacy.load('en_core_web_sm')

sentences = ['The company made a bad decision',
             'The team performed well in the game',
             'The restaurant served delicious food',
             'The movie was poorly directed']

extracted_aspects = []

In [3]:
for sentence in sentences:
    doc = nlp(sentence)
    aspect_pairs = {}
    for token in doc:
        if token.pos_ in ('NOUN', 'VERB'):
            children = [child for child in token.children if child.pos_ in ('ADJ', 'ADV', 'NOUN')]
            if children:
                aspect_pairs[token.text] = children
    if aspect_pairs:
        extracted_aspects.append(aspect_pairs)

In [4]:
print(extracted_aspects)


[{'made': [company, decision], 'decision': [bad]}, {'performed': [team, well]}, {'served': [restaurant, food], 'food': [delicious]}, {'directed': [movie, poorly]}]


Handling Negatition and Compound words

In [5]:
for sentence in sentences:
    doc = nlp(sentence)
    aspect_pairs = {}
    for token in doc:
        if token.pos_ == 'NOUN':
            modifiers = []
            for child in token.children:
                if child.pos_ in ('ADJ', 'ADV', 'VERB'):
                    modifiers.append(child)
                elif child.dep_ == 'neg':
                    for grandchild in child.children:
                        if grandchild.pos_ in ('ADJ', 'ADV', 'VERB'):
                            modifiers.append(grandchild)
            if modifiers:
                # Check for compound nouns
                compound_nouns = []
                for possible_compound in doc[token.i:]:
                    if possible_compound.pos_ == 'NOUN':
                        if possible_compound.dep_ == 'compound' or possible_compound.n_lefts > 0:
                            compound_nouns.append(possible_compound)
                        else:
                            break
                    else:
                        break
                if compound_nouns:
                    compound_noun_text = ' '.join([cn.text for cn in compound_nouns] + [token.text])
                    aspect_pairs[compound_noun_text] = modifiers
                else:
                    aspect_pairs[token.text] = modifiers
        elif token.pos_ == 'VERB':
            noun_objects = []
            for child in token.children:
                if child.pos_ == 'NOUN':
                    noun_objects.append(child)
            if noun_objects:
                named_entities = []
                for no in noun_objects:
                    if no.ent_type_ != 0:
                        named_entities.append(no)
                if named_entities:
                    named_entity_text = ' '.join([ne.text for ne in named_entities])
                    aspect_pairs[named_entity_text] = [token]
                else:
                    aspect_pairs[token.text] = noun_objects
    if aspect_pairs:
        extracted_aspects.append(aspect_pairs)

In [6]:
print(extracted_aspects)

[{'made': [company, decision], 'decision': [bad]}, {'performed': [team, well]}, {'served': [restaurant, food], 'food': [delicious]}, {'directed': [movie, poorly]}, {'company decision': [made], 'decision decision': [bad]}, {'team': [performed]}, {'restaurant food': [served], 'food food': [delicious]}, {'movie': [directed]}]


In [7]:
sentences = ['The company made a bad decision',
             'The team performed well in the game',
             'The restaurant served delicious food',
             'The food was not delicious',
             'The car engine is broken',
             'John is a good employee']

extracted_aspects = []

In [8]:
for sentence in sentences:
    doc = nlp(sentence)
    aspect_pairs = {}
    for token in doc:
        if token.pos_ == 'NOUN':
            # Check for adjective, adverb, and verb modifiers of the noun
            modifiers = []
            for child in token.children:
                if child.pos_ in ('ADJ', 'ADV', 'VERB'):
                    modifiers.append(child)
                elif child.dep_ == 'neg':
                    # Check for negation of modifiers
                    for grandchild in child.children:
                        if grandchild.pos_ in ('ADJ', 'ADV', 'VERB'):
                            modifiers.append(grandchild)
            if modifiers:
                # Check for compound nouns
                compound_nouns = []
                for possible_compound in doc[token.i:]:
                    if possible_compound.pos_ == 'NOUN':
                        if possible_compound.dep_ == 'compound' or possible_compound.n_lefts > 0:
                            compound_nouns.append(possible_compound)
                        else:
                            break
                    else:
                        break
                if compound_nouns:
                    compound_noun_text = ' '.join([cn.text for cn in compound_nouns] + [token.text])
                    aspect_pairs[compound_noun_text] = modifiers
                else:
                    aspect_pairs[token.text] = modifiers
        elif token.pos_ == 'VERB':
            # Check for noun objects of the verb
            noun_objects = []
            for child in token.children:
                if child.pos_ == 'NOUN':
                    noun_objects.append(child)
            if noun_objects:
                # Check for named entities
                named_entities = []
                for no in noun_objects:
                    if no.ent_type_ != 0:
                        named_entities.append(no)
                if named_entities:
                    named_entity_text = ' '.join([ne.text for ne in named_entities])
                    aspect_pairs[named_entity_text] = [token]
                else:
                    aspect_pairs[token.text] = noun_objects
    if aspect_pairs:
        extracted_aspects.append(aspect_pairs)

In [9]:
print(extracted_aspects)

[{'company decision': [made], 'decision decision': [bad]}, {'team': [performed]}, {'restaurant food': [served], 'food food': [delicious]}, {'engine': [broken]}, {'employee employee': [good]}]


Failed trial 1

In [10]:
sentences = ['The root causes were identified as follows: Root Cause for Sub-Problem 1: Inadequate procedural guidance and unclear coordination between applicable proceduers',
            'This ultimately created an environment that promulgated a human error-likely environment.” More specifically, the RCE team determined that the environment consisted of poor communication, lack of engineering leadership, too much reliance on vendor designs, time pressure, and distractions. ',
            'Also, equipment problems due to aging have led to an increasingly negative trend in the station’s Deficient Critical Component Backlog Orders. ',
             'The movie was not good.',
             'Mr. Baldwin stated the deficient performance was caused by maintenance procedural inadequacy which allowed work to proceed with the relay energized.'
            ]

extracted_aspects = []

for sentence in sentences:
    doc = nlp(sentence)
    noun_adj_pairs = {}
    for token in doc:
        adj = []
        noun = ""
        if token.pos_ == 'NOUN':
            if token.dep_ == 'compound':
                compound_noun = [t.text for t in token.subtree]
                noun = " ".join(compound_noun)
            else:
                noun = token.text
            for child in token.children:
                if child.pos_ == 'ADJ':
                    if 'neg' in [t.dep_ for t in child.head.children]:
                        adj.append('not ' + child.text)
                    else:
                        adj.append(child.text)
        if noun and adj:
            if 'neg' in [t.dep_ for t in token.head.children]:
                noun_adj_pairs.update({noun:['not ' + a for a in adj]})
            else:
                noun_adj_pairs.update({noun:adj})
    named_entities = [ent.text for ent in doc.ents if ent.label_ in ['PERSON', 'ORG', 'GPE']]
    if len(noun_adj_pairs) != 0:
        extracted_aspects.append({'aspects': noun_adj_pairs, 'entities': named_entities})

print(extracted_aspects)

[{'aspects': {'guidance': ['Inadequate', 'procedural'], 'coordination': ['unclear'], 'proceduers': ['applicable']}, 'entities': ['Root Cause']}, {'aspects': {'environment': ['human', 'likely'], 'communication': ['poor'], 'reliance': ['much']}, 'entities': []}, {'aspects': {'problems': ['due'], 'trend': ['negative']}, 'entities': ['Deficient Critical Component Backlog Orders']}, {'aspects': {'performance': ['deficient'], 'inadequacy': ['procedural']}, 'entities': ['Baldwin']}]


Failed trial 2

In [11]:
sentence_n = "The movie was not good."

doc = nlp(sentence_n)
noun_adj_pairs = {}
for token in doc:
    adj = []
    noun = ""
    if token.pos_ == 'NOUN':
        for child in token.children:
            if child.pos_ == 'ADJ':
                noun = token.text
                adj.append(child)
            elif child.dep_ == 'neg':
                adj.append(token)
    if noun and adj != []:
        noun_adj_pairs.update({noun:adj})

print(noun_adj_pairs)


{}


Failed trial 3


In [12]:
sentence = "The movie was not good."

doc = nlp(sentence)
noun_adj_pairs = {}
for token in doc:
    adj = []
    noun = ""
    if token.pos_ == 'NOUN':
        for child in token.children:
            if child.pos_ == 'ADJ':
                noun = token.text
                adj.append(child)
            elif child.dep_ == 'neg':
                adj.append(token)
        if noun or adj:
            noun_adj_pairs.update({noun: adj})

print(noun_adj_pairs)


{}
