# Implementing Rule-based Model

In [6]:
import numpy as np
import pandas as pd
import textacy
import json
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from spacy import displacy

- universal dependencies: https://universaldependencies.org/en/dep/index.html
- aspect-based opinion mining: https://devopedia.org/aspect-based-opinion-mining
- Apect extration GT: https://achyutjoshi.github.io/aspect_extraction/aspectextraction
    - https://github.com/ishikaarora/Aspect-Sentiment-Analysis-on-Amazon-Reviews/blob/master/src/models/aspect_extraction.py
- https://www.researchgate.net/publication/327707096_Aspect_Extraction_Performance_with_POS_Tag_Pattern_of_Dependency_Relation_in_Aspect-based_Sentiment_Analysis


In [4]:
lang_module = 'en_core_web_sm'
corpus = textacy.Corpus.load(lang=lang_module, filepath='./data/bank_reviews.bin.gz')

Notes:
- This approach may not be context aware. When two different products are beind discussed the Aspect/Modifier may not point to the correct product.
- Implicit vs. Explicit, our rule-based approach can only deal with explicit. Implicit aspect-opinion is missed
- Attention on aspects can be missed if not directly connected by dependency
- Punctuation changes meanings of A/M -> line 14 "Good bank, great service" vs. without comma. Co-operation split into two "co" and "operation"
- Very restrictive, can not keep up with the many ways people can write text

-> biggest concern how to evaluate un-labeled data

# Writting dependency rules to extract Aspects & Modifiers

In [11]:
analyzer = SentimentIntensityAnalyzer()

In [175]:
ITEM = 3
doc = corpus[ITEM]

product_references = ['it','this','they','these']


def add_compound(token, aspect):
    for child in token.children:
        # if child is compound then add as prefix
        if child.dep_ == 'compound' and aspect != '99999':
            aspect = child.norm_ + " " + aspect
        # if child clause modifier add to Aspect
        if child.dep_ == 'relcl':
            aspect = child.norm_ + " " + aspect
    return aspect

for i, doc in enumerate(corpus[:40]):
    print(i,doc)
    ## FIRST RULE OF DEPENDANCY PARSE -
    ## M - Sentiment modifier || A - Aspect
    ## RULE = M is child of A with a relationship of amod
    rule_pairs = []

    for token in doc:
        A = "99999"               # aspect
        M = "99999"               # modifier
        if token.dep_ == "amod" and not token.is_stop:
            # if token.pos_ in ()
            M = token.norm_
            A = token.head.norm_
            if token.head.dep_ == 'dobj':
                # print(token.head.head)
                if token.head.head.dep_ == 'advcl':
                    # print(token.head.head.head)
                    M = token.head.head.head.norm_ + " " + token.head.head.norm_ + " " + M
                else:
                    M = token.head.head.norm_ + " " + M

            # advervial modifiers (most refreshing lotion)
            M_children = token.children
            for child_m in M_children:
                if (child_m.dep_ == "advmod"):
                    M_hash = child_m.norm_
                    M = M_hash + " " + M
                    break

            # negation in adjective, the "no" keyword is a 'det' of the noun (e.g. no interesting characters)
            A = add_compound(token.head, A)
            # A_children = token.head.children
            # for child_a in A_children:
            #     # if child is compound then add as prefix or suffix(try to determine rule for this)
            #     if child_a.dep_ == 'compound':
            #         A = child_a.norm_ + " " + A
                
            #     # if child clause modifier add to Aspect
            #     if child_a.dep_ == 'relcl':
            #         A = child_a.norm_ + " " + A

                # # add negative
                # if(child_a.dep_ == "det" and child_a.norm_ == 'no'):
                #     neg_prefix = 'not'
                #     M = neg_prefix + " " + M
                #     break

        if(A != "99999" and M != "99999"):
            rule_pairs.append((A, M, analyzer.polarity_scores(M + " " + A)['compound'],1))



    ## TWO RULE OF DEPENDANCY PARSE -
    ## M - Sentiment modifier || A - Aspect
    ## Adjectival Complement - A is a child of something with relationship of nsubj, while
    ## M is a child of the same something with relationship of acomp
    ## Assumption - A verb will have only one NSUBJ and DOBJ
    ## "The sound of the speakers would be better. The sound of the speakers could be better" - handled using AUX dependency
    for token in doc:

        children = token.children
        A = "99999"
        M = "99999"
        add_neg_pfx = False
        for child in children :
            if(child.dep_ == "nsubj" and not child.is_stop):
                A = child.norm_
                # check_spelling(child.norm_)
                for child_two in child.children:
                    if child_two.dep_ == "compound":
                        A = child_two.norm_ + " " + A

            if(child.dep_ == "acomp" and not child.is_stop):
                children_two = child.children
                for child_two in children_two:
                    if child_two.dep_ == 'advmod':
                        M = child_two.norm_ + " " + child.norm_
                    else:
                        M = child.norm_

            # example - 'this could have been better' -> (this, not better)
            if(child.dep_ == "aux" and child.tag_ == "MD"):
                neg_prefix = "not"
                add_neg_pfx = True

            if(child.dep_ == "neg"):
                neg_prefix = child.norm_
                add_neg_pfx = True

        if (add_neg_pfx and M != "99999"):
            M = neg_prefix + " " + M
                #check_spelling(child.norm_)

        if(A != "99999" and M != "99999"):
            rule_pairs.append((A, M, analyzer.polarity_scores(M + " " + A)['compound'],2))


    ## THIRD RULE OF DEPENDANCY PARSE -
    ## M - Sentiment modifier || A - Aspect
    ## ATTR - link between a verb like 'be/seem/appear' and its complement
    ## Example: 'this is garbage' -> (this, garbage)
    for token in doc:
        children = token.children
        A = "99999"
        M = "99999"
        add_neg_pfx = False
        for child in children :
            if(child.dep_ == "nsubj" and not child.is_stop):
                A = child.lemma_
                # check_spelling(child.norm_)

            if((child.dep_ == "attr") and not child.is_stop):
                M = child.lemma_
                #check_spelling(child.norm_)

            if(child.dep_ == "neg"):
                neg_prefix = child.norm_
                add_neg_pfx = True

        if (add_neg_pfx and M != "99999"):
            M = neg_prefix + " " + M

        if(A != "99999" and M != "99999"):
            rule_pairs.append((A, M,analyzer.polarity_scores(M + " " + A)['compound'],3))        


    ## FOURTH RULE OF DEPENDANCY PARSE -
    ## M - Sentiment modifier || A - Aspect

    #Adverbial modifier to a passive verb - A is a child of something with relationship of nsubjpass, while
    # M is a child of the same something with relationship of advmod

    #Assumption - A verb will have only one NSUBJ and DOBJ
    for token in doc:


        children = token.children
        A = "99999"
        M = "99999"
        add_neg_pfx = False
        for child in children :
            if((child.dep_ == "nsubjpass" or child.dep_ == "nsubj") and not child.is_stop):
                A = child.norm_
                # check_spelling(child.norm_)

            if(child.dep_ == "advmod" and not child.is_stop):
                M = child.norm_
                M_children = child.children
                for child_m in M_children:
                    if(child_m.dep_ == "advmod"):
                        M_hash = child_m.norm_
                        M = M_hash + " " + child.norm_
                        break
                #check_spelling(child.norm_)

            if(child.dep_ == "neg"):
                neg_prefix = child.norm_
                add_neg_pfx = True

        if (add_neg_pfx and M != "99999"):
            M = neg_prefix + " " + M

        if(A != "99999" and M != "99999"):
            rule_pairs.append((A, M,analyzer.polarity_scores(M + " " + A)['compound'],4)) # )

    # FIFTH RULE 
    # A: Direct Object with NOUN type
    # M: Adj or Modifier of some sort
    # # Assumes that all direct objects with adjectives pointing may compose a A-M relationship
    for token in doc:
        A = "99999"               # aspect
        M = "99999"               # modifier
        # Adding aspect
        if token.dep_ == 'dobj' and token.pos_ == 'NOUN':
            A = token.norm_
        # Adding compound to aspect 
        A = add_compound(token=token, aspect=A)
        # Get modifier if matching dep and pos
        for child in token.children:
            if child.dep_ in ('advmod') and child.pos_ in ('ADV'):
                M = child.norm_

        if(A != "99999" and M != "99999"):
            rule_pairs.append((A, M, analyzer.polarity_scores(M + " " + A)['compound'],5))
    
    # Removing pairs that do not have sentiment
    rule_pairs = [(A,M,P,r) for (A,M,P,r) in rule_pairs if P != 0]
    print(f'\t {rule_pairs}\n')

# Example Row 0
# DONE -> Try adding case for CC "helped" (mod) build-credit (aspect) 

# Example Row 2
# DONE -> Neither should be captured [('credit card', 'new', 0.0, 1), ('future', 'near', 0.0, 1)] -> dismiss if 0 sentiment?

# Row 9
# DONE -> Misses cash back is a bonus 


# TODO:
# Capture negatives correctly - line 30

# Row 10
# M:help, A:credit back-up

# Example Row 3
# Missed 0 percent interest, good for balance -> A: select obj type (add compound if present) M: head -> get adj 


0 The PNC cash rewards card is a great first credit card. I have used it for about a month now and it has helped me build my credit.
	 [('credit card', 'great', 0.7717, 1)]

1 Stay as far away from this lender as possible. My mortgage got sold to pnc last month and it has been nothing but a nightmare trying to get my payment through. I paid my $1000 on the Friday of last month. The following Monday it was taken out of my checking. The next day another $1000 was taken out. I had to transfer money so my account wouldn’t overdraft. After days of trying to talk to customer service and getting the payment stopped I called today to have Brandy the customer service rep laugh and say ‘you’ve never had a payment for something get taken out twice?’ This place is a nightmare.
	 []

2 I like everything about it don't want to change anything about it like to add a new credit card in my near future and that's how the way it is
	 [('credit card', 'add new', 0.3818, 1)]

3 Good card....0 percent inter

In [71]:
analyzer.polarity_scores('helped')['compound']

0.0

In [172]:
# Visualize an example of dependency
ITEM = 31
displacy.render(corpus[ITEM], style='dep', jupyter=True, options={'distance':120})

In [170]:
doc = corpus[ITEM]
# for doc in corpus[:1]:
#     print(doc)
for token in doc:
    # if token.dep_ == 'ccomp':
    print(token.text, token.pos_, token.norm_, token.tag_ ,token.dep_, [child.text for child in token.children], [parent.text for parent in token.ancestors])

Good ADJ good JJ amod [] ['card']
card NOUN card NN ROOT ['Good', '....', 'interest', '....'] []
.... PUNCT .... NFP punct [] ['card']
0 NUM 0 CD nummod [] ['percent', 'interest', 'card']
percent NOUN percent NN compound ['0'] ['interest', 'card']
interest NOUN interest NN appos ['percent', 'for'] ['card']
for ADP for IN prep ['year'] ['interest', 'card']
first ADJ first JJ amod [] ['year', 'for', 'interest', 'card']
year NOUN year NN pobj ['first', 'so'] ['for', 'interest', 'card']
or CCONJ or CC cc [] ['so', 'year', 'for', 'interest', 'card']
so ADV so RB nummod ['or'] ['year', 'for', 'interest', 'card']
.... PUNCT .... NFP punct [] ['card']
good ADJ good JJ ROOT ['for', '.'] []
for ADP for IN prep ['transfer'] ['good']
balance NOUN balance NN compound [] ['transfer', 'for', 'good']
transfer NOUN transfer NN pobj ['balance'] ['for', 'good']
. PUNCT . . punct [] ['good']


In [None]:
#     # ## SECOND RULE OF DEPENDANCY PARSE -
#     # ## M - Sentiment modifier || A - Aspect
#     # #Direct Object - A is a child of something with relationship of nsubj, while
#     # # M is a child of the same something with relationship of dobj
#     # #Assumption - A verb will have only one NSUBJ and DOBJ
#     # for token in doc:
#     #     children = token.children
#     #     A = "99999"
#     #     M = "99999"
#     #     add_neg_pfx = False
#     #     for child in children :
#     #         if(child.dep_ == "nsubj" and not child.is_stop):
#     #             A = child.text
#     #             # check_spelling(child.text)

#     #         if((child.dep_ == "dobj" and child.pos_ == "ADJ") and not child.is_stop):
#     #             M = child.text
#     #             #check_spelling(child.text)

#     #         if(child.dep_ == "neg"):
#     #             neg_prefix = child.text
#     #             add_neg_pfx = True

#     # if (add_neg_pfx and M != "99999"):
#     #     M = neg_prefix + " " + M

#     #     if(A != "99999" and M != "99999"):
#     #         rule_pairs.append((A, M,analyzer.polarity_scores(M)['compound'],2))

# ## FOURTH RULE OF DEPENDANCY PARSE -
# ## M - Sentiment modifier || A - Aspect

# #Adverbial modifier to a passive verb - A is a child of something with relationship of nsubjpass, while
# # M is a child of the same something with relationship of advmod

# #Assumption - A verb will have only one NSUBJ and DOBJ

# for token in doc:


#     children = token.children
#     A = "99999"
#     M = "99999"
#     add_neg_pfx = False
#     for child in children :
#         if((child.dep_ == "nsubjpass" or child.dep_ == "nsubj") and not child.is_stop):
#             A = child.text
#             # check_spelling(child.text)

#         if(child.dep_ == "advmod" and not child.is_stop):
#             M = child.text
#             M_children = child.children
#             for child_m in M_children:
#                 if(child_m.dep_ == "advmod"):
#                     M_hash = child_m.text
#                     M = M_hash + " " + child.text
#                     break
#             #check_spelling(child.text)

#         if(child.dep_ == "neg"):
#             neg_prefix = child.text
#             add_neg_pfx = True

#     if (add_neg_pfx and M != "99999"):
#         M = neg_prefix + " " + M

#     if(A != "99999" and M != "99999"):
#         rule_pairs.append((A, M,analyzer.polarity_scores(M)['compound'],8)) # )

# ## FIFTH RULE OF DEPENDANCY PARSE -
# ## M - Sentiment modifier || A - Aspect

# #Complement of a copular verb - A is a child of M with relationship of nsubj, while
# # M has a child with relationship of cop

# #Assumption - A verb will have only one NSUBJ and DOBJ

# for token in doc:
#     children = token.children
#     A = "99999"
#     buf_var = "99999"
#     for child in children :
#         if(child.dep_ == "nsubj" and not child.is_stop):
#             A = child.text
#             # check_spelling(child.text)

#         if(child.dep_ == "cop" and not child.is_stop):
#             buf_var = child.text
#             #check_spelling(child.text)

#     if(A != "99999" and buf_var != "99999"):
#         rule_pairs.append((A, token.text,analyzer.polarity_scores(token.text)['compound'],5))


# ## SIXTH RULE OF DEPENDANCY PARSE -
# ## M - Sentiment modifier || A - Aspect
# ## Example - "It ok", "ok" is INTJ (interjections like bravo, great etc)


# for token in doc:
#     children = token.children
#     A = "99999"
#     M = "99999"
#     if(token.pos_ == "INTJ" and not token.is_stop):
#         for child in children :
#             if(child.dep_ == "nsubj" and not child.is_stop):
#                 A = child.text
#                 M = token.text
#                 # check_spelling(child.text)

#     if(A != "99999" and M != "99999"):
#         rule_pairs.append((A, M,analyzer.polarity_scores(M)['compound'],6))


