In [65]:
#imports
import pandas as pd
import gzip
import json
import nltk
from nrclex import NRCLex
from nltk.tokenize import sent_tokenize, word_tokenize
import csv
from nltk.stem import WordNetLemmatizer
import copy
import math
import sys
import matplotlib.pyplot as plt
import numpy as np
import cv2 
%matplotlib notebook



In [31]:
EMOTIONS=["fear", "anger", "anticip", "trust", "surprise", "positive", "negative", "sadness", "disgust", "joy"]
PRODUCT_RECIPIENT_WORDS=['kid', 'son', 'grandson', 'daughter', 'granddaughter', 'nephew', 'niece', 'child']
PRODUCT_RECIPIENT="product_recipient"
PRODUCT_WORDS=['product','car','toy','car','item']
PRODUCT="product"
EMOTE_DF_INDEX=['asin', 'aspect',"fear", "anger", "anticip", "trust", "surprise", "positive", "negative", "sadness", "disgust", "joy"]
ASPECT_METHODS=['rule_based','rule_heuristic','consolidated','manual']

In [32]:
#import dataset
top_car_reviews= pd.read_pickle('dataset/top_car_reviews.pkl')
top_car_reviews

Unnamed: 0,overall,verified,asin,reviewerID,reviewText,summary,title,price
0,5.0,True,B00005LBZO,A2VY5QVWBX2YB3,I remember playing with Kettcars when I was a ...,A Great Toy for any Child!,"Kettler Kettcar Kabrio Cart, Yellow",
1,5.0,True,B00005LBZO,A2VY5QVWBX2YB3,I remember playing with Kettcars when I was a ...,A Great Toy for any Child!,"Kettler Kettcar Kabrio Cart, Yellow",
2,5.0,False,B00005LBZO,A136AQL8RG5WJJ,I am so very pleased with this product! I boug...,Kettler Karibo Cart,"Kettler Kettcar Kabrio Cart, Yellow",
3,5.0,False,B00005LBZO,A136AQL8RG5WJJ,I am so very pleased with this product! I boug...,Kettler Karibo Cart,"Kettler Kettcar Kabrio Cart, Yellow",
4,5.0,False,B00005LBZO,A2X3VRF0KE4C1Q,"One of the best ride on toys for ""little peopl...",A better pedal car....,"Kettler Kettcar Kabrio Cart, Yellow",
...,...,...,...,...,...,...,...,...
2045,1.0,True,B01CWSRW62,A2U221MZ3INE7U,"Stopped working after about 30-40 days, total ...","Good for 2 hours of play, then dies.",ZHMY Remote control Stunt Car Double-face work...,$5.24
2046,5.0,True,B01CWSRW62,APFZRYC2NR7XO,It's impossible to get this thing stuck!,Five Stars,ZHMY Remote control Stunt Car Double-face work...,$5.24
2047,5.0,True,B01CWSRW62,A16AHSSEEFOXU5,My husband and our dog love it. It is very stu...,My husband and our dog love it. It is very stu...,ZHMY Remote control Stunt Car Double-face work...,$5.24
2048,1.0,True,B01CWSRW62,A291Y0FXKMLYFE,Don't buy this. Was fun for only a brief time...,Was fun for only a brief time before it stoppe...,ZHMY Remote control Stunt Car Double-face work...,$5.24


In [33]:
#gets all reviews for a given product as a list of strings
def get_reviews(df, asin):
    return df[df["asin"]==asin]["reviewText"].tolist()

# Emotion detection

## Common functions

In [34]:
'''
takes in text ands output values for each emotion fear, anger, anticipation, trust, surprise, positive, negative, sadness, disgust, joy
also handels a bug in the library where anticipation may be 0.0 and the true value is given at the end of the tuplr
'''
def get_emotion(text):
    # Create object
    emotion = NRCLex(text)
 
    # Classify emotion
    emotions_values_dic=emotion.affect_frequencies
    #print('\n\n', text[i], ': ', emotions_values)
    
    emotion_val_list=[]
    
    for i in range(10):
        emotion_val_list.append(emotions_values_dic[EMOTIONS[i]])
    
    #handel bug
    if len(emotions_values_dic)>10:
        #print("bug")
        emotion_val_list[2]=emotions_values_dic["anticipation"]
        
    return emotion_val_list


e=get_emotion("")
print(EMOTIONS)
print(e)
len(e)

['fear', 'anger', 'anticip', 'trust', 'surprise', 'positive', 'negative', 'sadness', 'disgust', 'joy']
[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


10

In [35]:
def isNaN(num):
    try:
        num=float(num)
    except:
        return False 
    return math.isnan(num)
'''
takes a list of words and lemmatize then all, returns list of lemmatize words
'''
def lem_list(words):
    lemmatizer = WordNetLemmatizer()
    
    lem_list=[]
    
    for word in words:
        lem_list.append(lemmatizer.lemmatize(word))
    return lem_list

'''
normilize the emotion values
'''
def norm(vals):
    if sum(vals)==0:
        return 0
    else:
        return [float(i)/sum(vals) for i in vals]

    
''' gets the aspects assosiated with product from file
    args:
    asin -- a string of the amazon string
    f_name -- name of file
    
    returns -- list of aspect names (str)
'''
def get_product_aspects(asin,f_name):
    aspects=[]
    
    with open(f_name) as csvfile:
        reader = csv.reader(csvfile) 

        for row in reader: # each row is a list
            if row[0]==asin:#find row matching asin
                aspects=row[1:]
    return aspects 

'''
gets a aspect emotion dic from two lists
'''
def get_dic_df_rows(asin,values,index=EMOTE_DF_INDEX):
    #add asin to start of list
    values=copy.deepcopy(values)
    values.insert(0,asin)
    
    #create dic
    zip_iterator = zip(index, values)
    row_dic= dict(zip_iterator)
    return row_dic
 
def drawProgressBar(percent, barLen = 20):
    sys.stdout.write("\r")
    progress = ""
    for i in range(barLen):
        if i < int(barLen * percent):
            progress += "="
        else:
            progress += " "
    sys.stdout.write("[ %s ] %.2f%%" % (progress, percent * 100))
    sys.stdout.flush()


## Method 1: Sentence based emotional analysis followed by aggregation across reviews   

In [36]:
''' gets the list  aspects of all sentences containing aspect word
    args:
    aspect -- a string for the aspect word
    reviews -- a list of review words
    
    returns -- sentence_list list of sentences containing aspect word
'''
def get_aspect_sentence_list(aspect,reviews):
    sentence_list=[]
    #get list of consilidated words
    aspect_words=[]
    if aspect == PRODUCT:
        aspect_words=PRODUCT_WORDS
    elif aspect==PRODUCT_RECIPIENT:
        aspect_words=PRODUCT_RECIPIENT_WORDS
    
    for review in reviews:
        #print(review)
        #tokenize into sentences
        #print()
        
        if isNaN(review):
            sents=[]
            #print("NaN!")
        else:
            sents=sent_tokenize(review)
        #print(len(sents))
        
        for sent in sents:
            #tokenize into words and make lowercase
            
            word_list=word_tokenize(sent.lower())
    
            #lemitize words
            word_list=lem_list(word_list)
            #if sentence word list has aspect word add to text blob
            if type(aspect) is str and aspect in word_list: 
                sentence_list.append(sent)
            elif any(x in word_list for x in aspect_words):#handel consolidated words
                sentence_list.append(sent)
            elif " " in aspect and sent.find(aspect)!=-1:#use for multi word aspect
                sentence_list.append(sent)
                
                
    return sentence_list

#B01CWSRW62,  battery -ve-ish

#B017AYXV9U, fun  +ve
reviews=get_reviews(top_car_reviews, "B01CWSRW62")
#print(reviews)

get_aspect_sentence_list(PRODUCT_RECIPIENT,reviews)

["My son doesn't mind, but he has to stop and spin to line up his direction.",
 'Missing batteries for remote and for the car, running n Christmas for the kids',
 'My son opened this car for Christmas...its a dud.',
 'Gave to my grandson for his birthday, wheel assembly broke on day two.',
 'and easy for kids to operate !',
 "Big let down for my son's 8th Birthday!",
 'Worked for my grandson for a day and then after recharging it would not move.',
 '4 year grandson loves.',
 'My son received this as a birthday gift last week.',
 'Not sure who loves it more my husband or my grandson who we bought it for they both are having a blast.',
 'My son has had lots of fun.',
 'my grand son loves it!',
 "Didn't work the instructions in how to operate were unclear and I had a very disappointed kid on Christmas",
 'Grandsons race these all over the yard',
 'Would love to have a toy like this, one my son has his heart set on (that Santa gave him) work for more than a week.']

In [37]:
non_zero_mean=False
''' gets the emotion for each sentence in list sentences, then agregates emotion values
    args:
    sentence_list - list of sentences containing aspect word
    
    returns -- average_emot - a list with a of emotion fear, anger, anticipation, trust, surprise, positive, negative, sadness, disgust, joy averaged accross all sentences
'''
def aggregate_ent_emotions(sentence_list):
    emotions_mat=[]
    
    if len(sentence_list)==0:
        return [0]*len(EMOTIONS) # if there or no sentences return 0
    #get emotions for each sentence
    for sent in sentence_list:
        emotions_mat.append(get_emotion(sent))
    #print(emotions_mat)
    
    #convert to numpy array
    emotions_mat=np.array(emotions_mat)
    #print(emotions_mat)
    
    if non_zero_mean:
        count=(emotions_mat!=0).sum(0)
        count=np.where(count==0,1,count)
        average_emote=np.true_divide(emotions_mat.sum(0),count)#Take non-zero average of columns
    else:
        average_emote = emotions_mat.mean(axis=0)#Take average of columns
    return average_emote.tolist()

asin="B01CWSRW62"
sent_list=get_aspect_sentence_list(PRODUCT_RECIPIENT,reviews) 

aggregate_ent_emotions(sent_list)


[0.024074074074074074,
 0.027777777777777776,
 0.05555555555555555,
 0.01851851851851852,
 0.03888888888888888,
 0.09259259259259259,
 0.03518518518518519,
 0.01851851851851852,
 0.01111111111111111,
 0.07777777777777777]

In [38]:
'''returns the emotion values associated with each aspect product,using method 1
    args:
    asin -- a string of the amazon string
    df -- padas dataframe containing reviews
    aspect_list  -- list of the prouct aspects
    returns - a n x 11 array where each row starts with the aspect name followed by values
'''
def get_product_aspect_emotions_m1(asin, df ,aspect_list):
    aspect_emote_mat=[]
    #get list of reviews for product
    reviews=get_reviews(df, asin)
    
    for aspect in aspect_list:
        #get list sentences containing aspect word 
        sent_list=get_aspect_sentence_list(aspect,reviews) 
        emotions=aggregate_ent_emotions(sent_list)#gregate into a single emotion tuple
        #print(emotions)
        
        #normilised
        #emotions=norm(emotions)
        #print(sum(emotions))
        #print()
        
        #put aspect and emotions into matrix
        row=emotions
        row.insert(0,aspect)
        #break
        aspect_emote_mat.append(row)
        
    return aspect_emote_mat

asin="B01CWSRW62"
aspect_list=get_product_aspects(asin,"dataset/top_10_review_nouns_consolidated_words.csv")
aspect_emote_mat=get_product_aspect_emotions_m1(asin,top_car_reviews,aspect_list)

In [39]:
aspect_emot_df = pd.DataFrame(columns = EMOTE_DF_INDEX)

for row in aspect_emote_mat:
    df_row=get_dic_df_rows(asin,row)
    aspect_emot_df= aspect_emot_df.append(df_row, ignore_index = True)

aspect_emot_df

Unnamed: 0,asin,aspect,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy
0,B01CWSRW62,product_recipient,0.024074,0.027778,0.055556,0.018519,0.038889,0.092593,0.035185,0.018519,0.011111,0.077778
1,B01CWSRW62,battery,0.074405,0.297421,0.016667,0.04246,0.0,0.072222,0.366865,0.032738,0.0,0.013889
2,B01CWSRW62,product,0.012346,0.103086,0.054938,0.085802,0.01358,0.167284,0.13642,0.021605,0.02284,0.085802
3,B01CWSRW62,fun,0.0,0.0,0.272917,0.0,0.0,0.304167,0.025,0.0,0.025,0.247917
4,B01CWSRW62,christmas,0.0,0.033333,0.061905,0.061905,0.028571,0.028571,0.061905,0.033333,0.061905,0.028571
5,B01CWSRW62,minute,0.0,0.1,0.0,0.0,0.0,0.0,0.3,0.0,0.0,0.0
6,B01CWSRW62,day,0.027778,0.0,0.027778,0.027778,0.027778,0.430556,0.152778,0.027778,0.0,0.027778
7,B01CWSRW62,plug,0.0,0.0,0.066667,0.0,0.0,0.066667,0.288889,0.111111,0.066667,0.066667
8,B01CWSRW62,great,0.0,0.1,0.033333,0.033333,0.033333,0.1,0.066667,0.0,0.0,0.033333
9,B01CWSRW62,wheel,0.092593,0.055556,0.037037,0.037037,0.037037,0.185185,0.259259,0.203704,0.055556,0.037037


In [40]:
'''
generates aspect emotion data for each product based on the product aspects, using method 1
   args:
   df - a dataframe with the product and reviews
   file_name - a post fix of the type of file to be used select from:['basic','title_removed','consolidated",'manual']
   
   return:
   aspect_emote_df - a data frame contaning aspect emotion data
'''
def gen_product_aspect_emotes(df,file_name='basic'):
    #make file name
    aspect_file_name="dataset/top_10_review_nouns_"+file_name+"_words.csv"
    
    #get list of asins
    asins=df["asin"].unique() 
    
    #create dataframe
    aspect_emote_df = pd.DataFrame(columns = EMOTE_DF_INDEX)
    i=0
    #get aspect and emotions for each asin (product)
    for asin in asins:
        drawProgressBar(i/30,30)
        i=i+1
        #get product aspects
        aspect_list=get_product_aspects(asin, aspect_file_name)
        
        #get emotions for each product aspects
        aspect_emote_mat=get_product_aspect_emotions_m1(asin,df,aspect_list)
        
        #put aspect and emotion data in dataframe
        for row in aspect_emote_mat:
            df_row=get_dic_df_rows(asin,row)
            aspect_emote_df= aspect_emote_df.append(df_row, ignore_index = True)
    return aspect_emote_df

gen_product_aspect_emotes(top_car_reviews,'consolidated')



Unnamed: 0,asin,aspect,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy
0,B00005LBZO,product,0.044872,0.015152,0.088131,0.087937,0.026515,0.277758,0.057692,0.004662,0.000000,0.063947
1,B00005LBZO,product_recipient,0.018687,0.051515,0.108838,0.103788,0.033081,0.225000,0.122222,0.036364,0.012626,0.075758
2,B00005LBZO,year,0.013158,0.017544,0.080702,0.099123,0.048246,0.142982,0.109649,0.013158,0.026316,0.080702
3,B00005LBZO,time,0.000000,0.027778,0.553704,0.027778,0.050000,0.161111,0.064815,0.037037,0.000000,0.077778
4,B00005LBZO,bike,0.000000,0.050000,0.050000,0.000000,0.000000,0.250000,0.150000,0.100000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
295,B01CWSRW62,minute,0.000000,0.100000,0.000000,0.000000,0.000000,0.000000,0.300000,0.000000,0.000000,0.000000
296,B01CWSRW62,day,0.027778,0.000000,0.027778,0.027778,0.027778,0.430556,0.152778,0.027778,0.000000,0.027778
297,B01CWSRW62,plug,0.000000,0.000000,0.066667,0.000000,0.000000,0.066667,0.288889,0.111111,0.066667,0.066667
298,B01CWSRW62,great,0.000000,0.100000,0.033333,0.033333,0.033333,0.100000,0.066667,0.000000,0.000000,0.033333


In [41]:
#run emotion detection on all aspect extraction methods
for aspect_method in ASPECT_METHODS:
    print("\n"+aspect_method)
    emote_csv_name="dataset/aspects_emotions_m1_"+aspect_method+".csv"
    emote_df=gen_product_aspect_emotes(top_car_reviews,aspect_method)
    emote_df.to_csv(emote_csv_name,index=False)




rule_based
rule_heuristic
consolidated
manual

In [42]:
non_zero_mean=True
print("\nNon-zero mean")
for aspect_method in ASPECT_METHODS:
    print("\n"+aspect_method)
    emote_csv_name="dataset/aspects_emotions_m1_non_zero_mean_"+aspect_method+".csv"
    emote_df=gen_product_aspect_emotes(top_car_reviews,aspect_method)
    emote_df.to_csv(emote_csv_name,index=False)
non_zero_mean=False


Non-zero mean

rule_based
rule_heuristic
consolidated
manual

## Method 2: Single emotional analysis on aggregated aspect text blob

In [43]:
#testing the NRCLex with tailored reviews
text = ['What a great phone! The screen is amazing and works well even in bright light. The battery lasts long into the day just as I need. This phone also has a fantastic camera that takes beautiful videos and amazing stills. I can’t believe how much the software has improved from my old phone. I am veary happy with my purchase!', 
        'Terrible phone! The screen is too dull, I cannot see it in bright light. The battery lasts only half the day and there is no charging brick in the box which is very disappointing. This phone also has a awful camera that takes mediocre videos and stills. I can’t believe how bad the software is old phone was better. I am unhappy with my purchase!']

# Iterate through list
for i in range(len(text)):
 
    # Create object
    emotion = NRCLex(text[i])
 
    # Classify emotion
    print('\n\n', text[i], ': ', emotion.affect_frequencies)
    print(len(emotion.affect_frequencies))



 What a great phone! The screen is amazing and works well even in bright light. The battery lasts long into the day just as I need. This phone also has a fantastic camera that takes beautiful videos and amazing stills. I can’t believe how much the software has improved from my old phone. I am veary happy with my purchase! :  {'fear': 0.0, 'anger': 0.1, 'anticip': 0.0, 'trust': 0.1, 'surprise': 0.0, 'positive': 0.3, 'negative': 0.1, 'sadness': 0.0, 'disgust': 0.0, 'joy': 0.2, 'anticipation': 0.2}
11


 Terrible phone! The screen is too dull, I cannot see it in bright light. The battery lasts only half the day and there is no charging brick in the box which is very disappointing. This phone also has a awful camera that takes mediocre videos and stills. I can’t believe how bad the software is old phone was better. I am unhappy with my purchase! :  {'fear': 0.09523809523809523, 'anger': 0.19047619047619047, 'anticip': 0.0, 'trust': 0.0, 'surprise': 0.0, 'positive': 0.0, 'negative': 0.333

In [44]:


''' gets the aspects textblob (concatinated sentences containing aspect word)
    args:
    aspect -- a string for the aspect word
    reviews -- a list of review words
    
    returns -- textblob (concatinated sentences containing aspect word)
'''
def get_aspect_textblob(aspect,reviews):
    textblob=""
    #get list of consilidated words
    aspect_words=[]
    if aspect == PRODUCT:
        aspect_words=PRODUCT_WORDS
    elif aspect==PRODUCT_RECIPIENT:
        aspect_words=PRODUCT_RECIPIENT_WORDS
    
    for review in reviews:
        #print(review)
        #tokenize into sentences
        #print()
        
        if isNaN(review):
            sents=[]
            #print("NaN!")
        else:
            sents=sent_tokenize(review)
        #print(len(sents))
        
        for sent in sents:
            #tokenize into words and make lowercase
            
            word_list=word_tokenize(sent.lower())
    
            #lemitize words
            word_list=lem_list(word_list)
            #if sentence word list has aspect word add to text blob
            if type(aspect) is str and aspect in word_list: 
                textblob=textblob+sent+" "
            elif any(x in word_list for x in aspect_words):#handel consolidated words
                textblob=textblob+sent+" "
            elif " " in aspect and sent.find(aspect)!=-1:#use for multi word aspect
                textblob=textblob+sent+" "
                
    return textblob

#B01CWSRW62,  battery -ve-ish

#B017AYXV9U, fun  +ve
reviews=get_reviews(top_car_reviews, "B01CWSRW62")
#print(reviews)

get_aspect_textblob(PRODUCT_RECIPIENT,reviews)

"My son doesn't mind, but he has to stop and spin to line up his direction. Missing batteries for remote and for the car, running n Christmas for the kids My son opened this car for Christmas...its a dud. Gave to my grandson for his birthday, wheel assembly broke on day two. and easy for kids to operate ! Big let down for my son's 8th Birthday! Worked for my grandson for a day and then after recharging it would not move. 4 year grandson loves. My son received this as a birthday gift last week. Not sure who loves it more my husband or my grandson who we bought it for they both are having a blast. My son has had lots of fun. my grand son loves it! Didn't work the instructions in how to operate were unclear and I had a very disappointed kid on Christmas Grandsons race these all over the yard Would love to have a toy like this, one my son has his heart set on (that Santa gave him) work for more than a week. "

In [45]:


'''returns the emotion values associated with each aspect product
    args:
    asin -- a string of the amazon string
    df -- padas dataframe containing reviews
    aspect_list  -- list of the prouct aspects
    returns - a n x 11 array where each row starts with the aspect name followed by values
'''
def get_product_aspect_emotions_m2(asin, df ,aspect_list):
    aspect_emote_mat=[]
    #get list of reviews for product
    reviews=get_reviews(df, asin)
    
    for aspect in aspect_list:
        #get textblob (concatinated sentences containing aspect word) 
        textblob = get_aspect_textblob(aspect,reviews)
        #print(aspect)
        #print(textblob)
        
        emotions=get_emotion(textblob)
        #print(emotions)
        
        #normilised
        #emotions=norm(emotions)
        #print(sum(emotions))
        #print()
        
        #put aspect and emotions into matrix
        row=emotions
        row.insert(0,aspect)
        #break
        aspect_emote_mat.append(row)
        
    return aspect_emote_mat

asin="B01CWSRW62"
aspect_list=get_product_aspects(asin,"dataset/top_10_review_nouns_consolidated_words.csv")
aspect_emote_mat=get_product_aspect_emotions_m2(asin,top_car_reviews,aspect_list)

In [46]:
   
aspect_emot_df = pd.DataFrame(columns = EMOTE_DF_INDEX)

for row in aspect_emote_mat:
    df_row=get_dic_df_rows(asin,row)
    aspect_emot_df= aspect_emot_df.append(df_row, ignore_index = True)

aspect_emot_df

Unnamed: 0,asin,aspect,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy
0,B01CWSRW62,product_recipient,0.060606,0.060606,0.151515,0.060606,0.121212,0.212121,0.090909,0.060606,0.030303,0.151515
1,B01CWSRW62,battery,0.078947,0.289474,0.026316,0.078947,0.0,0.105263,0.342105,0.052632,0.0,0.026316
2,B01CWSRW62,product,0.030769,0.138462,0.092308,0.107692,0.030769,0.2,0.184615,0.046154,0.046154,0.123077
3,B01CWSRW62,fun,0.0,0.0,0.307692,0.0,0.0,0.346154,0.038462,0.0,0.038462,0.269231
4,B01CWSRW62,christmas,0.0,0.076923,0.153846,0.153846,0.076923,0.076923,0.153846,0.076923,0.153846,0.076923
5,B01CWSRW62,minute,0.0,0.333333,0.0,0.0,0.0,0.0,0.666667,0.0,0.0,0.0
6,B01CWSRW62,day,0.083333,0.0,0.083333,0.083333,0.083333,0.333333,0.166667,0.083333,0.0,0.083333
7,B01CWSRW62,plug,0.0,0.0,0.125,0.0,0.0,0.125,0.375,0.125,0.125,0.125
8,B01CWSRW62,great,0.0,0.222222,0.111111,0.111111,0.111111,0.222222,0.111111,0.0,0.0,0.111111
9,B01CWSRW62,wheel,0.111111,0.055556,0.055556,0.055556,0.055556,0.166667,0.222222,0.166667,0.055556,0.055556


In [47]:

'''
generates aspect emotion data for each product based on the product aspects, using method 2
   args:
   df - a dataframe with the product and reviews
   file_name - a post fix of the type of file to be used select from:['basic','title_removed','consolidated",'manual']
   
   return:
   aspect_emote_df - a data frame contaning aspect emotion data
'''
def gen_product_aspect_emotes(df,file_name='basic'):
    #make file name
    aspect_file_name="dataset/top_10_review_nouns_"+file_name+"_words.csv"
    
    #get list of asins
    asins=df["asin"].unique() 
    
    #create dataframe
    aspect_emote_df = pd.DataFrame(columns = EMOTE_DF_INDEX)
    i=0
    #get aspect and emotions for each asin (product)
    for asin in asins:
        drawProgressBar(i/30,30)
        i=i+1
        #get product aspects
        aspect_list=get_product_aspects(asin, aspect_file_name)
        
        #get emotions for each product aspects
        aspect_emote_mat=get_product_aspect_emotions_m2(asin,df,aspect_list)
        
        #put aspect and emotion data in dataframe
        for row in aspect_emote_mat:
            df_row=get_dic_df_rows(asin,row)
            aspect_emote_df= aspect_emote_df.append(df_row, ignore_index = True)
    return aspect_emote_df

emote_df=gen_product_aspect_emotes(top_car_reviews,'consolidated')



In [48]:
emote_csv_name="dataset/aspects_emotions_m2_consolidated.csv"
emote_df.to_csv(emote_csv_name,index=False)

In [49]:
pd.read_csv(emote_csv_name)

Unnamed: 0,asin,aspect,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy
0,B00005LBZO,product,0.084746,0.016949,0.101695,0.118644,0.067797,0.355932,0.084746,0.033898,0.000000,0.135593
1,B00005LBZO,product_recipient,0.029703,0.049505,0.128713,0.128713,0.069307,0.267327,0.108911,0.049505,0.029703,0.138614
2,B00005LBZO,year,0.019231,0.019231,0.153846,0.134615,0.096154,0.250000,0.115385,0.019231,0.019231,0.173077
3,B00005LBZO,time,0.000000,0.038462,0.423077,0.038462,0.076923,0.192308,0.076923,0.038462,0.000000,0.115385
4,B00005LBZO,bike,0.000000,0.142857,0.142857,0.000000,0.000000,0.285714,0.285714,0.142857,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
295,B01CWSRW62,minute,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,0.666667,0.000000,0.000000,0.000000
296,B01CWSRW62,day,0.083333,0.000000,0.083333,0.083333,0.083333,0.333333,0.166667,0.083333,0.000000,0.083333
297,B01CWSRW62,plug,0.000000,0.000000,0.125000,0.000000,0.000000,0.125000,0.375000,0.125000,0.125000,0.125000
298,B01CWSRW62,great,0.000000,0.222222,0.111111,0.111111,0.111111,0.222222,0.111111,0.000000,0.000000,0.111111


In [50]:
#run emotion detection on all aspect extraction methods
for aspect_method in ASPECT_METHODS:
    print("\n"+aspect_method)
    emote_csv_name="dataset/aspects_emotions_m2_"+aspect_method+".csv"
    emote_df=gen_product_aspect_emotes(top_car_reviews,aspect_method)
    emote_df.to_csv(emote_csv_name,index=False)


rule_based
rule_heuristic
consolidated
manual

# Data visualization
Create barcharts showing the aspect emotions for each bar chart

In [51]:
emote_csv_name="dataset/aspects_emotions_m2_consolidated.csv"
#emote_csv_name="dataset/aspects_emotions_m2_manual.csv"

emote_df=pd.read_csv(emote_csv_name)
emote_df

Unnamed: 0,asin,aspect,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy
0,B00005LBZO,product,0.084746,0.016949,0.101695,0.118644,0.067797,0.355932,0.084746,0.033898,0.000000,0.135593
1,B00005LBZO,product_recipient,0.029703,0.049505,0.128713,0.128713,0.069307,0.267327,0.108911,0.049505,0.029703,0.138614
2,B00005LBZO,year,0.019231,0.019231,0.153846,0.134615,0.096154,0.250000,0.115385,0.019231,0.019231,0.173077
3,B00005LBZO,time,0.000000,0.038462,0.423077,0.038462,0.076923,0.192308,0.076923,0.038462,0.000000,0.115385
4,B00005LBZO,bike,0.000000,0.142857,0.142857,0.000000,0.000000,0.285714,0.285714,0.142857,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
295,B01CWSRW62,minute,0.000000,0.333333,0.000000,0.000000,0.000000,0.000000,0.666667,0.000000,0.000000,0.000000
296,B01CWSRW62,day,0.083333,0.000000,0.083333,0.083333,0.083333,0.333333,0.166667,0.083333,0.000000,0.083333
297,B01CWSRW62,plug,0.000000,0.000000,0.125000,0.000000,0.000000,0.125000,0.375000,0.125000,0.125000,0.125000
298,B01CWSRW62,great,0.000000,0.222222,0.111111,0.111111,0.111111,0.222222,0.111111,0.000000,0.000000,0.111111


In [52]:
product_df=emote_df[emote_df['asin']=='B00005LBZO']
product_df

Unnamed: 0,asin,aspect,fear,anger,anticip,trust,surprise,positive,negative,sadness,disgust,joy
0,B00005LBZO,product,0.084746,0.016949,0.101695,0.118644,0.067797,0.355932,0.084746,0.033898,0.0,0.135593
1,B00005LBZO,product_recipient,0.029703,0.049505,0.128713,0.128713,0.069307,0.267327,0.108911,0.049505,0.029703,0.138614
2,B00005LBZO,year,0.019231,0.019231,0.153846,0.134615,0.096154,0.25,0.115385,0.019231,0.019231,0.173077
3,B00005LBZO,time,0.0,0.038462,0.423077,0.038462,0.076923,0.192308,0.076923,0.038462,0.0,0.115385
4,B00005LBZO,bike,0.0,0.142857,0.142857,0.0,0.0,0.285714,0.285714,0.142857,0.0,0.0
5,B00005LBZO,pedal,0.0,0.333333,0.166667,0.0,0.0,0.333333,0.166667,0.0,0.0,0.0
6,B00005LBZO,seat,0.0625,0.125,0.0625,0.1875,0.0,0.125,0.3125,0.125,0.0,0.0
7,B00005LBZO,wheel,0.111111,0.0,0.0,0.222222,0.0,0.444444,0.222222,0.0,0.0,0.0
8,B00005LBZO,condition,0.111111,0.0,0.222222,0.111111,0.111111,0.222222,0.111111,0.0,0.0,0.111111
9,B00005LBZO,today,0.0,0.0,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5


In [97]:

COLOR_DIC={"fear":"black", "anger":"darkred", "anticip":"cyan", "trust":"royalblue", "surprise":"yellow", "positive":"green", "negative":"red", "sadness":"blue", "disgust":"limegreen", "joy":"hotpink"}

#plt.ioff()#stop displaying plots


'''
Takes the emote-aspect dataframe for a single product and displays it in barchart
    args:
    df -- emote-aspect dataframe for a single product
    display -- if true will display plot 
    path -- path where figure will be saved, won't save if empty string
    title -- tite of chart
'''
def product_emotion_barchart(df,title,display=True, path=""):
    if display==False:
        plt.ioff()#stop displaying plots
    else:
        plt.ion()

    fig, ax = plt.subplots()
    fig.set_size_inches(9,15)
    
    #plt.subplots_adjust(wspace=1, hspace=1, left=1, right=1, bottom=0, top=1)
    
    barchart=df.plot(ax=ax,x="aspect", y=EMOTE_DF_INDEX[2:], xlim=(0.0,1.0), kind="barh",width=0.7,color=COLOR_DIC,edgecolor = 'black', legend='reverse')
    plt.grid()
    plt.title(title[:75])
    if path != "":
        plt.savefig(path,bbox_inches='tight')
    
    
product_emotion_barchart(product_df,"oooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooo")


<IPython.core.display.Javascript object>

In [98]:
'''
generates aspect emotion data barcharts for each product
   args:
   df - a dataframe with the product and reviews
   m
   file_name - a post fix of the type of file to be used select from:['basic','title_removed','consolidated",'manual']
   
'''
def make_aspect_emotes_charts(df,method="m2",file_name='basic'):
    #open aspect file 
    emote_csv_name="dataset/aspects_emotions_"+method+"_"+file_name+".csv"
    #generate folder path
    folder_path="data_charts/emotion_barcharts_"+method+"_"+file_name+"/"
    
    emote_df=pd.read_csv(emote_csv_name)
    
    #get list of asins
    asins=df["asin"].unique() 
    
    i=0
    for asin in asins:
        #create figure file path
        figure_path=folder_path+asin+"_barchart.png"
        #get product title
        product_title=df[df['asin']==asin]["title"].unique()[0]
        
        
        #get product aspect and emotion dataframe
        product_df=emote_df[emote_df['asin']==asin]
        
        #make and save figure
        product_emotion_barchart(product_df,product_title,display=False,path=figure_path)
        
make_aspect_emotes_charts(top_car_reviews)

In [99]:
for aspect_method in ASPECT_METHODS:
    print("Making charts: "+ aspect_method + "."*10, end="")
    make_aspect_emotes_charts(top_car_reviews,"m2",aspect_method)
    print("Complete!")

Making charts: rule_based..........Complete!
Making charts: rule_heuristic..........Complete!
Making charts: consolidated..........Complete!
Making charts: manual..........Complete!


In [100]:
for aspect_method in ASPECT_METHODS:
    print("Making charts: "+ aspect_method + "."*10, end="")
    make_aspect_emotes_charts(top_car_reviews,"m1",aspect_method)
    print("Complete!")

Making charts: rule_based..........Complete!
Making charts: rule_heuristic..........Complete!
Making charts: consolidated..........Complete!
Making charts: manual..........Complete!


In [101]:
for aspect_method in ASPECT_METHODS:
    print("Making charts: "+ aspect_method + "."*10, end="")
    make_aspect_emotes_charts(top_car_reviews,"m1_non_zero_mean",aspect_method)
    print("Complete!")

Making charts: rule_based..........Complete!
Making charts: rule_heuristic..........Complete!
Making charts: consolidated..........Complete!
Making charts: manual..........Complete!


# consolidate charts
Show all the charts for a single product in a single image 

In [107]:
#Show all the charts for a single product in a single image  and save it 
def create_product_multi_chart(asin):
    # create figure
    rows = 3
    columns = 4
    i=1
    plt.ioff()#stop displaying plots
    fig = plt.figure(figsize=(30, 40))
    plt.subplots_adjust(wspace=0, hspace=0, left=0, right=1, bottom=0, top=1)
    for emote_method in ['m1','m1_non_zero_mean','m2']:
        for aspect_method in ASPECT_METHODS:
            #get chart image
            folder_name="data_charts/emotion_barcharts_{}_{}".format(emote_method,aspect_method)
            file_name=folder_name+"/"+asin+"_barchart.png"
            #print(file_name,i)
            im = cv2.imread(file_name)
            #print(im)
            
            fig.add_subplot(rows, columns, i)
  
            # showing image
            plt.imshow(im)
            plt.axis('off')
            plt.title("{} {}".format(emote_method,aspect_method))
            
            i+=1
            
    fig.savefig("data_charts/combined/"+asin+".png")
create_product_multi_chart("B0162HF12Q")

In [108]:
asins=top_car_reviews["asin"].unique() 
i=1
for asin in asins:
    print("Making charts: {}....{}/30".format(asin,i))
    create_product_multi_chart(asin)
    i+=1

Making charts: B00005LBZO....1/30
Making charts: B000GPLT68....2/30
Making charts: B000Q9KPB4....3/30
Making charts: B002AKKO6S....4/30
Making charts: B004KU83C8....5/30
Making charts: B004LH01ZW....6/30
Making charts: B006RKH1I6....7/30
Making charts: B006RKBK0Q....8/30
Making charts: B006RKFHBY....9/30
Making charts: B00AM9SZWO....10/30
Making charts: B00APTT9SU....11/30
Making charts: B00E849LS6....12/30
Making charts: B00J97X2YG....13/30
Making charts: B00JPTG45C....14/30
Making charts: B00KFBS5QA....15/30
Making charts: B00KS6ADQW....16/30
Making charts: B00S4RM1XK....17/30
Making charts: B00TSV67XG....18/30
Making charts: B00TWG57CY....19/30
Making charts: B00V8LIPFW....20/30
Making charts: B0155MEP1C....21/30
Making charts: B0155MVMFO....22/30
Making charts: B015SKSQUW....23/30
Making charts: B0162HF12Q....24/30
Making charts: B016YXI7YC....25/30
Making charts: B017AYXV9U....26/30
Making charts: B018VW1X4U....27/30
Making charts: B01B55BPDY....28/30
Making charts: B01CA4R218....