In [1]:
#import libraries
import pandas as pd
import numpy as np
import string

In [2]:
#Import the review dataset downloaded from amazon

watch_review = pd.read_csv("fastrac_watch.csv", error_bad_lines=False, usecols=["Title","Review"])
watch_review.head()

Unnamed: 0,Title,Review
0,Good,Super fast rack as usual quality & amazon di...
1,Very good product,The product is very good. Thanks to Amazon for...
2,Loved it.,Sooo cute...... Loved it.... Thanks AmazonPerf...
3,Elegqnt and classy.,Very elegant and beautiful watch. Looks quite ...
4,Good watch,I got the exact one what was in the image I ha...


In [3]:
#import Lexicon
NRC_lexicon = pd.read_excel("NRC-Emotion-Lexicon.xlsx", usecols=["English (en)","Anger",
                                                                 "Anticipation", "Disgust","Fear", "Joy", "Sadness",
                                                                 "Surprise", "Trust"])
NRC_lexicon.head()

Unnamed: 0,English (en),Anger,Anticipation,Disgust,Fear,Joy,Sadness,Surprise,Trust
0,aback,0,0,0,0,0,0,0,0
1,abacus,0,0,0,0,0,0,0,1
2,abandon,0,0,0,1,0,1,0,0
3,abandoned,1,0,0,1,0,1,0,0
4,abandonment,1,0,0,1,0,1,1,0


### Text processing

In [4]:
#Text Processing
book = [x.strip() for x in watch_review.Review] #remove both the leading and the trailing characters
book = [x.translate(x.maketrans("","",string.punctuation)) for x in book] # punctuation removal
book = [x.lower() for x in book] #convert all the text to lower case
book[0:5]

['super   fast rack as usual quality  amazon discount attracts buying online ',
 'the product is very good thanks to amazon for delivering it within 1day nicely packed only d belt was too large for my wrist had to cut it to adjust otherwise the watch is good am too satisfied with amazon  s servicerecommended for all',
 'sooo cute loved it thanks amazonperfecttttt',
 'very elegant and beautiful watch looks quite classy in hand beautifully designedworth for rs 1500',
 'i got the exact one what was in the image i had showed it s not light weight at all  a bit of heavy it s a good product']

In [5]:
#Update processed data into the dataframe
reviews_df = pd.DataFrame(book,columns=["reviews"])
reviews_df.head()

Unnamed: 0,reviews
0,super fast rack as usual quality amazon dis...
1,the product is very good thanks to amazon for ...
2,sooo cute loved it thanks amazonperfecttttt
3,very elegant and beautiful watch looks quite c...
4,i got the exact one what was in the image i ha...


In [6]:
#import stopwords
stop_words = pd.read_csv("stop.txt", names=["words"])
stop_words = list(stop_words.words)

In [7]:
#libraries
#Tokenization
from nltk.tokenize import word_tokenize
import nltk
nltk.download("punkt")

#Lemmatization
from nltk.stem import WordNetLemmatizer
nltk.download('wordnet')
wordlem = WordNetLemmatizer()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\praing57504\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\praing57504\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [8]:
#Functiion to find the emotion of Review

#Function
def calculate_emotion(text):
    emotion = ""
    
    Anger=Anticipation=Disgust=Fear=Joy=Sadness=Surprise=Trust=0 #assigning all score to zero
    if text:
        tokenized = word_tokenize(text) #tokenizarion
        lemmetized = [wordlem.lemmatize(word,pos="v") for word in tokenized] #lemmatization
        no_stopword_sent = [word for word in lemmetized if not word in stop_words] #stop word removal
        for word in no_stopword_sent:
            if word in NRC_lexicon["English (en)"].values:
                data = NRC_lexicon[NRC_lexicon["English (en)"] == word]
                #Scores
                Anger += data.iloc[0]["Anger"]
                Anticipation += data.iloc[0]["Anticipation"]
                Disgust += data.iloc[0]["Disgust"]
                Fear += data.iloc[0]["Fear"]
                Sadness += data.iloc[0]["Sadness"]
                Joy += data.iloc[0]["Joy"]
                Trust += data.iloc[0]["Trust"]
                Surprise += data.iloc[0]["Surprise"]
                
    dict_score = {"No_emotion":0,"Anger":Anger, "Anticipation":Anticipation, "Disgust":Disgust, "Fear":Fear,
                  "Sadness":Sadness, "Joy":Joy, "Surprise":Surprise, "Trust":Trust}
    
    max_value = max(dict_score.values())
    listOfKeys = list()
    for key, value in dict_score.items():
        if max_value == value:
            listOfKeys.append(key)
    
    return listOfKeys

In [9]:
#Adding emotion column to df, based on the statement

watch_review["Emotion"] = reviews_df["reviews"].apply(calculate_emotion)

In [10]:
watch_review.head()

Unnamed: 0,Title,Review,Emotion
0,Good,Super fast rack as usual quality & amazon di...,"[Sadness, Trust]"
1,Very good product,The product is very good. Thanks to Amazon for...,[Anticipation]
2,Loved it.,Sooo cute...... Loved it.... Thanks AmazonPerf...,[Joy]
3,Elegqnt and classy.,Very elegant and beautiful watch. Looks quite ...,[Joy]
4,Good watch,I got the exact one what was in the image I ha...,[Trust]
