# Emotions Analyzer

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame
import pymongo
from textblob import TextBlob
from textblob import Word
#import time

In [2]:
#Connections with mongoDB collections
client = pymongo.MongoClient("mongodb://localhost:27017/")
mi_db = client["books"]

revsColec = mi_db["reviews"]
revsMetaColec = mi_db["revsmetadata"]
booksMetaColec = mi_db["booksmetadata"]

## Get emotions from a text

In [3]:
#Get sentiment from a text
def getSentSubj(texto):
    
    blob = TextBlob(texto)
    sent = blob.sentiment.polarity
    subj = blob.sentiment.subjectivity
    
    return sent, subj

In [4]:
def getEmomatrix(texto):
    
    # Get the Emotion-Lexicon and make a dummy emotion table for each word in it
    nrcDF = pd.read_table('NRC-Emotion-Lexicon-Wordlevel-v0.92.txt')
    mask = nrcDF['Score'] == 1
    dummiesDF = pd.get_dummies(nrcDF[mask].Emotion)
    nrcDFDummiesDF = nrcDF.drop(['Emotion', 'Score'], axis = 1).join(dummiesDF)
    groupedDF = nrcDFDummiesDF.groupby('Word').sum().astype(int)

    # Read the stop-words list, and remove them from the test
    stopWords = list(pd.read_table('eng_stop-wods.txt')['WORD'])
    cleanText = (' ').join([word.lower() for word in texto.split() if word not in stopWords])
    print('Sin stop-word:', cleanText)
                     
    # Lemmatize the result
    sentence = TextBlob(cleanText)
    lemms = [Word(word).lemmatize() for word in sentence.words]
    print('Lematizado', lemms)
                     
    # Get the signinificative emotional words
    emoWords = set(lemms).intersection(set(nrcDF['Word']))
                     
    # For each emotional word, count the score for each sentiment (emo-matrix of the text)
    dicc = {}
    for word in emoWords:
        dicc[word] = lemms.count(word)
    dicc2 = {}
    for k, v in list(dicc.items()):
        dicc2[k] = list(groupedDF.loc[k]*v)
        
    # Get the emo-matrix of the text (emotion score for each word in the text)
    emoDF = pd.DataFrame(dicc2, index = list(groupedDF.columns))
    
    # Add the sum (#and relative sum) for each emotion in the whole text (emo-vector of the text) 
    emoDF['Sum'] = emoDF.sum(axis=1)
    emoDF['SumRel'] = emoDF['Sum'] / emoDF['Sum'].sum()      
    return emoDF

In [5]:
# Put everything together
def getMetadata(texto):
    
    tx = str(texto)
    return  getSentSubj(tx)[0], getSentSubj(tx)[1], list(getEmomatrix(tx).T.iloc[-1])

In [6]:
#Trnalate text into English
def toEng(texto):
    
    blob = TextBlob(texto)
    try:
        if blob.detect_language == 'en':
            return str(texto)
        else: 
            return str(blob.translate(to = 'en'))
    except:
        return str(texto)

## Update collections with metadata from reviews and synopsis

In [11]:
def updateRevsmetadata():
    
    n = 1
    for review in revsColec.find( {}, {'userID': 1, 'bookID': 1, 'revBody': 1} ):
        if n <= 10327:
            n += 1
            continue
        
        # Insert document for each review, to be able to get average metadata for each user and book
        # Translate into English if needed
        enRev = toEng(review['revBody']) 
        
        revSentValue, revSubjValue, revEmoValue = getMetadata(enRev)
        revEmoDict = {'anger': revEmoValue[0], 
                      'anticipation': revEmoValue[1],
                      'disgust': revEmoValue[2],
                      'fear': revEmoValue[3],
                      'joy': revEmoValue[4],
                      'negative': revEmoValue[5],
                      'positive': revEmoValue[6],
                      'sadness': revEmoValue[7],
                      'surprise': revEmoValue[8],
                      'trust': revEmoValue[9] }
        
        v =  { '_id': review['_id'], 'userID': review['userID'], 'bookID': review['bookID'],
            'revSent':revSentValue, 'revSubj': revSubjValue, 'revEmo': revEmoDict }
        
        try:
            revsMetaColec.insert_one(v)
            print('Insertion in revsmetadata:', n)
        except:
            print('Insertion failed:', n)
            
        n += 1         
    print('Done')

In [9]:
updateRevsmetadata()

Insertion in revsmetadata: 8987
Insertion in revsmetadata: 8988
Insertion in revsmetadata: 8989
Insertion in revsmetadata: 8990
Insertion in revsmetadata: 8991
Insertion in revsmetadata: 8992
Insertion in revsmetadata: 8993
Insertion in revsmetadata: 8994
Insertion in revsmetadata: 8995
Insertion in revsmetadata: 8996
Insertion in revsmetadata: 8997
Insertion in revsmetadata: 8998
Insertion in revsmetadata: 8999
Insertion in revsmetadata: 9000
Insertion in revsmetadata: 9001
Insertion in revsmetadata: 9002
Insertion in revsmetadata: 9003
Insertion in revsmetadata: 9004
Insertion in revsmetadata: 9005
Insertion in revsmetadata: 9006
Insertion in revsmetadata: 9007
Insertion in revsmetadata: 9008
Insertion in revsmetadata: 9009
Insertion in revsmetadata: 9010
Insertion in revsmetadata: 9011
Insertion in revsmetadata: 9012
Insertion in revsmetadata: 9013
Insertion in revsmetadata: 9014
Insertion in revsmetadata: 9015
Insertion in revsmetadata: 9016
Insertion in revsmetadata: 9017
Insertio

Insertion in revsmetadata: 9244
Insertion in revsmetadata: 9245
Insertion in revsmetadata: 9246
Insertion in revsmetadata: 9247
Insertion in revsmetadata: 9248
Insertion in revsmetadata: 9249
Insertion in revsmetadata: 9250
Insertion in revsmetadata: 9251
Insertion in revsmetadata: 9252
Insertion in revsmetadata: 9253
Insertion in revsmetadata: 9254
Insertion in revsmetadata: 9255
Insertion in revsmetadata: 9256
Insertion in revsmetadata: 9257
Insertion in revsmetadata: 9258
Insertion in revsmetadata: 9259
Insertion in revsmetadata: 9260
Insertion in revsmetadata: 9261
Insertion in revsmetadata: 9262
Insertion in revsmetadata: 9263
Insertion in revsmetadata: 9264
Insertion in revsmetadata: 9265
Insertion in revsmetadata: 9266
Insertion in revsmetadata: 9267
Insertion in revsmetadata: 9268
Insertion in revsmetadata: 9269
Insertion in revsmetadata: 9270
Insertion in revsmetadata: 9271
Insertion in revsmetadata: 9272
Insertion in revsmetadata: 9273
Insertion in revsmetadata: 9274
Insertio

Insertion in revsmetadata: 9501
Insertion in revsmetadata: 9502
Insertion in revsmetadata: 9503
Insertion in revsmetadata: 9504
Insertion in revsmetadata: 9505
Insertion in revsmetadata: 9506
Insertion in revsmetadata: 9507
Insertion in revsmetadata: 9508
Insertion in revsmetadata: 9509
Insertion in revsmetadata: 9510
Insertion in revsmetadata: 9511
Insertion in revsmetadata: 9512
Insertion in revsmetadata: 9513
Insertion in revsmetadata: 9514
Insertion in revsmetadata: 9515
Insertion in revsmetadata: 9516
Insertion in revsmetadata: 9517
Insertion in revsmetadata: 9518
Insertion in revsmetadata: 9519
Insertion in revsmetadata: 9520
Insertion in revsmetadata: 9521
Insertion in revsmetadata: 9522
Insertion in revsmetadata: 9523
Insertion in revsmetadata: 9524
Insertion in revsmetadata: 9525
Insertion in revsmetadata: 9526
Insertion in revsmetadata: 9527
Insertion in revsmetadata: 9528
Insertion in revsmetadata: 9529
Insertion in revsmetadata: 9530
Insertion in revsmetadata: 9531
Insertio

Insertion in revsmetadata: 9758
Insertion in revsmetadata: 9759
Insertion in revsmetadata: 9760
Insertion in revsmetadata: 9761
Insertion in revsmetadata: 9762
Insertion in revsmetadata: 9763
Insertion in revsmetadata: 9764
Insertion in revsmetadata: 9765
Insertion in revsmetadata: 9766
Insertion in revsmetadata: 9767
Insertion in revsmetadata: 9768
Insertion in revsmetadata: 9769
Insertion in revsmetadata: 9770
Insertion in revsmetadata: 9771
Insertion in revsmetadata: 9772
Insertion in revsmetadata: 9773
Insertion in revsmetadata: 9774
Insertion in revsmetadata: 9775
Insertion in revsmetadata: 9776
Insertion in revsmetadata: 9777
Insertion in revsmetadata: 9778
Insertion in revsmetadata: 9779
Insertion in revsmetadata: 9780
Insertion in revsmetadata: 9781
Insertion in revsmetadata: 9782
Insertion in revsmetadata: 9783
Insertion in revsmetadata: 9784
Insertion in revsmetadata: 9785
Insertion in revsmetadata: 9786
Insertion in revsmetadata: 9787
Insertion in revsmetadata: 9788
Insertio

Insertion in revsmetadata: 10014
Insertion in revsmetadata: 10015
Insertion in revsmetadata: 10016
Insertion in revsmetadata: 10017
Insertion in revsmetadata: 10018
Insertion in revsmetadata: 10019
Insertion in revsmetadata: 10020
Insertion in revsmetadata: 10021
Insertion in revsmetadata: 10022
Insertion in revsmetadata: 10023
Insertion in revsmetadata: 10024
Insertion in revsmetadata: 10025
Insertion in revsmetadata: 10026
Insertion in revsmetadata: 10027
Insertion in revsmetadata: 10028
Insertion in revsmetadata: 10029
Insertion in revsmetadata: 10030
Insertion in revsmetadata: 10031
Insertion in revsmetadata: 10032
Insertion in revsmetadata: 10033
Insertion in revsmetadata: 10034
Insertion in revsmetadata: 10035
Insertion in revsmetadata: 10036
Insertion in revsmetadata: 10037
Insertion in revsmetadata: 10038
Insertion in revsmetadata: 10039
Insertion in revsmetadata: 10040
Insertion in revsmetadata: 10041
Insertion in revsmetadata: 10042
Insertion in revsmetadata: 10043
Insertion 

Insertion in revsmetadata: 10263
Insertion in revsmetadata: 10264
Insertion in revsmetadata: 10265
Insertion in revsmetadata: 10266
Insertion in revsmetadata: 10267
Insertion in revsmetadata: 10268
Insertion in revsmetadata: 10269
Insertion in revsmetadata: 10270
Insertion in revsmetadata: 10271
Insertion in revsmetadata: 10272
Insertion in revsmetadata: 10273
Insertion in revsmetadata: 10274
Insertion in revsmetadata: 10275
Insertion in revsmetadata: 10276
Insertion in revsmetadata: 10277
Insertion in revsmetadata: 10278
Insertion in revsmetadata: 10279
Insertion in revsmetadata: 10280
Insertion in revsmetadata: 10281
Insertion in revsmetadata: 10282
Insertion in revsmetadata: 10283
Insertion in revsmetadata: 10284
Insertion in revsmetadata: 10285
Insertion in revsmetadata: 10286
Insertion in revsmetadata: 10287
Insertion in revsmetadata: 10288
Insertion in revsmetadata: 10289
Insertion in revsmetadata: 10290
Insertion in revsmetadata: 10291
Insertion in revsmetadata: 10292
Insertion 

Insertion in revsmetadata: 10512
Insertion in revsmetadata: 10513
Insertion in revsmetadata: 10514
Insertion in revsmetadata: 10515
Insertion in revsmetadata: 10516
Insertion in revsmetadata: 10517
Insertion in revsmetadata: 10518
Insertion in revsmetadata: 10519
Insertion in revsmetadata: 10520
Insertion in revsmetadata: 10521
Insertion in revsmetadata: 10522
Insertion in revsmetadata: 10523
Insertion in revsmetadata: 10524
Insertion in revsmetadata: 10525
Insertion in revsmetadata: 10526
Insertion in revsmetadata: 10527
Insertion in revsmetadata: 10528
Insertion in revsmetadata: 10529
Insertion in revsmetadata: 10530
Insertion in revsmetadata: 10531
Insertion in revsmetadata: 10532
Insertion in revsmetadata: 10533
Insertion in revsmetadata: 10534
Insertion in revsmetadata: 10535
Insertion in revsmetadata: 10536
Insertion in revsmetadata: 10537
Insertion in revsmetadata: 10538
Insertion in revsmetadata: 10539
Insertion in revsmetadata: 10540
Insertion in revsmetadata: 10541
Insertion 

Insertion in revsmetadata: 10761
Insertion in revsmetadata: 10762
Insertion in revsmetadata: 10763
Insertion in revsmetadata: 10764
Insertion in revsmetadata: 10765
Insertion in revsmetadata: 10766
Insertion in revsmetadata: 10767
Insertion in revsmetadata: 10768
Insertion in revsmetadata: 10769
Insertion in revsmetadata: 10770
Insertion in revsmetadata: 10771
Insertion in revsmetadata: 10772
Insertion in revsmetadata: 10773
Insertion in revsmetadata: 10774
Insertion in revsmetadata: 10775
Insertion in revsmetadata: 10776
Insertion in revsmetadata: 10777
Insertion in revsmetadata: 10778
Insertion in revsmetadata: 10779
Insertion in revsmetadata: 10780
Insertion in revsmetadata: 10781
Insertion in revsmetadata: 10782
Insertion in revsmetadata: 10783
Insertion in revsmetadata: 10784
Insertion in revsmetadata: 10785
Insertion in revsmetadata: 10786
Insertion in revsmetadata: 10787
Insertion in revsmetadata: 10788
Insertion in revsmetadata: 10789
Insertion in revsmetadata: 10790
Insertion 

Insertion in revsmetadata: 11010
Insertion in revsmetadata: 11011
Insertion in revsmetadata: 11012
Insertion in revsmetadata: 11013
Insertion in revsmetadata: 11014
Insertion in revsmetadata: 11015
Insertion in revsmetadata: 11016
Insertion in revsmetadata: 11017
Insertion in revsmetadata: 11018
Insertion in revsmetadata: 11019
Insertion in revsmetadata: 11020
Insertion in revsmetadata: 11021
Insertion in revsmetadata: 11022
Insertion in revsmetadata: 11023
Insertion in revsmetadata: 11024
Insertion in revsmetadata: 11025
Insertion in revsmetadata: 11026
Insertion in revsmetadata: 11027
Insertion in revsmetadata: 11028
Insertion in revsmetadata: 11029
Insertion in revsmetadata: 11030
Insertion in revsmetadata: 11031
Insertion in revsmetadata: 11032
Insertion in revsmetadata: 11033
Insertion in revsmetadata: 11034
Insertion in revsmetadata: 11035
Insertion in revsmetadata: 11036
Insertion in revsmetadata: 11037
Insertion in revsmetadata: 11038
Insertion in revsmetadata: 11039
Insertion 

In [12]:
def updateBooksmetadata():
    
    n = 1
    for review in revsColec.find( {}, {'bookID': 1, 'bookDescription': 1} ):
        
        if n <= 8837:
            n += 1
            continue
    
        # Insert document for each book, to have description metadata for each book
        # Translate into English if needed
        enDescr = toEng(review['bookDescription'])
        descrSentValue, descrSubjValue, descrEmoValue = getMetadata(enDescr)
        descrEmoDict = {'anger': descrEmoValue[0], 
                      'anticipation': descrEmoValue[1],
                      'disgust': descrEmoValue[2],
                      'fear': descrEmoValue[3],
                      'joy': descrEmoValue[4],
                      'negative': descrEmoValue[5],
                      'positive': descrEmoValue[6],
                      'sadness': descrEmoValue[7],
                      'surprise': descrEmoValue[8],
                      'trust': descrEmoValue[9] }
        
        v =  {'_id': review['bookID'], 'descrSent': descrSentValue,  'descrSubj': descrSubjValue, 'descrEmo': descrEmoDict}
        
        try: 
            booksMetaColec.insert_one(v)
            print('Insertion in booksmetadata', n)
        except:
            print('Book already emodescribed', n)
            
        n += 1
    print('Done')

In [None]:
updateBooksmetadata()

Book already emodescribed 8838
Insertion in booksmetadata 8839
Insertion in booksmetadata 8840
Insertion in booksmetadata 8841
Insertion in booksmetadata 8842
Insertion in booksmetadata 8843
Insertion in booksmetadata 8844
Insertion in booksmetadata 8845
Insertion in booksmetadata 8846
Insertion in booksmetadata 8847
Insertion in booksmetadata 8848
Insertion in booksmetadata 8849
Insertion in booksmetadata 8850
Insertion in booksmetadata 8851
Insertion in booksmetadata 8852
Insertion in booksmetadata 8853
Book already emodescribed 8854
Insertion in booksmetadata 8855
Insertion in booksmetadata 8856
Insertion in booksmetadata 8857
Insertion in booksmetadata 8858
Insertion in booksmetadata 8859
Insertion in booksmetadata 8860
Insertion in booksmetadata 8861
Insertion in booksmetadata 8862
Book already emodescribed 8863
Insertion in booksmetadata 8864
Book already emodescribed 8865
Book already emodescribed 8866
Insertion in booksmetadata 8867
Insertion in booksmetadata 8868
Book already 

Insertion in booksmetadata 9096
Insertion in booksmetadata 9097
Insertion in booksmetadata 9098
Insertion in booksmetadata 9099
Insertion in booksmetadata 9100
Insertion in booksmetadata 9101
Insertion in booksmetadata 9102
Insertion in booksmetadata 9103
Insertion in booksmetadata 9104
Insertion in booksmetadata 9105
Book already emodescribed 9106
Insertion in booksmetadata 9107
Insertion in booksmetadata 9108
Insertion in booksmetadata 9109
Insertion in booksmetadata 9110
Insertion in booksmetadata 9111
Insertion in booksmetadata 9112
Insertion in booksmetadata 9113
Insertion in booksmetadata 9114
Book already emodescribed 9115
Insertion in booksmetadata 9116
Insertion in booksmetadata 9117
Book already emodescribed 9118
Insertion in booksmetadata 9119
Insertion in booksmetadata 9120
Insertion in booksmetadata 9121
Insertion in booksmetadata 9122
Insertion in booksmetadata 9123
Insertion in booksmetadata 9124
Insertion in booksmetadata 9125
Insertion in booksmetadata 9126
Insertion i

Book already emodescribed 9354
Book already emodescribed 9355
Book already emodescribed 9356
Insertion in booksmetadata 9357
Book already emodescribed 9358
Insertion in booksmetadata 9359
Book already emodescribed 9360
Insertion in booksmetadata 9361
Insertion in booksmetadata 9362
Insertion in booksmetadata 9363
Insertion in booksmetadata 9364
Book already emodescribed 9365
Book already emodescribed 9366
Insertion in booksmetadata 9367
Insertion in booksmetadata 9368
Book already emodescribed 9369
Insertion in booksmetadata 9370
Insertion in booksmetadata 9371
Insertion in booksmetadata 9372
Book already emodescribed 9373
Insertion in booksmetadata 9374
Insertion in booksmetadata 9375
Insertion in booksmetadata 9376
Insertion in booksmetadata 9377
Insertion in booksmetadata 9378
Insertion in booksmetadata 9379
Insertion in booksmetadata 9380
Insertion in booksmetadata 9381
Insertion in booksmetadata 9382
Insertion in booksmetadata 9383
Insertion in booksmetadata 9384
Insertion in book

Insertion in booksmetadata 9613
Insertion in booksmetadata 9614
Insertion in booksmetadata 9615
Insertion in booksmetadata 9616
Book already emodescribed 9617
Book already emodescribed 9618
Insertion in booksmetadata 9619
Insertion in booksmetadata 9620
Book already emodescribed 9621
Book already emodescribed 9622
Book already emodescribed 9623
Insertion in booksmetadata 9624
Insertion in booksmetadata 9625
Insertion in booksmetadata 9626
Insertion in booksmetadata 9627
Insertion in booksmetadata 9628
Insertion in booksmetadata 9629
Insertion in booksmetadata 9630
Insertion in booksmetadata 9631
Insertion in booksmetadata 9632
Insertion in booksmetadata 9633
Insertion in booksmetadata 9634
Insertion in booksmetadata 9635
Insertion in booksmetadata 9636
Insertion in booksmetadata 9637
Insertion in booksmetadata 9638
Insertion in booksmetadata 9639
Insertion in booksmetadata 9640
Book already emodescribed 9641
Book already emodescribed 9642
Book already emodescribed 9643
Insertion in boo

Insertion in booksmetadata 9872
Insertion in booksmetadata 9873
Insertion in booksmetadata 9874
Insertion in booksmetadata 9875
Insertion in booksmetadata 9876
Insertion in booksmetadata 9877
Insertion in booksmetadata 9878
Book already emodescribed 9879
Book already emodescribed 9880
Insertion in booksmetadata 9881
Insertion in booksmetadata 9882
Insertion in booksmetadata 9883
Book already emodescribed 9884
Insertion in booksmetadata 9885
Insertion in booksmetadata 9886
Insertion in booksmetadata 9887
Book already emodescribed 9888
Insertion in booksmetadata 9889
Insertion in booksmetadata 9890
Insertion in booksmetadata 9891
Book already emodescribed 9892
Insertion in booksmetadata 9893
Insertion in booksmetadata 9894
Insertion in booksmetadata 9895
Insertion in booksmetadata 9896
Insertion in booksmetadata 9897
Insertion in booksmetadata 9898
Book already emodescribed 9899
Insertion in booksmetadata 9900
Insertion in booksmetadata 9901
Insertion in booksmetadata 9902
Insertion in b

Insertion in booksmetadata 10126
Insertion in booksmetadata 10127
Insertion in booksmetadata 10128
Book already emodescribed 10129
Insertion in booksmetadata 10130
Insertion in booksmetadata 10131
Book already emodescribed 10132
Book already emodescribed 10133
Insertion in booksmetadata 10134
Insertion in booksmetadata 10135
Insertion in booksmetadata 10136
Insertion in booksmetadata 10137
Book already emodescribed 10138
Insertion in booksmetadata 10139
Insertion in booksmetadata 10140
Insertion in booksmetadata 10141
Insertion in booksmetadata 10142
Insertion in booksmetadata 10143
Insertion in booksmetadata 10144
Book already emodescribed 10145
Book already emodescribed 10146
Insertion in booksmetadata 10147
Insertion in booksmetadata 10148
Insertion in booksmetadata 10149
Insertion in booksmetadata 10150
Book already emodescribed 10151
Insertion in booksmetadata 10152
Insertion in booksmetadata 10153
Book already emodescribed 10154
Book already emodescribed 10155
Insertion in booksm

## Example

In [7]:
tx ='I found that it was a story about fate, morality, guilt, and punishment as much as it was about love'

In [11]:
getEmomatrix(tx)

Sin stop-word: find story fate, morality, guilt, punishment much love
Lematizado ['find', 'story', 'fate', 'morality', 'guilt', 'punishment', 'much', 'love']


Unnamed: 0,love,find,morality,punishment,story,fate,guilt,Sum,SumRel
anger,0,0,0,1,0,0,0,1,0.076923
anticipation,0,0,0,0,0,1,0,1,0.076923
disgust,0,0,0,1,0,0,1,2,0.153846
fear,0,0,0,1,0,0,0,1,0.076923
joy,1,0,0,0,0,0,0,1,0.076923
negative,0,0,0,1,0,1,1,3,0.230769
positive,1,0,1,0,0,0,0,2,0.153846
sadness,0,0,0,0,0,0,1,1,0.076923
surprise,0,0,0,0,0,0,0,0,0.0
trust,0,0,1,0,0,0,0,1,0.076923


In [8]:
getMetadata(tx)

Sin stop-word: find story fate, morality, guilt, punishment much love
Lematizado ['find', 'story', 'fate', 'morality', 'guilt', 'punishment', 'much', 'love']


(0.35,
 0.4,
 [0.07692307692307693,
  0.07692307692307693,
  0.15384615384615385,
  0.07692307692307693,
  0.07692307692307693,
  0.23076923076923078,
  0.15384615384615385,
  0.07692307692307693,
  0.0,
  0.07692307692307693])