In [1]:
import numpy as np 
import pandas as pd
import pickle

# Read the Data

#### The sentiment keyword data

In [2]:
f = pd.read_csv('word_sentiment.csv')
f[0:5]

Unnamed: 0,word,category,score
0,aback,anticipation,0
1,aback,disgust,0
2,aback,anger,0
3,aback,fear,0
4,aback,joy,0


In [3]:
print('Total number of records: ' + str(len(f)))

Total number of records: 141820


#### The Sentiment-emotion data

In [4]:
s2e = pd.read_csv('sentiment2emotion.txt')
s2e[0:3]

Unnamed: 0,sentiment,emotion
0,anger,anger
1,positive,happiness
2,positive,neutral


# Create Dictionary

## Record all the Words

In [5]:
words = []
for i in range(len(f)):
    if f['word'][i] not in words:
        words.append(f['word'][i])

In [6]:
print('Example of Words: ' + str(words[0:2]))
print('Total number of Words: ' + str(len(words)))

Example of Words: ['aback', 'abacus']
Total number of Words: 14182


## Record all the Sentiments

In [7]:
sentiments = []
for i in range(len(f)):
    if f['category'][i] not in sentiments:
        sentiments.append(f['category'][i])

In [8]:
print(sentiments)

['anticipation', 'disgust', 'anger', 'fear', 'joy', 'negative', 'positive', 'sadness', 'surprise', 'trust']


## Record all the Emotions

In [9]:
emotions = []
for i in range(len(s2e)):
    if s2e['emotion'][i] not in emotions:
        emotions.append(s2e['emotion'][i])

In [10]:
print(emotions)

['anger', 'happiness', 'neutral', 'neutral ', 'fear', 'contempt', 'disgust ', 'sadness ', 'surprise']


## Record the Sentiment of Words
With `word2sentiment`, we can find all the specified sentiments of a given word

In [11]:
word2sentiment = {f['word'][i]:f['category'][i] for i in range(len(f)) if f['score'][i] == 1}

In [12]:
print(word2sentiment['abacus'])

trust


## Record the Words under each Sentiment

In [13]:
sentiment2words = {sentiments[i]:[] for i in range(len(sentiments))}
print(len(sentiment2words))

10


In [14]:
word2sentiments = {words[i]:[] for i in range(len(words))}
print(len(word2sentiments))

14182


In [15]:
for i in range(len(f)):
    if f['score'][i]:
        word2sentiments[f['word'][i]].append(f['category'][i])
        sentiment2words[f['category'][i]].append(f['word'][i])

In [16]:
print(word2sentiments['humiliate'])

['anger', 'negative', 'sadness']


In [17]:
print(sentiment2words['anger'][0:3])

['abandoned', 'abandonment', 'abhor']


## Record the Sentiment under each Emotion

In [18]:
emotion2sentiments = {emotions[i]:[] for i in range(len(emotions))}
for i in range(len(s2e)):
    emotion2sentiments[s2e['emotion'][i]].append(s2e['sentiment'][i])

In [19]:
print(emotion2sentiments['happiness'])

['positive', 'joy']


## Record the Emotion under each Sentiment

In [20]:
sentiment2emotions = {sentiments[i]:[] for i in range(len(sentiments))}
for i in range(len(s2e)):
    sentiment2emotions[s2e['sentiment'][i]].append(s2e['emotion'][i])

In [21]:
print(sentiment2emotions[word2sentiments['abacus'][0]])

['neutral ']


## Record the Words under each Emotion

In [22]:
words2emotions = {words[i]:[] for i in range(len(words))}
for i in range(len(words)):
    sentiments_in_word = word2sentiments[words[i]] # a list of sentiments
    for j in range(len(sentiments_in_word)):
        emotions_in_words = sentiment2emotions[sentiments_in_word[j]] # a list of emotions
        for k in range(len(emotions_in_words)):
            words2emotions[words[i]].append(emotions_in_words[k])

In [23]:
print(words2emotions['abacus'])

['neutral ']


## Save the mapping dictionaries

In [24]:
with open('mapping_dictionaries.p', 'wb') as outfile:
   pickle.dump({"word2sentiments":word2sentiments, "sentiment2words":sentiment2words,"emotion2sentiments":emotion2sentiments,"sentiment2emotions":sentiment2emotions,"words2emotions":words2emotions,"words":words,"emotions":emotions}, outfile)

In [25]:
with open('mapping_dictionaries.p', 'rb') as infile:
    mappings = pickle.load(infile)

In [26]:
print(len(mappings))
print(type(mappings))
print(len(mappings['word2sentiments']))
print(type(mappings['word2sentiments']))
print(len(mappings['sentiment2words']))
print(type(mappings['sentiment2words']))
print(len(mappings['words2emotions']))
print(type(mappings['words2emotions']))
print(len(mappings['words']))
print(type(mappings['words']))
print(len(mappings['emotions']))
print(type(mappings['emotions']))

7
<class 'dict'>
14182
<class 'dict'>
10
<class 'dict'>
14182
<class 'dict'>
14182
<class 'list'>
9
<class 'list'>
