In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import json
import nltk
from nltk.corpus import wordnet

In [67]:
train = pd.read_csv('./file/data/train.tsv',sep='\t',names = ["Sentence", "Class", "Delete"])
test = pd.read_csv('./file/data/test.tsv',sep='\t',names = ["Sentence", "Class", "Delete"])
val_data = pd.read_csv("./file/data/dev.tsv", sep="\t", header=None, names=["Sentence", "Class", "Delete"])

In [68]:
train = train.drop("Delete", axis=1)
test = test.drop("Delete", axis=1)
val_data = val_data.drop("Delete", axis=1)

In [69]:
train.head()

Unnamed: 0,Sentence,Class
0,My favourite food is anything I didn't have to...,27
1,"Now if he does off himself, everyone will thin...",27
2,WHY THE FUCK IS BAYLESS ISOING,2
3,To make her feel threatened,14
4,Dirty Southern Wankers,3


In [70]:
test.head()

Unnamed: 0,Sentence,Class
0,I’m really sorry about your situation :( Altho...,25
1,It's wonderful because it's awful. At not with.,0
2,"Kings fan here, good luck to you guys! Will be...",13
3,"I didn't know that, thank you for teaching me ...",15
4,They got bored from haunting earth for thousan...,27


In [71]:
val_data.head()

Unnamed: 0,Sentence,Class
0,Is this in New Orleans?? I really feel like th...,27
1,"You know the answer man, you are programmed to...",427
2,I've never been this sad in my life!,25
3,The economy is heavily controlled and subsidiz...,427
4,He could have easily taken a real camera from ...,20


In [72]:
print(train.shape)
print(test.shape)
print(val_data.shape)


(43410, 2)
(5427, 2)
(5426, 2)


In [73]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43410 entries, 0 to 43409
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   Sentence  43410 non-null  object
 1   Class     43410 non-null  object
dtypes: object(2)
memory usage: 678.4+ KB


# Data Preprocessing

In [74]:
train["Class list"] = train["Class"].apply(lambda x: x.split(","))
train["Class len"] = train["Class list"].apply(lambda x: len(x))

test["Class list"] = test["Class"].apply(lambda x: x.split(","))
test["Class len"] = test["Class list"].apply(lambda x: len(x))

val_data["Class list"] = val_data["Class"].apply(lambda x: x.split(","))
val_data["Class len"] = val_data["Class list"].apply(lambda x: len(x))

In [75]:
train.head()

Unnamed: 0,Sentence,Class,Class list,Class len
0,My favourite food is anything I didn't have to...,27,[27],1
1,"Now if he does off himself, everyone will thin...",27,[27],1
2,WHY THE FUCK IS BAYLESS ISOING,2,[2],1
3,To make her feel threatened,14,[14],1
4,Dirty Southern Wankers,3,[3],1


In [76]:
val_data.head()

Unnamed: 0,Sentence,Class,Class list,Class len
0,Is this in New Orleans?? I really feel like th...,27,[27],1
1,"You know the answer man, you are programmed to...",427,"[4, 27]",2
2,I've never been this sad in my life!,25,[25],1
3,The economy is heavily controlled and subsidiz...,427,"[4, 27]",2
4,He could have easily taken a real camera from ...,20,[20],1


In [77]:
with open("./file/data/ekman_mapping.json") as file:
    ekman_mapping = json.load(file)

In [78]:
ekman_mapping

{'anger': ['anger', 'annoyance', 'disapproval'],
 'disgust': ['disgust'],
 'fear': ['fear', 'nervousness'],
 'joy': ['joy',
  'amusement',
  'approval',
  'excitement',
  'gratitude',
  'love',
  'optimism',
  'relief',
  'pride',
  'admiration',
  'desire',
  'caring'],
 'sadness': ['sadness', 'disappointment', 'embarrassment', 'grief', 'remorse'],
 'surprise': ['surprise', 'realization', 'confusion', 'curiosity']}

In [79]:
emotion_file = open("./file/data/emotions.txt", "r")
emotion_list = emotion_file.read()
emotion_list = emotion_list.split("\n")
print(emotion_list)
print(len(emotion_list))

['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring', 'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval', 'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief', 'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization', 'relief', 'remorse', 'sadness', 'surprise', 'neutral']
28


In [80]:
def idx2class(idxlist):
    arr = []
    for i in idxlist:
        arr.append(emotion_list[int(i)])
    return arr

In [81]:
train["Emotion"] = train["Class list"].apply(idx2class)
test["Emotion"] = test["Class list"].apply(idx2class)
val_data["Emotion"] = val_data["Class list"].apply(idx2class)

In [82]:
train.head()

Unnamed: 0,Sentence,Class,Class list,Class len,Emotion
0,My favourite food is anything I didn't have to...,27,[27],1,[neutral]
1,"Now if he does off himself, everyone will thin...",27,[27],1,[neutral]
2,WHY THE FUCK IS BAYLESS ISOING,2,[2],1,[anger]
3,To make her feel threatened,14,[14],1,[fear]
4,Dirty Southern Wankers,3,[3],1,[annoyance]


In [83]:
val_data.head()

Unnamed: 0,Sentence,Class,Class list,Class len,Emotion
0,Is this in New Orleans?? I really feel like th...,27,[27],1,[neutral]
1,"You know the answer man, you are programmed to...",427,"[4, 27]",2,"[approval, neutral]"
2,I've never been this sad in my life!,25,[25],1,[sadness]
3,The economy is heavily controlled and subsidiz...,427,"[4, 27]",2,"[approval, neutral]"
4,He could have easily taken a real camera from ...,20,[20],1,[optimism]


In [84]:
def EmotionMapping(emotion_list):
    map_list = []
    
    for i in emotion_list:
        if i in ekman_mapping["anger"]:
            map_list.append("anger")
        if i in ekman_mapping["disgust"]:
            map_list.append("disgust")
        if i in ekman_mapping["fear"]:
            map_list.append("fear")
        if i in ekman_mapping["joy"]:
            map_list.append("joy")
        if i in ekman_mapping["sadness"]:
            map_list.append("sadness")
        if i in ekman_mapping["surprise"]:
            map_list.append("surprise")
        if i == "neutral":
            map_list.append("neutral")
            
    return map_list

In [85]:
train["Mapped_emotion"] = train["Emotion"].apply(EmotionMapping)
test["Mapped_emotion"] = test["Emotion"].apply(EmotionMapping)
val_data["Mapped_emotion"] = val_data["Emotion"].apply(EmotionMapping)

In [86]:
train.head()

Unnamed: 0,Sentence,Class,Class list,Class len,Emotion,Mapped_emotion
0,My favourite food is anything I didn't have to...,27,[27],1,[neutral],[neutral]
1,"Now if he does off himself, everyone will thin...",27,[27],1,[neutral],[neutral]
2,WHY THE FUCK IS BAYLESS ISOING,2,[2],1,[anger],[anger]
3,To make her feel threatened,14,[14],1,[fear],[fear]
4,Dirty Southern Wankers,3,[3],1,[annoyance],[anger]


In [87]:
val_data.head()

Unnamed: 0,Sentence,Class,Class list,Class len,Emotion,Mapped_emotion
0,Is this in New Orleans?? I really feel like th...,27,[27],1,[neutral],[neutral]
1,"You know the answer man, you are programmed to...",427,"[4, 27]",2,"[approval, neutral]","[joy, neutral]"
2,I've never been this sad in my life!,25,[25],1,[sadness],[sadness]
3,The economy is heavily controlled and subsidiz...,427,"[4, 27]",2,"[approval, neutral]","[joy, neutral]"
4,He could have easily taken a real camera from ...,20,[20],1,[optimism],[joy]


In [92]:
train_data_1= train["Mapped_emotion"].str.split(",", expand=True)
test_data_1= test["Mapped_emotion"].str.split(",", expand=True)
val_data_1= val_data["Mapped_emotion"].str.split(",", expand=True)

In [95]:
val_data.head()

Unnamed: 0,Sentence,Class,Class list,Class len,Emotion,Mapped_emotion
0,Is this in New Orleans?? I really feel like th...,27,[27],1,[neutral],[neutral]
1,"You know the answer man, you are programmed to...",427,"[4, 27]",2,"[approval, neutral]","[joy, neutral]"
2,I've never been this sad in my life!,25,[25],1,[sadness],[sadness]
3,The economy is heavily controlled and subsidiz...,427,"[4, 27]",2,"[approval, neutral]","[joy, neutral]"
4,He could have easily taken a real camera from ...,20,[20],1,[optimism],[joy]


In [98]:
train_data_1 = pd.DataFrame(train["Mapped_emotion"].tolist())
test_data_1 = pd.DataFrame(test["Mapped_emotion"].tolist())
val_data_1 = pd.DataFrame(val_data["Mapped_emotion"].tolist())


train_data_1.reset_index(drop=True, inplace=True)
test_data_1.reset_index(drop=True, inplace=True)
val_data_1.reset_index(drop=True, inplace=True)

In [99]:
val_data_1.head()

Unnamed: 0,0,1,2,3
0,neutral,,,
1,joy,neutral,,
2,sadness,,,
3,joy,neutral,,
4,joy,,,


In [100]:
train_data_1.drop(train_data_1.columns[1:5], axis=1, inplace=True)
test_data_1.drop(test_data_1.columns[1:5], axis=1, inplace=True)
val_data_1.drop(val_data_1.columns[1:5], axis=1, inplace=True)

In [101]:
val_data_1.head()

Unnamed: 0,0
0,neutral
1,joy
2,sadness
3,joy
4,joy


In [102]:
train_data = pd.concat([train, train_data_1], axis=1)
test_data = pd.concat([test, test_data_1], axis=1)
val_data = pd.concat([val_data, val_data_1], axis=1)

In [103]:
val_data.head()

Unnamed: 0,Sentence,Class,Class list,Class len,Emotion,Mapped_emotion,0
0,Is this in New Orleans?? I really feel like th...,27,[27],1,[neutral],[neutral],neutral
1,"You know the answer man, you are programmed to...",427,"[4, 27]",2,"[approval, neutral]","[joy, neutral]",joy
2,I've never been this sad in my life!,25,[25],1,[sadness],[sadness],sadness
3,The economy is heavily controlled and subsidiz...,427,"[4, 27]",2,"[approval, neutral]","[joy, neutral]",joy
4,He could have easily taken a real camera from ...,20,[20],1,[optimism],[joy],joy


In [104]:
train_data = train_data.drop(["Class", "Class list", "Class len", "Emotion","Mapped_emotion"], axis=1)
test_data = test_data.drop(["Class", "Class list", "Class len", "Emotion","Mapped_emotion"], axis=1)
val_data = val_data.drop(["Class", "Class list", "Class len", "Emotion","Mapped_emotion"], axis=1)

In [105]:
val_data.head()

Unnamed: 0,Sentence,Mapped_emotion,0
0,Is this in New Orleans?? I really feel like th...,[neutral],neutral
1,"You know the answer man, you are programmed to...","[joy, neutral]",joy
2,I've never been this sad in my life!,[sadness],sadness
3,The economy is heavily controlled and subsidiz...,"[joy, neutral]",joy
4,He could have easily taken a real camera from ...,[joy],joy
