In [6]:
import numpy as np
import pandas as pd
import pickle

In [7]:
dataset1=pd.read_csv("emotion-labels-train.csv")
dataset1.head(3)

Unnamed: 0,text,label
0,Just got back from seeing @GaryDelaney in Burs...,joy
1,Oh dear an evening of absolute hilarity I don'...,joy
2,Been waiting all week for this game ❤️❤️❤️ #ch...,joy


In [8]:
dataset2=pd.read_csv("emotion-labels-test.csv")
dataset2.head(3)

Unnamed: 0,text,label
0,You must be knowing #blithe means (adj.) Happ...,joy
1,Old saying 'A #smile shared is one gained for ...,joy
2,Bridget Jones' Baby was bloody hilarious 😅 #Br...,joy


In [9]:
dataset1.isna().sum()

text     0
label    0
dtype: int64

In [10]:
dataset2.isna().sum()

text     0
label    0
dtype: int64

In [11]:
train=pd.concat([dataset1,dataset2],axis=0).reset_index(drop=True)

In [12]:
train

Unnamed: 0,text,label
0,Just got back from seeing @GaryDelaney in Burs...,joy
1,Oh dear an evening of absolute hilarity I don'...,joy
2,Been waiting all week for this game ❤️❤️❤️ #ch...,joy
3,"@gardiner_love : Thank you so much, Gloria! Yo...",joy
4,I feel so blessed to work with the family that...,joy
...,...,...
6750,Why does Candice constantly pout #GBBO 💄😒,sadness
6751,"@redBus_in #unhappy with #redbus CC, when I ta...",sadness
6752,"@AceOperative789 no pull him afew weeks ago, s...",sadness
6753,I'm buying art supplies and I'm debating how s...,sadness


In [13]:
train.to_csv("emotion_dataset.csv",index=False)

In [14]:
train["label"].unique()

array(['joy', 'fear', 'anger', 'sadness'], dtype=object)

In [15]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
train["label"]=encoder.fit_transform(train["label"])

In [16]:
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [17]:
corpus=[]

for i in range(len(train)):
    sentiment=re.sub("[^a-zA-Z]"," ",train["text"][i])
    sentiment=sentiment.lower()
    sentiment=sentiment.split()
    ps=PorterStemmer()
    all_stopwords=stopwords.words("english")
    all_stopwords.remove("not")
    sentiment=[ps.stem(word) for word in sentiment if not word in set(all_stopwords)]
    sentiment=' '.join(sentiment)
    corpus.append(sentiment)

In [18]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=5000)
x=cv.fit_transform(corpus).toarray()
y=train["label"].values

In [19]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=42)

In [20]:
x_train.shape

(6079, 5000)

In [21]:
x_test.shape

(676, 5000)

In [22]:
from tensorflow.keras.utils import to_categorical

y_train=to_categorical(y_train,len(encoder.classes_))
y_test=to_categorical(y_test,len(encoder.classes_))

In [23]:
from keras.layers import Dense
from keras.models import Sequential

In [24]:
model=Sequential()
model.add(Dense(units=300,activation="relu",input_shape=(5000,)))
model.add(Dense(units=300,activation="relu"))
model.add(Dense(units=100,activation="relu"))
model.add(Dense(units=4,activation="softmax"))

In [25]:
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=20,batch_size=64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x2d2003a0430>

In [39]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 300)               1500300   
_________________________________________________________________
dense_1 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 404       
Total params: 1,621,104
Trainable params: 1,621,104
Non-trainable params: 0
_________________________________________________________________


## Testing Data

In [26]:
test=pd.read_csv("emotion-labels-val.csv")
test.head(4)

Unnamed: 0,text,label
0,"@theclobra lol I thought maybe, couldn't decid...",joy
1,Nawaz Sharif is getting more funnier than @kap...,joy
2,Nawaz Sharif is getting more funnier than @kap...,joy
3,@tomderivan73 😁...I'll just people watch and e...,joy


In [27]:
test.isna().sum()

text     0
label    0
dtype: int64

In [28]:
corpus=[]

for i in range(len(test)):
    sentiment=re.sub("[^a-zA-Z]"," ",test["text"][i])
    sentiment=sentiment.lower()
    sentiment=sentiment.split()
    ps=PorterStemmer()
    all_stopwords=stopwords.words("english")
    all_stopwords.remove("not")
    sentiment=[ps.stem(word) for word in sentiment if not word in set(all_stopwords)]
    sentiment=' '.join(sentiment)
    corpus.append(sentiment)

In [29]:
x_test=cv.transform(corpus).toarray()
y_test=encoder.transform(test["label"].values)

In [30]:
predict=model.predict(x_test)

In [31]:
pred=[]

for i in range(len(predict)):
    pred.append(np.argmax(predict[i]))

In [32]:
pred=np.array(pred)

In [33]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,pred)

0.8040345821325648

In [38]:
en=pickle.load(open("encoder.pkl","rb"))
en.inverse_transform([3])

array(['sadness'], dtype=object)

## Testing Random Data

In [40]:
def text_extract(data):
    corpus=[]
    sentiment=re.sub("[^a-zA-Z]"," ",data)
    sentiment=sentiment.lower()
    sentiment=sentiment.split()
    ps=PorterStemmer()
    all_stopwords=stopwords.words("english")
    all_stopwords.remove("not")
    sentiment=[ps.stem(word) for word in sentiment if not word in set(all_stopwords)]
    sentiment=' '.join(sentiment)
    corpus.append(sentiment)
    return corpus

## Test1

In [43]:
data="The race advances only by the extra achievements of the individual. You are the individual. ~Charles Towne\n #optimism #inspire"

corp=text_extract(data)
pred=cv.transform(corp).toarray()
encoder.inverse_transform([np.argmax(model.predict(pred))])

array(['joy'], dtype=object)

## Test2

In [44]:

data="@Bwana86 I can fear that someone is always following me everywhere I go. Does that make it true??"

corp=text_extract(data)
pred=cv.transform(corp).toarray()
encoder.inverse_transform([np.argmax(model.predict(pred))])

array(['fear'], dtype=object)

## Test3

In [45]:
data="Got a $20 tip from a drunk Uber passenger. Today I get a $25 parking ticket. I'd blame karma but my dumb ass forgot to pay the meter. #rage"

corp=text_extract(data)
pred=cv.transform(corp).toarray()
encoder.inverse_transform([np.argmax(model.predict(pred))])

array(['anger'], dtype=object)

## Test4

In [46]:
data="Very depressing seeing my whole fam packing to go on holiday tomorrow and I'm just staying here ðŸ™ƒ"

corp=text_extract(data)
pred=cv.transform(corp).toarray()
encoder.inverse_transform([np.argmax(model.predict(pred))])

array(['sadness'], dtype=object)