In [1]:
import numpy as np
import pandas as pd

In [24]:
train_data=pd.read_csv("train.txt",names=["text","labels"],header=None,sep=";")
test_data=pd.read_csv("test.txt",names=["text","labels"],header=None,sep=";")
valid_data=pd.read_csv("val.txt",names=["text","labels"],header=None,sep=";")
data=pd.concat([train_data,test_data,valid_data]).reset_index(drop=True)
data.to_csv("emotion_data.csv",index=False)

In [25]:
data.head(3)

Unnamed: 0,text,labels
0,i didnt feel humiliated,sadness
1,i can go from feeling so hopeless to so damned...,sadness
2,im grabbing a minute to post i feel greedy wrong,anger


In [26]:
data["labels"].unique()

array(['sadness', 'anger', 'love', 'surprise', 'fear', 'joy'],
      dtype=object)

In [27]:
from sklearn.preprocessing import LabelEncoder
encoder=LabelEncoder()
data["labels"]=encoder.fit_transform(data["labels"].values)

In [28]:
data.head(3)

Unnamed: 0,text,labels
0,i didnt feel humiliated,4
1,i can go from feeling so hopeless to so damned...,4
2,im grabbing a minute to post i feel greedy wrong,0


In [29]:
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [30]:
corpus=[]
all_stopwords=stopwords.words("english")
all_stopwords.remove("not")
ps=PorterStemmer()

for i in range(len(data)):
    emotion=re.sub("[^a-zA-Z]"," ",data["text"][i])
    emotion=emotion.lower()
    emotion=emotion.split()
    emotion=[ps.stem(word) for word in emotion if not word in set(all_stopwords)]
    emotion=' '.join(emotion)
    corpus.append(emotion)

In [32]:
from sklearn.feature_extraction.text import CountVectorizer
cv=CountVectorizer(max_features=5000)
x=cv.fit_transform(corpus).toarray()
y=data["labels"].values

In [33]:
x.shape

(20000, 5000)

In [34]:
y.shape

(20000,)

In [47]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.1,random_state=42)

In [87]:
print(f"Train Shape: {x_train.shape}\nTest Shape: {x_test.shape}")

Train Shape: (18000, 5000)
Test Shape: (2000, 5000)


In [37]:
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras.utils import to_categorical

In [48]:
y_train=to_categorical(y_train)
y_test=to_categorical(y_test)

In [52]:
model=Sequential()
model.add(Dense(units=300,activation="relu",input_shape=(5000,)))
model.add(Dense(units=300,activation="relu"))
model.add(Dense(units=100,activation="relu"))
model.add(Dense(units=len(encoder.classes_),activation="softmax"))

In [53]:
model.compile(optimizer="adam",loss="categorical_crossentropy",metrics=["accuracy"])
model.fit(x_train,y_train,validation_data=(x_test,y_test),epochs=20,batch_size=64)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1f88ead4280>

In [98]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 300)               1500300   
_________________________________________________________________
dense_1 (Dense)              (None, 300)               90300     
_________________________________________________________________
dense_2 (Dense)              (None, 100)               30100     
_________________________________________________________________
dense_3 (Dense)              (None, 6)                 606       
Total params: 1,621,306
Trainable params: 1,621,306
Non-trainable params: 0
_________________________________________________________________


## Testing Data

In [63]:
test=[]
valid=[]
predict=model.predict(x_test)

for i in range(len(predict)):
    test.append(np.argmax(predict[i]))
    valid.append(np.argmax(y_test[i]))

test=np.array(test)
valid=np.array(valid)

In [65]:
from sklearn.metrics import accuracy_score
accuracy_score(valid,test)*100

84.15

In [67]:
def text_extract(data):
    corpus=[]
    sentiment=re.sub("[^a-zA-Z]"," ",data)
    sentiment=sentiment.lower()
    sentiment=sentiment.split()
    ps=PorterStemmer()
    all_stopwords=stopwords.words("english")
    all_stopwords.remove("not")
    sentiment=[ps.stem(word) for word in sentiment if not word in set(all_stopwords)]
    sentiment=' '.join(sentiment)
    corpus.append(sentiment)
    return corpus

In [73]:
def testing_data(data):
    t=text_extract(data)
    t=cv.transform(t).toarray()
    pred=encoder.inverse_transform([np.argmax(model.predict(t))])
    return pred[0]

## Test1

In [74]:
test="i can go from feeling so hopeless to so damned hopeful just from being around someone who cares and is awake"
testing_data(test)

'sadness'

## Test2

In [75]:
test="im grabbing a minute to post i feel greedy wrong"
testing_data(test)

'anger'

## Test3

In [78]:
test="i do not feel reassured anxiety is on each side"
testing_data(test)

'joy'

## Test4

In [77]:
test="i feel romantic too"
testing_data(test)

'love'

## Test5

In [79]:
test="i am now nearly finished the week detox and i feel amazing"
testing_data(test)

'surprise'

## Test6

In [81]:
test="i had stated to her the reason i feel so fearful is because i feel unsafe"
testing_data(test)

'fear'