In [46]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.stem import PorterStemmer,WordNetLemmatizer
from nltk.tokenize import word_tokenize

from nltk.corpus import stopwords
import tensorflow as tf
import keras
from tensorflow.keras.layers import Dense,Input,LSTM,Embedding,GlobalMaxPooling1D,Dropout,SimpleRNN,BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.text import text_to_word_sequence,Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix

In [47]:
emo=pd.read_csv('data/emotion.csv')
sent=pd.read_csv('data/sentiment.csv')

In [48]:
emo.head()

Unnamed: 0,sentence,emotion
0,i just feel really helpless and heavy hearted,fear
1,ive enjoyed being able to slouch about relax a...,sad
2,i gave up my internship with the dmrg and am f...,fear
3,i dont know i feel so lost,sad
4,i am a kindergarten teacher and i am thoroughl...,fear


In [49]:
sent.head()

Unnamed: 0,sentence,sentiment
0,So there is no way for me to plug it in here i...,negative
1,"Good case, Excellent value.",positive
2,Great for the jawbone.,positive
3,Tied to charger for conversations lasting more...,negative
4,The mic is great.,positive


In [50]:
emo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 422746 entries, 0 to 422745
Data columns (total 2 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   sentence  422746 non-null  object
 1   emotion   422746 non-null  object
dtypes: object(2)
memory usage: 6.5+ MB


In [51]:
sent.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3309 entries, 0 to 3308
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   sentence   3309 non-null   object
 1   sentiment  3309 non-null   object
dtypes: object(2)
memory usage: 51.8+ KB


In [52]:
emo=emo.drop_duplicates()
sent=sent.drop_duplicates()

In [53]:
emotional_states=emo['emotion'].unique()

In [54]:
emotional_states

array(['fear', 'sad', 'love', 'joy', 'suprise', 'anger'], dtype=object)

In [55]:
sentiment_states=sent['sentiment'].unique()

In [56]:
sentiment_states

array(['negative', 'positive'], dtype=object)

In [57]:
oe_emo=OrdinalEncoder()
emotions=oe_emo.fit_transform(emo['emotion'].values.reshape(-1,1))
emo['emotion']=emotions.reshape(-1,).astype(int)

In [58]:
oe_sent=OrdinalEncoder()
sentiments=oe_sent.fit_transform(sent['sentiment'].values.reshape(-1,1))
sent['sentiment']=sentiments.reshape(-1,).astype(int)

In [59]:
stopwords=stopwords.words('english')

In [60]:
def remove_stopwords(sentence):
    return ' '.join([word for word in sentence.split() if word not in stopwords])

In [61]:
emo['sentence']=emo['sentence'].apply(remove_stopwords)
sent['sentence']=sent['sentence'].apply(remove_stopwords)

In [62]:
ps=PorterStemmer()

In [63]:
def stemming_words(sentence):
    return ' '.join([ps.stem(word) for word in sentence.split()])
    

In [64]:
emo['sentence']=emo['sentence'].apply(stemming_words)
sent['sentence']=sent['sentence'].apply(stemming_words)

In [65]:
tokenizer=Tokenizer(20000)
tokenizer.fit_on_texts(emo['sentence'])
emotions_sequence=tokenizer.texts_to_sequences(emo['sentence'])

In [66]:
V=len(tokenizer.word_index)
print(f'{V} unique words in the dictionary')

51876 unique words in the dictionary


In [67]:
emo_sent_train, emo_sent_test, emo_train, emo_test = train_test_split(emotions_sequence, emo['emotion'], test_size=0.2, random_state=101)

In [68]:
emo_sent_train_padded=pad_sequences(emo_sent_train)

In [69]:
T=emo_sent_train_padded.shape[1]

In [70]:
emo_sent_test_padded=pad_sequences(emo_sent_test,maxlen=T)

In [71]:
emo_sent_test_padded.shape # N X T

(83225, 79)

In [72]:
D=20
M=15
i=Input(shape=(T,))
x=Embedding(V+1,D)(i)
x=BatchNormalization()(x)
x=Dropout(0.2)(x)
x=SimpleRNN(8)(x)
x=BatchNormalization()(x)
x=Dropout(0.2)(x)
# x=GlobalMaxPooling1D()(x)
x=Dense(8,activation='relu')(x)
x=BatchNormalization()(x)
x=Dropout(0.2)(x)
x=Dense(len(emotional_states),activation='softmax')(x)

In [73]:
model_emo=Model(i,x)
model_emo.summary()

In [76]:
model_emo.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

In [77]:
r=model_emo.fit(emo_sent_train_padded,emo_train,validation_data=(emo_sent_test_padded,emo_test),epochs=20)

Epoch 1/20
[1m  742/10404[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:54[0m 18ms/step - accuracy: 0.2516 - loss: 1.8740

KeyboardInterrupt: 