In [5]:
import pandas as pd

In [6]:
emotions_df = pd.read_csv('emotions.csv')

In [7]:
emotions_df.head()

Unnamed: 0,text,label
0,i just feel really helpless and heavy hearted,4
1,ive enjoyed being able to slouch about relax a...,0
2,i gave up my internship with the dmrg and am f...,4
3,i dont know i feel so lost,0
4,i am a kindergarten teacher and i am thoroughl...,4


In [8]:
emotions_df.shape

(416809, 2)

In [9]:
emotions_df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
1,141067
0,121187
3,57317
4,47712
2,34554
5,14972


In [10]:
emotions_df.isna().sum()

Unnamed: 0,0
text,0
label,0


In [11]:
e_df = pd.DataFrame()
for i in range(6):
    subset = emotions_df[emotions_df['label'] == i].sample(n=2000, random_state=42)
    e_df = pd.concat([e_df, subset])

In [12]:
e_df.shape

(12000, 2)

In [13]:
e_df.head()

Unnamed: 0,text,label
133243,ive learned to surround myself with women who ...,0
88501,i already feel crappy because of this and you ...,0
131379,i feel like i have lost mourned and moved past...,0
148369,i could write a whole lot more about why im fe...,0
134438,i always seem to feel inadequate,0


In [14]:
emotion_df = e_df.copy()

In [15]:
emotion_df['label'].value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
0,2000
1,2000
2,2000
3,2000
4,2000
5,2000


In [16]:
emotion_df.reset_index(drop=True, inplace=True)

In [17]:
emotion_df.head()

Unnamed: 0,text,label
0,ive learned to surround myself with women who ...,0
1,i already feel crappy because of this and you ...,0
2,i feel like i have lost mourned and moved past...,0
3,i could write a whole lot more about why im fe...,0
4,i always seem to feel inadequate,0


In [18]:
import nltk
from nltk.corpus import stopwords

In [19]:
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [20]:
stop_words = set(stopwords.words('english'))

In [21]:
stop_words

{'a',
 'about',
 'above',
 'after',
 'again',
 'against',
 'ain',
 'all',
 'am',
 'an',
 'and',
 'any',
 'are',
 'aren',
 "aren't",
 'as',
 'at',
 'be',
 'because',
 'been',
 'before',
 'being',
 'below',
 'between',
 'both',
 'but',
 'by',
 'can',
 'couldn',
 "couldn't",
 'd',
 'did',
 'didn',
 "didn't",
 'do',
 'does',
 'doesn',
 "doesn't",
 'doing',
 'don',
 "don't",
 'down',
 'during',
 'each',
 'few',
 'for',
 'from',
 'further',
 'had',
 'hadn',
 "hadn't",
 'has',
 'hasn',
 "hasn't",
 'have',
 'haven',
 "haven't",
 'having',
 'he',
 "he'd",
 "he'll",
 "he's",
 'her',
 'here',
 'hers',
 'herself',
 'him',
 'himself',
 'his',
 'how',
 'i',
 "i'd",
 "i'll",
 "i'm",
 "i've",
 'if',
 'in',
 'into',
 'is',
 'isn',
 "isn't",
 'it',
 "it'd",
 "it'll",
 "it's",
 'its',
 'itself',
 'just',
 'll',
 'm',
 'ma',
 'me',
 'mightn',
 "mightn't",
 'more',
 'most',
 'mustn',
 "mustn't",
 'my',
 'myself',
 'needn',
 "needn't",
 'no',
 'nor',
 'not',
 'now',
 'o',
 'of',
 'off',
 'on',
 'once',
 'on

In [22]:
len(stop_words)

198

In [23]:
def remove_stopwords(text):
    all_words = nltk.word_tokenize(text)
    filteres_words = [word for word in all_words if word.lower() not in stop_words]
    return ' '.join(filteres_words)

In [26]:
# import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


True

In [27]:
emotion_df['text'] = emotion_df['text'].apply(remove_stopwords)

In [28]:
emotion_df.head()

Unnamed: 0,text,label
0,ive learned surround women lift leave feeling ...,0
1,already feel crappy upset situation doesnt help,0
2,feel like lost mourned moved past tears relati...,0
3,could write whole lot im feeling crappy dont t...,0
4,always seem feel inadequate,0


In [29]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [30]:
tokenizer = Tokenizer()
tokenizer.fit_on_texts(emotion_df['text'])

In [31]:
emotion_sequences = tokenizer.texts_to_sequences(emotion_df['text'])

In [32]:
emotion_df['text'].iloc[2]

'feel like lost mourned moved past tears relationship'

In [33]:
emotion_sequences[2:3]

[[1, 3, 239, 6075, 1174, 219, 617, 386]]

In [34]:
max_length = 50
emotion_padded = pad_sequences(emotion_sequences, maxlen=max_length, padding='post')

In [35]:
import numpy as np

In [36]:
emotion_df.shape

(12000, 2)

In [37]:
emotion_labels = np.array(emotion_df['label'])

In [38]:
emotion_labels

array([0, 0, 0, ..., 5, 5, 5])

In [39]:
emotion_input = emotion_padded

In [40]:
from tensorflow import keras

In [41]:
emotion_input_layer = keras.layers.Input(shape=(max_length,), name='emotion_input')

In [42]:
embedding_layer = keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128)

In [43]:
emotion_embedding = embedding_layer(emotion_input_layer)

In [44]:
shared_lstm = keras.layers.LSTM(64, return_sequences=True)

In [45]:
emotion_lstm = shared_lstm(emotion_embedding)

In [46]:
shared_pooling = keras.layers.GlobalAveragePooling1D()
shared_dropout = keras.layers.Dropout(0.5)

In [47]:
emotion_features = shared_dropout(shared_pooling(emotion_lstm))

In [48]:
emotion_output = keras.layers.Dense(6, activation='softmax', name='emotion_output')(emotion_features)

In [49]:
model = keras.models.Model(inputs=[emotion_input_layer], outputs=[emotion_output])

In [50]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [51]:
model.summary()

In [52]:
history = model.fit(emotion_input, emotion_labels, epochs=10, batch_size=4)

Epoch 1/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 6ms/step - accuracy: 0.2262 - loss: 1.6816
Epoch 2/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - accuracy: 0.7620 - loss: 0.6684
Epoch 3/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.9473 - loss: 0.1862
Epoch 4/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - accuracy: 0.9668 - loss: 0.1109
Epoch 5/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 6ms/step - accuracy: 0.9772 - loss: 0.0718
Epoch 6/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 8ms/step - accuracy: 0.9803 - loss: 0.0635
Epoch 7/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 7ms/step - accuracy: 0.9841 - loss: 0.0482
Epoch 8/10
[1m3000/3000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 6ms/step - accuracy: 0.9873 - loss: 0.0391
Epoch 9/10
[1m3

In [53]:
prediction = model.predict({'emotion_input':emotion_input})

[1m375/375[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step
