In [None]:
import pandas as pd
import re

import numpy as np
from keras.preprocessing.text import Tokenizer
from keras.utils import pad_sequences
from keras.models import Sequential
from keras.layers import LSTM,Dense,Embedding,SpatialDropout1D
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

In [None]:
df=pd.read_csv('bangla_emotions_dataset_11927.csv',encoding='utf-8')
#df=df.drop('Unnamed: 0',axis=1)
df

In [None]:
df.labels.value_counts()

In [None]:
#One-hot encode the lab
'''
df.loc[df['label'] == 'happy', 'Label'] = 0
df.loc[df['label'] == 'angry', 'Label'] = 1
df.loc[df['label'] == 'sad', 'Label'] = 2
df.loc[df['label'] == 'disgust', 'Label'] = 3
df.loc[df['label'] == 'surprise', 'Label'] = 4
df.loc[df['label'] == 'fear', 'Label'] = 5
print(df['Label'][:10])
'''
y = to_categorical(df['labels'], num_classes=6)
print(y[:10])
if 'label' in df.keys():
    df.drop(['label'], axis=1)

In [None]:
df

In [None]:
X=df['ctexts']

print(X.shape)
print(y.shape)

In [None]:
X.head()

In [None]:
num_words = 20000
tokenizer = Tokenizer(num_words=num_words)
tokenizer.fit_on_texts(X)

X = tokenizer.texts_to_sequences(X)

vocab_size = len(tokenizer.word_index) + 1
print("vocab size:", vocab_size)

In [None]:
maxlen = 128
X = pad_sequences(X, padding='post', maxlen=maxlen)

In [None]:
print(X.shape)

In [None]:
X_train, X_test, label_train, label_test = train_test_split(X, y, test_size=0.25,shuffle=True)

In [None]:
print("Training:", len(X_train), len(label_train))
print("Testing: ", len(X_test), len(label_test))

In [None]:
print("Training:", X_train.shape, label_train.shape)
print("Testing: ", X_test.shape,label_test.shape)

In [None]:
from keras.models import Sequential
from keras import layers
#from keras.layers import GaussianNoise, BatchNormalization
from keras.layers import Dense,Conv1D,MaxPooling1D, BatchNormalization
from keras.utils import to_categorical
from keras.callbacks import EarlyStopping

LSTM

In [None]:
embedding_dim = 100
dropout = 0.2
opt = 'adam'
#clear_session()

model = Sequential()
model.add(layers.Embedding(input_dim=num_words,
                           output_dim=embedding_dim,
                           input_length=maxlen))

model.add(layers.LSTM(100, dropout=dropout,recurrent_dropout=dropout,return_sequences=True))
model.add(layers.GlobalMaxPool1D())
model.add(BatchNormalization()) #normalization
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, kernel_regularizer='l2')) #regularizer
model.add(layers.Dropout(dropout))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(dropout))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(dropout))
model.add(layers.Dense(6, activation='softmax'))

model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['acc'])
model.summary()

In [None]:
history = model.fit(X_train, label_train,
                    epochs=30,
                    verbose=True,
                    validation_split=0.1,
                    batch_size=128,
                    callbacks=[EarlyStopping(monitor='val_acc',patience=7, min_delta=0.0001,verbose=1)])

loss, accuracy = model.evaluate(X_train, label_train, verbose=True)
print("Training Accuracy: {:.4f}".format(accuracy))

loss_val, accuracy_val = model.evaluate(X_test, label_test, verbose=True)
print("Testing Accuracy:  {:.4f}".format(accuracy_val))

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','val'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','val'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import multilabel_confusion_matrix,classification_report

y_pred = np.argmax(model.predict(X_test),axis=1)
y_test=np.argmax(label_test,axis=1)

print(classification_report(y_test,y_pred))

BiLSTM

In [None]:
embedding_dim = 100
dropout = 0.2
opt = 'adam'
#clear_session()

model = Sequential()
model.add(layers.Embedding(input_dim=num_words,
                           output_dim=embedding_dim,
                           input_length=maxlen))

model.add(layers.Bidirectional(layers.LSTM(100, dropout=dropout,
                                           recurrent_dropout=dropout,
                                           return_sequences=True)))
model.add(layers.GlobalMaxPool1D())
model.add(BatchNormalization()) #normalization
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, kernel_regularizer='l2')) #regularizer
model.add(layers.Dropout(dropout))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(dropout))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(dropout))
model.add(layers.Dense(6, activation='softmax'))

model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['acc'])
model.summary()

In [None]:
history = model.fit(X_train, label_train,
                    epochs=30,
                    verbose=True,
                    validation_split=0.1,
                    batch_size=128,
                    callbacks=[EarlyStopping(monitor='val_acc',patience=7, min_delta=0.0001,verbose=1)])

loss, accuracy = model.evaluate(X_train, label_train, verbose=True)
print("Training Accuracy: {:.4f}".format(accuracy))

loss_val, accuracy_val = model.evaluate(X_test, label_test, verbose=True)
print("Testing Accuracy:  {:.4f}".format(accuracy_val))

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','val'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','val'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import multilabel_confusion_matrix,classification_report

y_pred = np.argmax(model.predict(X_test),axis=1)
y_test=np.argmax(label_test,axis=1)

print(classification_report(y_test,y_pred))

CNN-BiLSTM

In [None]:
import keras

embedding_dim = 100
dropout = 0.2
opt = 'adam'
#clear_session()

model = Sequential()
model.add(layers.Embedding(input_dim=num_words, output_dim=embedding_dim, input_length=maxlen))
model.add(Conv1D(filters=32, kernel_size=3, padding='same', activation='relu'))
model.add(layers.Bidirectional(layers.LSTM(100, dropout=dropout,
                                           recurrent_dropout=dropout,
                                           return_sequences=True)))
model.add(layers.GlobalMaxPool1D())
model.add(BatchNormalization()) #normalization
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(128, kernel_regularizer='l2')) #regularizer
model.add(layers.Dropout(dropout))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dropout(dropout))
model.add(layers.Dense(32, activation='relu'))
model.add(layers.Dropout(dropout))
model.add(layers.Dense(6, activation='softmax'))

model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['acc'])
model.summary()

In [None]:
history = model.fit(X_train, label_train,
                    epochs=30,
                    verbose=True,
                    validation_split=0.1,
                    batch_size=128,
                    callbacks=[EarlyStopping(monitor='val_acc',patience=7, min_delta=0.0001)])

loss, accuracy = model.evaluate(X_train, label_train, verbose=True)
print("Training Accuracy: {:.4f}".format(accuracy))

loss_val, accuracy_val = model.evaluate(X_test, label_test, verbose=True)
print("Testing Accuracy:  {:.4f}".format(accuracy_val))

In [None]:
import matplotlib.pyplot as plt

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])

plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train','val'], loc='upper left')
plt.show()

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train','val'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import multilabel_confusion_matrix,classification_report

y_pred = np.argmax(model.predict(X_test),axis=1)
y_test=np.argmax(label_test,axis=1)

print(classification_report(y_test,y_pred))