## Import Necessary Libraries

In [None]:
import numpy as np
import tensorflow.keras
from  tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import os
import pandas as pd
from tensorflow.keras.utils import to_categorical
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import scikitplot
from scikitplot.metrics import plot_confusion_matrix

# Create a Dataframe for reading and analysing the text files easily

In [None]:
df_train = pd.read_csv('../input/emotions-dataset-for-nlp/train.txt', header =None, sep =';', names = ['Input','Sentiment'], encoding='utf-8')
df_test = pd.read_csv('../input/emotions-dataset-for-nlp/test.txt', header = None, sep =';', names = ['Input','Sentiment'],encoding='utf-8')
df_val=pd.read_csv('../input/emotions-dataset-for-nlp/val.txt',header=None,sep=';',names=['Input','Sentiment'],encoding='utf-8')

In [None]:
df_train.Sentiment.value_counts()

## Reading the Train and Validation Data

In [None]:
X=df_train['Input']


In [None]:
lst=[]
for i in X:
  lst.append(len(i))


In [None]:
len1=pd.DataFrame(lst)
len1.describe()

In [None]:
cts=[]
for i in range(7,301):
   ct=0
   for k in lst:
     if k==i:
       ct+=1
   cts.append(ct)

# Trying to fix a length for the embedding layers' input

In [None]:
plt.bar(range(7,301),cts)
plt.show()

# Using The tokenizer Class to convert the sentences into word vectors

In [None]:
tokenizer=Tokenizer(15212,lower=True,oov_token='UNK')
tokenizer.fit_on_texts(X)

In [None]:
len(tokenizer.word_index)

In [None]:
X_train=tokenizer.texts_to_sequences(X)
X_train_pad=pad_sequences(X_train,maxlen=80,padding='post')

In [None]:
df_train['Sentiment']=df_train.Sentiment.replace({'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5})

In [None]:
Y_train=df_train['Sentiment'].values


# One hot Encoding the Emotion Values

In [None]:
Y_train_f=to_categorical(Y_train)

In [None]:
Y_train_f[:6]

In [None]:
X_val=df_val['Input']
Y_val=df_val.Sentiment.replace({'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5})

In [None]:
X_val_f=tokenizer.texts_to_sequences(X_val)
X_val_pad=pad_sequences(X_val_f,maxlen=80,padding='post')

In [None]:
Y_val_f=to_categorical(Y_val)

In [None]:
Y_val_f[:6]

In [None]:
from keras.models import Sequential
from keras.layers import LSTM,Bidirectional,Dense,Embedding,Dropout


# Creating a Model

In [None]:
model=Sequential()
model.add(Embedding(15212,64,input_length=80))
model.add(Dropout(0.6))
model.add(Bidirectional(LSTM(80,return_sequences=True)))
model.add(Bidirectional(LSTM(160)))
model.add(Dense(6,activation='softmax'))
print(model.summary())

# Compiling and running the model

In [None]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

In [None]:
hist=model.fit(X_train_pad,Y_train_f,epochs=12,validation_data=(X_val_pad,Y_val_f))

# Plotting the Loss and Accuracy Curves

In [None]:
plt.plot(hist.history['accuracy'],c='b',label='train')
plt.plot(hist.history['val_accuracy'],c='r',label='validation')
plt.legend(loc='lower right')
plt.show()

In [None]:
plt.plot(hist.history['loss'],c='orange',label='train')
plt.plot(hist.history['val_loss'],c='g',label='validation')
plt.legend(loc='upper right')
plt.show()

# Checking for Test Data

In [None]:
X_test=df_test['Input']
Y_test=df_test.Sentiment.replace({'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5})

In [None]:
X_test_f=tokenizer.texts_to_sequences(X_test)
X_test_pad=pad_sequences(X_test_f,maxlen=80,padding='post')

In [None]:
Y_test_f=to_categorical(Y_test)

In [None]:
X_test_pad.shape

In [None]:
Y_test_f[:7]

#Accuracy for Test Data

In [None]:
model.evaluate(X_test_pad,Y_test_f)

# Plotting the Confusion matrix 

In [None]:
Y_pred=model.predict_classes(X_test_pad)

In [None]:
plot_confusion_matrix(Y_test,Y_pred)

In [None]:
print(classification_report(Y_test,Y_pred))

# Creating a Function to check for Your own Sentence

In [None]:
def get_key(value):
    dictionary={'joy':0,'anger':1,'love':2,'sadness':3,'fear':4,'surprise':5}
    for key,val in dictionary.items():
          if (val==value):
            return key



In [None]:
def predict(sentence):
  sentence_lst=[]
  sentence_lst.append(sentence)
  sentence_seq=tokenizer.texts_to_sequences(sentence_lst)
  sentence_padded=pad_sequences(sentence_seq,maxlen=80,padding='post')
  ans=get_key(model.predict_classes(sentence_padded))
  print("The emotion predicted is",ans)

# Check for Your Own Sentence

In [None]:
predict(str(input('Enter a sentence : ')))


In [None]:
predict(str(input('Enter a sentence : ')))


In [None]:
predict(str(input('Enter a sentence : ')))
