# Emoji Prediction using LSTM(RNN)
___

In [14]:
import emoji
import pandas as pd
import numpy as np

from keras.utils import to_categorical

from keras.layers import *
from keras.models import Sequential

print('Imports ready !')

Imports ready !


## Load Train, Test Data
___

In [2]:
train = pd.read_csv('dataset/train_emoji.csv', header = None)
test = pd.read_csv('dataset/test_emoji.cdv', header = None)

print(train.shape, " ", test.shape)

(132, 4)   (56, 2)


In [3]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [4]:
test.head()

Unnamed: 0,0,1
0,I want to eat\t,4
1,he did not answer\t,3
2,he got a raise\t,2
3,she got me a present\t,0
4,ha ha ha it was so funny\t,2


In [5]:
emoji_dictionary = {"0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
                    "1": ":baseball:",
                    "2": ":beaming_face_with_smiling_eyes:",
                    "3": ":downcast_face_with_sweat:",
                    "4": ":fork_and_knife:",
                   }

In [6]:
x_train = train.values[:, 0]
y_train = to_categorical(train.values[:, 1])

x_test = test.values[:, 0]
y_test = to_categorical(test.values[:, 1])

In [7]:
y_train[0:5]

array([[0., 0., 0., 1., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1.]], dtype=float32)

## Creating word embeddings
___

In [8]:
embeddings = {}
with open('glove.6B.50d.txt',encoding='utf-8') as f:
    for line in f:
        values = line.split() # splits the word and coeff
        word = values[0] # word
        coeffs = np.asarray(values[1:],dtype='float32') # makes a word vector of len 50 for each word
        embeddings[word] = coeffs

In [9]:
x_train[0:5]

array(['never talk to me again', 'I am proud of your achievements',
       'It is the worst day in my life', 'Miss you so much',
       'food is life'], dtype=object)

In [10]:
def getOutputEmbeddings(X):
    
    embedding_matrix_output = np.zeros((X.shape[0],10,50))
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split() # get a list of words of the sentence
        for jx in range(len(X[ix])):
            embedding_matrix_output[ix][jx] = embeddings[X[ix][jx].lower()]
            
    return embedding_matrix_output

In [11]:
emb_x_train = getOutputEmbeddings(x_train)
emb_x_test = getOutputEmbeddings(x_test)

In [12]:
emb_x_train.shape

(132, 10, 50)

In [13]:
emb_x_test.shape

(56, 10, 50)

In [15]:
model = Sequential()
model.add(LSTM(64,input_shape = (10,50),return_sequences=True)) # to create a stacked LSTM model
model.add(Dropout(0.4))
model.add(LSTM(64,input_shape = (10,50)))
model.add(Dropout(0.3))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_1 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
__________________________________________________

In [16]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
model.fit(emb_x_train,y_train,batch_size=32,epochs=40,shuffle=True,validation_split=0.1)

Train on 118 samples, validate on 14 samples
Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.callbacks.History at 0x7f40230afb90>

In [17]:
model.evaluate(emb_x_test,y_test)



[1.6048065083367484, 0.6071428656578064]

In [18]:
pred = model.predict_classes(emb_x_test)

In [25]:
print("Sentence      : Actual       Prediction")
for i in range(10):
    print(' '.join(x_test[i]), end = " : ")
    print(emoji.emojize(emoji_dictionary[str(np.argmax(y_test[i]))]), end = " ")
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

Sentence      : Actual       Prediction
I want to eat : 🍴 🍴
he did not answer : 😓 😓
he got a raise : 😁 😁
she got me a present : ❤️ 😁
ha ha ha it was so funny : 😁 😁
he is a good friend : ❤️ 😁
I am upset : ❤️ 😓
We had such a lovely dinner tonight : ❤️ 😁
where is the food : 🍴 🍴
Stop making this joke ha ha ha : 😁 😁
