## Emoji Predictor

In [37]:
import emoji 

In [38]:
emoji.emojize(":fire:")

'🔥'

In [39]:
import pandas as pd
import numpy as np

In [40]:
train = pd.read_csv('dataset/train_emoji.csv',header=None)
test = pd.read_csv('dataset/test_emoji.csv',header=None)

In [41]:
# dict to match a number with the emoji for training purpose

emoji_dictionary = {
    "0": "\u2764\uFE0F",    # :heart: prints a black instead of red heart depending on the font
    "1": ":baseball:",
    "2": ":beaming_face_with_smiling_eyes:",
    "3": ":downcast_face_with_sweat:",
    "4": ":fork_and_knife:",
   }

In [42]:
x_train = train[0].values
y_train = train[1].values

x_test = test[0].values
y_test = test[1].values

In [43]:
# Now we use glove.6B.50d.txt for embedding

f = open('glove.6B.50d.txt',encoding='utf-8')

embeddings_index = {}
cnt = 0

for line in f:
    
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:],dtype='float')
    embeddings_index[word] = coefs
    
f.close()

In [44]:
embeddings_index["oh"]

array([-0.070292,  1.6078  ,  0.64854 , -0.4591  , -0.16151 , -1.093   ,
        0.61743 ,  0.048792, -0.47594 ,  1.2585  , -0.52256 ,  0.96757 ,
       -0.70143 ,  0.31107 ,  0.13962 ,  0.72396 ,  0.21441 , -0.019466,
        0.40694 ,  0.94655 , -0.89237 ,  0.30974 ,  1.8434  ,  0.54281 ,
        0.60901 , -1.867   , -1.9405  ,  0.71482 , -0.090765, -1.5403  ,
        1.287   ,  0.79188 , -0.069779,  1.3083  ,  0.54165 , -0.94769 ,
        0.90328 ,  0.18304 ,  0.87004 ,  0.46736 , -0.32235 ,  0.69321 ,
       -0.25275 , -0.17076 ,  0.52085 ,  0.30456 , -0.47081 , -0.64507 ,
        0.49646 ,  0.71087 ])

In [45]:
# Converting Sentences into vectors

def embedding_output(x):
    
    maxlen = 10
    emd_dim = 50
    
    embedding_out = np.zeros((x.shape[0],maxlen,emd_dim))
    
    for sent in range(x.shape[0]):
        
        x[sent] = x[sent].split()
        
        for word in range(len(x[sent])):
            
            try:
                embedding_out[sent][word] = embeddings_index[x[sent][word].lower()]
            except:
                embedding_out[sent][word] = np.zeros((50,))
                
                
                
    return embedding_out

In [46]:
embeddings_matrix_train = embedding_output(x_train)
embeddings_matrix_test = embedding_output(x_test)

In [47]:
print(embeddings_matrix_train.shape)

(132, 10, 50)


In [48]:
from keras.utils import to_categorical

y_train = to_categorical(train[1])
y_test = to_categorical(test[1])

In [49]:
## Traing out model

# Here we are using Stacked LSTM : Where we are using y hat input of fitst cell of first Layer of LSTM as the input to the 
# first cell of LSTM of Second Layer.

from keras.layers import *
from keras.models import Sequential

model = Sequential()
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_2 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_2 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_3 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_3 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_1 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
__________________________________________________

In [50]:
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['acc'])
hist = model.fit(embeddings_matrix_train,y_train,batch_size=32,epochs=40,shuffle=True,validation_split=0.1)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [51]:
model.evaluate(embeddings_matrix_test,y_test)



[1.2688804864883423, 0.6785714030265808]

In [52]:
pred = model.predict_classes(embeddings_matrix_test)

In [58]:
for i in range(30):
    print(' '.join(x_test[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(y_test[i]))]))
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

I want to eat
🍴
🍴
he did not answer
😓
😓
he got a raise
😁
😓
she got me a present
❤️
❤️
ha ha ha it was so funny
😁
😁
he is a good friend
❤️
😁
I am upset
❤️
😓
We had such a lovely dinner tonight
❤️
❤️
where is the food
🍴
🍴
Stop making this joke ha ha ha
😁
😁
where is the ball
⚾
⚾
work is hard
😓
😁
This girl is messing with me
😓
😓
are you serious ha ha
😁
😓
Let us go play baseball
⚾
⚾
This stupid grader is not working
😓
😓
work is horrible
😓
😓
Congratulation for having a baby
😁
😁
stop messing around
😓
😓
any suggestions for dinner
🍴
😁
I love taking breaks
❤️
❤️
you brighten my day
😁
❤️
I boiled rice
🍴
🍴
she is a bully
😓
😓
Why are you feeling bad
😓
😓
I am upset
😓
😓
I worked during my birthday
😓
😁
My grandmother is the love of my life
❤️
❤️
enjoy your break
😁
⚾
valentine day is near
❤️
😁
