In [23]:
import numpy as np
import pandas as pd
import emoji

from keras.models import Sequential
from keras.layers import Dense, Input, Dropout, SimpleRNN,LSTM, Activation
from keras.utils import to_categorical
import matplotlib.pyplot as plt


In [3]:
train=pd.read_csv('D:\Data Sets\emoji_train.csv',header=None)
test=pd.read_csv('D:\Data Sets\emoji_test.csv',header=None)

In [4]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [6]:
emoji_dict = { 0 : ":heart:", 1 : ":baseball:", 2 : ":smile:", 3 : ":disappointed:", 4 : ":fork_and_knife:"}

In [7]:
for ix in emoji_dict.keys():
    print (ix,end=" ")
    print (emoji.emojize(emoji_dict[ix], use_aliases=True))

0 ❤
1 ⚾
2 😄
3 😞
4 🍴


In [8]:
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

print (X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

(132,) (132,) (56,) (56,)


In [10]:
for i in range(5):
    print( X_train[i],emoji.emojize( emoji_dict[(Y_train[i])], use_aliases=True ))

never talk to me again 😞
I am proud of your achievements 😄
It is the worst day in my life 😞
Miss you so much ❤
food is life 🍴


In [11]:
f=open('D:\Data Sets\glove.6B.50d.txt',encoding='utf-8')

In [12]:
embeddings_index={}

for line in f:
    values=line.split()
    word=values[0]
    coef=np.asarray(values[1:],dtype='float')
    
    embeddings_index[word]=coef
f.close()

In [13]:
embeddings_index["eat"]

array([ 6.4295e-01, -4.2946e-01, -5.4277e-01, -1.0307e+00,  1.2056e+00,
       -2.7174e-01, -6.3561e-01, -1.5065e-02,  3.7856e-01,  4.6474e-02,
       -1.3102e-01,  6.0500e-01,  1.6391e+00,  2.3940e-01,  1.2128e+00,
        8.3178e-01,  7.3893e-01,  1.5200e-01, -1.4175e-01, -8.8384e-01,
        2.0829e-02, -3.2545e-01,  1.8035e+00,  1.0045e+00,  5.8484e-01,
       -6.2031e-01, -4.3296e-01,  2.3562e-01,  1.3027e+00, -8.1264e-01,
        2.3158e+00,  1.1030e+00, -6.0608e-01,  1.0101e+00, -2.2426e-01,
        1.8908e-02, -1.0931e-01,  3.8350e-01,  7.7362e-01, -8.1927e-02,
       -3.4040e-01, -1.5143e-03, -5.6640e-02,  8.7359e-01,  1.4805e+00,
        6.9421e-01, -3.0966e-01, -9.0826e-01,  3.7277e-03,  8.4550e-01])

In [15]:
def embedding_output(x):
    maxlen=10
    emb_dim=50
    embedding_out=np.zeros((x.shape[0],maxlen,emb_dim))
    
    for ix in range (x.shape[0]):
        x[ix]=x[ix].split()
        
        for ij in range(len(x[ix])):
            try:
                embedding_out[ix][ij]=embeddings_index[x[ix][ij].lower()]
            except:
                embedding_out[ix][ij]=np.zeros((50,))
                
    return embedding_out

In [16]:
embeddings_matrix_train=embedding_output(X_train)
embeddings_matrix_test=embedding_output(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


In [19]:
embeddings_matrix_test.shape

(56, 10, 50)

In [20]:
embeddings_matrix_train.shape

(132, 10, 50)

In [24]:
Y_train=to_categorical(Y_train,num_classes=5)
Y_test=to_categorical(Y_test,num_classes=5)

# Training Model
## Using RNN

In [26]:
model=Sequential()
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 10, 64)            29440     
_________________________________________________________________
dropout (Dropout)            (None, 10, 64)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 5)                 325       
_________________________________________________________________
activation (Activation)      (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
____________________________________________________

In [30]:
from keras.callbacks import ModelCheckpoint
from keras.callbacks import EarlyStopping

checkpoint=ModelCheckpoint(
    "best_emoji_model.h5",
    monitor="val_loss",
    verbose=True,
    save_best_only=True,
    save_weights_only=False,
    
)

earlystop=EarlyStopping(monitor='val_accuracy',patience=100)

hist=model.fit(embeddings_matrix_train,Y_train,epochs=150,batch_size=64,shuffle=True,validation_split=0.2,callbacks=[checkpoint,earlystop])


Epoch 1/150
Epoch 00001: val_loss improved from inf to 0.13366, saving model to best_emoji_model.h5
Epoch 2/150
Epoch 00002: val_loss improved from 0.13366 to 0.09981, saving model to best_emoji_model.h5
Epoch 3/150
Epoch 00003: val_loss improved from 0.09981 to 0.07280, saving model to best_emoji_model.h5
Epoch 4/150
Epoch 00004: val_loss improved from 0.07280 to 0.06208, saving model to best_emoji_model.h5
Epoch 5/150
Epoch 00005: val_loss did not improve from 0.06208
Epoch 6/150
Epoch 00006: val_loss did not improve from 0.06208
Epoch 7/150
Epoch 00007: val_loss improved from 0.06208 to 0.06121, saving model to best_emoji_model.h5
Epoch 8/150
Epoch 00008: val_loss improved from 0.06121 to 0.04277, saving model to best_emoji_model.h5
Epoch 9/150
Epoch 00009: val_loss improved from 0.04277 to 0.03847, saving model to best_emoji_model.h5
Epoch 10/150
Epoch 00010: val_loss improved from 0.03847 to 0.03798, saving model to best_emoji_model.h5
Epoch 11/150
Epoch 00011: val_loss improved f

In [32]:
model.load_weights("best_emoji_model.h5")
model.evaluate(embeddings_matrix_test,Y_test)




[1.8365737199783325, 0.6071428656578064]

In [33]:
pred=model.predict_classes(embeddings_matrix_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [35]:
for i in range(30):
    print(' '.join(X_test[i]),end=" ")
    print(emoji.emojize( emoji_dict[np.argmax(Y_test[i])], use_aliases=True),end=" " )
    print(emoji.emojize(emoji_dict[pred[i]], use_aliases=True))

I want to eat 🍴 🍴
he did not answer 😞 😞
he got a raise 😄 😞
she got me a present ❤ 😞
ha ha ha it was so funny 😄 😄
he is a good friend ❤ 😄
I am upset ❤ 😞
We had such a lovely dinner tonight ❤ 😄
where is the food 🍴 🍴
Stop making this joke ha ha ha 😄 😄
where is the ball ⚾ ⚾
work is hard 😞 😞
This girl is messing with me 😞 😞
are you serious ha ha 😄 😞
Let us go play baseball ⚾ ⚾
This stupid grader is not working 😞 😞
work is horrible 😞 😞
Congratulation for having a baby 😄 😄
stop messing around 😞 😞
any suggestions for dinner 🍴 😄
I love taking breaks ❤ ❤
you brighten my day 😄 ❤
I boiled rice 🍴 🍴
she is a bully 😞 ❤
Why are you feeling bad 😞 😞
I am upset 😞 😞
I worked during my birthday 😞 😄
My grandmother is the love of my life ❤ ❤
enjoy your break 😄 ⚾
valentine day is near ❤ 😄


In [55]:
# Predicting for our random sentence
x1 = ['I', 'love', 'dogs']
#x = "I love dogs"

x1_ = np.zeros((1,10,50))

for ix in range(len(x)):
    x1_[0][ix] = embeddings_index[x1[ix].lower()]

x2 = ['I', 'do','not','like' ,'cats']

x2_ = np.zeros((1,10,50))

for ix in range(len(x)):
    x2_[0][ix] = embeddings_index[x2[ix].lower()]


In [56]:
pred=model.predict_classes(x1_)
print(' '.join(x1),end=" ")
print(emoji.emojize(emoji_dict[pred[0]], use_aliases=True),end="\n")
pred=model.predict_classes(x2_)
print(' '.join(x2),end=" ")
print(emoji.emojize(emoji_dict[pred[0]], use_aliases=True))

I love dogs ❤
I do not like cats 😞


# Training Model
## Using LSTM

In [95]:
model = Sequential()
model.add(LSTM(128, input_shape=(10,50), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(128, return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_6 (LSTM)                (None, 10, 128)           91648     
_________________________________________________________________
dropout_6 (Dropout)          (None, 10, 128)           0         
_________________________________________________________________
lstm_7 (LSTM)                (None, 128)               131584    
_________________________________________________________________
dropout_7 (Dropout)          (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 5)                 645       
_________________________________________________________________
activation_3 (Activation)    (None, 5)                 0         
Total params: 223,877
Trainable params: 223,877
Non-trainable params: 0
________________________________________________

In [97]:
hist=model.fit(embeddings_matrix_train,Y_train,epochs=150,batch_size=32)


Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78

In [99]:
model.evaluate(embeddings_matrix_test,Y_test)



[2.2850537300109863, 0.625]

In [101]:
pred = model.predict_classes(embeddings_matrix_test)

In [103]:
for i in range(30):
    print(' '.join(X_test[i]),end=" ")
    print(emoji.emojize( emoji_dict[np.argmax(Y_test[i])], use_aliases=True),end=" " )
    print(emoji.emojize(emoji_dict[pred[i]], use_aliases=True))

I want to eat 🍴 🍴
he did not answer 😞 😞
he got a raise 😄 😞
she got me a present ❤ ❤
ha ha ha it was so funny 😄 😄
he is a good friend ❤ 😄
I am upset ❤ 😞
We had such a lovely dinner tonight ❤ 😄
where is the food 🍴 🍴
Stop making this joke ha ha ha 😄 😄
where is the ball ⚾ ⚾
work is hard 😞 😄
This girl is messing with me 😞 ❤
are you serious ha ha 😄 😞
Let us go play baseball ⚾ ⚾
This stupid grader is not working 😞 😞
work is horrible 😞 😄
Congratulation for having a baby 😄 😄
stop messing around 😞 😞
any suggestions for dinner 🍴 😄
I love taking breaks ❤ ❤
you brighten my day 😄 ❤
I boiled rice 🍴 🍴
she is a bully 😞 ❤
Why are you feeling bad 😞 😞
I am upset 😞 😞
I worked during my birthday 😞 😄
My grandmother is the love of my life ❤ ❤
enjoy your break 😄 ⚾
valentine day is near ❤ 😄
