In [1]:
%config IPCompleter.use_jedi = False

## Emoji Predictor

### Get the Emoji Package

In [2]:
!pip install emoji



In [3]:
import emoji

In [4]:
# emoji.EMOJI_UNICODE

In [5]:
emoji_dictionary = {
    "0": "\u2764\uFE0F",
    "1": ":baseball:",
    "2": ":grinning_face_with_big_eyes:",
    "3": ":disappointed_face:",
    "4": ":fork_and_knife:",
    "5": ":hundred_points:",
    "6": ":fire:",
    "7": ":face_blowing_a_kiss:",
    "8": ":chestnut:",
    "9": ":flexed_biceps:"
}

In [6]:
for e in emoji_dictionary.values():
    print(emoji.emojize(e))

❤️
⚾
😃
😞
🍴
💯
🔥
😘
🌰
💪


### Processing a custom Dataset

In [7]:
import pandas as pd
import numpy as np

In [8]:
train = pd.read_csv("Dataset/train_emoji.csv",header=None)
test = pd.read_csv("Dataset/test_emoji.csv",header=None)

In [9]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [10]:
# Lets print the sentences with emojis
data = train.values
print(data.shape)

(132, 4)


In [11]:
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

In [12]:
for i in range(5):
    print(X_train[i],emoji.emojize(emoji_dictionary[str(Y_train[i])]))

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴


### Convert Sentences to Embeddings
Using **Glove 6B.5d.txt**

In [13]:
f = open("Dataset/glove.6B.50d.txt",encoding="utf-8")

In [14]:
embeddings_index = {}
cnt = 0
for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:],dtype='float')
    #print(word,coefs)
    embeddings_index[word]=coefs
f.close()

In [15]:
emb_dim = embeddings_index["eat"].shape[0]
print(emb_dim)

50


### Converting sentences into vectors (Embedding Layer Output)

In [16]:
def embedding_output(X):
    maxLen = 10
    embedding_out = np.zeros((X.shape[0],maxLen,emb_dim))
    
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        for ij in range(len(X[ix])):
            # go to every word in the current (ix) sentence
            try:
                embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()]
            except:
                embedding_out[ix][ij] = np.zeros((50,))
    return embedding_out

embeddings_matrix_train = embedding_output(X_train)
embeddings_matrix_test = embedding_output(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[ix] = X[ix].split()


In [17]:
print(X_train[0])
print(len(X_train[0]))

['never', 'talk', 'to', 'me', 'again']
5


In [18]:
print(embeddings_matrix_train.shape)
print(embeddings_matrix_test.shape)

(132, 10, 50)
(56, 10, 50)


In [19]:
Y_train.shape

(132,)

In [20]:
from keras.utils import to_categorical

In [21]:
Y_train = to_categorical(Y_train,num_classes=5)
Y_test = to_categorical(Y_test,num_classes=5)

In [22]:
print(Y_train.shape)
print(Y_test.shape)

(132, 5)
(56, 5)


### Define the RNN/LSTM model

In [23]:
from keras.models import Sequential
from keras.layers import *

In [24]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,50)))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 64)                29440     
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 5)                 325       
_________________________________________________________________
activation (Activation)      (None, 5)                 0         
Total params: 29,765
Trainable params: 29,765
Non-trainable params: 0
_________________________________________________________________


In [25]:
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

# earlystop = EarlyStopping(monitor='val_accuracy',patience=10)
checkpoint = ModelCheckpoint("best_model.h5",monitor='val_loss',verbose=True,save_best_only=True)

hist = model.fit(embeddings_matrix_train, Y_train,
                 epochs=100,
                 callbacks=[checkpoint],
                 batch_size=64,
                 shuffle=True,
                 validation_split=0.2
                )

Epoch 1/100
Epoch 00001: val_loss improved from inf to 1.60909, saving model to best_model.h5
Epoch 2/100
Epoch 00002: val_loss did not improve from 1.60909
Epoch 3/100
Epoch 00003: val_loss did not improve from 1.60909
Epoch 4/100
Epoch 00004: val_loss did not improve from 1.60909
Epoch 5/100
Epoch 00005: val_loss did not improve from 1.60909
Epoch 6/100
Epoch 00006: val_loss did not improve from 1.60909
Epoch 7/100
Epoch 00007: val_loss did not improve from 1.60909
Epoch 8/100
Epoch 00008: val_loss did not improve from 1.60909
Epoch 9/100
Epoch 00009: val_loss did not improve from 1.60909
Epoch 10/100
Epoch 00010: val_loss did not improve from 1.60909
Epoch 11/100
Epoch 00011: val_loss did not improve from 1.60909
Epoch 12/100
Epoch 00012: val_loss did not improve from 1.60909
Epoch 13/100
Epoch 00013: val_loss did not improve from 1.60909
Epoch 14/100
Epoch 00014: val_loss improved from 1.60909 to 1.60871, saving model to best_model.h5
Epoch 15/100
Epoch 00015: val_loss improved fro

Epoch 00029: val_loss improved from 1.13849 to 1.07589, saving model to best_model.h5
Epoch 30/100
Epoch 00030: val_loss improved from 1.07589 to 1.03792, saving model to best_model.h5
Epoch 31/100
Epoch 00031: val_loss improved from 1.03792 to 0.93686, saving model to best_model.h5
Epoch 32/100
Epoch 00032: val_loss improved from 0.93686 to 0.92147, saving model to best_model.h5
Epoch 33/100
Epoch 00033: val_loss improved from 0.92147 to 0.90819, saving model to best_model.h5
Epoch 34/100
Epoch 00034: val_loss did not improve from 0.90819
Epoch 35/100
Epoch 00035: val_loss did not improve from 0.90819
Epoch 36/100
Epoch 00036: val_loss did not improve from 0.90819
Epoch 37/100
Epoch 00037: val_loss did not improve from 0.90819
Epoch 38/100
Epoch 00038: val_loss did not improve from 0.90819
Epoch 39/100
Epoch 00039: val_loss improved from 0.90819 to 0.87034, saving model to best_model.h5
Epoch 40/100
Epoch 00040: val_loss did not improve from 0.87034
Epoch 41/100
Epoch 00041: val_loss 

Epoch 58/100
Epoch 00058: val_loss did not improve from 0.81268
Epoch 59/100
Epoch 00059: val_loss did not improve from 0.81268
Epoch 60/100
Epoch 00060: val_loss did not improve from 0.81268
Epoch 61/100
Epoch 00061: val_loss did not improve from 0.81268
Epoch 62/100
Epoch 00062: val_loss did not improve from 0.81268
Epoch 63/100
Epoch 00063: val_loss did not improve from 0.81268
Epoch 64/100
Epoch 00064: val_loss did not improve from 0.81268
Epoch 65/100
Epoch 00065: val_loss did not improve from 0.81268
Epoch 66/100
Epoch 00066: val_loss did not improve from 0.81268
Epoch 67/100
Epoch 00067: val_loss did not improve from 0.81268
Epoch 68/100
Epoch 00068: val_loss did not improve from 0.81268
Epoch 69/100
Epoch 00069: val_loss did not improve from 0.81268
Epoch 70/100
Epoch 00070: val_loss did not improve from 0.81268
Epoch 71/100
Epoch 00071: val_loss did not improve from 0.81268
Epoch 72/100
Epoch 00072: val_loss did not improve from 0.81268
Epoch 73/100
Epoch 00073: val_loss did n

Epoch 00088: val_loss did not improve from 0.81268
Epoch 89/100
Epoch 00089: val_loss did not improve from 0.81268
Epoch 90/100
Epoch 00090: val_loss did not improve from 0.81268
Epoch 91/100
Epoch 00091: val_loss did not improve from 0.81268
Epoch 92/100
Epoch 00092: val_loss did not improve from 0.81268
Epoch 93/100
Epoch 00093: val_loss did not improve from 0.81268
Epoch 94/100
Epoch 00094: val_loss did not improve from 0.81268
Epoch 95/100
Epoch 00095: val_loss did not improve from 0.81268
Epoch 96/100
Epoch 00096: val_loss did not improve from 0.81268
Epoch 97/100
Epoch 00097: val_loss did not improve from 0.81268
Epoch 98/100
Epoch 00098: val_loss did not improve from 0.81268
Epoch 99/100
Epoch 00099: val_loss did not improve from 0.81268
Epoch 100/100
Epoch 00100: val_loss did not improve from 0.81268


In [26]:
pred = model.predict_classes(embeddings_matrix_test)

Instructions for updating:
Please use instead:* `np.argmax(model.predict(x), axis=-1)`,   if your model does multi-class classification   (e.g. if it uses a `softmax` last-layer activation).* `(model.predict(x) > 0.5).astype("int32")`,   if your model does binary classification   (e.g. if it uses a `sigmoid` last-layer activation).


In [27]:
model.load_weights("best_model.h5")

In [28]:
model.evaluate(embeddings_matrix_test, Y_test)



[1.2062474489212036, 0.5714285969734192]

### Implementing Stacked LSTM

In [29]:
from keras.models import Sequential
from keras.layers import *

In [30]:
# We have not added a Embedding layer because it adds a lot of parameters to the model
# And for such a small dataset like this, it is better to use Transfer Learning Approach
model = Sequential()
model.add(LSTM(64,input_shape=(10,50), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss="categorical_crossentropy", optimizer="adam",metrics=["accuracy"])
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_1 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_1 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_2 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_1 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
__________________________________________________

In [31]:
from keras.callbacks import ModelCheckpoint

checkpoint = ModelCheckpoint("best_model2.h5",monitor='val_loss',verbose=True,save_best_only=True)

hist = model.fit(embeddings_matrix_train, Y_train,
                 epochs=150,
                 callbacks=[checkpoint],
                 batch_size=64,
                 shuffle=True,
                 validation_split=0.2
                )

Epoch 1/150
Epoch 00001: val_loss improved from inf to 1.59725, saving model to best_model2.h5
Epoch 2/150
Epoch 00002: val_loss did not improve from 1.59725
Epoch 3/150
Epoch 00003: val_loss did not improve from 1.59725
Epoch 4/150
Epoch 00004: val_loss did not improve from 1.59725
Epoch 5/150
Epoch 00005: val_loss did not improve from 1.59725
Epoch 6/150
Epoch 00006: val_loss did not improve from 1.59725
Epoch 7/150
Epoch 00007: val_loss did not improve from 1.59725
Epoch 8/150
Epoch 00008: val_loss did not improve from 1.59725
Epoch 9/150
Epoch 00009: val_loss did not improve from 1.59725
Epoch 10/150
Epoch 00010: val_loss improved from 1.59725 to 1.58739, saving model to best_model2.h5
Epoch 11/150
Epoch 00011: val_loss improved from 1.58739 to 1.55365, saving model to best_model2.h5
Epoch 12/150
Epoch 00012: val_loss improved from 1.55365 to 1.50955, saving model to best_model2.h5
Epoch 13/150
Epoch 00013: val_loss improved from 1.50955 to 1.46740, saving model to best_model2.h5
E

Epoch 29/150
Epoch 00029: val_loss did not improve from 0.99186
Epoch 30/150
Epoch 00030: val_loss did not improve from 0.99186
Epoch 31/150
Epoch 00031: val_loss did not improve from 0.99186
Epoch 32/150
Epoch 00032: val_loss improved from 0.99186 to 0.94155, saving model to best_model2.h5
Epoch 33/150
Epoch 00033: val_loss improved from 0.94155 to 0.89997, saving model to best_model2.h5
Epoch 34/150
Epoch 00034: val_loss did not improve from 0.89997
Epoch 35/150
Epoch 00035: val_loss did not improve from 0.89997
Epoch 36/150
Epoch 00036: val_loss did not improve from 0.89997
Epoch 37/150
Epoch 00037: val_loss did not improve from 0.89997
Epoch 38/150
Epoch 00038: val_loss did not improve from 0.89997
Epoch 39/150
Epoch 00039: val_loss did not improve from 0.89997
Epoch 40/150
Epoch 00040: val_loss did not improve from 0.89997
Epoch 41/150
Epoch 00041: val_loss did not improve from 0.89997
Epoch 42/150
Epoch 00042: val_loss did not improve from 0.89997
Epoch 43/150
Epoch 00043: val_lo

Epoch 59/150
Epoch 00059: val_loss did not improve from 0.89997
Epoch 60/150
Epoch 00060: val_loss did not improve from 0.89997
Epoch 61/150
Epoch 00061: val_loss did not improve from 0.89997
Epoch 62/150
Epoch 00062: val_loss did not improve from 0.89997
Epoch 63/150
Epoch 00063: val_loss did not improve from 0.89997
Epoch 64/150
Epoch 00064: val_loss did not improve from 0.89997
Epoch 65/150
Epoch 00065: val_loss did not improve from 0.89997
Epoch 66/150
Epoch 00066: val_loss did not improve from 0.89997
Epoch 67/150
Epoch 00067: val_loss did not improve from 0.89997
Epoch 68/150
Epoch 00068: val_loss did not improve from 0.89997
Epoch 69/150
Epoch 00069: val_loss did not improve from 0.89997
Epoch 70/150
Epoch 00070: val_loss did not improve from 0.89997
Epoch 71/150
Epoch 00071: val_loss did not improve from 0.89997
Epoch 72/150
Epoch 00072: val_loss did not improve from 0.89997
Epoch 73/150
Epoch 00073: val_loss did not improve from 0.89997
Epoch 74/150
Epoch 00074: val_loss did n

Epoch 00089: val_loss did not improve from 0.89997
Epoch 90/150
Epoch 00090: val_loss did not improve from 0.89997
Epoch 91/150
Epoch 00091: val_loss did not improve from 0.89997
Epoch 92/150
Epoch 00092: val_loss did not improve from 0.89997
Epoch 93/150
Epoch 00093: val_loss did not improve from 0.89997
Epoch 94/150
Epoch 00094: val_loss did not improve from 0.89997
Epoch 95/150
Epoch 00095: val_loss did not improve from 0.89997
Epoch 96/150
Epoch 00096: val_loss did not improve from 0.89997
Epoch 97/150
Epoch 00097: val_loss did not improve from 0.89997
Epoch 98/150
Epoch 00098: val_loss did not improve from 0.89997
Epoch 99/150
Epoch 00099: val_loss did not improve from 0.89997
Epoch 100/150
Epoch 00100: val_loss did not improve from 0.89997
Epoch 101/150
Epoch 00101: val_loss did not improve from 0.89997
Epoch 102/150
Epoch 00102: val_loss did not improve from 0.89997
Epoch 103/150
Epoch 00103: val_loss did not improve from 0.89997
Epoch 104/150
Epoch 00104: val_loss did not impro

Epoch 119/150
Epoch 00119: val_loss did not improve from 0.89997
Epoch 120/150
Epoch 00120: val_loss did not improve from 0.89997
Epoch 121/150
Epoch 00121: val_loss did not improve from 0.89997
Epoch 122/150
Epoch 00122: val_loss did not improve from 0.89997
Epoch 123/150
Epoch 00123: val_loss did not improve from 0.89997
Epoch 124/150
Epoch 00124: val_loss did not improve from 0.89997
Epoch 125/150
Epoch 00125: val_loss did not improve from 0.89997
Epoch 126/150
Epoch 00126: val_loss did not improve from 0.89997
Epoch 127/150
Epoch 00127: val_loss did not improve from 0.89997
Epoch 128/150
Epoch 00128: val_loss did not improve from 0.89997
Epoch 129/150
Epoch 00129: val_loss did not improve from 0.89997
Epoch 130/150
Epoch 00130: val_loss did not improve from 0.89997
Epoch 131/150
Epoch 00131: val_loss did not improve from 0.89997
Epoch 132/150
Epoch 00132: val_loss did not improve from 0.89997
Epoch 133/150
Epoch 00133: val_loss did not improve from 0.89997
Epoch 134/150
Epoch 00134

Epoch 149/150
Epoch 00149: val_loss did not improve from 0.89997
Epoch 150/150
Epoch 00150: val_loss did not improve from 0.89997


In [32]:
# model.load_weights("best_model2.h5")

In [33]:
model.evaluate(embeddings_matrix_test, Y_test)



[1.4345649480819702, 0.625]

### Visualising the results

In [34]:
pred = model.predict_classes(embeddings_matrix_test)

In [35]:
pred

array([4, 3, 2, 0, 2, 2, 1, 2, 4, 2, 1, 2, 0, 0, 1, 3, 2, 2, 3, 4, 0, 0,
       4, 0, 3, 1, 2, 0, 4, 2, 0, 1, 0, 2, 0, 1, 2, 4, 4, 2, 1, 0, 0, 1,
       2, 2, 2, 2, 3, 3, 1, 2, 3, 2, 3, 3], dtype=int64)

In [37]:
for i in range(30):
    print(' '.join(X_test[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(Y_test[i]))]))
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

I want to eat
🍴
🍴
he did not answer
😞
😞
he got a raise
😃
😃
she got me a present
❤️
❤️
ha ha ha it was so funny
😃
😃
he is a good friend
❤️
😃
I am upset
❤️
⚾
We had such a lovely dinner tonight
❤️
😃
where is the food
🍴
🍴
Stop making this joke ha ha ha
😃
😃
where is the ball
⚾
⚾
work is hard
😞
😃
This girl is messing with me
😞
❤️
are you serious ha ha
😃
❤️
Let us go play baseball
⚾
⚾
This stupid grader is not working
😞
😞
work is horrible
😞
😃
Congratulation for having a baby
😃
😃
stop messing around
😞
😞
any suggestions for dinner
🍴
🍴
I love taking breaks
❤️
❤️
you brighten my day
😃
❤️
I boiled rice
🍴
🍴
she is a bully
😞
❤️
Why are you feeling bad
😞
😞
I am upset
😞
⚾
I worked during my birthday
😞
😃
My grandmother is the love of my life
❤️
❤️
enjoy your break
😃
🍴
valentine day is near
❤️
😃
