# Emoji Predictor

Step-1 get the emoji package

In [1]:
!pip install emoji




In [2]:
import emoji


In [3]:
# Emoji dictionary
# emoji.EMOJI_UNICODE 

In [4]:
emoji_dictionary = {
    "0":'\u2764\uFE0F',
    "1":':baseball:',
    "2":':grinning_face_with_big_eyes:',
    "3":':disappointed_face:',
    "4":":fork_and_knife:",
    "5":":hundred_points:",
    "6":":fire:",
    "7":":face_blowing_a_kiss:",
    "8":":chestnut:",
    "9":":flexed_biceps:"
}

In [5]:
for e in emoji_dictionary.values():
    print(emoji.emojize(e))

❤️
⚾
😃
😞
🍴
💯
🔥
😘
🌰
💪


In [6]:
# Step-2 Processing a custom dataset

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [8]:
train = pd.read_csv('train_emoji.csv',header=None)
test = pd.read_csv('test_emoji.csv',header=None)

In [9]:
data = train.values

In [10]:
X_train = train[0]
X_test = test[0]

In [11]:
Y_train = train[1]
Y_test = test[1]

In [12]:
Y_train.shape

(132,)

In [13]:
i = 0
for y in Y_train[:7]:
    print(X_train[i],emoji.emojize(emoji_dictionary[str(y)]))
    i += 1

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴
I love you mum ❤️
Stop saying bullshit 😞


#### step-3 Getting the glove vectors



In [14]:
f = open('glove.6B.50d.txt',encoding='utf8')

In [15]:
embeddings_index = {}

for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:],dtype='float')
    embeddings_index[word] = coefs

f.close()
    

In [16]:
embeddings_index['eat']

array([ 6.4295e-01, -4.2946e-01, -5.4277e-01, -1.0307e+00,  1.2056e+00,
       -2.7174e-01, -6.3561e-01, -1.5065e-02,  3.7856e-01,  4.6474e-02,
       -1.3102e-01,  6.0500e-01,  1.6391e+00,  2.3940e-01,  1.2128e+00,
        8.3178e-01,  7.3893e-01,  1.5200e-01, -1.4175e-01, -8.8384e-01,
        2.0829e-02, -3.2545e-01,  1.8035e+00,  1.0045e+00,  5.8484e-01,
       -6.2031e-01, -4.3296e-01,  2.3562e-01,  1.3027e+00, -8.1264e-01,
        2.3158e+00,  1.1030e+00, -6.0608e-01,  1.0101e+00, -2.2426e-01,
        1.8908e-02, -1.0931e-01,  3.8350e-01,  7.7362e-01, -8.1927e-02,
       -3.4040e-01, -1.5143e-03, -5.6640e-02,  8.7359e-01,  1.4805e+00,
        6.9421e-01, -3.0966e-01, -9.0826e-01,  3.7277e-03,  8.4550e-01])

#### Step-4 Converting Sentences into Embeddings

In [17]:
# Embedding layer output

In [18]:
def embedding_output(X):
    maxLen = 10
    emb_dim = 50
    embedding_out = np.zeros((X.shape[0],maxLen,emb_dim))
    for ix in range(X.shape[0]):
         X[ix] = X[ix].split()
         for ij in range(len(X[ix])):
            # Go to every word in the current (ix) sentence
            embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()]
            
    return embedding_out

In [19]:
embedding_matrix_train = embedding_output(X_train)
embedding_matrix_test = embedding_output(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  X[ix] = X[ix].split()


In [20]:
print(X_train[0])
print(len(X_train[0]))

['never', 'talk', 'to', 'me', 'again']
5


In [21]:
print(embedding_matrix_train.shape)
print(embedding_matrix_test.shape)

(132, 10, 50)
(56, 10, 50)


In [22]:
#### Step-5 Define the RNN/LSTM Model

In [23]:
from keras.models import Sequential 
from keras.layers import LSTM,Dense,Softmax,Dropout,Activation

In [24]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 10, 64)            29440     
_________________________________________________________________
dropout (Dropout)            (None, 10, 64)            0         
_________________________________________________________________
lstm_1 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense (Dense)                (None, 5)                 325       
_________________________________________________________________
activation (Activation)      (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
____________________________________________________

In [25]:
# Train a Model

In [26]:
from keras.utils import to_categorical


In [27]:
Y_train = to_categorical(Y_train,num_classes=5)
Y_test = to_categorical(test[1],num_classes=5)

print(Y_train.shape)
print(Y_train[0])

(132, 5)
[0. 0. 0. 1. 0.]


In [41]:
from keras.callbacks import EarlyStopping,ModelCheckpoint

checkpoint = ModelCheckpoint('best_model.h5',monitor='val_loss',verbose=True,save_best_only=True)
earlystop = EarlyStopping(monitor='val_accuracy',patience=3)
hist = model.fit(embedding_matrix_train,Y_train,epochs=100,callbacks=[checkpoint,earlystop],batch_size=64,shuffle=True,validation_split=0.2)

Epoch 1/100
Epoch 00001: val_loss improved from inf to 1.59217, saving model to best_model.h5
Epoch 2/100
Epoch 00002: val_loss did not improve from 1.59217
Epoch 3/100
Epoch 00003: val_loss did not improve from 1.59217
Epoch 4/100
Epoch 00004: val_loss did not improve from 1.59217


In [42]:
pred = model.predict_classes(embedding_matrix_test)

In [43]:
print(pred)

[4 3 1 0 2 2 1 2 1 2 1 2 0 0 1 3 2 2 3 2 0 0 4 2 0 1 2 0 0 2 0 1 0 2 0 2 2
 4 4 2 1 0 0 2 2 0 2 2 0 1 3 0 3 2 2 0]


In [44]:
model.load_weights('best_model.h5')

In [45]:
model.evaluate(embedding_matrix_test,Y_test)



[2.0324301719665527, 0.5178571343421936]

In [47]:
for i in range(30):
    print(''.join(X_test[i]))
    print(emoji.emojize(emoji_dictionary[str(np.argmax(Y_test[i]))]))
    print(emoji.emojize(emoji_dictionary[str(pred[i])]))

Iwanttoeat
🍴
🍴
hedidnotanswer
😞
😞
hegotaraise
😃
⚾
shegotmeapresent
❤️
❤️
hahahaitwassofunny
😃
😃
heisagoodfriend
❤️
😃
Iamupset
❤️
⚾
Wehadsuchalovelydinnertonight
❤️
😃
whereisthefood
🍴
⚾
Stopmakingthisjokehahaha
😃
😃
whereistheball
⚾
⚾
workishard
😞
😃
Thisgirlismessingwithme
😞
❤️
areyouserioushaha
😃
❤️
Letusgoplaybaseball
⚾
⚾
Thisstupidgraderisnotworking
😞
😞
workishorrible
😞
😃
Congratulationforhavingababy
😃
😃
stopmessingaround
😞
😞
anysuggestionsfordinner
🍴
😃
Ilovetakingbreaks
❤️
❤️
youbrightenmyday
😃
❤️
Iboiledrice
🍴
🍴
sheisabully
😞
😃
Whyareyoufeelingbad
😞
❤️
Iamupset
😞
⚾
Iworkedduringmybirthday
😞
😃
Mygrandmotheristheloveofmylife
❤️
❤️
enjoyyourbreak
😃
❤️
valentinedayisnear
❤️
😃
