## Get the emoji package

In [1]:
import emoji

In [2]:
emoji_dictionary = {'0': '\u2764\uFE0F',
                    '1': ':baseball:',
                    '2': ':grinning_face_with_big_eyes:',
                    '3': ':disappointed_face:',
                    '4': ':fork_and_knife:',
                    '5': ':hundred_points:',
                    '6': ':fire:',
                    '7': ':face_blowing_a_kiss:',
                    '8': ':chestnut:',
                    '9': ':flexed_biceps:'}

In [3]:
for e in emoji_dictionary.values(): 
    print(emoji.emojize(e))

❤️
⚾
😃
😞
🍴
💯
🔥
😘
🌰
💪


## Processing a Custom Dataset

In [4]:
import pandas as pd
import numpy as np

In [5]:
train = pd.read_csv('train_emoji.csv', header=None)
test = pd.read_csv('test_emoji.csv', header=None)

In [6]:
train.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [7]:
#Printing sentences with emojis
data = train.values
print(data.shape)

(132, 4)


In [8]:
X_train = train[0]
Y_train = train[1]

X_test = test[0]
Y_test = test[1]

In [11]:
for i in range(5):
    print(X_train[i], emoji.emojize(emoji_dictionary[str(Y_train[i])]))

never talk to me again 😞
I am proud of your achievements 😃
It is the worst day in my life 😞
Miss you so much ❤️
food is life 🍴


## Getting the Glove Vectors

In [15]:
f = open('glove.6B.50d.txt', encoding='utf-8')

In [16]:
embeddings_index = {}

for line in f:
    values = line.split()
    word = values[0]
    coefs = np.asarray(values[1:], dtype='float')
    embeddings_index[word] = coefs
    
f.close()

In [18]:
emb_dim = embeddings_index['eat'].shape[0]
print(emb_dim)

50


## Converting sentences into vectors (Embedding Layer Output)

In [19]:
def embedding_output(X):
    maxLen = 10
    embedding_out = np.zeros((X.shape[0], maxLen, emb_dim))
    
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        
        for ij in range(len(X[ix])):
            try:
                embedding_out[ix][ij] = embeddings_index[X[ix][ij].lower()]
            except:
                embedding_out[ix][ij] = np.zeros((50,))
    
    return embedding_out

In [20]:
embeddings_matrix_train = embedding_output(X_train)
embeddings_matrix_test = embedding_output(X_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


In [21]:
print(embeddings_matrix_train.shape)
print(embeddings_matrix_test.shape)

(132, 10, 50)
(56, 10, 50)


In [23]:
from keras.utils import to_categorical

In [24]:
Y_train = to_categorical(Y_train, num_classes=5)
Y_test = to_categorical(Y_test, num_classes=5)

## Define the RNN/LSTM Model

In [34]:
from keras.models import Sequential
from keras.layers import *
from keras.callbacks import EarlyStopping, ModelCheckpoint

In [56]:
model = Sequential()
model.add(LSTM(64,input_shape=(10,50),return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(64,return_sequences=False))
model.add(Dropout(0.5))
model.add(Dense(5))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_8 (LSTM)                (None, 10, 64)            29440     
_________________________________________________________________
dropout_8 (Dropout)          (None, 10, 64)            0         
_________________________________________________________________
lstm_9 (LSTM)                (None, 64)                33024     
_________________________________________________________________
dropout_9 (Dropout)          (None, 64)                0         
_________________________________________________________________
dense_8 (Dense)              (None, 5)                 325       
_________________________________________________________________
activation_8 (Activation)    (None, 5)                 0         
Total params: 62,789
Trainable params: 62,789
Non-trainable params: 0
_________________________________________________________________


In [57]:
earlystop = EarlyStopping(monitor='val_loss', patience=10)
hist = model.fit(embeddings_matrix_train, Y_train, epochs=100, batch_size=64, shuffle=True, validation_split=0.2, callbacks=[earlystop])

Train on 105 samples, validate on 27 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100


In [58]:
pred = model.predict_classes(embeddings_matrix_test)

In [59]:
print(pred)

[4 3 2 0 2 2 1 2 4 2 1 2 0 2 1 3 2 2 3 2 0 0 4 2 3 1 2 0 4 2 0 1 0 2 0 2 2
 4 4 2 1 0 0 2 2 0 2 2 0 1 3 0 3 2 2 0]


In [60]:
model.evaluate(embeddings_matrix_test, Y_test)



[1.5770921536854334, 0.6250000085149493]