In [2]:
#! pip install emoji

In [1]:
import emoji
import pandas as pd
import numpy as np

In [2]:
# emoji.EMOJI_UNICODE

In [3]:
emoji_dict = {      "0": "\u2764\uFE0F",    
                    "1": ":baseball:",
                    "2": ":beaming_face_with_smiling_eyes:",
                    "3": ":downcast_face_with_sweat:",
                    "4": ":fork_and_knife:",
                   }

In [4]:
for e in emoji_dict.values():
    print(emoji.emojize(e))

❤️
⚾
😁
😓
🍴


## Read Data

In [157]:
train_data = pd.read_csv("train_emoji.csv", header=None)
test_data =  pd.read_csv("test_emoji.csv", header=None)

In [158]:
train_data.head()

Unnamed: 0,0,1,2,3
0,never talk to me again,3,,
1,I am proud of your achievements,2,,
2,It is the worst day in my life,3,,
3,Miss you so much,0,,[0]
4,food is life,4,,


In [159]:
train_data.drop(columns=[2, 3], inplace=True)

In [160]:
train_data.columns = ["Text", 'Label']

In [161]:
test_data.columns = ['Text', 'Label']

In [162]:
train_data.head()

Unnamed: 0,Text,Label
0,never talk to me again,3
1,I am proud of your achievements,2
2,It is the worst day in my life,3
3,Miss you so much,0
4,food is life,4


In [163]:
x_train = train_data['Text'].values
y_train = train_data['Label'].values

x_test = test_data['Text'].values
y_test = test_data['Label'].values

In [164]:
y_train

array([3, 2, 3, 0, 4, 0, 3, 2, 3, 1, 3, 3, 1, 3, 2, 3, 2, 3, 1, 2, 3, 0,
       2, 2, 2, 1, 4, 2, 2, 4, 0, 3, 4, 2, 0, 3, 2, 2, 3, 4, 2, 2, 0, 2,
       3, 0, 3, 2, 4, 3, 0, 3, 3, 3, 4, 2, 1, 1, 1, 2, 3, 1, 0, 0, 0, 3,
       4, 4, 2, 2, 1, 2, 0, 3, 2, 2, 0, 0, 3, 1, 2, 1, 2, 2, 4, 3, 3, 2,
       4, 0, 0, 0, 3, 3, 3, 2, 0, 1, 2, 3, 0, 2, 2, 2, 3, 2, 2, 2, 4, 1,
       1, 3, 3, 4, 1, 2, 1, 1, 3, 1, 0, 4, 0, 3, 3, 4, 4, 1, 4, 3, 0, 2])

In [165]:
x_train.shape

(132,)

In [166]:
x_test.shape

(56,)

In [167]:
for i in range(19, 29):
    print(x_train[i], emoji.emojize(emoji_dict[str(y_train[i])]))

I will celebrate soon 😁
So sad you are not coming 😓
She is my dearest love ❤️
Good job 😁
It was funny lol 😁
candy is life  😁
The chicago cubs won again ⚾
I am hungry 🍴
I am so excited to see you after so long 😁
you did well on you exam 😁


# Get Embedding

In [17]:
f = open("glove.6B.50d.txt", encoding='utf8')

In [18]:
embedding_index = {}

for line in f:
    values = line.split()
    word = values[0]
    emb = np.array(values[1:], dtype ='float')
    
    embedding_index[word] = emb

In [19]:
embedding_index['america']
embedding_index['apple'].shape

(50,)

In [20]:
def get_embedding_output(X):
    maxLen = 10
    embedding_output = np.zeros((len(X), maxLen, 50))
    
    for ix in range(X.shape[0]):
        X[ix] = X[ix].split()
        
        for ij in range(len(X[ix])):
            embedding_output[ix][ij] = embedding_index[X[ix][ij].lower()]
            
    return embedding_output

In [21]:
train_embeddings = get_embedding_output(x_train)

In [22]:
test_embeddings = get_embedding_output(x_test)

In [23]:
train_embeddings.shape

(132, 10, 50)

In [24]:
test_embeddings.shape

(56, 10, 50)

In [26]:
from keras.utils import to_categorical

In [27]:
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [28]:
y_train.shape

(132, 5)

In [30]:
y_test.shape

(56, 5)

## Define LSTM/RNN Model

In [36]:
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout, SimpleRNN

In [47]:
model = Sequential()
model.add(LSTM(units=64, input_shape =(10, 50), return_sequences= True ))
model.add(Dropout(0.3))
model.add(LSTM(units = 32))
model.add(Dropout(0.3))
model.add(Dense(units= 10, activation= 'tanh'))
model.add(Dense(units= 5, activation= 'softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm_11 (LSTM)               (None, 10, 64)            29440     
_________________________________________________________________
dropout_13 (Dropout)         (None, 10, 64)            0         
_________________________________________________________________
lstm_12 (LSTM)               (None, 32)                12416     
_________________________________________________________________
dropout_14 (Dropout)         (None, 32)                0         
_________________________________________________________________
dense_11 (Dense)             (None, 10)                330       
_________________________________________________________________
dense_12 (Dense)             (None, 5)                 55        
Total params: 42,241
Trainable params: 42,241
Non-trainable params: 0
_________________________________________________________________


In [48]:
model.compile(optimizer='adam', loss="categorical_crossentropy", metrics=['accuracy'])

In [49]:
# Callbacks

hist = model.fit(train_embeddings, y_train, validation_split=0.1 , shuffle=True, batch_size=64, epochs=50 )

Train on 118 samples, validate on 14 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [50]:
model.evaluate(test_embeddings, y_test)



[1.2338575635637556, 0.6249999914850507]

In [52]:
pred = model.predict_classes(test_embeddings)

In [53]:
pred

array([4, 3, 3, 0, 2, 2, 1, 2, 4, 2, 1, 3, 0, 3, 1, 3, 3, 2, 4, 2, 3, 2,
       4, 2, 3, 1, 2, 0, 4, 2, 0, 1, 2, 2, 0, 1, 2, 4, 4, 2, 1, 0, 0, 1,
       2, 0, 2, 2, 3, 1, 3, 1, 3, 2, 3, 3])

In [57]:
x_test

array([list(['I', 'want', 'to', 'eat']),
       list(['he', 'did', 'not', 'answer']),
       list(['he', 'got', 'a', 'raise']),
       list(['she', 'got', 'me', 'a', 'present']),
       list(['ha', 'ha', 'ha', 'it', 'was', 'so', 'funny']),
       list(['he', 'is', 'a', 'good', 'friend']),
       list(['I', 'am', 'upset']),
       list(['We', 'had', 'such', 'a', 'lovely', 'dinner', 'tonight']),
       list(['where', 'is', 'the', 'food']),
       list(['Stop', 'making', 'this', 'joke', 'ha', 'ha', 'ha']),
       list(['where', 'is', 'the', 'ball']), list(['work', 'is', 'hard']),
       list(['This', 'girl', 'is', 'messing', 'with', 'me']),
       list(['are', 'you', 'serious', 'ha', 'ha']),
       list(['Let', 'us', 'go', 'play', 'baseball']),
       list(['This', 'stupid', 'grader', 'is', 'not', 'working']),
       list(['work', 'is', 'horrible']),
       list(['Congratulation', 'for', 'having', 'a', 'baby']),
       list(['stop', 'messing', 'around']),
       list(['any', 'suggestions'

In [59]:
for i in range(len(pred)):
    
    print(" ".join(x_test[i]))
    
    emote = emoji_dict[str(pred[i])]
    print(emoji.emojize(emote))

I want to eat
🍴
he did not answer
😓
he got a raise
😓
she got me a present
❤️
ha ha ha it was so funny
😁
he is a good friend
😁
I am upset
⚾
We had such a lovely dinner tonight
😁
where is the food
🍴
Stop making this joke ha ha ha
😁
where is the ball
⚾
work is hard
😓
This girl is messing with me
❤️
are you serious ha ha
😓
Let us go play baseball
⚾
This stupid grader is not working
😓
work is horrible
😓
Congratulation for having a baby
😁
stop messing around
🍴
any suggestions for dinner
😁
I love taking breaks
😓
you brighten my day
😁
I boiled rice
🍴
she is a bully
😁
Why are you feeling bad
😓
I am upset
⚾
I worked during my birthday
😁
My grandmother is the love of my life
❤️
enjoy your break
🍴
valentine day is near
😁
I miss you so much
❤️
throw the ball
⚾
My life is so boring
😁
she said yes
😁
will you be my valentine
❤️
he can pitch really well
⚾
dance with me
😁
I am starving
🍴
See you at the restaurant
🍴
I like to laugh
😁
I will go dance
⚾
I like your jacket
❤️
i miss her
❤️
what is your favo

# Custom Example Prediction

In [144]:
x_query = "i ate dinner happily and i love soccer".lower()

In [145]:
embedding_x_query = []

In [146]:
for w in x_query.split():
    print(w)
    emb = embedding_index[w]
    embedding_x_query.append(emb)

i
ate
dinner
happily
and
i
love
soccer


In [147]:
embedding_x_query = np.array(embedding_x_query)

In [148]:
embedding_x_query.shape

(8, 50)

In [149]:
if embedding_x_query.shape <10:
    embedding_x_query = np.vstack((embedding_x_query , np.zeros((10 - len(x_query.split()), 50))))

In [150]:
embedding_x_query = embedding_x_query.reshape(1, 10, 50)

In [151]:
embedding_x_query.shape

(1, 10, 50)

In [152]:
p = model.predict_classes(embedding_x_query)

In [153]:
print(emoji.emojize(emoji_dict[str(p[0])]))

🍴
