In [36]:
import numpy as np
from emo_utils import *
np.random.seed(0)
from keras.models import Model
from keras.layers import Dense, Input, Dropout, LSTM, Activation, Bidirectional, RepeatVector, Lambda, Concatenate, Dot
from keras.layers.embeddings import Embedding
from keras.utils import to_categorical
import keras.backend as K
from keras.optimizers import Adam

from faker import Faker
import random
from tqdm import tqdm
from babel.dates import format_date
from nmt_utils import *
from IPython.display import Image
np.random.seed(1)

In [44]:

X_train, Y_train = read_csv('data/train_emoji.csv')
X_test, Y_test = read_csv('data/tesss.csv')
maxLen = len(max(X_train, key=len).split())


In [4]:
Y_oh_train = convert_to_one_hot(Y_train, C = 5) # or use to_categorical()
Y_oh_test = convert_to_one_hot(Y_test, C = 5)


In [5]:

word_to_index, index_to_words, word_to_vec_map = read_glove_vecs('data/glove.6B.50d.txt')

In [6]:
def sentences_to_indices(X, word_to_index, max_len):
    """
    Converts an array of sentences (strings) into an array of indices corresponding to words in the sentences.
    The output shape should be such that it can be given to `Embedding()

    Arguments:
    X -- array of sentences (strings), of shape (m, 1)
    word_to_index -- a dictionary containing the each word mapped to its index
    max_len -- maximum number of words in a sentence. You can assume every sentence in X is no longer than this.

    Returns:
    X_indices -- array of indices corresponding to words in the sentences from X, of shape (m, max_len)
    """

    m = X.shape[0]                                   # number of training examples

    # Initialize X_indices as a numpy matrix of zeros and the correct shape (≈ 1 line)
    X_indices = np.zeros((m,max_len))

    for i in range(m):                               # loop over training examples

        # Convert the ith training sentence in lower case and split is into words.
        sentence_words = X[i].lower().split()

        # Initialize j to 0
        j = 0

        # Loop over the words of sentence_words
        for w in sentence_words:
            # Set the (i,j)th entry of X_indices to the index of the correct word.
            X_indices[i, j] = word_to_index[w]
            # Increment j to j + 1
            j = j+1

    return X_indices

In [7]:
def pretrained_embedding_layer(word_to_vec_map, word_to_index):
    """
    Creates a Keras Embedding() layer and loads in pre-trained GloVe 50-dimensional vectors.

    Arguments:
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    embedding_layer -- pretrained layer Keras instance
    """

    vocab_len = len(word_to_index) + 1                  # adding 1 to fit Keras embedding (requirement)
    emb_dim = word_to_vec_map["cucumber"].shape[0]      # define dimensionality of your GloVe word vectors (= 50)

    # Initialize the embedding matrix as a numpy array of zeros of shape (vocab_len, dimensions of word vectors = emb_dim)
    emb_matrix = np.zeros((vocab_len,emb_dim))

    # Set each row "index" of the embedding matrix to be the word vector representation of the "index"th word of the vocabulary
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_vec_map[word]

    # Define Keras embedding layer with the correct output/input sizes
    embedding_layer = Embedding(vocab_len,emb_dim,trainable=False)

    # Build the embedding layer, it is required before setting the weights of the embedding layer.
    embedding_layer.build((None,))

    # Set the weights of the embedding layer to the embedding matrix.
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer


In [8]:
# Defined shared layers as global variables
repeator = RepeatVector(maxLen)
concatenator = Concatenate(axis=-1)
densor1 = Dense(10, activation = "tanh") # axis to be flatten is the sequence axis, i.e., Tx=maxLen=10
densor2 = Dense(1, activation = "relu") # axis to be flatten is the sequence axis, i.e., Tx=maxLen=10 becomes 1
activator = Activation(softmax, name='attention_weights') # We are using a custom softmax(axis = 1) loaded in this notebook # softmax is a custom function defined in nmt_utils.py
dotor = Dot(axes = 1)

In [9]:
def one_step_attention(a,s_prev):
    """
    Performs one step of attention: Outputs a context vector computed as a dot product of the attention weights
    "alphas" and the hidden states "a" of the Bi-LSTM.
    
    Arguments:
    a -- hidden state output of the Bi-LSTM, numpy-array of shape (m, Tx, 2*n_a)
    s_prev -- previous hidden state of the (post-attention) LSTM, numpy-array of shape (m, n_s)
    
    Returns:
    context -- context vector, input of the next (post-attetion) LSTM cell
    """
    
    s_prev = repeator(s_prev)
    
    # along the axis -1 to concatenate, e.g., (m, maxLen, n_s) and (m,maxLen, 2*n_a) ---> (m, maxLen, n_s + 2*n_a)
    concat = concatenator([a,s_prev]) 
    
    e = densor1(concat)
    
    energies = densor2(e)
    
    alphas = activator(energies)
    
    context = dotor([alphas,a])
    
    return context

In [10]:
# hidden state of a and s
n_a = 32
n_s = 64
post_activation_LSTM_cell = LSTM(n_s, return_state = True) # return state= True for return hidden state and cell state; post activation LSTM applied after getting context
output_layer = Dense(5, activation=softmax)


In [11]:
def model(input_shape, n_a, n_s, word_to_vec_map, word_to_index):
    """
    Arguments:
    n_a -- hidden state size of the Bi-LSTM
    n_s -- hidden state size of the post-attention LSTM
    word_to_vec_map -- dictionary mapping words to their GloVe vector representation.
    word_to_index -- dictionary mapping from words to their indices in the vocabulary (400,001 words)

    Returns:
    model -- Keras model instance
    """
    
    X_input = Input(shape = input_shape, dtype = 'int32')
    s0 = Input(shape=(n_s,), name= 's0')
    c0 = Input(shape=(n_s,), name='c0')
    s = s0
    c = c0
    
    embedding_layer = pretrained_embedding_layer(word_to_vec_map, word_to_index) 
    X = embedding_layer(X_input)
    a = Bidirectional(LSTM(n_a, return_sequences=True))(X)
    
    # iterate sequence to get context c and s for each step
    for i in range(input_shape[0]):
        context = one_step_attention(a, s)
        
        # get s<t> and cell<t> using context c<t> and s<t-1>  and iterate to get final s
        s,_,c = post_activation_LSTM_cell(context,initial_state=[s,c])
    
    # 
    output = output_layer(s)
    
    model = Model(inputs=[X_input,s0,c0], outputs=output)
    
    return model

In [12]:
model = model((maxLen,), n_a, n_s, word_to_vec_map, word_to_index)
model.summary()

__________________________________________________________________________________________________




Layer (type)                    Output Shape         Param #     Connected to                     









input_1 (InputLayer)            (None, 10)           0                                            




__________________________________________________________________________________________________




embedding_1 (Embedding)         (None, 10, 50)       20000050    input_1[0][0]                    




__________________________________________________________________________________________________




s0 (InputLayer)                 (None, 64)           0                                            




__________________________________________________________________________________________________




bidirectional_1 (Bidirectional) (None, 10, 64)       21248       embedding_1[0][0]                




__________________________________________________________________________________________________




repeat_vector_1 (RepeatVector)  (None, 10, 64)       0           s0[0][0]                         




                                                                 lstm_1[0][0]                     




                                                                 lstm_1[1][0]                     




                                                                 lstm_1[2][0]                     




                                                                 lstm_1[3][0]                     




                                                                 lstm_1[4][0]                     




                                                                 lstm_1[5][0]                     




                                                                 lstm_1[6][0]                     




                                                                 lstm_1[7][0]                     




                                                                 lstm_1[8][0]                     




__________________________________________________________________________________________________




concatenate_1 (Concatenate)     (None, 10, 128)      0           bidirectional_1[0][0]            




                                                                 repeat_vector_1[0][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[1][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[2][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[3][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[4][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[5][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[6][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[7][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[8][0]            




                                                                 bidirectional_1[0][0]            




                                                                 repeat_vector_1[9][0]            




__________________________________________________________________________________________________




dense_1 (Dense)                 (None, 10, 10)       1290        concatenate_1[0][0]              




                                                                 concatenate_1[1][0]              




                                                                 concatenate_1[2][0]              




                                                                 concatenate_1[3][0]              




                                                                 concatenate_1[4][0]              




                                                                 concatenate_1[5][0]              




                                                                 concatenate_1[6][0]              




                                                                 concatenate_1[7][0]              




                                                                 concatenate_1[8][0]              




In [13]:
model.compile(optimizer='Adam', loss= 'categorical_crossentropy', metrics=['accuracy'])

In [14]:
X_train_indices = sentences_to_indices(X_train, word_to_index, maxLen)
Y_train_oh = convert_to_one_hot(Y_train, C = 5)

In [15]:
print(X_train_indices)
print(X_train_indices.shape)

[[259914. 352214. 360915. ...      0.      0.      0.]
 [185457.  52943. 293982. ...      0.      0.      0.]
 [193716. 192973. 357266. ... 222138.      0.      0.]
 ...
 [386307. 192973. 390470. ...      0.      0.      0.]
 [185457. 226278. 394475. ...      0.      0.      0.]
 [166369. 198213.      0. ...      0.      0.      0.]]




(132, 10)

In [16]:
m = X_train_indices.shape[0]
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))

In [17]:
model.fit([X_train_indices,s0,c0], Y_train_oh, epochs = 50, batch_size = 32, shuffle=True)

Epoch 1/50








 - ETA: 20s - loss: 1.6185 - acc: 0.2500







 - ETA: 0s - loss: 1.6100 - acc: 0.2109 







 - 7s 50ms/step - loss: 1.6094 - acc: 0.2197


Epoch 2/50








 - ETA: 0s - loss: 1.5738 - acc: 0.4062







 - ETA: 0s - loss: 1.5491 - acc: 0.3750







 - 0s 697us/step - loss: 1.5454 - acc: 0.3939


Epoch 3/50








 - ETA: 0s - loss: 1.4812 - acc: 0.5312







 - ETA: 0s - loss: 1.5112 - acc: 0.4219







 - 0s 667us/step - loss: 1.5140 - acc: 0.4167


Epoch 4/50








 - ETA: 0s - loss: 1.3484 - acc: 0.5938







 - ETA: 0s - loss: 1.4953 - acc: 0.3906







 - 0s 674us/step - loss: 1.4865 - acc: 0.3939


Epoch 5/50








 - ETA: 0s - loss: 1.5881 - acc: 0.2500







 - ETA: 0s - loss: 1.4520 - acc: 0.3906







 - 0s 689us/step - loss: 1.4442 - acc: 0.4015


Epoch 6/50








 - ETA: 0s - loss: 1.3983 - acc: 0.4375







 - ETA: 0s - loss: 1.3751 - acc: 0.4531







 - 0s 735us/step - loss: 1.3831 - acc: 0.4394


Epoch 7/50








 - ETA: 0s - loss: 1.3885 - acc: 0.3750







 - ETA: 0s - loss: 1.3114 - acc: 0.4922







 - 0s 902us/step - loss: 1.2995 - acc: 0.5076


Epoch 8/50








 - ETA: 0s - loss: 1.1491 - acc: 0.6562







 - ETA: 0s - loss: 1.2240 - acc: 0.5469







 - 0s 811us/step - loss: 1.2192 - acc: 0.5455


Epoch 9/50








 - ETA: 0s - loss: 1.2718 - acc: 0.4375







 - ETA: 0s - loss: 1.1201 - acc: 0.6094







 - 0s 720us/step - loss: 1.1093 - acc: 0.6212


Epoch 10/50








 - ETA: 0s - loss: 0.9207 - acc: 0.7812







 - ETA: 0s - loss: 0.9942 - acc: 0.7031







 - 0s 705us/step - loss: 0.9900 - acc: 0.6970


Epoch 11/50








 - ETA: 0s - loss: 0.8519 - acc: 0.6875







 - ETA: 0s - loss: 0.8873 - acc: 0.7188







 - 0s 705us/step - loss: 0.8735 - acc: 0.7273


Epoch 12/50








 - ETA: 0s - loss: 0.7498 - acc: 0.7500







 - ETA: 0s - loss: 0.7732 - acc: 0.7422







 - 0s 735us/step - loss: 0.7560 - acc: 0.7500


Epoch 13/50








 - ETA: 0s - loss: 0.5159 - acc: 0.8750







 - ETA: 0s - loss: 0.6575 - acc: 0.7734







 - 0s 705us/step - loss: 0.6518 - acc: 0.7803


Epoch 14/50








 - ETA: 0s - loss: 0.5140 - acc: 0.8125







 - ETA: 0s - loss: 0.5898 - acc: 0.8125







 - 0s 727us/step - loss: 0.5802 - acc: 0.8182


Epoch 15/50








 - ETA: 0s - loss: 0.5076 - acc: 0.8438







 - ETA: 0s - loss: 0.4985 - acc: 0.8203







 - 0s 727us/step - loss: 0.4862 - acc: 0.8258


Epoch 16/50








 - ETA: 0s - loss: 0.5419 - acc: 0.7812







 - ETA: 0s - loss: 0.4298 - acc: 0.8359







 - 0s 750us/step - loss: 0.4299 - acc: 0.8333


Epoch 17/50








 - ETA: 0s - loss: 0.4715 - acc: 0.8438







 - ETA: 0s - loss: 0.3944 - acc: 0.8672







 - 0s 742us/step - loss: 0.3840 - acc: 0.8712


Epoch 18/50








 - ETA: 0s - loss: 0.2013 - acc: 0.9062







 - ETA: 0s - loss: 0.2945 - acc: 0.8906







 - 0s 735us/step - loss: 0.3038 - acc: 0.8864


Epoch 19/50








 - ETA: 0s - loss: 0.2035 - acc: 0.9375







 - ETA: 0s - loss: 0.3040 - acc: 0.8828







 - 0s 727us/step - loss: 0.3384 - acc: 0.8712


Epoch 20/50








 - ETA: 0s - loss: 0.1865 - acc: 0.9375







 - ETA: 0s - loss: 0.3573 - acc: 0.8750







 - 0s 720us/step - loss: 0.3476 - acc: 0.8788


Epoch 21/50








 - ETA: 0s - loss: 0.3205 - acc: 0.8750







 - ETA: 0s - loss: 0.3228 - acc: 0.8750







 - 0s 697us/step - loss: 0.3142 - acc: 0.8788


Epoch 22/50








 - ETA: 0s - loss: 0.2020 - acc: 0.9062







 - ETA: 0s - loss: 0.2913 - acc: 0.8672







 - 0s 735us/step - loss: 0.2853 - acc: 0.8712


Epoch 23/50








 - ETA: 0s - loss: 0.2578 - acc: 0.9062







 - ETA: 0s - loss: 0.1909 - acc: 0.9583







 - 0s 811us/step - loss: 0.1992 - acc: 0.9394


Epoch 24/50








 - ETA: 0s - loss: 0.2783 - acc: 0.9062







 - ETA: 0s - loss: 0.2290 - acc: 0.9297







 - 0s 727us/step - loss: 0.2239 - acc: 0.9318


Epoch 25/50








 - ETA: 0s - loss: 0.1348 - acc: 0.9688







 - ETA: 0s - loss: 0.1421 - acc: 0.9688







 - 0s 720us/step - loss: 0.1581 - acc: 0.9621


Epoch 26/50








 - ETA: 0s - loss: 0.0924 - acc: 0.9688







 - ETA: 0s - loss: 0.1497 - acc: 0.9609







 - 0s 727us/step - loss: 0.1559 - acc: 0.9545


Epoch 27/50








 - ETA: 0s - loss: 0.0920 - acc: 1.0000







 - ETA: 0s - loss: 0.1279 - acc: 0.9688







 - 0s 735us/step - loss: 0.1247 - acc: 0.9697


Epoch 28/50








 - ETA: 0s - loss: 0.0770 - acc: 0.9688







 - ETA: 0s - loss: 0.1322 - acc: 0.9609







 - 0s 727us/step - loss: 0.1301 - acc: 0.9621


Epoch 29/50








 - ETA: 0s - loss: 0.1297 - acc: 0.9375







 - ETA: 0s - loss: 0.1222 - acc: 0.9531







 - 0s 742us/step - loss: 0.1195 - acc: 0.9545


Epoch 30/50








 - ETA: 0s - loss: 0.0597 - acc: 1.0000







 - ETA: 0s - loss: 0.0974 - acc: 0.9766







 - 0s 727us/step - loss: 0.0983 - acc: 0.9773


Epoch 31/50








 - ETA: 0s - loss: 0.1810 - acc: 0.9375







 - ETA: 0s - loss: 0.0906 - acc: 0.9766







 - 0s 712us/step - loss: 0.0896 - acc: 0.9773


Epoch 32/50








 - ETA: 0s - loss: 0.0561 - acc: 0.9688







 - ETA: 0s - loss: 0.1021 - acc: 0.9609







 - 0s 750us/step - loss: 0.0992 - acc: 0.9621


Epoch 33/50








 - ETA: 0s - loss: 0.1007 - acc: 0.9375







 - ETA: 0s - loss: 0.0974 - acc: 0.9688







 - 0s 720us/step - loss: 0.0950 - acc: 0.9697


Epoch 34/50








 - ETA: 0s - loss: 0.0364 - acc: 1.0000







 - ETA: 0s - loss: 0.0900 - acc: 0.9766







 - 0s 720us/step - loss: 0.0908 - acc: 0.9773


Epoch 35/50








 - ETA: 0s - loss: 0.0324 - acc: 1.0000







 - ETA: 0s - loss: 0.2709 - acc: 0.9141







 - 0s 705us/step - loss: 0.2629 - acc: 0.9167


Epoch 36/50








 - ETA: 0s - loss: 0.2057 - acc: 0.9375







 - ETA: 0s - loss: 0.1249 - acc: 0.9688







 - 0s 712us/step - loss: 0.1232 - acc: 0.9697


Epoch 37/50








 - ETA: 0s - loss: 0.2555 - acc: 0.9375







 - ETA: 0s - loss: 0.1393 - acc: 0.9609







 - 0s 758us/step - loss: 0.1354 - acc: 0.9621


Epoch 38/50








 - ETA: 0s - loss: 0.0382 - acc: 1.0000







 - ETA: 0s - loss: 0.0785 - acc: 0.9844







 - 0s 705us/step - loss: 0.0925 - acc: 0.9773


Epoch 39/50








 - ETA: 0s - loss: 0.0248 - acc: 1.0000







 - ETA: 0s - loss: 0.1059 - acc: 0.9609







 - 0s 705us/step - loss: 0.1033 - acc: 0.9621


Epoch 40/50








 - ETA: 0s - loss: 0.0332 - acc: 1.0000







 - ETA: 0s - loss: 0.0929 - acc: 0.9688







 - 0s 705us/step - loss: 0.0908 - acc: 0.9697


Epoch 41/50








 - ETA: 0s - loss: 0.0638 - acc: 1.0000







 - ETA: 0s - loss: 0.0536 - acc: 0.9922







 - 0s 697us/step - loss: 0.0524 - acc: 0.9924


Epoch 42/50








 - ETA: 0s - loss: 0.1799 - acc: 0.9688







 - ETA: 0s - loss: 0.0962 - acc: 0.9844







 - 0s 712us/step - loss: 0.0937 - acc: 0.9848


Epoch 43/50








 - ETA: 0s - loss: 0.0414 - acc: 1.0000







 - ETA: 0s - loss: 0.0338 - acc: 1.0000







 - 0s 727us/step - loss: 0.0330 - acc: 1.0000


Epoch 44/50








 - ETA: 0s - loss: 0.0541 - acc: 1.0000







 - ETA: 0s - loss: 0.0289 - acc: 1.0000







 - 0s 727us/step - loss: 0.0286 - acc: 1.0000


Epoch 45/50








 - ETA: 0s - loss: 0.0259 - acc: 1.0000







 - ETA: 0s - loss: 0.0247 - acc: 1.0000







 - 0s 697us/step - loss: 0.0243 - acc: 1.0000


Epoch 46/50








 - ETA: 0s - loss: 0.0245 - acc: 1.0000







 - ETA: 0s - loss: 0.0211 - acc: 1.0000







 - 0s 705us/step - loss: 0.0208 - acc: 1.0000


Epoch 47/50








 - ETA: 0s - loss: 0.0177 - acc: 1.0000







 - ETA: 0s - loss: 0.0183 - acc: 1.0000







 - 0s 705us/step - loss: 0.0179 - acc: 1.0000


Epoch 48/50








 - ETA: 0s - loss: 0.0170 - acc: 1.0000







 - ETA: 0s - loss: 0.0161 - acc: 1.0000







 - 0s 712us/step - loss: 0.0159 - acc: 1.0000


Epoch 49/50








 - ETA: 0s - loss: 0.0238 - acc: 1.0000







 - ETA: 0s - loss: 0.0143 - acc: 1.0000







 - 0s 735us/step - loss: 0.0147 - acc: 1.0000


Epoch 50/50








 - ETA: 0s - loss: 0.0140 - acc: 1.0000







 - ETA: 0s - loss: 0.0129 - acc: 1.0000







 - 0s 689us/step - loss: 0.0131 - acc: 1.0000


<keras.callbacks.History at 0x1dbc9c50>

In [30]:

X_test_indices = sentences_to_indices(X_test, word_to_index, max_len = maxLen)
Y_test_oh = convert_to_one_hot(Y_test, C = 5)

m = X_test_indices.shape[0]
s0 = np.zeros((m, n_s))
c0 = np.zeros((m, n_s))

loss, acc = model.evaluate([X_test_indices,s0,c0], Y_test_oh)
print()
print("Test accuracy = ", acc)





 - ETA: 0s







 - 1s 16ms/step





Test accuracy = 

 

0.8035714370863778




In [32]:
C = 5
y_test_oh = np.eye(C)[Y_test.reshape(-1)]
X_test_indices = sentences_to_indices(X_test, word_to_index, maxLen)
pred = model.predict([X_test_indices,s0,c0])
for i in range(len(X_test)):
    x = X_test_indices
    num = np.argmax(pred[i])
    if(num != Y_test[i]):
        print('Expected emoji:'+ label_to_emoji(Y_test[i]) + ' prediction: '+ X_test[i] + label_to_emoji(num).strip())

Expected emoji:😄 prediction: he got a very nice raise	❤️




Expected emoji:😄 prediction: she got me a nice present	❤️




Expected emoji:😞 prediction: This girl is messing with me	❤️




Expected emoji:😄 prediction: Congratulation for having a baby	❤️




In [34]:
x_test = np.array(['not feeling good'])
X_test_indices = sentences_to_indices(x_test, word_to_index, maxLen)
print(x_test[0] +' '+  label_to_emoji(np.argmax(model.predict([X_test_indices,s0,c0]))))


not feeling good 😞


