In [47]:
import numpy as np
from emo_utils import *
from keras.layers import Dense, Dropout, Input, LSTM, Activation
from keras.models import Model
from keras.layers.embeddings import Embedding
from keras.preprocessing import sequence
from keras.initializers import glorot_uniform

# Getting required dictionary

In [3]:
word_to_index, index_to_word, word_to_emb_vec = read_glove_vecs('glove.6B.50d.txt')# dictionaries mapping corresponding elements

# Loading Data

In [119]:
X, Y = read_csv("train_sentences.csv")

# function to get max length of sentence in dataset

In [25]:
def get_max_length(X):
    max_words = 0
    for sentence in X:
        sent_indices = [word.lower() for word in sentence.split()]
        if len(sent_indices)> max_words:
            max_words = len(sent_indices)
            
    return max_words

# Converting words to indices and padding to max length

In [68]:
def sentence_to_indices(X, word_to_index, max_len):
    
    m = np.array(X).shape[0]
    sent_indices = np.zeros((m, max_len))
    for i in range(m):
        sent_temp = [word.lower() for word in X[i].split()]
        
        for j in range(len(sent_temp)):
            sent_indices[i, j] = word_to_index[sent_temp[j]]
    return sent_indices

# Creating layer for passing pretrained embedding layer

In [40]:
def pretrained_embedding_layer(word_to_emb_vec, word_to_index):
    vocab_len = len(word_to_index) + 1       # +1 is to fit as per the keras layer
    emb_len = word_to_emb_vec['just'].shape[0]
    
    emb_matrix = np.zeros((vocab_len, emb_len))
    
    for word, index in word_to_index.items():
        emb_matrix[index, :] = word_to_emb_vec[word]
    
    embedding_layer = Embedding(vocab_len, emb_len, trainable = False)
    
    embedding_layer.build((None,))
    
    embedding_layer.set_weights([emb_matrix])

    return embedding_layer

# Model

In [58]:
def model(input_shape, word_to_emb_vec, word_to_index):
    
    sentence_indices = Input(shape = input_shape, dtype = 'int32')
    
    embedding_layer = pretrained_embedding_layer(word_to_emb_vec, word_to_index)
    
    embedding = embedding_layer(sentence_indices)
    
    X = LSTM(128, return_sequences = True)(embedding)
    
    X = Dropout(0.5)(X)
    
    X = LSTM(128, return_sequences = False)(X)
    
    X = Dropout(0.5)(X)
    
    X = Dense(3)(X)
    
    X = Activation('softmax')(X)
    
    emo_model = Model(inputs = sentence_indices, outputs = X)
    
    return emo_model    

In [63]:
max_len = get_max_length(X)
emoji_model = model((max_len,), word_to_emb_vec, word_to_index)
emoji_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         (None, 10)                0         
_________________________________________________________________
embedding_7 (Embedding)      (None, 10, 50)            20000050  
_________________________________________________________________
lstm_9 (LSTM)                (None, 10, 128)           91648     
_________________________________________________________________
dropout_9 (Dropout)          (None, 10, 128)           0         
_________________________________________________________________
lstm_10 (LSTM)               (None, 128)               131584    
_________________________________________________________________
dropout_10 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 3)                 387       
__________

# Compiling and fiting

In [120]:
Y_oh = convert_to_one_hot(Y, C=3)
max_len = get_max_length(X)
X_indices = sentence_to_indices(X, word_to_index, max_len)

In [65]:
emoji_model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

In [66]:
emoji_model.fit(X_indices, Y_oh, epochs = 20, batch_size = 32)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x1b781109748>

# Lets play and predict some sentiments

In [118]:
my_sentiments = np.array(['i want to hug you', 'i gifted a sweet', 'i have super power', 'i am great'])
my_senti = sentence_to_indices(my_sentiments, word_to_index, max_len)
for i in range(my_sentiments.shape[0]):
    pred = emoji_model.predict(my_senti[i].reshape(1,10))
    print(my_sentiments[i], label_to_emoji(np.argmax(pred)))

i want to hug you ❤️
i gifted a sweet ❤️
i have super power 😃
i am great 😟


# Conclusion :
Its working good, but needs some extra training set which are not bias. It is giving poor results in some cases