In [1]:
import numpy as np
import time
from emo_utils import *

# Getting required dictionaries

In [2]:
word_to_index, index_to_word, word_to_emb_vec = read_glove_vecs('glove.6B.50d.txt')# dictionaries mapping corresponding elements

# Loading, visualising and modifying Dataset

In [17]:
X, Y = read_csv('train_sentences.csv')
for i in range(4):
    print(X[i], label_to_emoji(Y[i]))
    print()

French macaroon is so tasty 😃

work is horrible 😟

I am upset 😟

Good joke 😃



In [29]:
Y_oh = convert_to_one_hot(Y, C = 3)
index = 43
print(Y[index], "is converted to ", Y_oh[index])

0 is converted to  [1. 0. 0.]


# Average the embedding values

In [6]:
def sentence_avg(xi, word_to_emb_vec):
    words = [i for i in xi.lower().split()]
    
    avg_vec = np.zeros((50,))
    for w in words:
        avg_vec += word_to_emb_vec[w]
    avg_vec /= len(words)
    
    return avg_vec

# Model

In [9]:
def model(X, Y_oh, word_to_emb_vec, learning_rate = 0.001, num_iteration = 50, n_h = 50):
    m = Y_oh.shape[0]
    n_y = Y_oh.shape[1]
    
    W = np.random.randn(n_y, n_h) / np.sqrt(n_h)
    b = np.zeros((n_y,))
    
    for iteration in range(num_iteration):
        for i in range(m):
            avg = sentence_avg(X[i], word_to_emb_vec)
            z = np.dot(W, avg) + b
            a = softmax(z)
            
            cost = -np.sum(np.multiply(Y_oh[i], np.log(a)))
            
            dz = a - Y_oh[i]
            dW = np.dot(dz.reshape(n_y,1), avg.reshape(1, n_h))
            db = dz

            W = W - learning_rate * dW
            b = b - learning_rate * db
            
        if iteration%(num_iteration/10) == 0:
            print("Epoch: " + str(iteration) + " --- cost = " + str(cost))
            pred = predict(X, Y, W, b, word_to_emb_vec)
            
    return pred, W, b

In [10]:
pred, w, b = model(X, Y_oh, word_to_emb_vec, learning_rate= 0.001, num_iteration = 500, n_h = 50)

Epoch: 0 --- cost = 1.4218597045967338
Accuracy: 0.355
Epoch: 50 --- cost = 0.737517715510417
Accuracy: 0.69
Epoch: 100 --- cost = 0.4810493788199171
Accuracy: 0.73
Epoch: 150 --- cost = 0.3427133141302946
Accuracy: 0.765
Epoch: 200 --- cost = 0.2597489065916679
Accuracy: 0.79
Epoch: 250 --- cost = 0.205825312833278
Accuracy: 0.795
Epoch: 300 --- cost = 0.16851126837134975
Accuracy: 0.8
Epoch: 350 --- cost = 0.14139353468330557
Accuracy: 0.815
Epoch: 400 --- cost = 0.1209106945172286
Accuracy: 0.835
Epoch: 450 --- cost = 0.10495769908597631
Accuracy: 0.835


# Lets play and Predict some sentiments

In [15]:
X_my_sentences = np.array(["i know you i am genius", 
                           "you are my favourite uncle",
                           "i have did great jobs", 
                           "i am super boss", 
                           "i love you", 
                           "i hate you"])

Y_my_labels = np.array([[0], [0], [2], [1], [4],[3]])

pred = predict(X_my_sentences, Y_my_labels , w, b, word_to_emb_vec)
print_predictions(X_my_sentences, pred)

Accuracy: 0.3333333333333333

i know you i am genius 😃
you are my favourite uncle ❤️
i have did great jobs 😃
i am super boss 😃
i love you ❤️
i hate you 😟
