# RNN Model

In this notebook we define the RNN model

In [1]:
import numpy as np
import pandas as pd
from emoji_list import all_emoji
from collections import Counter
from string import punctuation
import tflearn
from tflearn.data_utils import to_categorical, pad_sequences
from math import floor
from random import shuffle
import helper

content, targets, emoji_to_int, int_to_emoji, vocab_to_int, int_to_vocab, token_lookup = helper.load_preprocess()
X, Y, VAL_X, VAL_Y = helper.peel_validation(content, targets)

In the text cell we define a few helper functions to transform the data. We need to make fix length sequences, unroll our data, and one hot encode the labels

In [2]:
def remove_empty(x, y):
    non_zero_idx = [ii for ii, comment in enumerate(x) if len(comment) != 0]
    x = [x[ii] for ii in non_zero_idx]
    y = np.array([y[ii] for ii in non_zero_idx])
    return x, y

def normalize_len(x, seq_len):
    z = np.zeros((len(x), seq_len), dtype=int)
    for i, row in enumerate(x):
        z[i, -len(row):] = np.array(row)[:seq_len]
    return z

def sanitize_data(x, seq_len):
    if len(x) == 0:
        return np.zeros((seq_len), dtype=int)
    z = np.zeros((seq_len), dtype=int)
    z[-len(x):] = np.array(x)[:seq_len]
    return z

num_classes = len(all_emoji)
num_words = len(int_to_vocab)

def encode(int_to_encode):
    zeros = np.zeros(num_classes)
    zeros[int_to_encode] = 1
    return zeros

def encode_labs(labs):
    encoded = []
    for label in labs:
        encoded.append(encode(label))
    return encoded

def decode(enc):
    res = []
    for i in range(len(enc)):
        if enc[i] == 1:
            res.append(i)
    return res

def multi_to_single(xx, yy):
    xxs, yys = [], []
    for l in yy:
        xxs.append(xx)
        yys.append(l)
    return xxs, yys

def unroll_data(data_x, data_y):
    res_x, res_y = [], []
    for x, y in zip(data_x, data_y):
        xx, yy = multi_to_single(x, y)
        res_x.extend(xx)
        res_y.extend(yy)
    return res_x, res_y

In this next cell, we actually use our functions to trasform the data.

In [3]:
seq_len = 20

X, Y = remove_empty(X, Y)
VAL_X, VAL_Y = remove_empty(VAL_X, VAL_Y)
X, VAL_X = normalize_len(X, seq_len), normalize_len(VAL_X, seq_len)
X, Y = unroll_data(X, Y)
VAL_X, VAL_Y = unroll_data(VAL_X, VAL_Y)
Y, VAL_Y = encode_labs(Y), encode_labs(VAL_Y)

train_x, test_x, train_y, test_y = helper.train_test_split(X, Y)

# Building the Network
In the next couple cells we define the architecture of the network using TFLEARN

In [4]:
size_of_each_vector = seq_len
vocab_size = len(vocab_to_int)
no_of_unique_y_labels = len(all_emoji)

In [5]:
net = tflearn.input_data([None, size_of_each_vector])

net = tflearn.embedding(net, input_dim=vocab_size, output_dim=256)

net = tflearn.lstm(net, 256, dropout=0.8, return_seq=True)

net = tflearn.lstm(net, 512, dropout=0.8, return_seq=True)

net = tflearn.lstm(net, 1024, dropout=0.8)

net = tflearn.fully_connected(net, no_of_unique_y_labels, activation='softmax')

top5 = tflearn.Top_k(k=5)
net = tflearn.regression(net, 
                         optimizer='adam',  # adam or ada or adagrad # sgd
                         learning_rate=.0001,
                         metric=top5,
                         loss='categorical_crossentropy')

# Training
If wanting to train a model run the next cell to run the training session.

In [None]:
model = tflearn.DNN(net, tensorboard_verbose=0)
model.fit(train_x, train_y, 
          validation_set=(test_x, test_y), 
          n_epoch=20,
          show_metric=True, 
          batch_size=128)

# Saving
The next cell saves the model. Do not run this if you have loaded or want to use the model!

In [None]:
model.save('SavedModels/model.tfl')
print('Model Saved!')

# Loading
The next cell loads the model, if previously have trained a model. Do not run this if you dont need to load a model.

In [6]:
model = tflearn.DNN(net, tensorboard_verbose=0)
model.load('SavedModels/model.tfl')
print('Model Loaded!')

INFO:tensorflow:Restoring parameters from /output/SavedModels/model.tfl
Model Loaded!


# Predict
Here we define a few functions for actually making predictions with the model.

In [9]:
def top_k_predictions(text, k, preprocess=False, secondary_prep=True):
    if preprocess:
        text = helper.tokenize_and_embed(text, token_lookup, vocab_to_int)
    if secondary_prep:
        text = sanitize_data(text, seq_len)
    return [each for each in list(np.argsort(model.predict([text]))[0])[-k:][::-1]]

def print_nth_prediction(text_n):
    """Prints a nicely formatted chart for comparing results
    :type text      : List[Int]
    :type scale     : Float
    """
    print('Comment     :', helper.get_nth_text(content, int_to_vocab, text_n))
    print('Emoticons   :', helper.get_nth_label(targets, int_to_emoji, text_n))
    print('Top 5 Preds :', ' '.join(int_to_emoji[each] for each in top_k_predictions(content[text_n], 5)))

In [10]:
for i in range(0, 300, 5):
    print_nth_prediction(i)
    print('-' * 50)

Comment     : logan paul it's yo big day
Emoticons   : ‼️
Top 5 Preds : 😂 👌 🔥 👍 ️
--------------------------------------------------
Comment     : evan is being a douch logans getting pissed <comma_sign>  logan should have went to smash his game system then evan would have cried  & evans not gonna be able to take care of a pitbull tell his ass no <comma_sign>  its gonna kill kong  <exclamation_mark>  <exclamation_mark>  <exclamation_mark>
Emoticons   : 😂
Top 5 Preds : 😂 😭 💀 🤣 ❤
--------------------------------------------------
Comment     : me and my friends subscribe aaaaaaaaaaaa you got the diamond play button
Emoticons   : 💎😆😁
Top 5 Preds : 😍 ❤ ️ 😂 😊
--------------------------------------------------
Comment     : saying nigga isn't even bad it's only bad if you say it in a hateful way to black people all the cry babies and immature people stop going at him i guarantee youse you wouldn't say it to idubbz or others but because it's pewdiepie you think you have the right to go full f

Top 5 Preds : 😂 🤣 💀 😭 😅
--------------------------------------------------
Comment     : i love you guys <exclamation_mark>  <exclamation_mark>  <exclamation_mark>  
Emoticons   : 😘
Top 5 Preds : 😍 ❤ ️ 😘 ♥
--------------------------------------------------
Comment     : ugh <period>  <period>  i love
Emoticons   : 💕
Top 5 Preds : 😍 ❤ ️ 😘 ♥
--------------------------------------------------
Comment     : yawn <return> wake me up when it gets
Emoticons   : 😎
Top 5 Preds : 😎 👊 😜 🤡 💯
--------------------------------------------------
Comment     : karma for the chargers leaving san diego
Emoticons   : 🙃
Top 5 Preds : 😕 😥 😆 😔 😑
--------------------------------------------------
Comment     : look at 1:42 and someone falls
Emoticons   : 😅
Top 5 Preds : 😂 😆 🤣 😅 💀
--------------------------------------------------
Comment     : they must of done thousands of retakes and fake laughs
Emoticons   : 😂
Top 5 Preds : 😂 👌 💀 😭 🤣
--------------------------------------------------
Comment     : omg how 

In [None]:
print('RNN Training Accuracy :', helper.top_k_categorical_accuracy(X, Y, top_k_predictions))

RNN Training Accuracy : 0.7019878573368494


In [None]:
print('RNN Validation Accuracy :', helper.top_k_categorical_accuracy(VAL_X, VAL_Y, top_k_predictions))

RNN Validation Accuracy : 0.8124643264840182
