# Reccurent Neural Networks
## Covers
    1/ Character level Vanilla RNN implementation
    2/ Generating a sequence from a random input
    

# Vanilla implementation

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
#Create a test data
text = 'Hello how are you doing? I am great '
x = list(text[0:len(text)-1])
y = list(text[1:len(text)])

In [3]:
#Converting the input to one hot encoding 
from sklearn.preprocessing import LabelEncoder, LabelBinarizer

# String to int
le = LabelEncoder()
vocab = list(set(text))
le.fit(vocab)
text_int = le.transform(vocab)
print('**Input :x \n', x)
x = le.transform(x)
y = le.transform(y)
print('\n**LabelEncoder :x \n', x)
#One-hot encoding
lb = LabelBinarizer()
lb.fit(text_int)
x = lb.transform(x)
y = lb.transform(y)
print('\n**LabelBinarizer :x \n', x)
print('\n**Transform back :x \n', le.inverse_transform(lb.inverse_transform(x)))
vocab_dict = dict(zip(text_int.tolist(), vocab))
vocab_reverse_dict = dict(zip(vocab, text_int.tolist()))
vocab_onehot = lb.transform(text_int)
vocab_onehot_dict = dict(zip(text_int.tolist(), vocab_onehot))

**Input :x 
 ['H', 'e', 'l', 'l', 'o', ' ', 'h', 'o', 'w', ' ', 'a', 'r', 'e', ' ', 'y', 'o', 'u', ' ', 'd', 'o', 'i', 'n', 'g', '?', ' ', 'I', ' ', 'a', 'm', ' ', 'g', 'r', 'e', 'a', 't']

**LabelEncoder :x 
 [ 2  6 10 10 13  0  8 13 17  0  4 14  6  0 18 13 16  0  5 13  9 12  7  1  0
  3  0  4 11  0  7 14  6  4 15]

**LabelBinarizer :x 
 [[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]
 [0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1]
 [0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]
 [0

In [4]:
tf.reset_default_graph()
hidden_size = 10
state_size = 4
truncated_backprop_length = state_size
vocab_size = len(set(text))


# model parameters
x_tensor = tf.placeholder(tf.float32, [vocab_size, truncated_backprop_length])
y_tensor = tf.placeholder(tf.int32, [vocab_size, truncated_backprop_length])

#unpack the columns
inputs_series = tf.unstack(x_tensor, axis=1)
labels_series = tf.unstack(y_tensor, axis=1)

#Initialize the init state to zeros
init_state = tf.Variable(tf.zeros([1, hidden_size]),dtype=tf.float32)

# input to hidden weights
Wxh = tf.Variable(tf.truncated_normal((vocab_size, hidden_size), stddev=0.01), dtype=tf.float32, name='Wxh')

# hidden to hidden weights
Whh = tf.Variable(tf.truncated_normal((hidden_size, hidden_size), stddev=0.01), dtype=tf.float32, name='Whh')
bhh = tf.Variable(tf.zeros(hidden_size), dtype=tf.float32, name='bhh')
# Hidden to outputs
Why = tf.Variable(tf.truncated_normal((hidden_size, vocab_size), stddev=0.01), dtype=tf.float32, name='Why')
bhy = tf.Variable(tf.zeros(vocab_size), dtype=tf.float32, name='bhy')

# Forward pass
current_state = init_state
states_series = []
for X_i in inputs_series:
    X_i = tf.reshape(X_i, [-1, vocab_size])
    next_state = tf.tanh(tf.matmul(X_i, Wxh) + tf.matmul(current_state,Whh) + bhh)
    #next_state = tf.tanh(tf.matmul(x, Wxh) )
    states_series.append(next_state)
    current_state = next_state

#logits 
logits_series = [tf.matmul(state, Why)+bhy for state in states_series]

#Loss function
pred_series = [tf.nn.softmax(logits) for logits in logits_series]
loss = tf.reduce_mean([tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels) for logits, labels in zip(logits_series, labels_series)])
optimizer = tf.train.AdagradOptimizer(0.5).minimize(loss)



The network is trained, and during testing, the output is feed back to input to generate an sequence

In [5]:
# Running the train session
from random import randint

def generate_sequence(batchX, num_samples=20):
    print('Input Feed:', le.inverse_transform(lb.inverse_transform(batchX)), end=' ')
    print('Output Obtained: ', end='')
    for i in range(num_samples):
        _pred_series, _current_state = sess.run([pred_series, current_state], feed_dict={x_tensor:batchX.T})
        idx_o = np.argmax(_pred_series, axis=2).flatten()
        #print('Input', le.inverse_transform(lb.inverse_transform(batchX)))
        #print('Output:', [dictionary[idx_o[i]] for i in range(truncated_backprop_length)])
        batchX = np.roll(batchX, -1, axis=0)
        batchX[truncated_backprop_length-1] = vocab_onehot_dict[idx_o[truncated_backprop_length-1]]
        #batchX = vocab_onehot_dict[idx_o]
        #if i > 0:
        print(dictionary[idx_o[truncated_backprop_length-1]], end='')
    print('\n')
    return _pred_series, _current_state
    
truncated_backprop_length = state_size
num_epoch = 50
num_batches = len(x)//truncated_backprop_length
dictionary = dict(zip(text_int.tolist(), vocab))

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    for epoch in range(num_epoch):
        for batch in range(len(x)):
            start_idx = batch
            end_idx = min(start_idx + truncated_backprop_length, len(x))
            if (end_idx - start_idx) < truncated_backprop_length :
                break
            batchX = x[start_idx:end_idx].T
            batchY = y[start_idx:end_idx].T

            _loss, _ = sess.run([loss, optimizer], feed_dict={x_tensor:batchX, y_tensor:batchY})
        if(epoch %10 ==0):
            print('Loss epoch({:})={:3.4f}'.format(epoch,_loss))
            input_char = 'Hell'
            batchX = np.array([vocab_onehot_dict[vocab_reverse_dict[i]].tolist() for i in input_char])
            print('batchX shape:', batchX.shape)
            generate_sequence(batchX)

    print('Giving a random input and generating a sequence by passing back the output to input:')
    input_char = 'how '
    batchX = np.array([vocab_onehot_dict[vocab_reverse_dict[i]].tolist() for i in input_char])
    _pred, _state = generate_sequence(batchX, 50)

    # Save Model
    save_model = './char_rnn'
    saver = tf.train.Saver()
    save_path = saver.save(sess, save_model)
    print("Model saved in file: %s" % save_path)

Loss epoch(0)=2.4300
batchX shape: (4, 19)
Input Feed: ['H' 'e' 'l' 'l'] Output Obtained:  aear aear aear aear

Loss epoch(10)=0.1549
batchX shape: (4, 19)
Input Feed: ['H' 'e' 'l' 'l'] Output Obtained: o how areat great gr

Loss epoch(20)=0.1937
batchX shape: (4, 19)
Input Feed: ['H' 'e' 'l' 'l'] Output Obtained: o how are you doing?

Loss epoch(30)=0.2017
batchX shape: (4, 19)
Input Feed: ['H' 'e' 'l' 'l'] Output Obtained: o how are you doing?

Loss epoch(40)=0.2113
batchX shape: (4, 19)
Input Feed: ['H' 'e' 'l' 'l'] Output Obtained: o how are you doing?

Giving a random input and generating a sequence by passing back the output to input:
Input Feed: ['h' 'o' 'w' ' '] Output Obtained: are you doing? I am great great great great great 

Model saved in file: ./char_rnn


In [None]:
run('H', 'h', 1)

# Experiments with state variable

In [None]:
# Load the model 
tf.reset_default_graph()

loaded_graph = tf.Graph()

def pred_next_char(input_char, sess, _state):
    X = np.array(vocab_onehot_dict[vocab_reverse_dict[input_char]].tolist())
    X = tf.constant(X, dtype=tf.float32)
    X = tf.reshape(X, [-1, vocab_size])
    state = tf.constant(_state, dtype=tf.float32)
        
    h = tf.tanh(tf.matmul(X, loaded_Wxh) + tf.matmul(state, loaded_Whh) + loaded_bhh)
    p = tf.matmul(h, loaded_Why)+loaded_bhy
    out = sess.run(p)
    return out, h

with tf.Session(graph=loaded_graph) as sess:
    # Load model
    loader = tf.train.import_meta_graph(save_model+ '.meta')
    loader.restore(sess, save_model)
    loaded_Wxh = loaded_graph.get_tensor_by_name('Wxh:0')
    loaded_Whh = loaded_graph.get_tensor_by_name('Whh:0')
    loaded_bhh = loaded_graph.get_tensor_by_name('bhh:0')
    loaded_Why = loaded_graph.get_tensor_by_name('Why:0')
    loaded_bhy = loaded_graph.get_tensor_by_name('bhy:0')
    
    input_char = 'H'
    print('Input Feed:', [input_char], end=' ')
    print('Output Obtained: ', end='')
    h = _state
    for i in range(50):
        out, h = pred_next_char(input_char, sess, h)  
        idx_o = np.argmax(out, axis=1)
        input_char = dictionary[idx_o[0]]
        print(input_char, end='')

In [None]:
_state

In [None]:
dictionary[5]