In [None]:
import numpy as np
import tensorflow as tf

In [None]:
filename = 'input.txt'
#filename = 'wodehouse_right_ho_jeeves.txt'
data_raw = open(filename, 'r').read() # should be simple plain text file
data_raw = data_raw.lower()
chars = list(set(data_raw))
chars.sort()

vocab_size = len(chars)
print('Data has length {} and consist of {} unique characters.'.format(len(data_raw), vocab_size))
ch_to_idx = {ch: i for i, ch in enumerate(chars)}
idx_to_char = {i: ch for i, ch in enumerate(chars)}
data = [ch_to_idx[ch] for ch in data_raw]
data = np.array(data)

In [None]:
seq_length = 25 # this is how much of the data we sample before updating the params
hidden_size = 50 # size of the hidden state vector
learning_rate = 0.1
num_epochs = 1
m = len(data)//seq_length
print( m, seq_length, vocab_size)

In [None]:
# want to get the data in the shape 
data_one_hot = tf.keras.utils.to_categorical(data)
data_X = data_one_hot[0:m*seq_length]
data_Y = data_one_hot[1:m*seq_length+1]

data_X.shape

In [None]:
input_shape = (seq_length, vocab_size)
X_ph = tf.placeholder(tf.float32, input_shape)
Y_ph = tf.placeholder(tf.float32, input_shape)
h_prev_ph = tf.placeholder(tf.float32, (hidden_size,1) )

In [None]:
from simple_RNN import initialize_parameters
params = initialize_parameters(hidden_size, vocab_size)

param_tensors = {name: tf.Variable(param, dtype=tf.float32) for name,param in params.items()}

U = param_tensors['U']
V = param_tensors['V']
W = param_tensors['W']
by = param_tensors['by']
bh = param_tensors['bh']

In [None]:
# forward pass
loss_tensor = 0

h = h_prev_ph

h_series = []

for t in range(seq_length):
    xt = X_ph[t,:]  # vocab_size
    yt = Y_ph[t,:]  # vocab_size
    xt = tf.reshape(xt, [vocab_size,1] ) # vocab_size x 1
    yt = tf.reshape(yt, [vocab_size,1] ) # vocab_size x 1
    
    p = tf.matmul(U, xt ) + tf.matmul(W, h) + bh # hidden_size x 1
    h = tf.tanh(p) # hidden_size x 1
    h_series.append(h)

labels_series = tf.unstack(Y_ph, axis=0)
outputs = [tf.matmul(V,hh) + by for hh in h_series]
losses = [tf.nn.softmax_cross_entropy_with_logits_v2(logits=out, labels=tf.reshape(yt, (vocab_size,1)),axis=0) for out,yt in zip(outputs,labels_series)]
loss_tensor = tf.reduce_sum(losses)

# keep the gradients as well (don't need this, but it's nice to compare to other implementations)
param_names = params.keys()
tensors = [param_tensors[pn] for pn in param_names]
grads_list = tf.gradients(loss_tensor, tensors)
grads_dict = dict(zip(param_names, grads_list))


In [None]:
#optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss_tensor)
optimizer = tf.train.AdamOptimizer(learning_rate/10).minimize(loss_tensor)
seqs_per_epoch = int(len(data)/seq_length)

num_epochs = 100
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(num_epochs):
        h_prev = np.zeros((hidden_size,1), dtype=float)
        for i in range(seqs_per_epoch):
            start = i*seq_length
            end = i*seq_length+seq_length

            X = data_X[start:end]
            Y = data_Y[start:end]
            
            _, loss, grads, h_prev = sess.run([optimizer, loss_tensor, grads_dict, h], feed_dict={X_ph: X, Y_ph: Y, h_prev_ph: h_prev})
        print('epoch: {}, iter: {}, loss: {}'.format(epoch, i, loss))

    trained_params = sess.run(param_tensors)
        

In [None]:
from simple_RNN import generate_sample
trained_params
h_prev = np.zeros((hidden_size,1), dtype=float)

sample_indices = generate_sample(trained_params, h_prev, 0, 100)
sample = ''.join([idx_to_char[idx] for idx in sample_indices])
sample