
# Example from: Recurrent Neural Networks in Tensorflow II

http://r2rt.com/recurrent-neural-networks-in-tensorflow-ii.html


* ## <font color='purple'>We will use LSTM and GRU </font>
* ## <font color='purple'>Simple NLP Task: character-level language model to generate character sequences </font>
### a la Andrej Karpathy’s char-rnn 

* <font size='3'> We’ll use the tiny-shakespeare corpus as our data, though we could use any plain text file.
* We’ll choose to use all of the characters in the text file as our vocabulary, treating lowercase and capital letters are separate characters. </font>

Additionally, it is likely a good idea to restrict the vocabulary (i.e., the set of characters) used, by replacing uncommon characters with an UNK token (like a square: □).

In [39]:
"""
Imports
"""
import numpy as np
import tensorflow as tf
%matplotlib inline
import matplotlib.pyplot as plt
import time
import os

from six.moves import urllib

# No in TF 1.
#from tensorflow.models.rnn.ptb import reader


In [40]:
tf.__version__

'1.1.0'

In [41]:
! pip show tensorflow

Name: tensorflow
Version: 1.1.0
Summary: TensorFlow helps the tensors flow
Home-page: http://tensorflow.org/
Author: Google Inc.
Author-email: opensource@google.com
License: Apache 2.0
Location: c:\anaconda2\envs\tensorflow\lib\site-packages
Requires: protobuf, wheel, six, werkzeug, numpy


## <font color='brown'>Update tensorflow to 0.10.0 for managing state_is_tuple=True in LSTM (see below) </font>

* ### cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
* ### cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

In [42]:
# ! pip install --upgrade https://storage.googleapis.com/tensorflow/linux/cpu/tensorflow-0.10.0-cp27-none-linux_x86_64.whl

## <font color='magenta'>The task:  generate character sequences</font>

In [43]:
"""
Load and data
"""

file_url = 'https://raw.githubusercontent.com/jcjohnson/torch-rnn/master/data/tiny-shakespeare.txt'
file_name = 'tinyshakespeare.txt'

#file_url = 'http://latel.upf.edu/traductica/scp/quijote/quijote.txt'
#file_name = 'cervantes.txt'

if not os.path.exists(file_name):
    urllib.request.urlretrieve(file_url, file_name)

In [44]:
with open(file_name,'r') as f:
    raw_data = f.read()
    print("Data length:", len(raw_data))

Data length: 1115394


In [45]:
type(raw_data)

str

In [46]:
print(raw_data[0:200])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you


* ## understand : vocab : unique elements in raw text

In [47]:
vocab = set(raw_data)
vocab_size = len(vocab)
idx_to_vocab = dict(enumerate(vocab))

In [48]:
type(vocab)

set

In [49]:
print(vocab)

{'Q', '.', 'K', '-', 'O', '3', 'm', 'U', 'H', 'e', ':', ' ', 'B', 'R', 'X', 'Y', 's', 'S', 'c', 'i', '$', 'Z', 'T', 'b', 'g', "'", 'f', 'l', 'M', 'L', 'v', 'E', 'w', ',', 'P', 'z', 'W', 'N', 't', '\n', ';', 'F', 'C', 'p', 'k', 'd', 'I', 'q', 'y', 'r', '!', 'a', 'J', 'x', '?', 'h', 'j', 'G', 'u', 'n', 'V', 'o', 'D', 'A', '&'}


In [50]:
vocab_size

65

In [51]:
idx_to_vocab

{0: 'Q',
 1: '.',
 2: 'K',
 3: '-',
 4: 'O',
 5: '3',
 6: 'm',
 7: 'U',
 8: 'H',
 9: 'e',
 10: ':',
 11: ' ',
 12: 'B',
 13: 'R',
 14: 'X',
 15: 'Y',
 16: 's',
 17: 'S',
 18: 'c',
 19: 'i',
 20: '$',
 21: 'Z',
 22: 'T',
 23: 'b',
 24: 'g',
 25: "'",
 26: 'f',
 27: 'l',
 28: 'M',
 29: 'L',
 30: 'v',
 31: 'E',
 32: 'w',
 33: ',',
 34: 'P',
 35: 'z',
 36: 'W',
 37: 'N',
 38: 't',
 39: '\n',
 40: ';',
 41: 'F',
 42: 'C',
 43: 'p',
 44: 'k',
 45: 'd',
 46: 'I',
 47: 'q',
 48: 'y',
 49: 'r',
 50: '!',
 51: 'a',
 52: 'J',
 53: 'x',
 54: '?',
 55: 'h',
 56: 'j',
 57: 'G',
 58: 'u',
 59: 'n',
 60: 'V',
 61: 'o',
 62: 'D',
 63: 'A',
 64: '&'}

In [52]:
vocab_to_idx = dict(zip(idx_to_vocab.values(), idx_to_vocab.keys()))

In [53]:
vocab_to_idx['A']

63

In [54]:
idx_to_vocab[13]

'R'

In [55]:
vocab_to_idx 

{'\n': 39,
 ' ': 11,
 '!': 50,
 '$': 20,
 '&': 64,
 "'": 25,
 ',': 33,
 '-': 3,
 '.': 1,
 '3': 5,
 ':': 10,
 ';': 40,
 '?': 54,
 'A': 63,
 'B': 12,
 'C': 42,
 'D': 62,
 'E': 31,
 'F': 41,
 'G': 57,
 'H': 8,
 'I': 46,
 'J': 52,
 'K': 2,
 'L': 29,
 'M': 28,
 'N': 37,
 'O': 4,
 'P': 34,
 'Q': 0,
 'R': 13,
 'S': 17,
 'T': 22,
 'U': 7,
 'V': 60,
 'W': 36,
 'X': 14,
 'Y': 15,
 'Z': 21,
 'a': 51,
 'b': 23,
 'c': 18,
 'd': 45,
 'e': 9,
 'f': 26,
 'g': 24,
 'h': 55,
 'i': 19,
 'j': 56,
 'k': 44,
 'l': 27,
 'm': 6,
 'n': 59,
 'o': 61,
 'p': 43,
 'q': 47,
 'r': 49,
 's': 16,
 't': 38,
 'u': 58,
 'v': 30,
 'w': 32,
 'x': 53,
 'y': 48,
 'z': 35}

* ### understand: converting text data into numbers

In [56]:
data = [vocab_to_idx[c] for c in raw_data]

In [57]:
type(data)

list

In [58]:
len(data)

1115394

In [59]:
raw_data[0:10]

'First Citi'

In [60]:
del raw_data

In [61]:
data[0:10]

[41, 19, 49, 16, 38, 11, 42, 19, 38, 19]

In [62]:
recover_data = [idx_to_vocab[c] for c in data]

In [63]:
recover_data[0:10]

['F', 'i', 'r', 's', 't', ' ', 'C', 'i', 't', 'i']

In [64]:
del recover_data

## <font color='brown'> Some utility functions for feeding batches</font>

In [65]:
def ptb_iterator(raw_data, batch_size, num_steps):
  """Iterate on the raw PTB data.
  This generates batch_size pointers into the raw PTB data, and allows
  minibatch iteration along these pointers.
  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
  Yields:
    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
    The second element of the tuple is the same data time-shifted to the
    right by one.
  Raises:
    ValueError: if batch_size or num_steps are too high.
  """
  raw_data = np.array(raw_data, dtype=np.int32)

  data_len = len(raw_data)
  batch_len = data_len // batch_size
  data = np.zeros([batch_size, batch_len], dtype=np.int32)
  for i in range(batch_size):
    data[i] = raw_data[batch_len * i:batch_len * (i + 1)]

  epoch_size = (batch_len - 1) // num_steps

  if epoch_size == 0:
    raise ValueError("epoch_size == 0, decrease batch_size or num_steps")

  for i in range(epoch_size):
    x = data[:, i*num_steps:(i+1)*num_steps]
    y = data[:, i*num_steps+1:(i+1)*num_steps+1]
    yield (x, y)

In [66]:
def gen_epochs(n, num_steps, batch_size):
    for i in range(n):
        yield ptb_iterator(data, batch_size, num_steps)

###  PTB from Penn Tree Bank (PTB) dataset

<font color='green'>reader.ptb_iterator(data, batch_size, num_steps)<7font>

https://github.com/petewarden/tensorflow_makefile/blob/master/tensorflow/models/rnn/ptb/reader.py


def ptb_iterator(raw_data, batch_size, num_steps):
  
  Iterate on the raw PTB data.
  This generates batch_size pointers into the raw PTB data, and allows
  minibatch iteration along these pointers.
  
  Args:
    raw_data: one of the raw data outputs from ptb_raw_data.
    batch_size: int, the batch size.
    num_steps: int, the number of unrolls.
    
  Yields:
    Pairs of the batched data, each a matrix of shape [batch_size, num_steps].
    
    The second element of the tuple is the same data time-shifted to the
    right by one.
    
  Raises:
    ValueError: if batch_size or num_steps are too high.
 




In [67]:
num_epochs=100
num_steps=200
batch_size=32

Xepoch=gen_epochs(num_epochs, num_steps, batch_size)

for idx, epoch in enumerate(Xepoch):
    print('epoch no. =',idx)
    for step, (X, Y) in enumerate(epoch):
            cc=0
    
    print("Total no. steps=",step)
    print("X information....")
    print(X.shape)
    print("Y information....")
    print(Y.shape)
            

epoch no. = 0
Total no. steps= 173
X information....
(32, 200)
Y information....
(32, 200)


In [68]:
173*200*32

1107200

In [69]:
1115394/(32*200)

174.2803125

In [70]:
num_epochs=1
num_steps=200
batch_size=32

Xepoch=gen_epochs(num_epochs, num_steps, batch_size)

for idx, epoch in enumerate(Xepoch):
    print('epoch no. =',idx)
    for step, (X, Y) in enumerate(epoch):
        if step % 500 == 0:
            print(step)
            print ("X information....")
            print(X.shape)
            print(type(X))
            print(X[0:10])
            print ("Y information....")
            print(Y.shape)
            print(type(Y))
            print(Y[0:10])
            


epoch no. = 0
0
X information....
(32, 200)
<class 'numpy.ndarray'>
[[41 19 49 ..., 48 61 58]
 [26 61 49 ...,  6 51 48]
 [11 42 61 ..., 41 19 49]
 ..., 
 [61 58 11 ..., 58 16 43]
 [11 55 61 ..., 26 11 32]
 [11 26 61 ..., 61 16  9]]
Y information....
(32, 200)
<class 'numpy.ndarray'>
[[19 49 16 ..., 61 58 11]
 [61 49 11 ..., 51 48 11]
 [42 61 49 ..., 19 49 16]
 ..., 
 [58 11 32 ..., 16 43 19]
 [55 61 32 ..., 11 32 61]
 [26 61 61 ..., 16  9 11]]


### Function for graph reset

In [71]:
def reset_graph():
    if 'sess' in globals() and sess:
        sess.close()
    tf.reset_default_graph()

## <font color='magenta'>Second: RNN graph definition</font>

In [72]:
state_size = 100
num_classes = vocab_size
batch_size = 32
num_steps = 200
num_layers = 3
learning_rate = 1e-4


reset_graph()

x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

* ## <font color='red'>Create EMBEDDINGS</font>
http://suriyadeepan.github.io/2017-02-13-unfolding-rnn-2/

In [73]:
embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

# Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
rnn_inputs = tf.nn.embedding_lookup(embeddings, x)
    
    

In [74]:

#LSTM
# for TF 1....
#cell = tf.contrib.rnn.LSTMCell(state_size, state_is_tuple=True)
#cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

#cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
#cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

# GRU
# TF 1....
cell = tf.contrib.rnn.GRUCell(state_size)
#cell = tf.nn.rnn_cell.GRUCell(state_size)



init_state = cell.zero_state(batch_size, tf.float32)
rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

with tf.variable_scope('softmax'):
    W = tf.get_variable('W', [state_size, num_classes])
    b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

#reshape rnn_outputs and y so we can get the logits in a single matmul
rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
y_reshaped = tf.reshape(y, [-1])

logits = tf.matmul(rnn_outputs, W) + b

predictions = tf.nn.softmax(logits)

total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

## <font color='magenta'>Third: train RNN (LSTM or GRU)</font>

In [75]:
num_epochs=1
verbose=True

t = time.time()

tf.set_random_seed(2345)
with tf.Session() as sess:
        #TF 1.
        sess.run(tf.global_variables_initializer())
        #sess.run(tf.initialize_all_variables())
        training_losses = []
        for idx, epoch in enumerate(gen_epochs(num_epochs, num_steps, batch_size)):
            training_loss = 0
            steps = 0
            training_state = None
            for X, Y in epoch:
                steps += 1

#                if training_state is not None:
#                    feed_dict[g['init_state']] = training_state
 
                training_loss_, training_state, _ = sess.run([total_loss,
                                                      final_state,
                                                      train_step],
                                                          feed_dict={x: X, y: Y})
                training_loss += training_loss_
                if step % 100 == 0 and step > 0:
                    if verbose:
                        print("Average loss at step", step,"for last 100 steps:", training_loss/100)
                    training_losses.append(training_loss/steps)
                    training_loss = 0
            
            if verbose:
                print("Average training loss for Epoch", idx, ":", training_loss/steps)
            
        saver = tf.train.Saver()
        #saver.save(sess, 'RNN_GRU_model_cervantes')
        saver.save(sess, '.\RNN_GRU_model_shakespeare')
            
print("It took", time.time() - t, "seconds this training.")

Average training loss for Epoch 0 : 3.70520788226
It took 45.62564277648926 seconds this training.


## Some results:

### LSTM
* cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
* cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
+ ('Average training loss for Epoch', 0, ':', 3.5619977603013488)
+ ('It took', 38.50640511512756, 'seconds this training.')

### GRU
* ('Average training loss for Epoch', 0, ':', 3.6105946466840546)
* ('It took', 35.258342027664185, 'seconds this training.')


## working directory to save our model

In [76]:
print(os.getcwd())

C:\Users\usuario\MSTC_TFCourse


In [77]:
print(os.listdir('.'))

['.ipynb_checkpoints', 'checkpoint', 'conv1.npz', 'data_with_labels.npz', 'MSTC_FontReco_CNN_TF1Py3.ipynb', 'MSTC_FontReco_FeedForward_TF1Py3.ipynb', 'MSTC_FontReco_LogisticReg_TF1Py3.ipynb', 'MSTC_IntroTF_1.ipynb', 'MSTC_IntroTF_2.ipynb', 'MSTC_RNN_1.ipynb', 'MSTC_RNN_2_dynamic_TF1Py3.ipynb', 'MSTC_RNN_3.ipynb', 'RNN_GRU_model_shakespeare.data-00000-of-00001', 'RNN_GRU_model_shakespeare.index', 'RNN_GRU_model_shakespeare.meta', 'temp.npz', 'TF_Upgrade11.ipynb', 'tinyshakespeare.txt']


In [78]:
#Remove dir not empty + sub dirs
#import shutil

#shutil.rmtree('./Ubi_Voice.csv')

## <font color='magenta'>Finally: models (LSTM or GRU) can be used to generate TEXT</font>

* ## First: <font size='3'>we need to rebuild the graph so as to accept a single character at a time</font>

In [80]:
state_size = 100
num_classes = vocab_size
batch_size = 1
num_steps = 1
num_layers = 3
learning_rate = 1e-4
num_epochs=1


reset_graph()

x = tf.placeholder(tf.int32, [batch_size, num_steps], name='input_placeholder')
y = tf.placeholder(tf.int32, [batch_size, num_steps], name='labels_placeholder')

embeddings = tf.get_variable('embedding_matrix', [num_classes, state_size])

# Note that our inputs are no longer a list, but a tensor of dims batch_size x num_steps x state_size
rnn_inputs = tf.nn.embedding_lookup(embeddings, x)
    
    
#LSTM
# for TF 1....
#cell = tf.contrib.rnn.LSTMCell(state_size, state_is_tuple=True)
#cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

#cell = tf.nn.rnn_cell.LSTMCell(state_size, state_is_tuple=True)
#cell = tf.nn.rnn_cell.MultiRNNCell([cell] * num_layers, state_is_tuple=True)

# GRU
# TF 1....
cell = tf.contrib.rnn.GRUCell(state_size)
#cell = tf.nn.rnn_cell.GRUCell(state_size)


init_state = cell.zero_state(batch_size, tf.float32)
rnn_outputs, final_state = tf.nn.dynamic_rnn(cell, rnn_inputs, initial_state=init_state)

with tf.variable_scope('softmax'):
    W = tf.get_variable('W', [state_size, num_classes])
    b = tf.get_variable('b', [num_classes], initializer=tf.constant_initializer(0.0))

#reshape rnn_outputs and y so we can get the logits in a single matmul
rnn_outputs = tf.reshape(rnn_outputs, [-1, state_size])
y_reshaped = tf.reshape(y, [-1])

logits = tf.matmul(rnn_outputs, W) + b

predictions = tf.nn.softmax(logits)

total_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=y_reshaped))
train_step = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

## Then:
* <font size='3'>Restore our saved model.
* We’ll give the network a single character prompt,i.e. prompt='A'
* Grab its predicted probability distribution for the next character
* Use that distribution to pick the next character, and repeat. </font>
###   
<font size='3'>When picking the next character, using pick_top_chars != None to use the whole probability distribution (default), or be forced to pick one of the top n most likely characters in the distribution. The latter option should obtain more English-like results.</font>

In [81]:
prompt='A'
pick_top_chars=5
num_chars=750

saver = tf.train.Saver()

with tf.Session() as sess:
    #TF 1.
    sess.run(tf.global_variables_initializer())
    #sess.run(tf.initialize_all_variables())
    saver.restore(sess, "RNN_GRU_model_shakespeare")
    
    state = None
    current_char = vocab_to_idx[prompt]
    chars = [current_char]
    
    for i in range(num_chars):
            if state is not None:
                preds, state = sess.run([predictions,final_state], feed_dict={x: [[current_char]], init_state: state})
            else:
                preds, state = sess.run([predictions,final_state], feed_dict={x: [[current_char]]})

            if pick_top_chars is not None:
                p = np.squeeze(preds)
                p[np.argsort(p)[:-pick_top_chars]] = 0
                p = p / np.sum(p)
                current_char = np.random.choice(vocab_size, 1, p=p)[0]
            else:
                current_char = np.random.choice(vocab_size, 1, p=np.squeeze(preds))[0]

            chars.append(current_char)
            
chars = map(lambda x: idx_to_vocab[x], chars)
print("".join(chars))


INFO:tensorflow:Restoring parameters from RNN_GRU_model_shakespeare
A h heh  h heet e  eo toe  h a  ataeo  htat  aeae eaet e  h a h t hoa o t eoha  aaoe t eeoto e eeateae   h h  aa   ee to t toee o e o eoe    at haat t e h   ht ha h  a ettoeo tt eo  ha tte  t ett te  a ett  he eaa   e h a h eth to he e e h th ht  a a  ho  hoeoao eet h  etooe th  t a t     te  eeo tt o eoo  aa   e t   eo te ot  heoeeoe eaea  o toe   tet heeoe  oee eohaet   eaao e  ht o  aataotaeo   aoe t  e heee   e ohtte ea ht    hoaeoate ot teoo ho  ha  eooh te   e eet  he      h e  t  teet h tteat eo o  e e  eeo te t o ht aaonoeoe  e e o t ete   thoaootta e   eaeettto e  t      aa hteteohteoe te hethaae   h  h  t  tto   atee  eat  eot o a ae  heoe  tette  e t oe  t e  he   tohe t eeae too o  h ae ha eohaao tot  at    a    hte  t eoeet tohe
