# Recurrent Neural Networks

Recurrent neural networks, or RNNs, are a family of neural networks for processing sequential data. Much as a convolutional networkis a neural network that is specialized for processing a grid of values Xsuch as an image, a recurrent neural network is a neural network that is specialized for processing a sequence of values $ x^{(1)} ,...x^{(T)}$ 

![alt text](https://cdn-images-1.medium.com/max/1600/1*4KwIUHWL3sTyguTahIxmJw.png)


* $x_t$ is the input at time step t. For example, $x_1$ could be a one-hot vector corresponding to the second word of a sentence.
* $h_t$ is the hidden state at time step t. It’s the “memory” of the network. $h_t$ is calculated based on the previous hidden state and the input at the current step: $h_t=f(Ux_t + Wh_{t-1})$. The function f usually is a nonlinearity such as tanh or ReLU.  $h_{0}$, which is required to calculate the first hidden state, is typically initialized to all zeros.
* $y_t$ is the output at step t. For example, if we wanted to predict the next word in a sentence it would be a vector of probabilities across our vocabulary. $y_t = \mathrm{softmax}(Vs_t)$.

In [2]:
import tensorflow as tf
import os
import numpy as np

In [61]:
class Dataloader():
  """ Load Text """
  def __init__(self):
    
    # Path to the file
    path=tf.keras.utils.get_file('nietzsche.txt',
            origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
    
    # Open file
    with open(path, encoding='utf-8') as f:
      self.raw_text=f.read().lower()
    
    # Sorted list of the vocabulary  that contains all the unique characters in the file
    self.chars=sorted(list(set(self.raw_text)))
    
    # Char to index
    self.chars_idx={c:i for i,c in enumerate(self.chars)}
    
    # Idx to Char
    self.idx_chars={i:c for i,c in enumerate(self.chars)}
    
    # Text 
    self.text=[self.chars_idx[c] for c in self.raw_text]
    
  def get_batch(self,seq_length, batch_size):
    seq=[]
    next_char=[]
    
    for i in range(batch_size):
      index=np.random.randint(0, len(self.text) - seq_length)
      seq.append(self.text[index:index + seq_length])
      next_char.append(self.text[index + seq_length])
    
    X,state= np.array(seq),  np.array(next_char)
    #setattr(X.dtype,'base_dtype',X.dtype)
    #X.dtype.__setattr__('base_dtype', X.dtype)
    return X, state

In [62]:
data=Dataloader()

In [5]:
# Download the dataset 
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [6]:
with open(path_to_file, encoding='utf-8') as f:
      raw_text=f.read().lower()

## Parametros 

In [14]:
# maximum length
seq_length = 100
vocab_size= len(data.text)
embedding_dim= 256
units= 1024
batch_size=64
buffer_size=10000

## Model 

In [89]:
class LSTM(tf.keras.Model):
    """Long Short Term Memory"""
    
    def __init__(self,vocab_size, embedding_dim, units, batch_size):
        super(LSTM,self).__init__()
        
        # Parametros
        self.units=units
        self.batch_sz=batch_size
        
        # Word Embeddings
        self.embedding= tf.keras.layers.Embedding(vocab_size, embedding_dim)
        
        # Select device
        params={'return_sequences':True, 'return_state':True, 'recurrent_initializer':'glorot_uniform'}
        if tf.test.is_gpu_available():
            self.lstm= tf.keras.layers.CuDNNLSTM(self.units,**params)
        else: 
            self.lstm= tf.keras.layers.LSTM(self.units,**params)
            
        self.fc= tf.keras.layers.Dense(batch_size) # Y= WX + b
        
    def call(self, x, hidden_state):
        # Word embedding
        x= self.embedding(tf.convert_to_tensor(x))
        
        # output shape == (batch_size, seq_length, hidden_state)
        # state shpe == (batch_size, hidden_state)
        output, states, _= self.lstm(x, initial_state=hidden_state)
        
        # Reshape del output : (batch_size*seq_length, hidden_state)
        output= tf.reshape(output, (-1, output.shape[2]))
        x= self.fc(output)
        return x, states
    
        
        
            

In [90]:
modelo= LSTM(vocab_size,embedding_dim, units,batch_size)

## Optimizer

In [91]:
optimizer= tf.train.AdamOptimizer()

# loss function
def loss_function(real,preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real,logits=preds)


In [106]:
tf.transpose(predictions)

<tf.Tensor 'transpose_5:0' shape=(64, 6400) dtype=float32>

In [110]:
for epoch in range(30):
    
    # Inicializar los estados a cero
    hidden= modelo.reset_states() 
    
    # Datos
    X, output=data.get_batch(seq_length,batch_size)
    
    with tf.GradientTape() as tape:
        predictions, hidden= modelo(X, hidden)
        
        output= tf.reshape(output,(-1,) )
        loss= tf.losses.sparse_softmax_cross_entropy(labels=output, logits=tf.transpose(predictions))
    
    # back prop 
    grads= tape.gradient(loss, modelo.variables)
    optimizer.apply_gradients(zip(grads, modelo.variables))
    print(f'epoch : {epoch}, loss: {loss}')
        
        
    

epoch : 0, loss: Tensor("sparse_softmax_cross_entropy_loss_38/value:0", shape=(), dtype=float32)
epoch : 1, loss: Tensor("sparse_softmax_cross_entropy_loss_39/value:0", shape=(), dtype=float32)
epoch : 2, loss: Tensor("sparse_softmax_cross_entropy_loss_40/value:0", shape=(), dtype=float32)
epoch : 3, loss: Tensor("sparse_softmax_cross_entropy_loss_41/value:0", shape=(), dtype=float32)
epoch : 4, loss: Tensor("sparse_softmax_cross_entropy_loss_42/value:0", shape=(), dtype=float32)
epoch : 5, loss: Tensor("sparse_softmax_cross_entropy_loss_43/value:0", shape=(), dtype=float32)
epoch : 6, loss: Tensor("sparse_softmax_cross_entropy_loss_44/value:0", shape=(), dtype=float32)
epoch : 7, loss: Tensor("sparse_softmax_cross_entropy_loss_45/value:0", shape=(), dtype=float32)
epoch : 8, loss: Tensor("sparse_softmax_cross_entropy_loss_46/value:0", shape=(), dtype=float32)
epoch : 9, loss: Tensor("sparse_softmax_cross_entropy_loss_47/value:0", shape=(), dtype=float32)
epoch : 10, loss: Tensor("spar

In [68]:
foo= tf.ones([1])

In [71]:
foo.dtype.base_dtype.name

'float32'

<tf.Tensor 'ones:0' shape=(1,) dtype=float32>