# Install unidecode library
A helpful library to convert unicode to ASCII.

In [1]:
!pip install unidecode

Collecting unidecode
[?25l  Downloading https://files.pythonhosted.org/packages/59/ef/67085e30e8bbcdd76e2f0a4ad8151c13a2c5bce77c85f8cad6e1f16fb141/Unidecode-1.0.22-py2.py3-none-any.whl (235kB)
[K    100% |████████████████████████████████| 235kB 5.7MB/s 
[?25hInstalling collected packages: unidecode
Successfully installed unidecode-1.0.22


# Import tensorflow and enable eager execution


In [0]:
# Import TensorFlow >= 1.10 and enable eager execution
import tensorflow as tf

# Note: Once you enable eager execution, it cannot be disabled. 
tf.enable_eager_execution()

import numpy as np
import os
import re
import random
import unidecode
import time

# Download the dataset

In [3]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


# Read the dataset

In [4]:
text = unidecode.unidecode(open(path_to_file).read())
# length of text is the number of characters in it
print (len(text))


1115394


In [0]:
# unique contains all the unique characters in the file
unique = sorted(set(text))
# creating a mapping from unique characters to indices
char2idx = {u:i for i, u in enumerate(unique)}
idx2char = {i:u for i, u in enumerate(unique)}

In [0]:
# setting the maximum length sentence we want for a single input in characters
max_length = 100

# length of the vocabulary in chars
vocab_size = len(unique)

# the embedding dimension 
embedding_dim = 256

# number of RNN (here GRU) units
units = 1024

# batch size 
BATCH_SIZE = 64

# buffer size to shuffle our dataset
BUFFER_SIZE = 10000

# Creating the input and output tensors

In [9]:
input_text = []
target_text = []

for f in range(0, len(text)-max_length, max_length):
    inps = text[f:f+max_length]
    targ = text[f+1:f+1+max_length]

    input_text.append([char2idx[i] for i in inps])
    target_text.append([char2idx[t] for t in targ])
    
print (np.array(input_text).shape)
print (np.array(target_text).shape)


(11153, 100)
(11153, 100)


# Creating batches and shuffling them using tf.data

In [0]:
dataset = tf.data.Dataset.from_tensor_slices((input_text, target_text)).shuffle(BUFFER_SIZE)
dataset = dataset.batch(BATCH_SIZE, drop_remainder=True)

# Creating the model

In [0]:
class Model(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, units, batch_size):
    super(Model, self).__init__()
    self.units = units
    self.batch_sz = batch_size

    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)

    if tf.test.is_gpu_available():
      self.gru = tf.keras.layers.CuDNNGRU(self.units, 
                                          return_sequences=True, 
                                          return_state=True, 
                                          recurrent_initializer='glorot_uniform')
    else:
      self.gru = tf.keras.layers.GRU(self.units, 
                                     return_sequences=True, 
                                     return_state=True, 
                                     recurrent_activation='sigmoid', 
                                     recurrent_initializer='glorot_uniform')

    self.fc = tf.keras.layers.Dense(vocab_size)
        
  def call(self, x, hidden):
    x = self.embedding(x)

    # output shape == (batch_size, max_length, hidden_size) 
    # states shape == (batch_size, hidden_size)

    # states variable to preserve the state of the model
    # this will be used to pass at every step to the model while training
    output, states = self.gru(x, initial_state=hidden)


    # reshaping the output so that we can pass it to the Dense layer
    # after reshaping the shape is (batch_size * max_length, hidden_size)
    output = tf.reshape(output, (-1, output.shape[2]))

    # The dense layer will output predictions for every time_steps(max_length)
    # output shape after the dense layer == (max_length * batch_size, vocab_size)
    x = self.fc(output)

    return x, states


# Call the model and set the optimizer and the loss function

In [0]:
model = Model(vocab_size, embedding_dim, units, BATCH_SIZE)

In [0]:
optimizer = tf.train.AdamOptimizer()

# using sparse_softmax_cross_entropy so that we don't have to create one-hot vectors
def loss_function(real, preds):
    return tf.losses.sparse_softmax_cross_entropy(labels=real, logits=preds)


# Checkpoints (Object-based saving)

In [0]:
checkpoint_dir = './training_checkpoints'
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt")
checkpoint = tf.train.Checkpoint(optimizer=optimizer,
                                 model=model)

# Train the model

In [16]:
# Training step

EPOCHS = 20

for epoch in range(EPOCHS):
    start = time.time()
    
    # initializing the hidden state at the start of every epoch
    hidden = model.reset_states()
    
    for (batch, (inp, target)) in enumerate(dataset):
          with tf.GradientTape() as tape:
              # feeding the hidden state back into the model
              # This is the interesting step
              predictions, hidden = model(inp, hidden)
              
              # reshaping the target because that's how the 
              # loss function expects it
              target = tf.reshape(target, (-1,))
              loss = loss_function(target, predictions)
              
          grads = tape.gradient(loss, model.variables)
          optimizer.apply_gradients(zip(grads, model.variables))

          if batch % 100 == 0:
              print ('Epoch {} Batch {} Loss {:.4f}'.format(epoch+1,
                                                            batch,
                                                            loss))
    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
      checkpoint.save(file_prefix = checkpoint_prefix)

    print ('Epoch {} Loss {:.4f}'.format(epoch+1, loss))
    print('Time taken for 1 epoch {} sec\n'.format(time.time() - start))

Epoch 1 Batch 0 Loss 4.1755
Epoch 1 Batch 100 Loss 2.3879
Epoch 1 Loss 2.1674
Time taken for 1 epoch 25.53898024559021 sec

Epoch 2 Batch 0 Loss 2.1254
Epoch 2 Batch 100 Loss 1.8889
Epoch 2 Loss 1.7644
Time taken for 1 epoch 24.338879346847534 sec

Epoch 3 Batch 0 Loss 1.7419
Epoch 3 Batch 100 Loss 1.6441
Epoch 3 Loss 1.5556
Time taken for 1 epoch 24.51798939704895 sec

Epoch 4 Batch 0 Loss 1.5959
Epoch 4 Batch 100 Loss 1.4926
Epoch 4 Loss 1.4534
Time taken for 1 epoch 24.543392419815063 sec

Epoch 5 Batch 0 Loss 1.4584
Epoch 5 Batch 100 Loss 1.4318
Epoch 5 Loss 1.3883
Time taken for 1 epoch 24.696858167648315 sec

Epoch 6 Batch 0 Loss 1.3502
Epoch 6 Batch 100 Loss 1.4288
Epoch 6 Loss 1.3787
Time taken for 1 epoch 24.573727130889893 sec

Epoch 7 Batch 0 Loss 1.3336
Epoch 7 Batch 100 Loss 1.2970
Epoch 7 Loss 1.3222
Time taken for 1 epoch 24.53632664680481 sec

Epoch 8 Batch 0 Loss 1.2960
Epoch 8 Batch 100 Loss 1.3224
Epoch 8 Loss 1.3231
Time taken for 1 epoch 24.69237971305847 sec

Epoc

# Restore the latest checkpoint

In [17]:
# restoring the latest checkpoint in checkpoint_dir
checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))

<tensorflow.python.training.checkpointable.util.CheckpointLoadStatus at 0x7f6755dcec88>

# Predicting using our trained model

In [18]:
# Evaluation step(generating text using the model learned)

# number of characters to generate
num_generate = 1000

# You can change the start string to experiment
start_string = 'Q'
# converting our start string to numbers(vectorizing!) 
input_eval = [char2idx[s] for s in start_string]
input_eval = tf.expand_dims(input_eval, 0)

# empty string to store our results
text_generated = ''

# low temperatures results in more predictable text.
# higher temperatures results in more surprising text
# experiment to find the best setting
temperature = 1.0

# hidden state shape == (batch_size, number of rnn units); here batch size == 1
hidden = [tf.zeros((1, units))]
for i in range(num_generate):
    predictions, hidden = model(input_eval, hidden)

    # using a multinomial distribution to predict the word returned by the model
    predictions = predictions / temperature
    predicted_id = tf.multinomial(predictions, num_samples=1)[0][0].numpy()
    
    # We pass the predicted word as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    
    text_generated += idx2char[predicted_id]

print (start_string + text_generated)


QUKET:
That hath pass'd it me ediction: he is
the county alone;
Then once again, all men are firmulture, allame, betwixt them!

RUTLAND:
Did not being thy heart willing in Somersation!
Officiours! Mine honest wealth signs?
Urth Caius Marcius.

BRUTUS:
Come away; or on not all as those that regree went to silence, and the boldness is rin Claudio?
Will you not arm me, Kate, the kingly look to fly,
To fight on England's quarrellows' countrymen or sour record
To omplain unto the root of spring of them.

TRANIO:
Mistress it pleasem, why leaving so far off
And Julietbe myself to hear
To eye our flesh is outwilling heaven for them
Shall do appear in doubled call you at the gates of York,
And art thou mad with trie oclain and nawling jow
And lies thy hands the wisds of all he is come to have some:
And in me, and what say you to love?

VAll it please you
Liest thou not quickly for her son: you know hee
Of what you see, or slemple,
Her moon, the means than the law whom you shall be Goubt,
When w