In [29]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.layers import LSTM, Dense, GRU
from tensorflow.keras.layers import BatchNormalization, Dropout, Embedding
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
import string
import os
import datetime
import time

In [2]:
# Load the extension for tensorboard
%load_ext tensorboard

Get and read dataset

In [3]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')
#Read with correct format
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [4]:
#Find total number of characters
print(len(text))

1115394


In [5]:
#Check how the data looks like, first 1000 characters
print(text[:1000])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.

All:
We know't, we know't.

First Citizen:
Let us kill him, and we'll have corn at our own price.
Is't a verdict?

All:
No more talking on't; let it be done: away, away!

Second Citizen:
One word, good citizens.

First Citizen:
We are accounted poor citizens, the patricians good.
What authority surfeits on would relieve us: if they
would yield us but the superfluity, while it were
wholesome, we might guess they relieved us humanely;
but they think we are too dear: the leanness that
afflicts us, the object of our misery, is as an
inventory to particularise their abundance; our
sufferance is a gain to them Let us revenge this with
our pikes, ere we become rakes: for the gods know I
speak this in hunger for bread, not in thirst for revenge.



In [6]:
# Finding unique characters
vocab = sorted(set(text))
print(len(vocab))

65


Vectorization

Stings to indices and indices to strings

In [7]:
#Converting strings to integers

# Mapping strings from vocabulary to indices
ids_from_chars = preprocessing.StringLookup(vocabulary=list(vocab), mask_token=None)
# To change char ids to strings
chars_from_ids = preprocessing.StringLookup(vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [8]:
#Converts ids back to strings
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [9]:
#all_ids is a numpy array consisting of ids of all the characters
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
print(all_ids)

tf.Tensor([19 48 57 ... 46  9  1], shape=(1115394,), dtype=int64)


Making dataset into batches

In [10]:
#from_tensor_slices converts text vectors into stream of character indices
#ids_dataset contains all the character ids
#this is done to later shift the characters
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [11]:
# text is divided into sequences of length 100 
# each sequence and its target have the same size but the target is shifted by 1 char towards right
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)
examples_per_epoch

11043

In [12]:
# creating batches and storing into sequences
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

print("All letters in first sequence\n")
for seq in sequences.take(1):
  print(chars_from_ids(seq))

print("\nConverting chars into string and printing first sequence\n")
for seq in sequences.take(1):
  print(text_from_ids(seq).numpy())

All letters in first sequence

tf.Tensor(
[b'F' b'i' b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':'
 b'\n' b'B' b'e' b'f' b'o' b'r' b'e' b' ' b'w' b'e' b' ' b'p' b'r' b'o'
 b'c' b'e' b'e' b'd' b' ' b'a' b'n' b'y' b' ' b'f' b'u' b'r' b't' b'h'
 b'e' b'r' b',' b' ' b'h' b'e' b'a' b'r' b' ' b'm' b'e' b' ' b's' b'p'
 b'e' b'a' b'k' b'.' b'\n' b'\n' b'A' b'l' b'l' b':' b'\n' b'S' b'p' b'e'
 b'a' b'k' b',' b' ' b's' b'p' b'e' b'a' b'k' b'.' b'\n' b'\n' b'F' b'i'
 b'r' b's' b't' b' ' b'C' b'i' b't' b'i' b'z' b'e' b'n' b':' b'\n' b'Y'
 b'o' b'u' b' '], shape=(101,), dtype=string)

Converting chars into string and printing first sequence

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [13]:
#input and target with a difference of 1 char (target shifted right). So it aligns the input and the label for each timestep
def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

print(split_input_target(list("Sample text")))

# map input and label for each sequence
dataset = sequences.map(split_input_target)

(['S', 'a', 'm', 'p', 'l', 'e', ' ', 't', 'e', 'x'], ['a', 'm', 'p', 'l', 'e', ' ', 't', 'e', 'x', 't'])


In [14]:
# First sequence input and label
for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


# Gated recurrent units 

In [15]:
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
BUFFER_SIZE = 10000

dataset_1 = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset_1

<PrefetchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [16]:
# Length of the vocabulary in chars
vocab_size = len(vocab) +1

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [17]:
#Creating layers using Keras subclass
class Model_1(tf.keras.Model):
  #All the sublayers are created inside init() method
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    
    #Embedding is the input layer. A lookup table that will map each character id to vector with embedding-dimensions 
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.dense_1 = tf.keras.layers.Dense(1024, activation = 'relu')

    #GRU layer
    self.gru = tf.keras.layers.GRU(rnn_units, return_sequences = True, return_state = True)

    #Batch Normalisation
    self.batch = tf.keras.layers.BatchNormalization()

    #The output layer with vocab_size outputs. 
    self.dense_2 = tf.keras.layers.Dense(vocab_size, activation = 'relu')

  #call method will automatically run build the first time it is called
  def call(self, inputs, states = None, return_state = False, training = False):
    x = inputs
    x = self.embedding(x, training = training)
    x = self.dense_1(x, training = training)

    #initial_state is the list of initial state tensors to be passed to the first call of the cell
    if states is None:
      states = self.gru.get_initial_state(x)

    x, states = self.gru(x, initial_state = states, training = training)
    x = self.batch(x)
    x = self.dense_2(x, training = training)

    #whether to return the last state in addition to the output
    if return_state:
      return x, states
    else:
      return x


In [18]:
model_GRU = Model_1(
    vocab_size = len(ids_from_chars.get_vocabulary()),
    embedding_dim = embedding_dim,
    rnn_units = rnn_units
)

In [19]:
# Shape of the input
for input_example_batch, target_example_batch in dataset_1.take(1):
    example_batch_predictions = model_GRU(input_example_batch)
    print(example_batch_predictions.shape, " -> (batch_size, sequence_length, vocab_size)")

(64, 100, 66)  -> (batch_size, sequence_length, vocab_size)


In [20]:
model_GRU.summary()

Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        multiple                  16896     
_________________________________________________________________
dense (Dense)                multiple                  263168    
_________________________________________________________________
gru (GRU)                    multiple                  6297600   
_________________________________________________________________
batch_normalization (BatchNo multiple                  4096      
_________________________________________________________________
dense_1 (Dense)              multiple                  67650     
Total params: 6,649,410
Trainable params: 6,647,362
Non-trainable params: 2,048
_________________________________________________________________


Compile

In [21]:
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)
model_GRU.compile(optimizer = 'adam', loss = loss, metrics= ['accuracy'])

In [22]:
example_batch_loss = loss(target_example_batch, example_batch_predictions)
mean_loss = example_batch_loss.numpy().mean()

print("Prediction shape: ", example_batch_predictions.shape, " -> (batch_size, sequence_length, vocab_size)")
print("Mean loss:", mean_loss)
print("Mean loss exp ",tf.exp(mean_loss).numpy())

Prediction shape:  (64, 100, 66)  -> (batch_size, sequence_length, vocab_size)
Mean loss: 4.18916
Mean loss exp  65.96735


In [23]:
#Saving the checkpoints
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")
log_dir = "logs/fit/GRU/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True)

In [24]:
history_GRU = model_GRU.fit(dataset_1, epochs=50, callbacks=[checkpoint_callback, tensorboard_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


# Text generator

In [26]:
#To generate the text, a loop is ran and each time the model is called some text is passed wih the internal state. The model returns a predicton for the next character and its 
#new state which is again passed to continue generating text.
class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.5):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    #Sparse Tensor enables an efficient storage and processing of tensors that contain a lot of zero values
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        #Values is a 1D tensor containing all nonzero values
        values=[-float('inf')]*len(skip_ids),
        #Indices is a 2D tensor containing the indices of nonzero values
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

In [41]:
one_step_model = OneStep(model_GRU, chars_from_ids, ids_from_chars)

In [42]:
start = time.time()
states = None
next_char = tf.constant(['very happy for you!'])
result = [next_char]

for n in range(500):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

very happy for you! May I neel you go
what?

BIONDELLO:
I canNay.

HORTENSIO:
Well, 'tis a litland cha: which Warwick xanchos' loves.

MOPSA:
I was promised them aga, poH: but, by your leave,
I shall not notess, call not us gone.

LUCENTIO:
Provost! most whatxare shall, I will prove so, being plead
XetortaXENT:
You sjudx'd at my master, who never keeps
The Apto through by when he made fa: Angelo
Doth mine men put to death a dine Kate leave
To hear Conspira calls your penitence, I
find his troHPaIr of his Time king 

________________________________________________________________________________

Run time: 3.7162413597106934


In [43]:
tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')





INFO:tensorflow:Assets written to: one_step/assets


INFO:tensorflow:Assets written to: one_step/assets


In [44]:
input_word = 'My name is Virinchi'

In [47]:
states = None
next_char = tf.constant([input_word])
result = [next_char]
r = len(input_word) + 10
for n in range(r):
  next_char, states = one_step_reloaded.generate_one_step(next_char, states=states)
  result.append(next_char)

print(tf.strings.join(result)[0].numpy().decode("utf-8"))

My name is Virinching beasts in ,
Which issured 
