<a href="https://colab.research.google.com/github/GlassesNoGlasses/TFProjects/blob/main/TFTut_RNN_Text_Generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Goal**:
Given a text, have the model predict and output the next sequences of sentences.

**Modal Goal**:
Given a sequence of characters, predict the next character to come after.
Reiterate the model to produce sentences, then a whole text stream.

In [None]:
# Import required libraries

import tensorflow as tf

import numpy as np
import os
import time

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [None]:
# Read, then decode for py2 compat.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# length of text is the number of characters in it
print(f'Length of text: {len(text)} characters')

Length of text: 1115394 characters


In [None]:
# The unique characters in the file
vocab = sorted(set(text))
print(f'{len(vocab)} unique characters')

65 unique characters


In [None]:
# Convert vocab into a list, then each character is tokenized with a unique id.

ids_from_chars = tf.keras.layers.StringLookup(
    vocabulary=list(vocab), mask_token=None)

In [None]:
# Return characters based on their id representation defined above.
chars_from_ids = tf.keras.layers.StringLookup(
    vocabulary=ids_from_chars.get_vocabulary(), invert=True, mask_token=None)

In [None]:
# Join ids back into original stirngs
def text_from_ids(ids):
  return tf.strings.reduce_join(chars_from_ids(ids), axis=-1)

In [None]:
# Example with id <-> char conversion:

example_texts = ['abcdefg', 'xyz']

# Split list of strings into characters.
chars = tf.strings.unicode_split(example_texts, input_encoding='UTF-8')
print(chars)

# Convert each char into a unique id
ids = ids_from_chars(chars)
print(ids)

# Convert ids back into original chars
reconvertedChars = chars_from_ids(ids)
reconvertedChars

<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>
<tf.RaggedTensor [[40, 41, 42, 43, 44, 45, 46], [63, 64, 65]]>


<tf.RaggedTensor [[b'a', b'b', b'c', b'd', b'e', b'f', b'g'], [b'x', b'y', b'z']]>

In [None]:
# Tokenize and assign character ids to all characters in original text
all_ids = ids_from_chars(tf.strings.unicode_split(text, 'UTF-8'))
all_ids

# Convert ids into a stream of ids that represent the original text characters
ids_dataset = tf.data.Dataset.from_tensor_slices(all_ids)

In [None]:
# Define the sequence length of characters to train model on
seq_length = 100

In [None]:
# Create sequential batches of size seq_length + 1
sequences = ids_dataset.batch(seq_length+1, drop_remainder=True)

In [None]:
# Example:
for seq in sequences.take(5):
  print(text_from_ids(seq).numpy())

b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
b'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'
b"now Caius Marcius is chief enemy to the people.\n\nAll:\nWe know't, we know't.\n\nFirst Citizen:\nLet us ki"
b"ll him, and we'll have corn at our own price.\nIs't a verdict?\n\nAll:\nNo more talking on't; let it be d"
b'one: away, away!\n\nSecond Citizen:\nOne word, good citizens.\n\nFirst Citizen:\nWe are accounted poor citi'


In [None]:
# Split input sequence into a data set of (input, label)
# I.e. "tensorflow" = ("tensorflo", "ensorflow")

def split_input_target(sequence):
    input_text = sequence[:-1]
    target_text = sequence[1:]
    return input_text, target_text

In [None]:
# Create training data set based on our original sequence
dataset = sequences.map(split_input_target)

In [None]:
# Example training data

for input_example, target_example in dataset.take(1):
    print("Input :", text_from_ids(input_example).numpy())
    print("Target:", text_from_ids(target_example).numpy())

Input : b'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target: b'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [None]:
# Creating test batches

BATCH_SIZE = 64

# Buffer to fit data into
BUFFER_SIZE = 10000

dataset = (
    dataset
    .shuffle(BUFFER_SIZE)
    .batch(BATCH_SIZE, drop_remainder=True)
    .prefetch(tf.data.experimental.AUTOTUNE))

dataset


<_PrefetchDataset element_spec=(TensorSpec(shape=(64, 100), dtype=tf.int64, name=None), TensorSpec(shape=(64, 100), dtype=tf.int64, name=None))>

In [None]:
# Length of the vocabulary in StringLookup Layer
vocab_size = len(ids_from_chars.get_vocabulary())

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [None]:
class MyModel(tf.keras.Model):
  def __init__(self, vocab_size, embedding_dim, rnn_units):
    super().__init__(self)
    # vocab_size: unique inputs + 1
    # embedding_dim: output vector dimensions
    # rnn_units: how many rnn used.
    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
    self.gru = tf.keras.layers.GRU(rnn_units,
                                   return_sequences=True,
                                   return_state=True)
    # log liklihood with vocab_size outputs
    self.dense = tf.keras.layers.Dense(vocab_size)

  def call(self, inputs, states=None, return_state=False, training=False):
    x = inputs
    x = self.embedding(x, training=training)
    if states is None:
      states = self.gru.get_initial_state(x)
    x, states = self.gru(x, initial_state=states, training=training)
    x = self.dense(x, training=training)

    if return_state:
      return x, states
    else:
      return x

In [None]:
model = MyModel(
    vocab_size=vocab_size,
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)

# How it works
'''
For each batch of inputs, we take each character,
map the character to an embedding layer, update the
GRU model with the embedding layer of input character,
take GRU output and updates the dense (output) value
of the character appearing.
'''

'\nFor each batch of inputs, we take each character,\nmap the character to an embedding layer, update the\nGRU model with the embedding layer of input character,\ntake GRU output and updates the dense (output) value\nof the character appearing.\n'

In [None]:
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = model(input_example_batch)
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 66) # (batch_size, sequence_length, vocab_size)


In [None]:
model.summary()

Model: "my_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       multiple                  16896     
                                                                 
 gru (GRU)                   multiple                  3938304   
                                                                 
 dense (Dense)               multiple                  67650     
                                                                 
Total params: 4022850 (15.35 MB)
Trainable params: 4022850 (15.35 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# Sampling distributions

sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices, axis=-1).numpy()

print("Input:\n", text_from_ids(input_example_batch[0]).numpy())
print()
print("Next Char Predictions:\n", text_from_ids(sampled_indices).numpy())

Input:
 b', but, in foul mouth\nAnd in the witness of his proper ear,\nTo call him villain? and then to glance f'

Next Char Predictions:
 b"Rhi'n,ws'hyiRDIMjGSUdvU!Ek$cLPnBu?&VJ[UNK].AzkocfQkvTuBL.gUsDMmrmdQZxT[UNK],-X!P,Baqu!GJXvEjTY?LNMF\nchUv.e,Y"


In [None]:
# Adding a loss function

# from_logits=True because model is returning logits
loss = tf.losses.SparseCategoricalCrossentropy(from_logits=True)

example_batch_mean_loss = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("Mean loss:        ", example_batch_mean_loss)

Prediction shape:  (64, 100, 66)  # (batch_size, sequence_length, vocab_size)
Mean loss:         tf.Tensor(4.1891747, shape=(), dtype=float32)


In [None]:
# Testing loss function

'''
Current loss should be similar to vocabulary size.
The model is newly trained and should have a loss
that is high.

Higher loses => model is sure of wrong answers and
badly initialized.
'''

tf.exp(example_batch_mean_loss).numpy()

65.96832

In [None]:
# Configuration of model with optimizer and loss functions

model.compile(optimizer='adam', loss=loss)

In [None]:
# Directory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
# Actual Training process

EPOCHS = 20

history = model.fit(dataset, epochs=EPOCHS, callbacks=[checkpoint_callback])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [None]:
# Generating Text Class

class OneStep(tf.keras.Model):
  def __init__(self, model, chars_from_ids, ids_from_chars, temperature=1.0):
    super().__init__()
    self.temperature = temperature
    self.model = model
    self.chars_from_ids = chars_from_ids
    self.ids_from_chars = ids_from_chars

    # Create a mask to prevent "[UNK]" from being generated.
    skip_ids = self.ids_from_chars(['[UNK]'])[:, None]
    sparse_mask = tf.SparseTensor(
        # Put a -inf at each bad index.
        values=[-float('inf')]*len(skip_ids),
        indices=skip_ids,
        # Match the shape to the vocabulary
        dense_shape=[len(ids_from_chars.get_vocabulary())])
    self.prediction_mask = tf.sparse.to_dense(sparse_mask)

  @tf.function
  def generate_one_step(self, inputs, states=None):
    # Convert strings to token IDs.
    input_chars = tf.strings.unicode_split(inputs, 'UTF-8')
    input_ids = self.ids_from_chars(input_chars).to_tensor()

    # Run the model.
    # predicted_logits.shape is [batch, char, next_char_logits]
    predicted_logits, states = self.model(inputs=input_ids, states=states,
                                          return_state=True)
    # Only use the last prediction.
    predicted_logits = predicted_logits[:, -1, :]
    predicted_logits = predicted_logits/self.temperature
    # Apply the prediction mask: prevent "[UNK]" from being generated.
    predicted_logits = predicted_logits + self.prediction_mask

    # Sample the output logits to generate token IDs.
    predicted_ids = tf.random.categorical(predicted_logits, num_samples=1)
    predicted_ids = tf.squeeze(predicted_ids, axis=-1)

    # Convert from token ids to characters
    predicted_chars = self.chars_from_ids(predicted_ids)

    # Return the characters and model state.
    return predicted_chars, states

**Idea**: The training has been done. We want to track the model's prediction of the next character, as well as the model's internal state. In each character iteration, we pass in the new internal state of the model and its prediction. Run in a loop.

In [None]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
Go to; sir, your mistress were the sons?

MIRANDA:
Mistake me not.

DUKE VINCENTIO:
There was a matter; yet I know my heart
And wished a charm of arm me left with them;
For such a clouds are strength must entertain it.

HERMIONE:
Sound, sir, come.

LUCIO:
Sir, your resoluting wretch.

Second Servant:
He doth ever less high a life.

ANGELO:
Believe me, my good lord, I pray your speech, and learn
A man that found our former plucks of liberty.
Gow plays we most the end were all offenders' speak,
That raised by the noble king my husband's fresh.
And till she stood up unsaved:
The tongues of the presence goes
Uppressting on thy trifes,--
Gentlemen, cocelon, Clifford, could not speak?
Your son--his deeds since, we will continue
then and weak such come back, but that to dream
As to a feast of poher from his histed speed,
Which thou hast pass to scrape, and live as stay.
And, if would I were some penntrot
Are sen a little under run,
But faults from hence, milty unshort
And leads the wed

Improvements:


*   Train model longer (higher EPOCH value)
*   Add another RNN layer for accuracy.
*   Adjust temperature for more or less random predictions.
* Generate **parallel** results, shown below, in the same time.



In [None]:
start = time.time()
states = None
next_char = tf.constant(['ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:', 'ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states = one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result, '\n\n' + '_'*80)
print('\nRun time:', end - start)

tf.Tensor(
[b"ROMEO:\nSome out of sore lines:\nGood dances can tell them and retaguing; and I chnocks my nation\nVeruse degrees I will leave his welk at him,\nNor with thy swelling heart\nPardon Remaints: 'He help to say\nI do retermine or to Bolingbriot's heart.\n\nADRAND:\nFaith, my noble mother!' there is no more sorrow ensper is.\n\nLUCIO:\nI take her. So use his passage.\n\nESCALUS:\nMy lord.\n\nDUKE OF YORK:\nWhat will you undertake the trumpets bold.\n\nHASTINGS:\nI am the greatest deposide, that the slave\nDeliver us will find a thousand grace.\n\nGLOUCESTER:\nThey be in love?\n\nHASTINGS:\nBreak thee to fly, Tapth, and wild marriage,\n'Martared, nobless with our royal elders.\n\nALONSO:\nI primose than my daughter's home, the drud I now\nTo make an emperite things.\n\nKING RICHARD III:\nThat shows you to make aing the steel?\n\nEXETER:\nHere are the untimely face that figure is sicks:\nI hear those gracious just princes where he abship\nTo the deputy's deeds o'er one 'WESTER:\

In [None]:
# Import/Export generator and models

tf.saved_model.save(one_step_model, 'one_step')
one_step_reloaded = tf.saved_model.load('one_step')



In [None]:
# Advanced/Custom training

'''
The main idea is to provide feedback to the model,
for both positive and negative predicitons.
We use grandients as a way to calculate the
accuracy of the prediction value, then use an
optimizer to update the model.
'''

class CustomTraining(MyModel):
  @tf.function
  def train_step(self, inputs):
      inputs, labels = inputs
      with tf.GradientTape() as tape:
          predictions = self(inputs, training=True)
          loss = self.loss(labels, predictions)
      grads = tape.gradient(loss, model.trainable_variables)
      self.optimizer.apply_gradients(zip(grads, model.trainable_variables))

      return {'loss': loss}

In [None]:
model = CustomTraining(
    vocab_size=len(ids_from_chars.get_vocabulary()),
    embedding_dim=embedding_dim,
    rnn_units=rnn_units)


model.compile(optimizer = tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True))

model.fit(dataset, epochs=1)



<keras.src.callbacks.History at 0x7d9c8a01cdc0>

In [None]:
EPOCHS = 10

mean = tf.metrics.Mean()

for epoch in range(EPOCHS):
    start = time.time()

    mean.reset_states()
    for (batch_n, (inp, target)) in enumerate(dataset):
        logs = model.train_step([inp, target])
        mean.update_state(logs['loss'])

        if batch_n % 50 == 0:
            template = f"Epoch {epoch+1} Batch {batch_n} Loss {logs['loss']:.4f}"
            print(template)

    # saving (checkpoint) the model every 5 epochs
    if (epoch + 1) % 5 == 0:
        model.save_weights(checkpoint_prefix.format(epoch=epoch))

    print()
    print(f'Epoch {epoch+1} Loss: {mean.result().numpy():.4f}')
    print(f'Time taken for 1 epoch {time.time() - start:.2f} sec')
    print("_"*80)

model.save_weights(checkpoint_prefix.format(epoch=epoch))

Epoch 1 Batch 0 Loss 2.1733
Epoch 1 Batch 50 Loss 2.0425
Epoch 1 Batch 100 Loss 1.9666
Epoch 1 Batch 150 Loss 1.8401

Epoch 1 Loss: 2.0005
Time taken for 1 epoch 20.47 sec
________________________________________________________________________________
Epoch 2 Batch 0 Loss 1.8547
Epoch 2 Batch 50 Loss 1.7791
Epoch 2 Batch 100 Loss 1.7215
Epoch 2 Batch 150 Loss 1.6276

Epoch 2 Loss: 1.7205
Time taken for 1 epoch 11.24 sec
________________________________________________________________________________
Epoch 3 Batch 0 Loss 1.6110
Epoch 3 Batch 50 Loss 1.5703
Epoch 3 Batch 100 Loss 1.5058
Epoch 3 Batch 150 Loss 1.5066

Epoch 3 Loss: 1.5574
Time taken for 1 epoch 11.22 sec
________________________________________________________________________________
Epoch 4 Batch 0 Loss 1.4907
Epoch 4 Batch 50 Loss 1.4788
Epoch 4 Batch 100 Loss 1.4681
Epoch 4 Batch 150 Loss 1.4504

Epoch 4 Loss: 1.4579
Time taken for 1 epoch 11.11 sec
_____________________________________________________________________

In [None]:
one_step_model = OneStep(model, chars_from_ids, ids_from_chars)

start = time.time()
states = None
next_char = tf.constant(['ROMEO:'])
result = [next_char]

for n in range(1000):
  next_char, states =  one_step_model.generate_one_step(next_char, states=states)
  result.append(next_char)

result = tf.strings.join(result)
end = time.time()
print(result[0].numpy().decode('utf-8'), '\n\n' + '_'*80)
print('\nRun time:', end - start)

ROMEO:
I will to sweet some tread in Poin'd Volsces
Of that he had rude the prison with you;
In odns and the father's shoulders.

First Musician:
Soother, they if well
sweet will not have no gracious bush, and fain and hear.

QUEEN MARGARET:
Thanks, heavy too for the son, a noble pardon,
I would are succed each pity.

PROSPERO:
Late me the uncle Vould and his present a traitor,
Bless me of his heir. Come, I have done
For Clifford and The heavion of thine honest:
I'll go asking your watch these people.

First Murderer:
Turn giddy, sir, as you cannot learn the tcoung,
You shall go and bry weok to Romeo's are,
That were my fancy to entreat of death.
Tell them for what is dishand.'

FLORIZEL:
I speak not liberty and,
That never made me stain'd the kinsman's wisdem,
And we and to the boy of this sweeters
Show me for, let them to see you, therefore I
I know the cause, which have break me, wid,
What entrance his contract that know
I have in ware? Proshead, bedied the wold.

JULIET:
All formal