**RNN PLAY GENERATOR**

We are going to use a RNN to generate a play. We will show the *RNN* an example of something we want it to recreate and it will learn how to write a version on of it on its own. Based on: https://www.tensorflow.org/tutorials/text/text_generation

In [2]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

In [3]:
# DOWNLOADING THE DATASET

# Loading romeo and juliet shakespeare play
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 
                                       'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# OR IF I WANTED TO LOAD MY OWN DATA I CAN JUST (TXT FILE ONLY)

# from google.colab import files
# path_to_file = list(files.upload().keys())[0] 

In [4]:
# READ CONTENTS OF FILE

text = open(path_to_file, 'rb').read().decode(encoding='utf-8') # read and decode to py2 compat
print('Text length: {} characters\n'.format(len(text)))
print(text[:250])

Text length: 1115394 characters

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [5]:
# ENCODING
# we are going to encode each unique character as a different integer

vocab = sorted(set(text))

char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

def text_to_int(text):
    return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

# DECODING
# Function that do the opposite (numeric to text)
def int_to_text(ints):
    try:
        ints = ints.numpy()
    except:
        pass
    return ''.join(idx2char[ints])

print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))
print("Decoded:", int_to_text(text_as_int[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]
Decoded: First Citizen


In [6]:
# CREATING TRAINING EXAMPLES
# we need to to split our data from above into many shorter sequences that we can pass to the model as training examples
# will use a seq_length sequence as input and a seq_length sequence as the output, where the original one is shifted
# one letter to the right as below
''' INPUT: Hell || OUTPUT: ello '''

seq_length = 100 
examples_per_epoch = len(text)//(seq_length+1)

char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int) # create training examples/targets

# Using the batch method to turn this stream of characters into batches of desired length
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [7]:
# Splitting those sequences into input and output

def split_input_target(chunk): # Hello
    input_text = chunk[:-1] # hell
    target_text = chunk[1:] # ello
    return input_text, target_text

dataset = sequences.map(split_input_target) # using MAP to apply the function to every entry

# peeking at some examples:
for x, y in dataset.take(2):
    print("\n\nEXAMPLE\nINPUT:", int_to_text(x))
    print("\nOUTPUT:", int_to_text(y))



EXAMPLE
INPUT: First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT: irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE
INPUT: are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT: re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


In [8]:
# MAKING TRAINING BATCHES

BATCH_SIZE = 64
VOCAB_SIZE = len(vocab) # number of unique characters
EMBEDDING_DIM = 256
RNN_UNITS = 1024
BUFFER_SIZE = 10000 # Buffer size to shuffle the dataset 

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True) # shuffling the data maintaining a buffer

In [20]:
# BUILDING THE MODEL
# We will be using a embedding layer, a LSTM and one dense layer that contains a node for each unique character in train data.

def build_model(vocab_size, embedding_dim, rnn_units):
    model = tf.keras.Sequential([
        tf.keras.layers.Embedding(
            vocab_size, 
            embedding_dim
        ),
        tf.keras.layers.LSTM(
            rnn_units, 
            return_sequences=True, # return the intermediate state in each step
            return_state=False,
            recurrent_initializer='glorot_uniform'
        ),
        tf.keras.layers.Dense(
            vocab_size
        )
    ])
    return model

model = build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS)
model.summary()

**CREATING A LOSS FUNCTION**

Actually creating our own loss function. Because our model will output a (64, sequence_length, 65) shaped tensor 
that represents the probability distribution of each character at each timestep for every sequence in the batch

In [21]:
# looking  at a sample input and the output from our untrained model (to understand what the model is giving us)

for input_example_batch, target_example_batch in data.take(1):
    example_batch_predictions = model(input_example_batch) # ask our mopdel for a predition on our first batch of train data
    print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

(64, 100, 65) # (batch_size, sequence_length, vocab_size)


In [22]:
# the prediction is an array of 64 arrays, one for each entry in the batch

print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[-6.6112884e-04  2.8625457e-03  6.4170090e-03 ... -2.2444059e-03
   -2.5195212e-03  4.5626592e-03]
  [-3.6763856e-03  1.0502073e-03  2.9337266e-03 ...  4.6975561e-05
   -1.6088025e-03 -3.1453106e-03]
  [-6.3614417e-03  6.0947612e-05 -1.0884242e-03 ...  8.9663500e-04
   -2.8458089e-03 -2.6361519e-03]
  ...
  [ 5.3965417e-03 -2.5660419e-03 -3.5551698e-03 ... -4.1346420e-03
   -5.2702059e-03 -4.0890859e-04]
  [-2.0118332e-03 -4.3129763e-03  2.7267430e-03 ... -7.7362554e-03
   -9.0820212e-03 -8.2099112e-05]
  [-4.5427764e-03 -7.4327732e-03  2.6739719e-03 ... -8.8502401e-03
   -3.8034942e-03  1.4788890e-03]]

 [[-5.6982180e-04 -2.3396094e-03  5.6416821e-03 ... -5.5364594e-03
   -2.2858954e-03 -4.6919426e-03]
  [-5.9112138e-03 -5.5633863e-03  7.8322776e-03 ... -6.8339542e-04
   -1.2772367e-03 -1.1800688e-03]
  [-4.1598128e-03  1.2390992e-03  6.4639160e-03 ...  2.1832306e-03
   -4.6610790e-03 -3.7566780e-03]
  ...
  [ 2.1750587e-03  2.1861764e-03  1.4865007e-03 ... -1.9352031e

In [None]:
# Examination of one prediction (2d array of length 100, where each interior array is the prediction for the next character)

pred = example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[-6.6112884e-04  2.8625457e-03  6.4170090e-03 ... -2.2444059e-03
  -2.5195212e-03  4.5626592e-03]
 [-3.6763856e-03  1.0502073e-03  2.9337266e-03 ...  4.6975561e-05
  -1.6088025e-03 -3.1453106e-03]
 [-6.3614417e-03  6.0947612e-05 -1.0884242e-03 ...  8.9663500e-04
  -2.8458089e-03 -2.6361519e-03]
 ...
 [ 5.3965417e-03 -2.5660419e-03 -3.5551698e-03 ... -4.1346420e-03
  -5.2702059e-03 -4.0890859e-04]
 [-2.0118332e-03 -4.3129763e-03  2.7267430e-03 ... -7.7362554e-03
  -9.0820212e-03 -8.2099112e-05]
 [-4.5427764e-03 -7.4327732e-03  2.6739719e-03 ... -8.8502401e-03
  -3.8034942e-03  1.4788890e-03]], shape=(100, 65), dtype=float32)


In [None]:
# A prediction at the first timestep (65 values representing the probability of each character occurring next)

time_pred = pred[0]
print(len(time_pred))
print(time_pred)

65
tf.Tensor(
[-6.6112884e-04  2.8625457e-03  6.4170090e-03  7.0083071e-05
  3.9898930e-03  2.9915327e-03 -7.8985456e-04  3.5819630e-03
 -7.2608011e-05  4.4781156e-03  4.4442462e-03 -7.7996361e-03
 -4.4288747e-03  1.0007964e-03  1.1413853e-05 -1.2319350e-03
 -4.4470991e-04  2.4883628e-03  9.1389869e-04  5.9806765e-03
 -3.8215320e-03  5.3309640e-03 -1.3831645e-03 -2.0746137e-03
 -7.3690759e-04  2.8823616e-03 -8.8051679e-03  3.1050474e-03
  3.9248087e-05  1.6978378e-03  5.4748049e-03 -4.3121213e-03
  4.4827387e-03 -2.0147527e-03  1.2989648e-03  1.5617122e-03
 -5.6122895e-03  6.8683538e-04 -2.2159372e-03 -1.6503979e-03
  1.4744047e-03  2.1191863e-03 -2.6400876e-03  5.8518597e-03
 -2.3681554e-04  5.2660047e-03  1.1233122e-03  2.9698908e-03
  5.7203998e-03 -2.7337037e-03 -6.0618338e-03 -1.8564209e-03
 -1.2428407e-04 -5.4105837e-04 -2.2321329e-03 -1.4074005e-03
 -1.1876461e-03 -2.3141138e-03  1.1900854e-03  5.6826435e-03
  3.9289086e-03 -2.3115955e-03 -2.2444059e-03 -2.5195212e-03
  4.562659

In [25]:
# Sample the output distribution (picking a value based on probability)

sampled_indices = tf.random.categorical(pred, num_samples=1)

# reshape that array and convert all the integers to numbers to see the actual characters
sampled_indices = np.reshape(sampled_indices, (1, -1))[0]
predicted_chars = int_to_text(sampled_indices)
predicted_chars # this is what the model predicts for sequence 1

"xLbHFmrFwnoV,eECwvhx'xv!Kv$uXiNOW,a-tXARg?yVimJXGLn!AXsLxYDb$VTG,Rc'S.kFeRjg;Q:BWT;Q,;mntQBBaQL$nOe!"

In [26]:
# CREATION OF THE LOSS FUNCTION
# The loss function needs to compare the output to the expected output and give a numeric value of how close the two were

def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [27]:
# COMPILING THE MODEL

model.compile(optimizer='adam', loss=loss)

In [29]:
# CREATING CHECKPOINTS
# allowing us to load our model from a checkpoint and continue training it

checkpoint_dir = './RNN_PG_training_checkpoints' # directory will be saving it
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}.weights.h5") # file name

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True
)

In [2]:
import tensorflow as tf

print("GPUs disponíveis:", tf.config.list_physical_devices('GPU'))
print("Versão do TensorFlow:", tf.__version__)


GPUs disponíveis: []
Versão do TensorFlow: 2.16.1


In [30]:
# TRAINING THE MODEL

history = model.fit(data, epochs=40, callbacks=[checkpoint_callback])

Epoch 1/40
[1m140/172[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m32s[0m 1s/step - loss: 2.9869

KeyboardInterrupt: 