<a href="https://colab.research.google.com/github/AYSTONER/RNN-play-generator/blob/main/RNN_play_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [78]:
%tensorflow_version 2.x
import keras
from keras.datasets import imdb
from keras.preprocessing import sequence
import tensorflow as tf
import os
import numpy as np


Colab only includes TensorFlow 2.x; %tensorflow_version has no effect.


In [79]:
# saving the path to it
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [80]:
# to import your own file, use this code
# from google.colab import files
# path_to_file = list(files.upload().keys())[0]

In [81]:
# read the file then decode
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# len of text is the amount of characters in it
print(f'Length of text: {len(text)} characters')


Length of text: 1115394 characters


In [82]:
# first 250 characyers
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [83]:
# sort the unique characters in the data
vocab = sorted(set(text))
# Creating a mapping from unique characters to indices

char2idx = {u:i for i, u in enumerate(vocab)}

idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])
text_as_int=text_to_int(text)

# lets look at how part of our text is encoded
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [84]:
def int_to_text(ints):

  try:

    ints = ints.numpy()

  except:

    pass

  return "".join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


In [85]:
# creating a training data
seq_length = 100 #length of sequence for a training exmple
num_per_epoch = len(text) // (seq_length + 1)
# this code converts the entire string dataset into characters and will contain a stream of characters(training examples)
character_dst = tf.data.Dataset.from_tensor_slices(text_as_int)

In [86]:
# use the batch method to batch the characters
sequences = character_dst.batch(seq_length+1, drop_remainder=True)


In [87]:
# use the sequence of length 101 and split into input and output
def split_input_target(chunk): # for the example: hello
  input_text = chunk[:-1] #   hell
  target_text = chunk [1:] # ello
  return input_text, target_text # hell, ello

dataset = sequences.map(split_input_target) #we use map to apply the above function to every entry



In [88]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(x))





EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 


In [89]:
# create our training batches
BATCH_SIZE =64
VOCAB_SIZE = len(vocab)#number of unique characters
EMBDDING_DIM = 256
RNN_UNIT = 1024

BUFFER_SIZE = 1000
data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder= True)


In [90]:

# building the model
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),

    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                         recurrent_initializer='glorot_uniform'),
  tf.keras.layers.Dense(vocab_size)
  ])
  return model






model = build_model(VOCAB_SIZE, EMBDDING_DIM, RNN_UNIT, BATCH_SIZE)

model.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_4 (Embedding)     (64, None, 256)           16640     
                                                                 
 lstm_4 (LSTM)               (64, None, 1024)          5246976   
                                                                 
 dense_4 (Dense)             (64, None, 65)            66625     
                                                                 
Total params: 5330241 (20.33 MB)
Trainable params: 5330241 (20.33 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [91]:
# creating a loss function
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch) #ask our model for a prediction on our first batch of training data
  print(example_batch_predictions.shape," (batch_size, sequence_length, vocab_size)") #print out the output shape
# we can see that the predicition is an array of 64 arrays, one for each entry in the batch
print(len(example_batch_predictions))
print(example_batch_predictions)


(64, 100, 65)  (batch_size, sequence_length, vocab_size)
64
tf.Tensor(
[[[ 2.90187472e-03 -8.64573941e-03 -4.52754262e-04 ...  1.23450719e-03
    5.52992336e-03  3.95592581e-03]
  [-2.96954531e-03 -6.40117005e-03 -6.47847261e-03 ... -1.39941135e-03
    2.17099092e-03  1.46700740e-02]
  [-3.82969249e-03 -1.04468572e-03 -6.23628264e-03 ... -6.10565767e-03
    1.37738278e-03  1.35648474e-02]
  ...
  [ 3.93566396e-03 -1.74706075e-02  5.05244639e-03 ...  1.56311877e-03
    3.16285924e-03  4.98690689e-03]
  [-2.32771016e-03 -1.60812885e-02 -2.76323408e-03 ... -1.40638882e-03
    3.10819782e-03  7.80413486e-03]
  [-2.61532236e-03 -1.53960213e-02  1.21810054e-03 ... -3.95171950e-03
    3.23932222e-03  5.11337537e-03]]

 [[-1.62781368e-03  3.10006132e-03 -1.73694245e-03 ... -4.29182593e-03
   -1.22540817e-03  2.41854251e-03]
  [-6.12360984e-03  3.01651482e-04 -7.20979786e-03 ... -4.66218498e-03
   -1.55302393e-03  5.21751028e-03]
  [-2.40962091e-03  2.14562751e-03 -4.79089655e-03 ... -4.1808798

In [92]:
# lets examine one prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# 2d array of length 100 where each interior array is a prediction for the next character in the next timestep

100
tf.Tensor(
[[ 0.00290187 -0.00864574 -0.00045275 ...  0.00123451  0.00552992
   0.00395593]
 [-0.00296955 -0.00640117 -0.00647847 ... -0.00139941  0.00217099
   0.01467007]
 [-0.00382969 -0.00104469 -0.00623628 ... -0.00610566  0.00137738
   0.01356485]
 ...
 [ 0.00393566 -0.01747061  0.00505245 ...  0.00156312  0.00316286
   0.00498691]
 [-0.00232771 -0.01608129 -0.00276323 ... -0.00140639  0.0031082
   0.00780413]
 [-0.00261532 -0.01539602  0.0012181  ... -0.00395172  0.00323932
   0.00511338]], shape=(100, 65), dtype=float32)


In [93]:
# prediction for each time step
time_pred = pred[0]
print(len(time_pred))
print(time_pred)
# 65 values representing the prediction of the next character

65
tf.Tensor(
[ 2.9018747e-03 -8.6457394e-03 -4.5275426e-04 -5.9458390e-03
 -9.1681385e-04 -2.9490276e-03  2.2071507e-03  6.6788853e-03
 -3.9436994e-03  2.4298930e-03 -7.2926021e-05  3.8239933e-03
 -6.7915758e-03 -2.0783828e-03  2.3809441e-03  2.7900550e-04
 -3.2943422e-03 -1.1456059e-03 -5.4725595e-03 -7.1931951e-04
 -1.7413092e-03 -1.9141519e-03  1.9332453e-03  5.4377574e-04
  4.8998250e-03 -4.1354490e-03  2.8137811e-03  2.8143581e-03
 -2.0140843e-03  6.1865719e-03  2.0247025e-03  7.0415414e-04
  2.0874680e-03 -2.7035321e-03  6.7494088e-04 -2.0917563e-05
 -4.7043208e-03  2.6688618e-03 -4.4843783e-03  8.9718419e-04
  7.7267044e-04  2.6209177e-03 -1.7208465e-03  1.5541710e-03
  2.3786123e-03 -1.8487117e-04 -2.0960106e-03 -1.4579052e-03
 -3.9627813e-03 -8.6243656e-03 -6.0958527e-03 -8.8243658e-04
 -7.7046105e-03  1.3788685e-03 -1.2990828e-03 -1.1715367e-03
 -2.9979274e-04 -4.0294076e-03 -4.9001290e-03  9.4262656e-04
  3.0622252e-03  9.1061322e-03  1.2345072e-03  5.5299234e-03
  3.955925

In [94]:

# to determine the predicted character, we need to sample the output distribution(pick a character based on probability)
sampled_indices =tf.random.categorical(pred, num_samples =1)
# reshape the array and convert all integers to numbers
sampled_indices = np.reshape(sampled_indices,(1, -1))[0]
predicted_charactrs = int_to_text(sampled_indices)
predicted_charactrs


"CwEmResbPOOABb-KLw-:BIl&Buz\n\n .JSgoH!KW-3A-M3!fJygXIb:H-rnxz\nh KixjtD-lCb.:$&;n!vIL'AFDqQwp&C3Gb!Q?y"

In [95]:

# now we can create a loss funtion that compares that output to the expected output and gives us some numerical value telling how close the two were
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits = True) #logits is probability distribution(nodes(dense stuff)).....
  # the goal of our algorithm in the network is to reduce the loss

In [96]:
# compile the model
model.compile(optimizer= "adam", loss=loss)
# its like a classification problem where the model predicts the probability of each unique letter coming next

In [97]:
# configure our model to save checkpoints as it trains
# allow us to load our model from a checkpoint to continue training
# Directory where the checkpoints will be saved
checkpoint_dir ='./training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)


In [98]:

# training the model
history = model.fit(data, epochs=20, callbacks=[checkpoint_callback] )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [99]:
# rebuild the model using a batch size of 1 since its a prediction for 1 character not 64 characters(batches)
model = build_model(VOCAB_SIZE, EMBDDING_DIM, RNN_UNIT, batch_size = 1)

In [100]:
# find the latest checkpoint the stores the models weight
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [101]:
def generate_text(model, start_string):
  num_generate = 400



  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)
  # empty string to store result
  text_generated = []

  temperature = 1.2   #low temp - more predictable text, high temp - more suprising text

# batch_size == 1
  model.reset_states()

  for i in range(num_generate):
    predictions = model(input_eval)
    # remove the batch dimension
    predictions = tf.squeeze(predictions, 0)

    # use a categorical distribution to predict the character from the model
    predictions = predictions / temperature
    predicted_id =tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

    # we pass the output(predicted charactr) from the model as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [102]:

inp = input('Type a starting string: ')
print(generate_text(model,inp))

Type a starting string: play
play?

Say was a foul word:
Lie foot, mich opinior stil get no cause.

HENRY BOWCKAMIO:
KAil Silience!
Tell her her wits, bride, take thee all Meremonius,
Her ever hunning lead, who meautiling a months-sharps!
Go above him, or mocking Puriedla!-with me to give incert!

PETRUCHIO:
Nay, take carry wed, being heldet
To set inchised, hearing, to stain at
any mirrow? there is sister,
And my heart thee-buzz
