<a href="https://colab.research.google.com/github/AYSTONER/RNN-play-generator/blob/main/RNN_play_generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [13]:
import keras
from keras.datasets import imdb
from keras.preprocessing import sequence
import tensorflow as tf
import os
import numpy as np


In [14]:
# saving the path to it
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

In [15]:
# to import your own file, use this code
# from google.colab import files
# path_to_file = list(files.upload().keys())[0]

In [16]:
# read the file then decode
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')
# len of text is the amount of characters in it
print(f'Length of text: {len(text)} characters')


Length of text: 1115394 characters


In [17]:
# first 250 characyers
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [18]:
# sort the unique characters in the data
vocab = sorted(set(text))
# Creating a mapping from unique characters to indices

char2idx = {u:i for i, u in enumerate(vocab)}

idx2char = np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])
text_as_int=text_to_int(text)

# lets look at how part of our text is encoded
print("Text:", text[:13])
print("Encoded:", text_to_int(text[:13]))

Text: First Citizen
Encoded: [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [19]:
def int_to_text(ints):

  try:

    ints = ints.numpy()

  except:

    pass

  return "".join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


In [20]:
# creating a training data
seq_length = 100 #length of sequence for a training exmple
num_per_epoch = len(text) // (seq_length + 1)
# this code converts the entire string dataset into characters and will contain a stream of characters(training examples)
character_dst = tf.data.Dataset.from_tensor_slices(text_as_int)

In [21]:
# use the batch method to batch the characters
sequences = character_dst.batch(seq_length+1, drop_remainder=True)


In [22]:
# use the sequence of length 101 and split into input and output
def split_input_target(chunk): # for the example: hello
  input_text = chunk[:-1] #   hell
  target_text = chunk [1:] # ello
  return input_text, target_text # hell, ello

dataset = sequences.map(split_input_target) #we use map to apply the above function to every entry



In [23]:
for x, y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(x))





EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 


In [24]:
# create our training batches
BATCH_SIZE =64
VOCAB_SIZE = len(vocab)#number of unique characters
EMBDDING_DIM = 256
RNN_UNIT = 1024

BUFFER_SIZE = 1000
data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder= True)


In [25]:

# building the model
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),

    tf.keras.layers.LSTM(rnn_units,
                        return_sequences=True,
                        stateful=True,
                         recurrent_initializer='glorot_uniform'),
  tf.keras.layers.Dense(vocab_size)
  ])
  return model






model = build_model(VOCAB_SIZE, EMBDDING_DIM, RNN_UNIT, BATCH_SIZE)

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (64, None, 256)           16640     
                                                                 
 lstm (LSTM)                 (64, None, 1024)          5246976   
                                                                 
 dense (Dense)               (64, None, 65)            66625     
                                                                 
Total params: 5330241 (20.33 MB)
Trainable params: 5330241 (20.33 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [26]:
# creating a loss function
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch) #ask our model for a prediction on our first batch of training data
  print(example_batch_predictions.shape," (batch_size, sequence_length, vocab_size)") #print out the output shape
# we can see that the predicition is an array of 64 arrays, one for each entry in the batch
print(len(example_batch_predictions))
print(example_batch_predictions)


(64, 100, 65)  (batch_size, sequence_length, vocab_size)
64
tf.Tensor(
[[[-0.00339666  0.00372586 -0.00232849 ... -0.00115589 -0.00019825
    0.00094499]
  [-0.00316992  0.00132619 -0.00286747 ... -0.00390963 -0.00041807
   -0.0013145 ]
  [-0.00281235  0.00308569 -0.00754597 ... -0.00070454  0.00506456
   -0.00220535]
  ...
  [-0.00888389 -0.00183344 -0.00569069 ... -0.00555229  0.00339345
   -0.00232633]
  [-0.00691008 -0.0031834  -0.00688785 ... -0.00659937  0.00247563
   -0.00440373]
  [-0.00314979 -0.00036527 -0.00271043 ...  0.00134703  0.00321952
   -0.00168075]]

 [[ 0.00354769 -0.00012214 -0.00555308 ...  0.00184843  0.00082338
   -0.00334246]
  [ 0.00151818 -0.00017169 -0.0076186  ... -0.00167721  0.00033441
    0.00298148]
  [-0.00112527  0.0031728  -0.01152877 ...  0.00115662 -0.00094682
    0.00086874]
  ...
  [ 0.00126767 -0.00155362 -0.00449295 ... -0.0085513   0.00485331
    0.001348  ]
  [ 0.00188477 -0.003213   -0.00571553 ... -0.00752354  0.0045382
   -0.00038649]
  [

In [27]:
# lets examine one prediction
pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# 2d array of length 100 where each interior array is a prediction for the next character in the next timestep

100
tf.Tensor(
[[-0.00339666  0.00372586 -0.00232849 ... -0.00115589 -0.00019825
   0.00094499]
 [-0.00316992  0.00132619 -0.00286747 ... -0.00390963 -0.00041807
  -0.0013145 ]
 [-0.00281235  0.00308569 -0.00754597 ... -0.00070454  0.00506456
  -0.00220535]
 ...
 [-0.00888389 -0.00183344 -0.00569069 ... -0.00555229  0.00339345
  -0.00232633]
 [-0.00691008 -0.0031834  -0.00688785 ... -0.00659937  0.00247563
  -0.00440373]
 [-0.00314979 -0.00036527 -0.00271043 ...  0.00134703  0.00321952
  -0.00168075]], shape=(100, 65), dtype=float32)


In [28]:
# prediction for each time step
time_pred = pred[0]
print(len(time_pred))
print(time_pred)
# 65 values representing the prediction of the next character

65
tf.Tensor(
[-3.3966561e-03  3.7258593e-03 -2.3284925e-03  4.6405208e-04
 -9.3733985e-04  1.5143754e-03  4.3055508e-04 -1.0022020e-03
 -6.2810159e-03 -6.5836776e-04  9.9644705e-04 -1.0627857e-03
 -4.2817257e-03 -2.3619509e-03  7.5291982e-03 -3.2232834e-03
  1.3981643e-04 -3.7296412e-03 -1.8671958e-03  8.5892942e-04
  5.2631581e-03  3.9782980e-04 -1.5964090e-03 -9.3981880e-04
  6.7808293e-03 -6.6705421e-04 -3.3922987e-03  4.3701311e-04
 -4.7203787e-03  1.8988936e-03 -2.7058111e-03 -3.8031046e-03
  8.2357507e-03  3.4296010e-03 -3.6744424e-04 -2.9932510e-03
  5.3522801e-03 -1.5641218e-03  2.1864031e-04  8.5007574e-04
  1.5418578e-03 -1.3240112e-03 -4.3200371e-03  7.6427604e-03
 -1.9367656e-03 -4.5194011e-03  4.6163648e-03  7.9570571e-05
 -2.2193128e-03  5.0714822e-03 -2.3162579e-03  3.0907956e-03
 -2.9080622e-03  2.5916239e-03 -8.8151929e-04 -4.9675425e-04
  6.0751080e-04  8.5453130e-04  4.4859760e-03 -9.0702920e-04
 -2.1113958e-03  2.7016725e-04 -1.1558896e-03 -1.9824726e-04
  9.449875

In [29]:

# to determine the predicted character, we need to sample the output distribution(pick a character based on probability)
sampled_indices =tf.random.categorical(pred, num_samples =1)
# reshape the array and convert all integers to numbers
sampled_indices = np.reshape(sampled_indices,(1, -1))[0]
predicted_charactrs = int_to_text(sampled_indices)
predicted_charactrs


"NNe$gM\nx\nTo?FspJwTgc&Mj;RiST,ZD:RK x'gnxX.RLfOAgEvQMSeCX Q;UgstHpMpZ? bYQ?ulZkpZqrtA:YgPtbb&OMNHqana"

In [30]:

# now we can create a loss funtion that compares that output to the expected output and gives us some numerical value telling how close the two were
def loss(labels, logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits = True) #logits is probability distribution(nodes(dense stuff)).....
  # the goal of our algorithm in the network is to reduce the loss

In [31]:
# compile the model
model.compile(optimizer= "adam", loss=loss)
# its like a classification problem where the model predicts the probability of each unique letter coming next

In [32]:
# configure our model to save checkpoints as it trains
# allow us to load our model from a checkpoint to continue training
# Directory where the checkpoints will be saved
checkpoint_dir ='./training_checkpoints'
# Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)


In [34]:

# training the model
history = model.fit(data, epochs=2, callbacks=[checkpoint_callback] )

Epoch 1/2
Epoch 2/2


In [35]:
# rebuild the model using a batch size of 1 since its a prediction for 1 character not 64 characters(batches)
model = build_model(VOCAB_SIZE, EMBDDING_DIM, RNN_UNIT, batch_size = 1)

In [36]:
# find the latest checkpoint the stores the models weight
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1, None]))

In [37]:
def generate_text(model, start_string):
  num_generate = 400



  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)
  # empty string to store result
  text_generated = []

  temperature = 1.2   #low temp - more predictable text, high temp - more suprising text

# batch_size == 1
  model.reset_states()

  for i in range(num_generate):
    predictions = model(input_eval)
    # remove the batch dimension
    predictions = tf.squeeze(predictions, 0)

    # use a categorical distribution to predict the character from the model
    predictions = predictions / temperature
    predicted_id =tf.randon.categorical(predictions, num_samples=1)[-1,0].numpy()

    # we pass the output(predicted charactr) from the model as the next input to the model
    # along with the previous hidden state
    input_eval = tf.expand_dims([predicted_id], 0)
    text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [38]:

inp = input('Type a starting string: ')
print(generate_text(model,inp))

Type a starting string: rest


AttributeError: ignored