In [31]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import numpy as np
import os

In [None]:
# This model will predict the next (most likely) character in the sequence

In [32]:
# Dataset
path_to_file = tf.keras.utils.get_file('shakespear.txt','https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

'''Uploading your own text file:

from google.colab import files
path_to_file = list(files.upload().keys())[0]'''

'Uploading your own text file:\n\nfrom google.colab import files\npath_to_file = list(files.upload().keys())[0]'

In [33]:
# Reading the contents

# read then decode for py2 compat
text = open(path_to_file,'rb').read().decode(encoding='utf-8')

# length of the text is num of characters in it
print(f"Length of text: {(len(text))} charachters")

Length of text: 1115394 charachters


In [4]:
# taking a look at the first 250 chars:
print(text[:250])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you know Caius Marcius is chief enemy to the people.



In [None]:
# ENCODING

In [34]:
vocab = sorted(set(text))

# creating a mapping from unique characters to indices
char2idx = {u:i for i,u in enumerate(vocab)}
idx2char = np.array(vocab) # we can use the index as reveresed mapping

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int = text_to_int(text)

In [29]:
# Taking a look at the encoded text
print("Text: ", text[:13])
print("Encoded: ", text_as_int[:13])

Text:  First Citizen
Encoded:  [18 47 56 57 58  1 15 47 58 47 64 43 52]


In [35]:
# optional reversed function
def int_to_text(ints):
  try:
    ints = ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

In [6]:
int_to_text(text_as_int[:13])

'First Citizen'

In [None]:
# CREATING TRAINING EXAMPLES

In [36]:
seq_length = 100 # length of a sequence for a training example
examples_per_epoch = len(text)//(seq_length+1) # fore very trainign example we need to create a sequence input that is 100 chars long
                                               # and a sequence out put that is 100 chars long, which means we need 101 chars for every training example

# create trainign examples
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

In [39]:
# next we use the batch method to turn the stream of characters ito batches of desired length
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

In [40]:
# Now we need use these 101 sequences and split them inot input and output

def split_input_target(chunk):
  input_text = chunk[:-1] # hell
  target_text =  chunk[:1] # ello
  return input_text, target_text # hell ello

dataset = sequences.map(split_input_target) # we use map to apply the above function to every entry

In [41]:
# Finally we make training batches

BATCH_SIZE = 64
VOCAB_SIZE = len(vocab) # vocab is the num of unique chars
EMBEDDING_DIM = 256
RNN_UNITS = 1024

BUFFER_SIZE = 10000

data = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

In [None]:
# BUILDING THE MODEL

In [42]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model = tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size, embedding_dim, batch_input_shape=[batch_size, None]),
      tf.keras.layers.LSTM(rnn_units, return_sequences=True,stateful=True,recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size) # we want the final layer to have the same amount of nodes as chars in our vocab
  ])
  return model

model = build_model(VOCAB_SIZE,EMBEDDING_DIM,RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (64, None, 256)           16640     
                                                                 
 lstm_1 (LSTM)               (64, None, 1024)          5246976   
                                                                 
 dense_1 (Dense)             (64, None, 65)            66625     
                                                                 
Total params: 5330241 (20.33 MB)
Trainable params: 5330241 (20.33 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
# CREATING A LOSS FUNCTION

In [43]:
for input_example_batch, target_example_batch in data.take(1):
  example_batch_predictions = model(input_example_batch) # ask our model for a prediction on our first batch of training data
  print(example_batch_predictions.shape, "# batch_size, sequence_length, vocab_size")

(64, 100, 65) # batch_size, sequence_length, vocab_size


In [44]:
# We can see that the prediction is an array of 64 arrays, one for each entry in the data batch
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[ 4.32584528e-03  5.59760723e-03 -2.51257652e-03 ...  4.29108879e-03
   -8.86508962e-04  1.80023839e-03]
  [-5.60800254e-04  6.68838155e-03 -5.95131656e-04 ...  6.69392385e-03
    3.56369710e-04  9.47710592e-03]
  [ 4.74301865e-03  7.26400688e-03  7.94523396e-04 ...  5.55570936e-03
   -5.91270672e-03  5.68142347e-03]
  ...
  [ 6.36301283e-03  6.48797629e-03 -9.39564256e-04 ...  2.02074344e-03
    9.66945756e-03  3.94868571e-03]
  [-7.02205580e-04  4.19305824e-03  2.04360532e-03 ... -9.19538317e-04
    1.28374733e-02 -2.42654677e-03]
  [ 1.35589624e-03  2.54341448e-03  4.55738883e-03 ... -3.77844996e-03
    5.81055367e-03  3.82342050e-03]]

 [[ 4.32584528e-03  5.59760723e-03 -2.51257652e-03 ...  4.29108879e-03
   -8.86508962e-04  1.80023839e-03]
  [ 8.16028751e-03  5.23368688e-03  2.07741023e-03 ... -4.98526962e-04
    3.43174161e-03  4.56149876e-03]
  [ 4.91527375e-03  3.52195045e-03 -5.85988397e-04 ... -3.49345850e-04
    1.34529024e-02  6.01829379e-05]
  ...
  [-3.373

In [45]:
# let's examine one prediction

pred = example_batch_predictions[0]
print(len(pred))
print(pred)
# notice this is a 2s array of length 100, where each interior array is the prediction for the next character at each time step

100
tf.Tensor(
[[ 0.00432585  0.00559761 -0.00251258 ...  0.00429109 -0.00088651
   0.00180024]
 [-0.0005608   0.00668838 -0.00059513 ...  0.00669392  0.00035637
   0.00947711]
 [ 0.00474302  0.00726401  0.00079452 ...  0.00555571 -0.00591271
   0.00568142]
 ...
 [ 0.00636301  0.00648798 -0.00093956 ...  0.00202074  0.00966946
   0.00394869]
 [-0.00070221  0.00419306  0.00204361 ... -0.00091954  0.01283747
  -0.00242655]
 [ 0.0013559   0.00254341  0.00455739 ... -0.00377845  0.00581055
   0.00382342]], shape=(100, 65), dtype=float32)


In [46]:
# and finally we'll look at a prediction at the first timestep

time_pred = pred[0]
print(len(time_pred))
print(time_pred)
# likelyhood of each character occuring next

65
tf.Tensor(
[ 4.3258453e-03  5.5976072e-03 -2.5125765e-03  4.1197753e-03
 -3.4910841e-03  3.6050547e-03  1.0486305e-04 -2.3626315e-04
 -6.5949507e-04 -2.0033177e-03 -5.4357667e-04  4.2696614e-03
 -2.5809519e-03 -3.6794036e-03 -5.8355602e-04 -2.3284929e-03
  6.7891547e-04 -1.1506937e-03 -8.1230057e-03  3.0838451e-03
  1.6705417e-03  1.1093356e-03  2.8018698e-03  2.9910645e-03
 -6.2163742e-03  4.5172041e-03  3.8738800e-03  5.5065635e-03
 -2.4454365e-04 -2.6354841e-03 -3.6530435e-04 -2.9564758e-03
  2.5105155e-03 -2.4029268e-03 -6.2869559e-03 -2.3293348e-03
  3.5876057e-03  1.9545201e-05 -1.3108503e-03  1.2362455e-03
 -3.7216083e-03  3.0789541e-03  5.7961355e-04 -8.1888709e-04
  1.5301621e-03  3.9867556e-04 -1.9524782e-03  8.1027374e-03
 -4.7259363e-03 -3.1354972e-03 -1.6754896e-03 -2.7377964e-03
 -1.1958119e-03 -2.8424165e-03 -1.0403573e-03 -6.9103664e-04
 -4.3955279e-04 -2.9225172e-03  4.6739134e-04  1.2209989e-03
 -6.5522722e-04 -2.4823514e-03  4.2910888e-03 -8.8650896e-04
  1.800238

In [47]:
# If we want to determine the predicted character we need to sample the output distribution (pick a value based on probability)
sampled_indices = tf.random.categorical(pred, num_samples=1)

# now we can reshape that array and convert the integers to numbers to see the actual characters
sampled_indices = np.reshape(sampled_indices, (1,-1),)[0]
predicted_chars = int_to_text(sampled_indices)

predicted_chars # this is what the model predicted for training sequence 1

"w;EHfm'VMYL-rl TOAUoLUX'VAvePtMJ?Tx!IFAjLMfGBRnXQ c bV?ANsmmYlnZc\n mKuduBlG,zemcV 3V3hsCM,ho&.VfsxGJ"

In [48]:
def loss(labels,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

In [None]:
# COMPILING THE MODEL

In [49]:
model.compile(optimizer="adam",loss=loss)

In [None]:
# CREATING CHECKPOINTS

In [50]:
# Drectory where the checkpoints will be saved
checkpoint_dir = './training_checkpoints'
# name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir,'ckpt_{epoch}')

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_prefix, save_weights_only=True)

In [None]:
# TRAINING

history = model.fit(data, epochs=40,callbacks=[checkpoint_callback])