In [None]:
from keras.preprocessing import sequence
import keras
import tensorflow as tf
import os
import numpy as np

In [None]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt', 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [None]:
# use this if you want to upload you're own text document , for now we're using shakespeare
'''
from google.colab import files
path_to_file=list(files.upload().keys())[0]
'''

In [None]:
text=open(path_to_file,'rb').read().decode(encoding='utf-8')

#length of the text is the number f characters in it
print('Length of text : {} characters'.format(len(text)))

Length of text : 1115394 characters


In [None]:
print(text[:300])

**Encoding**

Each unique character will be encoded with a different integer

In [None]:
vocab=sorted(set(text))           # sorted all unique characters of the text as a list

char2idx={u:i for i,u in enumerate(vocab)}
idx2char=np.array(vocab)

def text_to_int(text):
  return np.array([char2idx[c] for c in text])

text_as_int=text_to_int(text)

In [None]:
print("Text: ", text[:13])
print("Encoded: ", text_to_int(text[:13]))

In [None]:
def int_to_text(ints):
  try:
    ints=ints.numpy()
  except:
    pass
  return ''.join(idx2char[ints])

print(int_to_text(text_as_int[:13]))

First Citizen


Creating Training Examples

We're going to give a sequence as input for training(not the entire text), and we'll give the labels as the same sequence shifted to the right by 1 unit

In [None]:
seq_length=100 
examples_per_epoch=len(text)//(seq_length+1)

# creating training examples/targets
char_dataset=tf.data.Dataset.from_tensor_slices(text_as_int)

In [None]:
sequences=char_dataset.batch(seq_length+1,drop_remainder=True)   #putting the dataset into batches of desired length

Now we will use those sequences of 101 length and split them into input and output

In [None]:
def split_input_target(chunk):  # say hello
  input_text=chunk[:-1]         # hell
  target_text=chunk[1:]         # ello
  return input_text,target_text

dataset=sequences.map(split_input_target)       # map is used to apply the above function to every entry

In [None]:
for x,y in dataset.take(2):
  print("\n\nEXAMPLE\n")
  print("INPUT")
  print(int_to_text(x))
  print("\nOUTPUT")
  print(int_to_text(y))



EXAMPLE

INPUT
First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You

OUTPUT
irst Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You 


EXAMPLE

INPUT
are all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you 

OUTPUT
re all resolved rather to die than to famish?

All:
Resolved. resolved.

First Citizen:
First, you k


Making Training Batches

In [None]:
BATCH_SIZE=64
VOCAB_SIZE=len(vocab)
EMBEDDING_DIM=256          # dimension of vectors in embedding layer
RNN_UNITS=1024

#buffer size to shuffle the elements
BUFFER_SIZE=10000

data=dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

**Building the Model**

In [None]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
  model=tf.keras.Sequential([
    tf.keras.layers.Embedding(vocab_size,embedding_dim,batch_input_shape=[batch_size,None]),
    tf.keras.layers.LSTM(rnn_units,return_sequences=True,
                         stateful=True,recurrent_initializer='glorot_uniform'),
    tf.keras.layers.Dense(vocab_size)                        
  ])
  return model

model=build_model(VOCAB_SIZE, EMBEDDING_DIM, RNN_UNITS, BATCH_SIZE)
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
lstm (LSTM)                  (64, None, 1024)          5246976   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 5,330,241
Trainable params: 5,330,241
Non-trainable params: 0
_________________________________________________________________


Creating a Loss Function

In [None]:
for input_example_batch,target_example_baatch in data.take(1):
  example_batch_predictions=model(input_example_batch)         #prediction on 1st batch
  print(example_batch_predictions.shape, "# (batch_size,sequence_length,vocab_size)")       #output shape

(64, 100, 65) # (batch_size,sequence_length,vocab_size)


In [None]:
print(len(example_batch_predictions))
print(example_batch_predictions)

64
tf.Tensor(
[[[-4.35322337e-03 -5.96316718e-03  6.21498155e-04 ...  3.95013369e-04
   -6.28710538e-03 -9.82082449e-04]
  [-4.83203260e-03 -4.38144570e-03 -2.91602360e-03 ... -8.14423838e-04
   -2.05587968e-03  1.07849506e-03]
  [-8.50506965e-03 -8.61323997e-03 -8.97047110e-03 ... -1.11458125e-03
    1.71006855e-03  3.99824185e-03]
  ...
  [-1.11014172e-02 -1.05707645e-02 -7.37554859e-03 ... -1.31539116e-03
    2.21319683e-03  1.75144635e-02]
  [-6.24691788e-03 -1.39642358e-02 -4.13247105e-03 ...  4.61515505e-04
   -7.84620584e-04  1.24724451e-02]
  [-4.68615489e-03 -9.03265644e-03 -5.08794514e-03 ... -9.38251615e-05
    3.46405292e-03  1.59397833e-02]]

 [[-4.40974766e-03  5.57443919e-03  3.52374371e-03 ...  1.95894716e-03
    1.94849411e-03 -2.75645498e-03]
  [-6.44375756e-03  4.97705815e-03 -4.72289074e-04 ... -5.91672142e-04
    3.82385333e-03  7.59961782e-04]
  [-8.88469163e-03  9.09097679e-03  3.25556146e-03 ...  1.59917842e-03
    4.28472925e-03 -2.46180221e-04]
  ...
  [ 2.918

In [None]:
# examining one prediction
pred=example_batch_predictions[0]
print(len(pred))
print(pred)

100
tf.Tensor(
[[-4.3532234e-03 -5.9631672e-03  6.2149815e-04 ...  3.9501337e-04
  -6.2871054e-03 -9.8208245e-04]
 [-4.8320326e-03 -4.3814457e-03 -2.9160236e-03 ... -8.1442384e-04
  -2.0558797e-03  1.0784951e-03]
 [-8.5050697e-03 -8.6132400e-03 -8.9704711e-03 ... -1.1145812e-03
   1.7100686e-03  3.9982419e-03]
 ...
 [-1.1101417e-02 -1.0570765e-02 -7.3755486e-03 ... -1.3153912e-03
   2.2131968e-03  1.7514464e-02]
 [-6.2469179e-03 -1.3964236e-02 -4.1324710e-03 ...  4.6151550e-04
  -7.8462058e-04  1.2472445e-02]
 [-4.6861549e-03 -9.0326564e-03 -5.0879451e-03 ... -9.3825161e-05
   3.4640529e-03  1.5939783e-02]], shape=(100, 65), dtype=float32)


In [None]:
# now we'll look at a prediction at the first timestep
time_pred=pred[0]
print(len(time_pred))
print(time_pred)
# the 65 values represent the probabilities of each charater occuring next

65
tf.Tensor(
[-4.3532234e-03 -5.9631672e-03  6.2149815e-04 -2.9003234e-03
  5.3185475e-04 -7.8967959e-04  6.2883417e-03 -4.5606815e-03
 -2.5071555e-03 -9.2325509e-03  8.9550321e-04 -4.5694583e-03
 -4.2928578e-03 -3.1235784e-03  1.0440724e-03 -5.4096943e-04
  9.2637405e-04  3.9796107e-03 -2.7889125e-03 -3.3533820e-03
 -3.2813624e-03  5.2086795e-03  4.4036568e-03 -3.9666593e-03
  1.6528995e-03  3.2263470e-04  1.4312507e-03  2.7853320e-04
 -2.5721374e-03 -1.4299988e-03 -8.4274582e-04  2.0258185e-03
  6.1624189e-05  8.5192733e-03 -7.5394055e-05 -1.1111780e-03
  3.2387360e-04 -9.2128254e-03 -3.2711274e-03  1.6333652e-03
  1.3942006e-03 -2.1381353e-03  4.0205340e-03  6.5847137e-03
  1.4907643e-03 -5.2932780e-03 -4.9783862e-03 -6.0028774e-03
 -4.6517234e-03  1.2986710e-03  2.7053084e-03  1.1229775e-03
  3.2225118e-03 -1.0363717e-03  4.1581118e-03  3.0091717e-03
 -1.4300719e-03 -3.5022711e-03 -1.1557562e-04 -1.4216393e-03
 -9.3663606e-05  6.0275284e-04  3.9501337e-04 -6.2871054e-03
 -9.820824

In [None]:
sampled_indices=tf.random.categorical(pred, num_samples=1)

sampled_indices=np.reshape(sampled_indices, (1,-1))[0]
predicted_chars=int_to_text(sampled_indices)

predicted_chars

"Mkl$WEBXR!\nkKcwpHLQ.KR:a?knqtxWQRmusOhmdinMhg-uAYd'UiToqTnvbeT3Tmo dkqskzTJpfqYAdUhM'E3MupwSkLazP,Va"

In [None]:
def loss(labels,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

Compiling the model

In [None]:
model.compile(optimizer='adam',loss=loss)

Creating Checkpoints

Now we will set up our model to save checkpoints as it trains. This will allow us to load our model from a checkpoint and continue training it.

In [None]:
checkpoint_dir="./training_checkpoints" # directory where it'll be saved
#Name of the checkpoint files
checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

**Training**

In [None]:
history=model.fit(data, epochs=40, callbacks=[checkpoint_callback])

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


Loading the model

In [None]:
model=build_model(VOCAB_SIZE,EMBEDDING_DIM,RNN_UNITS,batch_size=1)

In [None]:
# for getting the most recent checkpoint
model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))
model.build(tf.TensorShape([1,None]))

In [None]:
# for getting a checkpoint of a specific epoch
'''
checkpoint_num=10
model.load_weights(tf.train.load_checkpoint("./training_checkpoints/ckpt_"+str(checkpoint_num))
model.build(tf.TensorShape([1,None]))
'''

**GENERATING TEXT**

In [None]:
def generate_text(model, start_string):
  num_generate=800

  input_eval=[char2idx[s] for s in start_string]
  input_eval=tf.expand_dims(input_eval,0)

  text_generated=[]

  temperature=1.0     # low temperature gives more predictable text, high gives surprising texts

  model.reset_states()
  for i in range(num_generate):
    predictions=model(input_eval)

    predictions=tf.squeeze(predictions,0)

    predictions=predictions/temperature
    predicted_id=tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

    input_eval=tf.expand_dims([predicted_id],0)
    text_generated.append(idx2char[predicted_id])

  return start_string+''.join(text_generated)

In [None]:
inp=input("Type a starting string")
print(generate_text(model,inp))

Type a starting stringRomeo
Romeou chides; thit has he doth awhile,
And most all dead, that lift such ventle deeds,
They have been still begin to dry have your names:
Provost, a fool, issue, which was so abrace?

First Murderer:
Now, in good time: how love to her King of
Eath, strings musician in the sun:
So shall you be a Richard, I am pains
For this most precedee.
I should have done thy grey have need of much ado.

KING RICHARD III:
I will not miss the play so. Where is her high me ha?

DUCHESS OF YORK:
Why, York, what wilt thou do?

GREMIO:
Northumberland, redees him hither; and so wide as deeply
Letturders:
Look that by God's fance unbrothed i' the body.

MENENIUS:
I am a great n particular saint,
Were tender y an imprisonment in foul swift and masks.
I confess you,
Let them have given to beat by the shepherd;
Reform'
