In [None]:
import tensorflow as tf
import numpy as np
import os
import time

In [None]:
path_to_file=tf.keras.utils.get_file('shakespeare.txt',
                                     'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt


In [None]:
path_to_file

'/root/.keras/datasets/shakespeare.txt'

In [None]:
#Experiment and see the difference
text=open(path_to_file,'rb').read().decode(encoding='utf-8')
print(f'Length of text:{len(text)}')

Length of text:1115394


In [None]:
print(text[:100])

First Citizen:
Before we proceed any further, hear me speak.

All:
Speak, speak.

First Citizen:
You


In [None]:
vocab=sorted(set(text))
print('{} unique characters'.format(len(vocab)))

65 unique characters


Process the text

In [None]:
#text is string
char2idx={u:i for i,u in enumerate(vocab)}
idx2char=np.array(vocab)

text_as_int=np.array([char2idx[c] for c in text])

In [None]:
text_as_int.shape

(1115394,)

In [None]:
char2idx

{'\n': 0,
 ' ': 1,
 '!': 2,
 '$': 3,
 '&': 4,
 "'": 5,
 ',': 6,
 '-': 7,
 '.': 8,
 '3': 9,
 ':': 10,
 ';': 11,
 '?': 12,
 'A': 13,
 'B': 14,
 'C': 15,
 'D': 16,
 'E': 17,
 'F': 18,
 'G': 19,
 'H': 20,
 'I': 21,
 'J': 22,
 'K': 23,
 'L': 24,
 'M': 25,
 'N': 26,
 'O': 27,
 'P': 28,
 'Q': 29,
 'R': 30,
 'S': 31,
 'T': 32,
 'U': 33,
 'V': 34,
 'W': 35,
 'X': 36,
 'Y': 37,
 'Z': 38,
 'a': 39,
 'b': 40,
 'c': 41,
 'd': 42,
 'e': 43,
 'f': 44,
 'g': 45,
 'h': 46,
 'i': 47,
 'j': 48,
 'k': 49,
 'l': 50,
 'm': 51,
 'n': 52,
 'o': 53,
 'p': 54,
 'q': 55,
 'r': 56,
 's': 57,
 't': 58,
 'u': 59,
 'v': 60,
 'w': 61,
 'x': 62,
 'y': 63,
 'z': 64}

In [None]:
print('{}---characters mapped to int---->{}'.format(text[:13],text_as_int[:13]))

First Citizen---characters mapped to int---->[18 47 56 57 58  1 15 47 58 47 64 43 52]


In [None]:
seq_length=100
examples_per_epoch=len(text)//(seq_length+1)

char_dataset=tf.data.Dataset.from_tensor_slices(text_as_int)

In [None]:
for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

F
i
r
s
t


In [None]:
#repr->printable representation of an object(\n is displayed and not a next line)
sequences=char_dataset.batch(seq_length+1,drop_remainder=True)

for item in sequences.take(2):
  print(repr(''.join(idx2char[item.numpy()])))

'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '
'are all resolved rather to die than to famish?\n\nAll:\nResolved. resolved.\n\nFirst Citizen:\nFirst, you k'


In [None]:
sequences

<BatchDataset shapes: (101,), types: tf.int64>

In [None]:
def split_input_target(chunk):
  input_text=chunk[:-1]
  target_text=chunk[1:]
  return input_text,target_text

dataset=sequences.map(split_input_target)

In [None]:
for input_example,target_example in dataset.take(1):
    print('Input data: ', repr(''.join(idx2char[input_example.numpy()])))
    print('Target data:', repr(''.join(idx2char[target_example.numpy()])))

Input data:  'First Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou'
Target data: 'irst Citizen:\nBefore we proceed any further, hear me speak.\n\nAll:\nSpeak, speak.\n\nFirst Citizen:\nYou '


In [None]:
next(iter(dataset))

(<tf.Tensor: shape=(100,), dtype=int64, numpy=
 array([18, 47, 56, 57, 58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 14, 43,
        44, 53, 56, 43,  1, 61, 43,  1, 54, 56, 53, 41, 43, 43, 42,  1, 39,
        52, 63,  1, 44, 59, 56, 58, 46, 43, 56,  6,  1, 46, 43, 39, 56,  1,
        51, 43,  1, 57, 54, 43, 39, 49,  8,  0,  0, 13, 50, 50, 10,  0, 31,
        54, 43, 39, 49,  6,  1, 57, 54, 43, 39, 49,  8,  0,  0, 18, 47, 56,
        57, 58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 37, 53, 59])>,
 <tf.Tensor: shape=(100,), dtype=int64, numpy=
 array([47, 56, 57, 58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 14, 43, 44,
        53, 56, 43,  1, 61, 43,  1, 54, 56, 53, 41, 43, 43, 42,  1, 39, 52,
        63,  1, 44, 59, 56, 58, 46, 43, 56,  6,  1, 46, 43, 39, 56,  1, 51,
        43,  1, 57, 54, 43, 39, 49,  8,  0,  0, 13, 50, 50, 10,  0, 31, 54,
        43, 39, 49,  6,  1, 57, 54, 43, 39, 49,  8,  0,  0, 18, 47, 56, 57,
        58,  1, 15, 47, 58, 47, 64, 43, 52, 10,  0, 37, 53, 59,  1])>)

In [None]:
for i,(input_idx,target_idx) in enumerate(zip(input_example[:5],target_example[:5])):
  print('Step:{}'.format(i))
  print('Input:{}({})'.format(input_idx,repr(idx2char[input_idx])))
  print('Expected output:{}({})'.format(target_idx,repr(idx2char[target_idx])))

Step:0
Input:18('F')
Expected output:47('i')
Step:1
Input:47('i')
Expected output:56('r')
Step:2
Input:56('r')
Expected output:57('s')
Step:3
Input:57('s')
Expected output:58('t')
Step:4
Input:58('t')
Expected output:1(' ')


In [None]:
BATCH_SIZE=64
BUFFER_SIZE=10000
dataset=dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE,drop_remainder=True)
dataset

<BatchDataset shapes: ((64, 100), (64, 100)), types: (tf.int64, tf.int64)>

In [None]:
vocab_size=len(vocab)
embedding_dim=256
rnn_units=1024

In [None]:
model=tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size,embedding_dim,
                                batch_input_shape=(BATCH_SIZE,None)),
      tf.keras.layers.GRU(rnn_units,return_sequences=True,stateful=True,
                          recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)])


In [None]:
for input_example_batch,target_example_batch in dataset.take(1):
  example_batch_predictions=model(input_example_batch)
  print(example_batch_predictions.shape)   #batch_size, sequence_length, vocab_size) 

(64, 100, 65)


In [None]:
target_tokens=tf.argmax(example_batch_predictions,axis=-1)
target_tokens

<tf.Tensor: shape=(64, 100), dtype=int64, numpy=
array([[33, 54, 54, ..., 62, 61, 27],
       [27, 29, 11, ..., 50, 50, 46],
       [62,  9, 29, ..., 54, 43, 46],
       ...,
       [ 6, 55, 50, ..., 43, 54, 29],
       [35, 54, 14, ..., 25,  3, 21],
       [29, 50, 24, ..., 23, 12, 54]])>

In [None]:
target_char=[idx2char[i] for i in target_tokens[0].numpy()]
print(''.join(target_char))

UppP$pUpeLQQlp.$.$!PLDp!H;;;KpP,lll$$eb.L!!x..Q...!ll$k??-$pQkQPLDp::FkAQLkL$b!DwJ::XUH-K-lXHpBCPxwO


In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding (Embedding)        (64, None, 256)           16640     
_________________________________________________________________
gru (GRU)                    (64, None, 1024)          3938304   
_________________________________________________________________
dense (Dense)                (64, None, 65)            66625     
Total params: 4,021,569
Trainable params: 4,021,569
Non-trainable params: 0
_________________________________________________________________


In [None]:
def loss(labels,logits):
  return tf.keras.losses.sparse_categorical_crossentropy(labels,logits,from_logits=True)

example_batch_loss=loss(target_example_batch,example_batch_predictions)
print("Prediction shape:", example_batch_predictions.shape)
print("scalar_loss:", example_batch_loss.numpy().mean())

Prediction shape: (64, 100, 65)
scalar_loss: 4.1750875


In [None]:
model.compile(optimizer='adam',loss=loss,metrics=['acc'])

In [None]:
#Directory where the checkpoints will be saved
checkpoint_dir='./training_checkpoints'
#Name of the checkpoint files
checkpoint_prefix=os.path.join(checkpoint_dir,'ckpt_{epoch}')

checkpoint_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_prefix,
    save_weights_only=True)

In [None]:
history=model.fit(dataset,epochs=10,callbacks=[checkpoint_callback])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


Generate text

In [None]:
tf.train.latest_checkpoint(checkpoint_dir)

'./training_checkpoints/ckpt_10'

In [None]:
new_model=tf.keras.Sequential([
      tf.keras.layers.Embedding(vocab_size,embedding_dim,
                                batch_input_shape=(1,None)),
      tf.keras.layers.GRU(rnn_units,return_sequences=True,stateful=True,
                          recurrent_initializer='glorot_uniform'),
      tf.keras.layers.Dense(vocab_size)])

new_model.load_weights(tf.train.latest_checkpoint(checkpoint_dir))

new_model.build(tf.TensorShape([1,None]))

In [None]:
def generate_text(model,start_string):
  #Number of characters to generate
  num_generate=1000
  input_eval=[char2idx[s] for s in start_string]
  input_eval=tf.expand_dims(input_eval,0)
  
  #Empty string to store our results
  text_generated=[]
  
  #Low temperature results in more predictable text.
  #Higher temperature results in more surprising text.
  temperature=1.0

  model.reset_states()
  
  for i in range(num_generate):
    predictions=model(input_eval)                   #(1,6,65)  
    #print(predictions.shape)
    #remove the batch dimension
    predictions=tf.squeeze(predictions,0)           #(6,65)
    #print(predictions.shape)
    predicitions=predictions/temperature
    predicted_id=tf.random.categorical(predictions,num_samples=1)[-1,0].numpy()
    #print(predicted_id)
    
    #Pass the predicted character as the next input to the model
    #along with the previous hidden state
    input_eval=tf.expand_dims([predicted_id],0)       #(1,1)
    #print(input_eval.shape)
    text_generated.append(idx2char[predicted_id])
    
  return (start_string+''.join(text_generated))


In [None]:
print(generate_text(new_model,start_string=u'ROMEO:'))

ROMEO:
Nay, or not with all rrbuse to rain.

PAULINAM:
Montague than these say, make her heart as it to leave it.

HENRY BOLINGBROKE:
Is it, my soul's blood, when I must have in thy life;
More, make an assarel
Until him frown for shame,
Which got before he will keep what I shall find my heart.

YORKELO:
You do hold for brother's blows, the princes, unhappy straight,
On him thee.
Have I not make a sense is.

Messenger:
I thank you, sir, be crow'st, that I enjoy by the youngest.

ANGELO:
No.

CORIOLANUS:
Ye solemn. You come to do
As twhip the Ty-Vaiter love, and man:
But bid he shall be will determing pate;
Inful maid, we winge, makes for assail:
How lougg me, do I' these you.

CORIOLANUS:

ANTENONIUS:
Romeo, do for all corrue of lanful thee:
But I'll play appear but me.

TYBALT:
A for what dost thou keep one doors:
But, sir, sir, have us boutest and spoke dream.

Nurse:
Amen, so wandrall is he, for use you.
Nay, is he vow, my woman, but brothers for such
Thousand fellow 'stain'd:' quoth