# Recurrent Neural Networks

In [1]:
import tensorflow as tf
import numpy as np
import os
import time

In [5]:
file = '1400-0.txt'
url = 'https://www.gutenberg.org/files/1400/1400-0.txt' #Great expectations by Charles Dickens

In [6]:
path = tf.keras.utils.get_file(file, url)

In [13]:
text = open(path, encoding = 'utf-8').read()
print('Text length : %d'%len(text))
text = text[1836:200000]
print(text[:300])

Text length : 1014396
 and my Christian name Philip, my
infant tongue could make of both names nothing longer or more explicit
than Pip. So, I called myself Pip, and came to be called Pip.

I give Pirrip as my father’s family name, on the authority of his
tombstone and my sister,—Mrs. Joe Gargery, who married the blacksm


In [15]:
vocabulary = sorted(set(text))
print('{} unique characters.'.format(len(vocabulary)))
print(vocabulary)

73 unique characters.
['\n', ' ', '!', '(', ')', ',', '-', '.', '2', '4', ':', ';', '?', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', ']', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '—', '‘', '’', '“', '”']


In [16]:
char_to_index = {char:index for index, char in enumerate(vocabulary)}
print(char_to_index)

{'\n': 0, ' ': 1, '!': 2, '(': 3, ')': 4, ',': 5, '-': 6, '.': 7, '2': 8, '4': 9, ':': 10, ';': 11, '?': 12, 'A': 13, 'B': 14, 'C': 15, 'D': 16, 'E': 17, 'F': 18, 'G': 19, 'H': 20, 'I': 21, 'J': 22, 'K': 23, 'L': 24, 'M': 25, 'N': 26, 'O': 27, 'P': 28, 'Q': 29, 'R': 30, 'S': 31, 'T': 32, 'U': 33, 'V': 34, 'W': 35, 'X': 36, 'Y': 37, 'Z': 38, '[': 39, ']': 40, '_': 41, 'a': 42, 'b': 43, 'c': 44, 'd': 45, 'e': 46, 'f': 47, 'g': 48, 'h': 49, 'i': 50, 'j': 51, 'k': 52, 'l': 53, 'm': 54, 'n': 55, 'o': 56, 'p': 57, 'q': 58, 'r': 59, 's': 60, 't': 61, 'u': 62, 'v': 63, 'w': 64, 'x': 65, 'y': 66, 'z': 67, '—': 68, '‘': 69, '’': 70, '“': 71, '”': 72}


In [17]:
index_to_char = np.array(vocabulary)
print(index_to_char)
print(len(text))

['\n' ' ' '!' '(' ')' ',' '-' '.' '2' '4' ':' ';' '?' 'A' 'B' 'C' 'D' 'E'
 'F' 'G' 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W'
 'X' 'Y' 'Z' '[' ']' '_' 'a' 'b' 'c' 'd' 'e' 'f' 'g' 'h' 'i' 'j' 'k' 'l'
 'm' 'n' 'o' 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' 'x' 'y' 'z' '—' '‘' '’' '“'
 '”']
198164


In [18]:
text_as_int = np.array([char_to_index[char] for char in text])

In [20]:
for char, _ in zip(char_to_index, range(20)):
    print('{:5s}: {:4d},'.format(repr(char), char_to_index[char]))
print('...\n')

'\n' :    0,
' '  :    1,
'!'  :    2,
'('  :    3,
')'  :    4,
','  :    5,
'-'  :    6,
'.'  :    7,
'2'  :    8,
'4'  :    9,
':'  :   10,
';'  :   11,
'?'  :   12,
'A'  :   13,
'B'  :   14,
'C'  :   15,
'D'  :   16,
'E'  :   17,
'F'  :   18,
'G'  :   19,
...



In [21]:
print('{} -----> {}'.format(repr(text[:17]), text_as_int[:17]))

' and my Christian' -----> [ 1 42 55 45  1 54 66  1 15 49 59 50 60 61 50 42 55]


In [22]:
seq_length = 100
examples_per_epoch = len(text)//seq_length
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)
for char in char_dataset.take(5):
    print(index_to_char[char.numpy()])


a
n
d
 


### Making the sequences according to the batch length that is required.

In [24]:
sequences = char_dataset.batch(seq_length+1, drop_remainder = True)

In [25]:
def split_input_target(chunk):
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

In [26]:
dataset = sequences.map(split_input_target)
dataset

<MapDataset shapes: ((100,), (100,)), types: (tf.int32, tf.int32)>

In [27]:
for input_example, target_example in dataset.take(2):
    print('input data: ', repr(''.join(index_to_char[input_example.numpy()])))
    print('target data: ', repr(''.join(index_to_char[target_example.numpy()])))
    print('_______________________________________________________________________________________________________________________')

input data:  ' and my Christian name Philip, my\ninfant tongue could make of both names nothing longer or more expl'
target data:  'and my Christian name Philip, my\ninfant tongue could make of both names nothing longer or more expli'
_______________________________________________________________________________________________________________________
input data:  'cit\nthan Pip. So, I called myself Pip, and came to be called Pip.\n\nI give Pirrip as my father’s fami'
target data:  'it\nthan Pip. So, I called myself Pip, and came to be called Pip.\n\nI give Pirrip as my father’s famil'
_______________________________________________________________________________________________________________________


In [31]:
batch = 64
steps_per_epoch = examples_per_epoch//batch
buffer = 1000
dataset = dataset.shuffle(buffer).batch(batch, drop_remainder = True)
print(dataset)
dataset = dataset.repeat()
print(dataset)

<BatchDataset shapes: ((64, 64, 64, 100), (64, 64, 64, 100)), types: (tf.int32, tf.int32)>
<RepeatDataset shapes: ((64, 64, 64, 100), (64, 64, 64, 100)), types: (tf.int32, tf.int32)>


In [32]:
vocabulary_length = len(vocabulary)
embedding_dimension = 256
recurrent_nn_units = 1024

In [33]:
if tf.test.is_gpu_available():
    recurrent_nn = tf.compat.v1.keras.layers.CuDNNGRU
    print("GPU in use.")
else:
    import functools
    recurrent_nn = functools.partial(tf.keras.layers.GRU, recurrent_activation = 'sigmoid')
    print('CPU in use.')

W0706 21:35:00.028807  4860 deprecation.py:323] From <ipython-input-33-9a1621f8d24e>:1: is_gpu_available (from tensorflow.python.framework.test_util) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.
CPU in use.


In [37]:
def build_model(vocabulary_size, embedding_dimension, recurrent_nn_units, batch_size):
    model = tf.keras.Sequential([tf.keras.layers.Embedding(vocabulary_size, embedding_dimension, batch_input_shape = [batch_size, None]), recurrent_nn(recurrent_nn_units, return_sequences = True, recurrent_initializer = 'GlorotUniform', stateful = True), tf.keras.layers.Dense(vocabulary_length)])
    return model

In [38]:
model = build_model(len(vocabulary), embedding_dimension = embedding_dimension, recurrent_nn_units = recurrent_nn_units, batch_size = batch)

In [39]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (64, None, 256)           18688     
_________________________________________________________________
gru_1 (GRU)                  (64, None, 1024)          3938304   
_________________________________________________________________
dense_1 (Dense)              (64, None, 73)            74825     
Total params: 4,031,817
Trainable params: 4,031,817
Non-trainable params: 0
_________________________________________________________________


### Sanity check

In [94]:
for batch_input_example, batch_target_example in dataset.take(1):
    batch_predictions_example = model(batch_input_example)
    print(batch_predictions_example.shape, "#(batch, seq_l, vocab_l)")

(64, 100, 92) #(batch, seq_l, vocab_l


In [98]:
sampled_indices = tf.random.categorical(logits = batch_predictions_example[0], num_samples = 1)
sampled_indices = tf.squeeze(sampled_indices, axis = -1).numpy()
print(sampled_indices)

[58  6 47 52 13 91 74 80 15 40 60 77 32 88 91 42 82 59 88 56 91  5 46 54
 82 85 60 73 29 31 28 39 52 33 66 77 46 39 79 16 91 71 52 64 70 59  8  5
 43 32 90 28  0  0 38 20 58 37 45 83 65 55 69 55 42 44 25  2 71 54  6 28
 64 58  7 18 73  8 17  5 47 77 21 70  9 68 34 79 81 63 91 46 66 73  7 60
 22 65  3 75]


In [99]:
print('input : \n', repr("".join(index_to_char[batch_input_example[0]])))
print('next char predictions: \n', repr("".join(index_to_char[sampled_indices])))

input : 
 'm had wished me to\ncome and see Estella. To which he replied, “Ah! Very fine young lady!”\nThen he pu'
next char predictions: 
 "_%RW-”pv/KbsC‘”Mxa‘[”$QYxêbo@B?JWDhsQJu0”mWfla'$NC“?\t\tI4_HPygZkZMO9 mY%?f_&2o'1$Rs5l(jEuwe”Qho&b6g!q"


In [101]:
def loss(labels, logits):
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

### Sanity check for the loss

In [110]:
batch_loss_example = tf.compat.v1.losses.sparse_softmax_cross_entropy(batch_target_example, batch_predictions_example)
print('prediction shape : %s #%r'%(batch_predictions_example.shape, ''.join(str((batch, seq_length, len(vocabulary))))))
print('scalar loss = %.3f'%batch_loss_example.numpy())

prediction shape : (64, 100, 92) #'(64, 100, 92)'
scalar loss = 4.523


In [113]:
model.compile(optimizer = tf.optimizers.Adam(), loss = loss)

### Saving the model weights

In [114]:
directory = './checkpoints'

file_prefix = os.path.join(directory, "ckpt_{epoch}")

callback = [tf.keras.callbacks.ModelCheckpoint(filepath = file_prefix, save_weights_only=True)]

In [115]:
epochs = 4

history = model.fit(dataset, epochs = epochs, steps_per_epoch = steps_per_epoch, callbacks = callback)

Train for 158 steps
Epoch 1/4
 24/158 [===>..........................] - ETA: 24:30 - loss: 3.8476

KeyboardInterrupt: 

### Get latest checkpoint

In [None]:
tf.train.latest_checkpoint(directory)

In [None]:
model = build_model(len(vocabulary), embedding_dimension, recurrent_nn_units, batch_size = 1)
model.load_weights(tf.train.latest_checkpoint(directory))
model.build(tf.TensorShape([1, None]))
model.summary()