<a href="https://colab.research.google.com/github/KimDaeUng/AI-Agent/blob/master/Week13_Deep_Learning_and_TensorFlow_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Week13 Deep Learning and TensorFlow 2

# 1. Convolutional Neural Netowks

## Image Classification with CNNs

### Import TensorFlow and other libraries

In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

### Load and Preprocess the Fashion-MNIST Dataset 

In [0]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()


In [0]:
X_train, X_test = X_train / 255.0, X_test / 255.0
num_classes = 10

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) # Expand dimension (Channel dim)
X_train.shape

In [0]:
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_test.shape

In [0]:
y_train = y_train.reshape(y_train.shape[0], 1) 
y_train.shape

In [0]:
y_test = y_test.reshape(y_test.shape[0], 1) 
y_test.shape

In [0]:
y_train

### Train the model

In [0]:
layers = tf.keras.layers
base_model = tf.keras.Sequential([
layers.Conv2D(32, kernel_size=(3, 3), padding = "same", input_shape = (28, 28, 1), 
                       activation="relu"),
layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
layers.MaxPool2D(pool_size=2),
layers.Flatten(),
layers.Dense(128, activation="relu"),
layers.Dense(10, activation="softmax")
])


In [0]:
base_model.compile(loss='sparse_categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [0]:
base_history = base_model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [0]:
base_model.evaluate(X_train, y_train)

In [0]:
base_model.evaluate(X_test, y_test)

## Dropout regularization

In [0]:
# Dropout Model
dropout_model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, kernel_size=(3, 3), padding = "same", input_shape = (28, 28, 1), 
                       activation="relu"),
tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
tf.keras.layers.MaxPool2D(pool_size=2),

tf.keras.layers.Flatten(),
tf.keras.layers.Dense(128, activation="relu"),
tf.keras.layers.Dropout(0.5),
tf.keras.layers.Dense(10, activation="softmax")
])

dropout_model.compile(loss='sparse_categorical_crossentropy',
                   optimizer='adam',
                   metrics=['accuracy'])

In [0]:
drop_history = dropout_model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [0]:
dropout_model.evaluate(X_train, y_train)

In [0]:
dropout_model.evaluate(X_test, y_test)

### Plotting the learning curve

In [0]:
def plot_history(histories, key='loss'):
    plt.figure(figsize=(16,10))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()
    plt.xlim([0,max(history.epoch)])

plot_history([('Base CNNs', base_history),
              ('Dropout CNNs', drop_history)])



# 2. Recurrent Neural Networks

## Character-level Language Model with RNNs

### Import TensorFlow and other libraries

In [0]:
import tensorflow as tf
import numpy as np
import os
import time

### Load and preprocess the Shakespeare dataset

In [0]:
# 1. Download the Shakespeare's Sonnet dataset
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# Load whole text file as a string, then decode.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

In [0]:
# Take a look first 250 characters
print(text[:250])

# The unique characters in the file
vocab = sorted(set(text))
print("="*90)
print ('{} unique characters'.format(len(vocab)))

In [0]:
# 2. Vectorize the text
# Creating a mapping from unique characters to indices, and vice versa
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# Convert the characters to the indices
text_as_int = np.array([char2idx[c] for c in text])

# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

In [0]:
# 3. Creating training task
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

In [0]:
# 'batch' method convert these individual characters to sequences of the desired size
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

In [0]:
def split_input_target(chunk):
    # input text is shifted to form the target text 
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

# 'map' method lets us easily apply a simple function to each batch
dataset = sequences.map(split_input_target)

# Print the examples 
for input_ex, target_ex in  dataset.take(1):
  print ('Input : ', repr(''.join(idx2char[input_ex.numpy()])))
  print ('Target :', repr(''.join(idx2char[target_ex.numpy()])))

In [0]:
# 4. Create training batches
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

### Build the model

- 3 layers are used to define this model
    1. `tf.keras.layers.Embedding`: The input layer, A trainable lookup table that will map the numbers of each character to a vector with `embedding_dim` dimensions;
    2. `tf.keras.layers.RNN`: A RNN with size `units=rnn_units`
    3. `tf.keras.layers.Dense`: The output layer, with `vocab_size` outputs.

In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    layers = tf.keras.layers
    model = tf.keras.Sequential([
            layers.Embedding(input_dim=vocab_size,
                            output_dim=embedding_dim,
                            batch_input_shape=[batch_size, None]
                            ),
            layers.SimpleRNN(rnn_units, # you can change the RNN type to LSTM or GRU
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform' # Xavier Initialization
                            ),
            # If you want to build multi-layer RNN, just stack the RNN layer here
            # layers.SimpleRNN(rnn_units,
            #     return_sequences=True,
            #     stateful=True,
            #     recurrent_initializer='glorot_uniform' # Xavier Initialization
            #     ),
            layers.Dense(vocab_size)
    ])
    return model

In [0]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

### Try the model
Before training the model, take a look about how does the model works

In [0]:
# Check the shape of the output
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

In [0]:
# Check the model architecture
# Model can be run on inputs of any length 
model.summary()

In [0]:
# We need to sample from the output distribution, not to take the argmax of the distribution
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

# Predictions of the next character index 
sampled_indices

In [0]:
# Decode the predictions, the model shows poor performance 
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

### Train the model

In [0]:
# 1. Attach an optimizer, and a loss function
# define the loss function 
def loss(labels, logits):
    # Because the output of model is logit, 
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

# Test the loss function
example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

# Configure the training procedure
optimizer = tf.keras.optimizers.Adam(clipnorm=5.0)
model.compile(optimizer=optimizer, loss=loss)

In [0]:
# 2. Configure the checkpoints
# `tf.keras.callbacks.ModelCheckpoint` : The callback function to save the model checkpoint

# Directory where the model weights will be saved
ckpt_dir = './training_rnns_ckpts'

# Checkpoint name
ckpt_prefix = os.path.join(ckpt_dir, "ckpt_rnns_{epoch}")

# Callback function to save the model weights
ckpt_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=ckpt_prefix,
    save_weights_only=True)

In [0]:
# 3. Execute the training
EPOCHS=10
rnn_history = model.fit(dataset, epochs=EPOCHS, callbacks=[ckpt_callback])

### Generate text

In [0]:
# Check the latest checkpoint
tf.train.latest_checkpoint(ckpt_dir)

In [0]:
# To run the model with one sample(not with batch_size of samples),
# We rebuild the model, and load the weights from the saved checkpoint. 
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(ckpt_dir))

In [0]:
# Check the model summary
model.summary()

In [0]:
# The prediction loop
def generate_text(model, start_string):
  # Number of characters to generate
  n_generate = 1000

  # Converting start_strings to index (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Making the empty list to store results
  text_generated = []

  # Low temperatures -> more predictable text.
  # Higher temperatures -> more surprising text.
  temperature = 1

  # Here batch size == 1
  model.reset_states()
  for i in range(n_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # Passing the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [0]:
# It shows poor performance 
# Sometimes it prints out a series of meaningless characters
# -> Due to vanishing gradients problem
print(generate_text(model, start_string=u"ROMEO: "))

## Text Classifiation with RNNs 

### Import TensorFlow and other libraries

In [0]:
import tensorflow_datasets as tfds
import tensorflow as tf
import matplotlib.pyplot as plt

### Load and preprocess the IMDb dataset

In [0]:
# using tfds library, load the imdb dataset
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

In [0]:
# info object has the lookup table(encoder) of token and index
encoder = info.features['text'].encoder
print('Vocabulary size: {}'.format(encoder.vocab_size))

In [0]:
# Test the encoder
sample_string = 'Hello TensorFlow.'

encoded_string = encoder.encode(sample_string)
print('Encoded string is {}'.format(encoded_string))

original_string = encoder.decode(encoded_string)
print('The original string: "{}"'.format(original_string))

In [0]:
# Test the encoder
for index in encoded_string:
  print('{} ----> {}'.format(index, encoder.decode([index])))

In [0]:
# Creating training task
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE)

test_dataset = test_dataset.padded_batch(BATCH_SIZE)

In [0]:
# Check the shape of batches
for x, y in train_dataset.take(2):
    print(x)
    print(y)
    print("-"*90)

### Build the model

In [0]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(encoder.vocab_size, 64),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [0]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])
model.summary()

### Train the model

In [0]:
history = model.fit(train_dataset, epochs=5,
                    validation_data=test_dataset, 
                    validation_steps=30)

### Evalutate the model

In [0]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))

In [0]:
# helper function 
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])
  plt.show()

In [0]:
plot_graphs(history, 'accuracy')

In [0]:
plot_graphs(history, 'loss')

In [0]:
def sample_pred(text, model):
    encoded_text = encoder.encode(text)
    encoded_text = tf.cast(encoded_text, tf.float32)
    predictions = model.predict(tf.expand_dims(encoded_text, 0))
    prob = tf.sigmoid(predictions)[0][0].numpy()
    print('Prob : ', prob)
    if prob >= 0.5:
        return "Positive"
    else:
        return "Negative"

In [0]:
sample_pred_text = 'You should watch this movie, this movie is excellent'
sample_pred(sample_pred_text, model)

# Quiz 1 : Image Classification Model on the CIFAR-10
- Build the Convolutional Neural Networks
    - Build the model following the bellow model summary
    - Apply the dropout regularization to the model and compare the result
- Compare the performance of the model built last week


### Import TensorFlow and other libraries

In [0]:
import tensorflow as tf

from tensorflow.keras import datasets, models
import matplotlib.pyplot as plt

### Load the CIFAR-10 Dataset

In [0]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

In [0]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    # The CIFAR labels happen to be arrays, 
    # which is why you need the extra index
    plt.xlabel(class_names[train_labels[i][0]])
plt.show()

### Build the model

In [0]:
layers = tf.keras.layers

cifar_model = tf.keras.Sequential([
            layers.Conv2D(32, kernel_size=(3, 3), padding = "same",
                          input_shape = (32, 32, 3), 
                                activation="relu"),
            layers.MaxPool2D(pool_size=2),
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
            layers.MaxPool2D(pool_size=2),
            layers.Flatten(),
            layers.Dense(128, activation="relu"),
            layers.Dense(10, activation="softmax")
])


In [0]:
cifar_model.summary()

In [0]:
# Compile the model(set optimizer, loss function and metrics)
cifar_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

### Train the model

In [0]:
cifar_history = cifar_model.fit(train_images, train_labels, epochs=20, 
                    validation_data=(test_images, test_labels))

### Apply the dropout to the model

In [0]:
cifar_drop_model = tf.keras.Sequential([
            layers.Conv2D(32, kernel_size=(3, 3), padding = "same",
                          input_shape = (32, 32, 3), 
                                activation="relu"),
            layers.MaxPool2D(pool_size=2),
            layers.Conv2D(64, kernel_size=(3, 3), activation="relu"),
            layers.MaxPool2D(pool_size=2),
            layers.Flatten(),
            layers.Dense(128, activation="relu"),
            layers.Dropout(0.5),
            layers.Dense(10, activation="softmax")
])


In [0]:
cifar_drop_model.summary()

In [0]:
# Compile the model(set optimizer, loss function and metrics)
cifar_drop_model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])

In [0]:
cifar_drop_history = cifar_drop_model.fit(train_images, train_labels, epochs=20, 
                    validation_data=(test_images, test_labels))

In [0]:
def plot_history(histories, key='loss'):
    plt.figure(figsize=(16,10))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()
    plt.xlim([0,max(history.epoch)])

plot_history([('Base CNNs', cifar_history),
              ('Dropout CNNs', cifar_drop_history)])

In [0]:
def plot_history(histories, key='accuracy'):
    plt.figure(figsize=(16,10))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()
    plt.xlim([0,max(history.epoch)])

plot_history([('Base CNNs', cifar_history),
              ('Dropout CNNs', cifar_drop_history)])

# Quiz 2 : Character-level Language Model
- Build the Character-level Language Model with LSTM
- Compare the generated text to the one generated by RNNs


### Import TensorFlow and other libraries

In [0]:
import tensorflow as tf
import numpy as np
import os
import time

### Load and preprocess the Shakespeare dataset

In [0]:
# 1. Download the Shakespeare's Sonnet dataset
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# Load whole text file as a string, then decode.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

In [0]:
# Take a look first 250 characters
print(text[:250])

# The unique characters in the file
vocab = sorted(set(text))
print("="*90)
print ('{} unique characters'.format(len(vocab)))

In [0]:
# 2. Vectorize the text
# Creating a mapping from unique characters to indices, and vice versa
char2idx = {u:i for i, u in enumerate(vocab)}
idx2char = np.array(vocab)

# Convert the characters to the indices
text_as_int = np.array([char2idx[c] for c in text])

# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

In [0]:
# 3. Creating training task
# The maximum length sentence we want for a single input in characters
seq_length = 100
examples_per_epoch = len(text)//(seq_length+1)

# Create training examples / targets
char_dataset = tf.data.Dataset.from_tensor_slices(text_as_int)

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

In [0]:
# 'batch' method convert these individual characters to sequences of the desired size
sequences = char_dataset.batch(seq_length+1, drop_remainder=True)

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

In [0]:
def split_input_target(chunk):
    # input text is shifted to form the target text 
    input_text = chunk[:-1]
    target_text = chunk[1:]
    return input_text, target_text

# 'map' method lets us easily apply a simple function to each batch
dataset = sequences.map(split_input_target)

# Print the examples 
for input_ex, target_ex in  dataset.take(1):
  print ('Input : ', repr(''.join(idx2char[input_ex.numpy()])))
  print ('Target :', repr(''.join(idx2char[target_ex.numpy()])))

In [0]:
# 4. Create training batches
# Batch size
BATCH_SIZE = 64

# Buffer size to shuffle the dataset
BUFFER_SIZE = 10000

dataset = dataset.shuffle(BUFFER_SIZE).batch(BATCH_SIZE, drop_remainder=True)

dataset

### Build the model

- 3 layers are used to define this model
    1. `tf.keras.layers.Embedding`: The input layer, A trainable lookup table that will map the numbers of each character to a vector with `embedding_dim` dimensions;
    2. `tf.keras.layers.RNN`: A RNN with size `units=rnn_units`
    3. `tf.keras.layers.Dense`: The output layer, with `vocab_size` outputs.

In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 256

# Number of RNN units
rnn_units = 1024

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    layers = tf.keras.layers
    model = tf.keras.Sequential([
            layers.Embedding(input_dim=vocab_size,
                            output_dim=embedding_dim,
                            batch_input_shape=[batch_size, None]
                            ),
            layers.LSTM(rnn_units, # you can change the RNN type to LSTM or GRU
                            return_sequences=True,
                            stateful=True,
                            recurrent_initializer='glorot_uniform' # Xavier Initialization
                            ),
            layers.Dense(vocab_size)
    ])
    return model

In [0]:
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

### Try the model
Before training the model, take a look about how does the model works

In [0]:
# Check the shape of the output
for input_example_batch, target_example_batch in dataset.take(1):
  example_batch_predictions = model(input_example_batch)
  print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")

In [0]:
# Check the model architecture
# Model can be run on inputs of any length 
model.summary()

In [0]:
# We need to sample from the output distribution, not to take the argmax of the distribution
sampled_indices = tf.random.categorical(example_batch_predictions[0], num_samples=1)
sampled_indices = tf.squeeze(sampled_indices,axis=-1).numpy()

# Predictions of the next character index 
sampled_indices

In [0]:
# Decode the predictions, the model shows poor performance 
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

### Train the model

In [0]:
# 1. Attach an optimizer, and a loss function
# define the loss function 
def loss(labels, logits):
    # Because the output of model is logit, 
    return tf.keras.losses.sparse_categorical_crossentropy(labels, logits, from_logits=True)

# Test the loss function
example_batch_loss  = loss(target_example_batch, example_batch_predictions)
print("Prediction shape: ", example_batch_predictions.shape, " # (batch_size, sequence_length, vocab_size)")
print("scalar_loss:      ", example_batch_loss.numpy().mean())

# Configure the training procedure
optimizer = tf.keras.optimizers.Adam(clipnorm=5.0)
model.compile(optimizer=optimizer, loss=loss)

In [0]:
# 2. Configure the checkpoints
# `tf.keras.callbacks.ModelCheckpoint` : The callback function to save the model checkpoint

# Directory where the model weights will be saved
ckpt_dir = './training_lstm_ckpts'

# Checkpoint name
ckpt_prefix = os.path.join(ckpt_dir, "ckpt_lstm_{epoch}")

# Callback function to save the model weights
ckpt_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=ckpt_prefix,
    save_weights_only=True)

In [0]:
# 3. Execute the training
EPOCHS=10
rnn_history = model.fit(dataset, epochs=EPOCHS, callbacks=[ckpt_callback])

### Generate text

In [0]:
# Check the latest checkpoint
tf.train.latest_checkpoint(ckpt_dir)

In [0]:
# To run the model with one sample(not with batch_size of samples),
# We rebuild the model, and load the weights from the saved checkpoint. 
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(ckpt_dir))

In [0]:
# Check the model summary
model.summary()

In [0]:
# The prediction loop
def generate_text(model, start_string):
  # Number of characters to generate
  n_generate = 1000

  # Converting start_strings to index (vectorizing)
  input_eval = [char2idx[s] for s in start_string]
  input_eval = tf.expand_dims(input_eval, 0)

  # Making the empty list to store results
  text_generated = []

  # Low temperatures -> more predictable text.
  # Higher temperatures -> more surprising text.
  temperature = 1

  # Here batch size == 1
  model.reset_states()
  for i in range(n_generate):
      predictions = model(input_eval)
      # remove the batch dimension
      predictions = tf.squeeze(predictions, 0)

      # using a categorical distribution to predict the character
      predictions = predictions / temperature
      predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

      # Passing the predicted character as the next input to the model
      # along with the previous hidden state
      input_eval = tf.expand_dims([predicted_id], 0)

      text_generated.append(idx2char[predicted_id])

  return (start_string + ''.join(text_generated))

In [0]:
# It shows poor performance 
# Sometimes it prints out a series of meaningless characters
# -> Due to vanishing gradients problem
print(generate_text(model, start_string=u"ROMEO: "))