# Week13 Deep Learning and TensorFlow 2

# 1. Convolutional Neural Netowks

### Image Classification with CNNs

In [0]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

### Load and Preprocess the Fashion-MNIST Dataset 

In [0]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()


In [0]:
X_train, X_test = X_train / 255.0, X_test / 255.0
num_classes = 10

X_train = X_train.reshape(X_train.shape[0], 28, 28, 1) # Expand dimension (Channel dim)
X_train.shape

In [0]:
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_test.shape

In [0]:
y_train = y_train.reshape(y_train.shape[0], 1) 
y_train.shape

In [0]:
y_test = y_test.reshape(y_test.shape[0], 1) 
y_test.shape

In [0]:
y_train

### Build the CNN model

In [0]:
layers = tf.keras.layers
base_model = None

base_model.summary()

In [0]:
None

### Train the model

In [0]:
base_history = None

In [0]:
None

In [0]:
None

### The model with dropout regularization

In [0]:
# Dropout Model
dropout_model = None

None

In [0]:
drop_history = None

In [0]:
None

In [0]:
None

### Plotting the learning curve

In [0]:
def plot_history(histories, key='loss'):
    plt.figure(figsize=(16,10))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()
    plt.xlim([0,max(history.epoch)])

plot_history([('Base CNNs', base_history),
              ('Dropout CNNs', drop_history)])

# 2. Recurrent Neural Networks

### Character-level Language Model with RNNs

In [0]:
import tensorflow as tf
import numpy as np
import os
import time

### Load and preprocess the Shakespeare dataset

**1. Download the Shakespeare's Sonnet dataset**

In [0]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# Load whole text file as a string, then decode.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

In [0]:
# We'll use the subset
text = text[:14592]
len(text)

In [0]:
# Take a look first 250 characters
print(text[:250])

In [0]:
# The unique characters in the file
vocab = None
print ('{} unique characters'.format(len(vocab)))

**2. Vectorize the text**
- **text_as_int** : a vector with shape of (14592, ) 

In [0]:
# Creating a mapping from unique characters to indices, and vice versa
char2idx = None
idx2char = None

# Convert the characters to the indices
text_as_int = None

# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

**3. Creating training task**
- **char_dataset** : `tf.data.Dataset` object, shape = (14592, )
   - .take(*count*) : Creates a `Dataset` with at most *count* elements from this dataset.
   - .batch(*count*) : Combines consecutive elements of this dataset into batches.

In [0]:
# The maximum length sentence we want for a single input in characters
seq_length = 100

# Create training examples / targets
char_dataset = None

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

In [0]:
# Check the shape : char_dataset
np.array(list(char_dataset.as_numpy_iterator())).shape

- **sequences** : `tf.data.Dataset` object, shape = (144, 101)
   - `.map(map_func)` : Maps `map_func` across the elements of this dataset.
- **dataset** : `tf.data.Dataset` object, shape = (144, 2, 100) 

In [0]:
# 'batch' method convert these individual characters to sequences of the desired size
sequences = None

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

In [0]:
# Check the shape : sequences
np.array(list(sequences.as_numpy_iterator())).shape

In [0]:
# map_func
def split_input_target(chunk):
    # input text is shifted to form the target text 
    input_text = None
    target_text = None
    return input_text, target_text

# 'map' method lets us easily apply a simple function to each batch
dataset = None

# Print the examples 
for input_ex, target_ex in  dataset.take(1):
    print ('Input : ', repr(''.join(idx2char[input_ex.numpy()])))
    print ('Target :', repr(''.join(idx2char[target_ex.numpy()])))

In [0]:
# Check the shape : dataset - (1)
np.array(list(dataset.as_numpy_iterator())).shape

4. Create training batches
- **dataset** : `tf.data.Dataset` object, shape = (9, 2, 16, 100)

In [0]:
# Batch size
BATCH_SIZE = 16

# Buffer size to shuffle the dataset
BUFFER_SIZE = 100

dataset = None

In [0]:
# Check the shape : dataset - (2)
np.array(list(dataset.as_numpy_iterator())).shape

### Build the model

- 3 layers are used to define this model
    1. `tf.keras.layers.Embedding`: The input layer, A trainable lookup table that will map the numbers of each character to a vector with `embedding_dim` dimensions;
    2. `tf.keras.layers.RNN`: A RNN with size `units=rnn_units`
    3. `tf.keras.layers.Dense`: The output layer, with `vocab_size` outputs.

In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 128

# Number of RNN units
rnn_units = 256

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    layers = tf.keras.layers
    model = None
    return model

In [0]:
# Build the model 
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

# Check the model architecture
# Model can be run on inputs of any length 
model.summary()

### Try the model
Before training the model, take a look about how does the model works

In [0]:
# Check the shape of the input, output
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = None

print(input_example_batch.shape)
print(target_example_batch.shape)
print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")


In [0]:
# We need to sample from the output distribution, not to take the argmax of the distribution
sampled_indices = None
sampled_indices = None

# Predictions of the next character index 
sampled_indices

In [0]:
# Decode the predictions, the model shows poor performance 
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

### Train the model

In [0]:
# 1. Attach an optimizer, and a loss function
# define the loss function 
def loss(labels, logits):
    # Because the output of model is logit, 
    return None

# Configure the training procedure
optimizer = None

None

In [0]:
# 2. Configure the checkpoints
# `tf.keras.callbacks.ModelCheckpoint` : The callback function to save the model checkpoint

# Directory where the model weights will be saved
ckpt_dir = './training_rnns_ckpts'

# Checkpoint name
ckpt_prefix = os.path.join(ckpt_dir, "ckpt_rnns_{epoch}")

# Callback function to save the model weights
ckpt_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=ckpt_prefix,
    save_weights_only=True)

In [0]:
# 3. Execute the training
EPOCHS=10
rnn_history = None

### Generate text

In [0]:
# Check the latest checkpoint
tf.train.latest_checkpoint(ckpt_dir)

In [0]:
# To run the model with one sample(not with batch_size of samples),
# We rebuild the model, and load the weights from the saved checkpoint. 
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(ckpt_dir))

In [0]:
# Check the model summary
model.summary()

In [0]:
# The prediction loop
def generate_text(model, start_string):
    # Number of characters to generate
    n_generate = 1000

    # Converting start_strings to index (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    
    # Making the empty list to store results
    text_generated = []

    # Here batch size == 1
    model.reset_states()
    for i in range(n_generate):
        predictions = model(input_eval)
        
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Passing the predicted character as the next input to the model along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [0]:
# Generate text from start string
print(generate_text(model, start_string="All: "))

# Text Classifiation with RNNs 

### Import TensorFlow and other libraries

In [0]:
import tensorflow_datasets as tfds
import tensorflow as tf
import matplotlib.pyplot as plt

### Load and preprocess the IMDb dataset

In [0]:
# using tfds library, load the imdb dataset
dataset, info = tfds.load('imdb_reviews/subwords8k', with_info=True,
                          as_supervised=True)
train_dataset, test_dataset = dataset['train'], dataset['test']

In [0]:
# info object has the lookup table(encoder) of token and index
encoder = info.features['text'].encoder
print('Vocabulary size: {}'.format(encoder.vocab_size))

In [0]:
# Test the encoder
sample_string = 'Hello TensorFlow.'

encoded_string = encoder.encode(sample_string)
print('Encoded string is {}'.format(encoded_string))

original_string = encoder.decode(encoded_string)
print('The original string: "{}"'.format(original_string))

In [0]:
# Test the encoder
for index in encoded_string:
  print('{} ----> {}'.format(index, encoder.decode([index])))

In [0]:
# Creating training task
BUFFER_SIZE = 10000
BATCH_SIZE = 64

train_dataset = train_dataset.shuffle(BUFFER_SIZE)
train_dataset = train_dataset.padded_batch(BATCH_SIZE)

test_dataset = test_dataset.padded_batch(BATCH_SIZE)

In [0]:
# Check the shape of batches
for x, y in train_dataset.take(2):
    print(x)
    print(y)
    print("-"*90)

### Build the model

In [0]:
layers = tf.keras.layers
model = tf.keras.Sequential([
    layers.Embedding(encoder.vocab_size, 64),
    layers.Bidirectional(layers.LSTM(64)),
    layers.Dense(64, activation='relu'),
    layers.Dense(1)
])

In [0]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
              optimizer=tf.keras.optimizers.Adam(1e-4),
              metrics=['accuracy'])
model.summary()

### Train the model

In [0]:
history = model.fit(train_dataset, epochs=5,
                    validation_data=test_dataset, 
                    validation_steps=30)

### Evalutate the model

In [0]:
test_loss, test_acc = model.evaluate(test_dataset)

print('Test Loss: {}'.format(test_loss))
print('Test Accuracy: {}'.format(test_acc))

In [0]:
# helper function 
def plot_graphs(history, metric):
  plt.plot(history.history[metric])
  plt.plot(history.history['val_'+metric], '')
  plt.xlabel("Epochs")
  plt.ylabel(metric)
  plt.legend([metric, 'val_'+metric])
  plt.show()

In [0]:
plot_graphs(history, 'accuracy')

In [0]:
plot_graphs(history, 'loss')

In [0]:
def sample_pred(text, model):
    encoded_text = encoder.encode(text)
    encoded_text = tf.cast(encoded_text, tf.float32)
    predictions = model.predict(tf.expand_dims(encoded_text, 0))
    prob = tf.sigmoid(predictions)[0][0].numpy()
    print('Prob : ', prob)
    if prob >= 0.5:
        return "Positive"
    else:
        return "Negative"

In [0]:
sample_pred_text = 'You should watch this movie, this movie is excellent'
sample_pred(sample_pred_text, model)

# Quiz 1 : Image Classification Model on the CIFAR-10
- Build the Convolutional Neural Networks
    - Build the model following the bellow model summary
    - Apply the dropout regularization to the model and compare the result
- Compare the performance of the model built last week


### Import TensorFlow and other libraries

In [0]:
import tensorflow as tf

from tensorflow.keras import datasets, models
import matplotlib.pyplot as plt

### Load the CIFAR-10 Dataset

In [0]:
(train_images, train_labels), (test_images, test_labels) = datasets.cifar10.load_data()

# Normalize pixel values to be between 0 and 1
train_images, test_images = train_images / 255.0, test_images / 255.0

In [0]:
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

plt.figure(figsize=(10,10))
for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(train_images[i], cmap=plt.cm.binary)
    # The CIFAR labels happen to be arrays, 
    # which is why you need the extra index
    plt.xlabel(class_names[train_labels[i][0]])
plt.show()

### Build the model

In [0]:
layers = tf.keras.layers

cifar_model = None


In [0]:
cifar_model.summary()

In [0]:
# Compile the model(set optimizer, loss function and metrics)
None

### Train the model

In [0]:
cifar_history = None

### Apply the dropout to the model

In [0]:
cifar_drop_model = None

In [0]:
cifar_drop_model.summary()

In [0]:
# Compile the model(set optimizer, loss function and metrics)
None

In [0]:
cifar_drop_history = None

In [0]:
def plot_history(histories, key='loss'):
    plt.figure(figsize=(16,10))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()
    plt.xlim([0,max(history.epoch)])

plot_history([('Base CNNs', cifar_history),
              ('Dropout CNNs', cifar_drop_history)])

In [0]:
def plot_history(histories, key='accuracy'):
    plt.figure(figsize=(16,10))

    for name, history in histories:
        val = plt.plot(history.epoch, history.history['val_'+key],
                       '--', label=name.title()+' Val')
        plt.plot(history.epoch, history.history[key], color=val[0].get_color(),
                 label=name.title()+' Train')

    plt.xlabel('Epochs')
    plt.ylabel(key.replace('_',' ').title())
    plt.legend()
    plt.xlim([0,max(history.epoch)])

plot_history([('Base CNNs', cifar_history),
              ('Dropout CNNs', cifar_drop_history)])

# Quiz 2 : Character-level Language Model
- Build the Character-level Language Model with LSTM
- Compare the generated text to the one generated by RNNs


### Character-level Language Model with RNNs

In [0]:
import tensorflow as tf
import numpy as np
import os
import time

### Load and preprocess the Shakespeare dataset

**1. Download the Shakespeare's Sonnet dataset**

In [0]:
path_to_file = tf.keras.utils.get_file('shakespeare.txt',
'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt')

# Load whole text file as a string, then decode.
text = open(path_to_file, 'rb').read().decode(encoding='utf-8')

# length of text is the number of characters in it
print ('Length of text: {} characters'.format(len(text)))

In [0]:
# We'll use the subset
text = text[:14592]
len(text)

In [0]:
# Take a look first 250 characters
print(text[:250])

In [0]:
# The unique characters in the file
vocab = None
print ('{} unique characters'.format(len(vocab)))

**2. Vectorize the text**
- **text_as_int** : a vector with shape of (14592, ) 

In [0]:
# Creating a mapping from unique characters to indices, and vice versa
char2idx = None
idx2char = None

# Convert the characters to the indices
text_as_int = None

# Show how the first 13 characters from the text are mapped to integers
print ('{} ---- characters mapped to int ---- > {}'.format(repr(text[:13]), text_as_int[:13]))

**3. Creating training task**
- **char_dataset** : `tf.data.Dataset` object, shape = (14592, )
   - .take(*count*) : Creates a `Dataset` with at most *count* elements from this dataset.
   - .batch(*count*) : Combines consecutive elements of this dataset into batches.

In [0]:
# The maximum length sentence we want for a single input in characters
seq_length = 100

# Create training examples / targets
char_dataset = None

for i in char_dataset.take(5):
  print(idx2char[i.numpy()])

In [0]:
# Check the shape : char_dataset
np.array(list(char_dataset.as_numpy_iterator())).shape

- **sequences** : `tf.data.Dataset` object, shape = (144, 101)
   - `.map(map_func)` : Maps `map_func` across the elements of this dataset.
- **dataset** : `tf.data.Dataset` object, shape = (144, 2, 100) 

In [0]:
# 'batch' method convert these individual characters to sequences of the desired size
sequences = None

for item in sequences.take(5):
  print(repr(''.join(idx2char[item.numpy()])))

In [0]:
# Check the shape : sequences
np.array(list(sequences.as_numpy_iterator())).shape

In [0]:
# map_func
def split_input_target(chunk):
    # input text is shifted to form the target text 
    input_text = None
    target_text = None
    return input_text, target_text

# 'map' method lets us easily apply a simple function to each batch
dataset = None

# Print the examples 
for input_ex, target_ex in  dataset.take(1):
    print ('Input : ', repr(''.join(idx2char[input_ex.numpy()])))
    print ('Target :', repr(''.join(idx2char[target_ex.numpy()])))

In [0]:
# Check the shape : dataset - (1)
np.array(list(dataset.as_numpy_iterator())).shape

4. Create training batches
- **dataset** : `tf.data.Dataset` object, shape = (9, 2, 16, 100)

In [0]:
# Batch size
BATCH_SIZE = 16

# Buffer size to shuffle the dataset
BUFFER_SIZE = 100

dataset = None

In [0]:
# Check the shape : dataset - (2)
np.array(list(dataset.as_numpy_iterator())).shape

### Build the model

- 3 layers are used to define this model
    1. `tf.keras.layers.Embedding`: The input layer, A trainable lookup table that will map the numbers of each character to a vector with `embedding_dim` dimensions;
    2. `tf.keras.layers.RNN`: A RNN with size `units=rnn_units`
    3. `tf.keras.layers.Dense`: The output layer, with `vocab_size` outputs.

In [0]:
# Length of the vocabulary in chars
vocab_size = len(vocab)

# The embedding dimension
embedding_dim = 128

# Number of RNN units
rnn_units = 256

In [0]:
def build_model(vocab_size, embedding_dim, rnn_units, batch_size):
    layers = tf.keras.layers
    model = None
    return model

In [0]:
# Build the model 
model = build_model(
  vocab_size = vocab_size,
  embedding_dim=embedding_dim,
  rnn_units=rnn_units,
  batch_size=BATCH_SIZE)

# Check the model architecture
# Model can be run on inputs of any length 
model.summary()

### Try the model
Before training the model, take a look about how does the model works

In [0]:
# Check the shape of the input, output
for input_example_batch, target_example_batch in dataset.take(1):
    example_batch_predictions = None

print(input_example_batch.shape)
print(target_example_batch.shape)
print(example_batch_predictions.shape, "# (batch_size, sequence_length, vocab_size)")


In [0]:
# We need to sample from the output distribution, not to take the argmax of the distribution
sampled_indices = None
sampled_indices = None

# Predictions of the next character index 
sampled_indices

In [0]:
# Decode the predictions, the model shows poor performance 
print("Input: \n", repr("".join(idx2char[input_example_batch[0]])))
print()
print("Next Char Predictions: \n", repr("".join(idx2char[sampled_indices ])))

### Train the model

In [0]:
# 1. Attach an optimizer, and a loss function
# define the loss function 
def loss(labels, logits):
    # Because the output of model is logit, 
    return None

# Configure the training procedure
optimizer = None

None

In [0]:
# 2. Configure the checkpoints
# `tf.keras.callbacks.ModelCheckpoint` : The callback function to save the model checkpoint

# Directory where the model weights will be saved
ckpt_dir = './training_lstm_ckpts'

# Checkpoint name
ckpt_prefix = os.path.join(ckpt_dir, "ckpt_lstm_{epoch}")

# Callback function to save the model weights
ckpt_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=ckpt_prefix,
    save_weights_only=True)

In [0]:
# 3. Execute the training
EPOCHS=10
rnn_history = None

### Generate text

In [0]:
# Check the latest checkpoint
tf.train.latest_checkpoint(ckpt_dir)

In [0]:
# To run the model with one sample(not with batch_size of samples),
# We rebuild the model, and load the weights from the saved checkpoint. 
model = build_model(vocab_size, embedding_dim, rnn_units, batch_size=1)

model.load_weights(tf.train.latest_checkpoint(ckpt_dir))

In [0]:
# Check the model summary
model.summary()

In [0]:
# The prediction loop
def generate_text(model, start_string):
    # Number of characters to generate
    n_generate = 1000

    # Converting start_strings to index (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    
    # Making the empty list to store results
    text_generated = []

    # Here batch size == 1
    model.reset_states()
    for i in range(n_generate):
        predictions = model(input_eval)
        
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Passing the predicted character as the next input to the model along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [0]:
# Generate text from start string
print(generate_text(model, start_string="All: "))

In [0]:
# The prediction loop
def generate_text(model, start_string):
    # Number of characters to generate
    n_generate = 1000

    # Converting start_strings to index (vectorizing)
    input_eval = [char2idx[s] for s in start_string]
    input_eval = tf.expand_dims(input_eval, 0)
    
    # Making the empty list to store results
    text_generated = []

    # Here batch size == 1
    model.reset_states()
    for i in range(n_generate):
        predictions = model(input_eval)
        
        # remove the batch dimension
        predictions = tf.squeeze(predictions, 0)

        # using a categorical distribution to predict the character
        predicted_id = tf.random.categorical(predictions, num_samples=1)[-1,0].numpy()

        # Passing the predicted character as the next input to the model along with the previous hidden state
        input_eval = tf.expand_dims([predicted_id], 0)

        text_generated.append(idx2char[predicted_id])

    return (start_string + ''.join(text_generated))

In [0]:
# Generate text from start string
print(generate_text(model, start_string="All: "))

In [0]:
# 2. Configure the checkpoints
# `tf.keras.callbacks.ModelCheckpoint` : The callback function to save the model checkpoint

# Directory where the model weights will be saved
ckpt_dir = './training_lstm_ckpts'

# Checkpoint name
ckpt_prefix = os.path.join(ckpt_dir, "ckpt_lstm_{epoch}")

# Callback function to save the model weights
ckpt_callback=tf.keras.callbacks.ModelCheckpoint(
    filepath=ckpt_prefix,
    save_weights_only=True)