<a href="https://colab.research.google.com/github/AnHaiTrinh/handson-ml/blob/main/TrainingDNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import sklearn

%matplotlib inline
import matplotlib.pyplot as plt
from tensorflow import keras
import tensorflow as tf

In [None]:
from keras.datasets import mnist
from sklearn.model_selection import train_test_split
(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()
X_train_full = X_train_full / 255
X_test = X_test / 255
X_subset_train_full = X_train_full[y_train_full < 5]
y_subset_train_full = y_train_full[y_train_full < 5] 
X_subset_test = X_test[y_test < 5]
y_subset_test = y_test[y_test < 5]
X_subset_train, X_subset_valid, y_subset_train, y_subset_valid = train_test_split(X_subset_train_full, y_subset_train_full, test_size=0.2)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [None]:
model = keras.models.Sequential([
                                keras.layers.Flatten(input_shape=[28, 28]),
                                keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
                                keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
                                keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
                                keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
                                keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
                                keras.layers.Dense(5, activation="softmax")
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 784)               0         
                                                                 
 dense (Dense)               (None, 100)               78500     
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 100)               10100     
                                                                 
 dense_3 (Dense)             (None, 100)               10100     
                                                                 
 dense_4 (Dense)             (None, 100)               10100     
                                                                 
 dense_5 (Dense)             (None, 5)                 5

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10)
model_checkpoint_cb = keras.callbacks.ModelCheckpoint("my_mnist_model.h5", save_best_only=True)
history = model.fit(X_subset_train, y_subset_train, epochs=50, validation_data=(X_subset_valid, y_subset_valid), callbacks=[early_stopping_cb, model_checkpoint_cb])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50


In [None]:
import pandas as pd
pd.DataFrame(history.history).plot(figsize=(8, 6))

In [None]:
model = keras.models.load_model("my_mnist_model.h5")
model.evaluate(X_subset_test, y_subset_test)

In [None]:
batch_normalized_model = keras.models.Sequential()
batch_normalized_model.add(keras.layers.Flatten(input_shape=[28, 28]))
batch_normalized_model.add(keras.layers.BatchNormalization())
for _ in range(5):
  batch_normalized_model.add(keras.layers.Dense(100, kernel_initializer="he_normal"))
  batch_normalized_model.add(keras.layers.BatchNormalization())
  batch_normalized_model.add(keras.layers.Activation("elu"))
batch_normalized_model.add(keras.layers.Dense(5, activation="softmax"))

In [None]:
batch_normalized_model.summary()

In [None]:
batch_normalized_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
new_model_checkpoint_cb = keras.callbacks.ModelCheckpoint("batch_normalized_model.h5", save_best_only=True)
history = batch_normalized_model.fit(X_subset_train, y_subset_train, epochs=50, 
                                     validation_data=(X_subset_valid, y_subset_valid), callbacks=[early_stopping_cb, new_model_checkpoint_cb])

In [None]:
batch_normalized_model = keras.models.load_model("batch_normalized_model.h5")
batch_normalized_model.evaluate(X_subset_test, y_subset_test)

In [None]:
dropout_model = keras.models.Sequential()
dropout_model = keras.models.Sequential()
dropout_model.add(keras.layers.Flatten(input_shape=[28, 28]))
dropout_model.add(keras.layers.BatchNormalization())
for _ in range(5):
  dropout_model.add(keras.layers.Dense(100, kernel_initializer="he_normal"))
  dropout_model.add(keras.layers.BatchNormalization())
  dropout_model.add(keras.layers.Activation("elu"))
  dropout_model.add(keras.layers.Dropout(rate=0.1))
dropout_model.add(keras.layers.Dense(5, activation="softmax"))

In [None]:
dropout_model.summary()

In [None]:
dropout_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
dropout_model_checkpoint_cb = keras.callbacks.ModelCheckpoint("dropout_model.h5", save_best_only=True)
history = dropout_model.fit(X_subset_train, y_subset_train, epochs=50, 
                                     validation_data=(X_subset_valid, y_subset_valid), callbacks=[early_stopping_cb, dropout_model_checkpoint_cb])

In [None]:
dropout_model = keras.models.load_model("dropout_model.h5")
dropout_model.evaluate(X_subset_test, y_subset_test)

In [None]:
cloned_model = keras.models.clone_model(model)
cloned_model.set_weights(model.get_weights())

In [None]:
new_model = keras.models.Sequential(cloned_model.layers[:-1])
new_model.add(keras.layers.Dense(5, activation="softmax"))
for layer in new_model.layers[:-1]:
  layer.trainable = False
new_model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
new_model.summary()

In [None]:
X_subset_train_full = X_train_full[y_train_full >= 5][:625]
y_subset_train_full = y_train_full[y_train_full >= 5][:625] - 5
X_subset_test = X_test[y_test >= 5]
y_subset_test = y_test[y_test >= 5] - 5
X_subset_train, X_subset_valid, y_subset_train, y_subset_valid = train_test_split(X_subset_train_full, y_subset_train_full, test_size=0.2)

In [None]:
short_early_stopping = keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)
new_model.fit(X_subset_train, y_subset_train, epochs=30, validation_data=(X_subset_valid, y_subset_valid), callbacks=[short_early_stopping])

In [None]:
for layer in new_model.layers[:-1]:
  layer.trainable = True
new_model.fit(X_subset_train, y_subset_train, epochs=50, validation_data=(X_subset_valid, y_subset_valid), callbacks=[short_early_stopping])

In [None]:
new_model.evaluate(X_subset_test, y_subset_test)

In [None]:
input_a = keras.layers.Input(shape=[28, 28], name="input_a")
input_b = keras.layers.Input(shape=[28, 28], name="input_b")
flatten_a = keras.layers.Flatten()(input_a)
flatten_b = keras.layers.Flatten()(input_b)
hidden_1_a = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(flatten_a)
hidden_2_a = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_1_a)
hidden_3_a = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_2_a)
hidden_4_a = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_3_a)
hidden_5_a = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_4_a)
hidden_1_b = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(flatten_b)
hidden_2_b = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_1_b)
hidden_3_b = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_2_b)
hidden_4_b = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_3_b)
hidden_5_b = keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal")(hidden_4_b)
concat = keras.layers.concatenate([hidden_5_a, hidden_5_b])
dense = keras.layers.Dense(10, activation="elu", kernel_initializer="he_normal")(concat)
output = keras.layers.Dense(1, activation="sigmoid", kernel_initializer="he_normal", name="output")(dense)
my_model = keras.models.Model(inputs=[input_a, input_b], outputs=[output])

In [None]:
my_model.summary()

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=5000)

In [None]:
digits_data = []
for i in range(10):
  digits_data.append(X_train[y_train == i])
def generate_training_data(data_size=100000):
  inputs_a = []
  inputs_b = []
  outputs = []
  for _ in range(data_size):
    rand = np.random.choice(np.arange(0, 10))
    data = digits_data[rand]
    idx_1, idx_2 = np.random.choice(np.arange(0, len(data)), size=2, replace=False)
    inputs_a.append(data[idx_1])
    inputs_b.append(data[idx_2])
    outputs.append(0)
    rnd1, rnd2 = np.random.choice(np.arange(0, 10), size=2, replace=False)
    data1, data2 = digits_data[rnd1], digits_data[rnd2]
    idx1 = np.random.choice(np.arange(0, len(data1)))
    idx2 = np.random.choice(np.arange(0, len(data2)))
    inputs_a.append(data1[idx1])
    inputs_b.append(data2[idx2])
    outputs.append(1)
  return np.array(inputs_a), np.array(inputs_b), np.array(outputs).reshape(-1, 1)

In [None]:
from sklearn.utils import shuffle
inputs_a, inputs_b, outputs = generate_training_data(60000)
inputs_a, inputs_b, outputs = shuffle(inputs_a, inputs_b, outputs)

In [None]:
plt.imshow(inputs_a[2], cmap="binary", interpolation="nearest")
plt.axis("off")

In [None]:
plt.imshow(inputs_b[2], cmap="binary", interpolation="nearest")
plt.axis("off")

In [None]:
outputs[2]

In [None]:
my_model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=False), optimizer="nadam", metrics=["binary_accuracy"])

In [None]:
my_model.fit((inputs_a, inputs_b), outputs, batch_size=500, epochs=20)

In [None]:
clone_dnn = keras.models.clone_model(my_model)
clone_dnn.set_weights(my_model.get_weights())

In [None]:
dnn = keras.models.Sequential(clone_dnn.layers[2:14:2])
dnn.add(keras.layers.Dense(10, activation="softmax"))
for layer in dnn.layers[:-1]:
  layer.trainable = False

In [None]:
dnn.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

In [None]:
dnn.fit(X_valid, y_valid, epochs=20)
dnn.evaluate(X_valid, y_valid)

In [None]:
for layer in dnn.layers[:-1]:
  layer.trainable = True
dnn.fit(X_valid, y_valid, epochs=20)
dnn.evaluate(X_valid, y_valid)

In [None]:
dnn.evaluate(X_test, y_test)