## 1. Deep Learning.

In [None]:

a. Build a DNN with five hidden layers of 100 neurons each, He initialization, and the
ELU activation function.
b. Using Adam optimization and early stopping, try training it on MNIST but only on
digits 0 to 4, as we will use transfer learning for digits 5 to 9 in the next exercise. You
will need a softmax output layer with five neurons, and as always make sure to save
checkpoints at regular intervals and save the final model so you can reuse it later.
c. Tune the hyperparameters using cross-validation and see what precision you can
achieve.
d. Now try adding Batch Normalization and compare the learning curves: is it
converging faster than before? Does it produce a better model?
e. Is the model overfitting the training set? Try adding dropout to every layer and try
again. Does it help?

In [1]:
import tensorflow as tf

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=[28, 28]))
for _ in range(5):
    model.add(tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))
model.add(tf.keras.layers.Dense(5, activation="softmax"))



In [2]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split

# Load and preprocess the MNIST dataset
(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()
X_train_full, X_test = X_train_full / 255.0, X_test / 255.0
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, stratify=y_train_full)
# Select only digits 0 to 4
mask_train = y_train < 5
mask_val = y_val < 5
mask_test = y_test < 5
X_train, y_train = X_train[mask_train], y_train[mask_train]
X_val, y_val = X_val[mask_val], y_val[mask_val]
X_test, y_test = X_test[mask_test], y_test[mask_test]

# Compile and train the model
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("dnn_mnist.h5", save_best_only=True)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
history = model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=100, callbacks=[checkpoint_cb, early_stopping_cb])

# Save the final model
model.save("dnn_mnist_final.h5")


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz

KeyboardInterrupt: 

In [3]:
import tensorflow as tf
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

# Define a function to create the model
def create_model(learning_rate=0.001):
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.Flatten(input_shape=[28, 28]))
    for _ in range(5):
        model.add(tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))
    model.add(tf.keras.layers.Dense(5, activation="softmax"))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])
    return model

# Create the KerasClassifier
model = KerasClassifier(build_fn=create_model, verbose=0)

# Define the hyperparameters to tune
param_grid = {
    "learning_rate": [0.001, 0.01, 0.1],
    "batch_size": [16, 32, 64],
    "epochs": [50, 100, 150]
}

# Perform grid search cross-validation
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_result = grid.fit(X_train, y_train)

# Print the best hyperparameters and corresponding accuracy
print("Best Hyperparameters: ", grid_result.best_params_)
print("Best Accuracy: ", grid_result.best_score_)


ModuleNotFoundError: No module named 'tensorflow.keras.wrappers'

In [4]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=[28, 28]))
for _ in range(5):
    model.add(tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))
    model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dense(5, activation="softmax"))


In [5]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten(input_shape=[28, 28]))
for _ in range(5):
    model.add(tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.Dropout(0.5))  # Dropout rate can be adjusted
model.add(tf.keras.layers.Dense(5, activation="softmax"))


## 2. Transfer learning.


In [None]:
a. Create a new DNN that reuses all the pretrained hidden layers of the previous
model, freezes them, and replaces the softmax output layer with a new one.
b. Train this new DNN on digits 5 to 9, using only 100 images per digit, and time how
long it takes. Despite this small number of examples, can you achieve high precision?
c. Try caching the frozen layers, and train the model again: how much faster is it now?
d. Try again reusing just four hidden layers instead of five. Can you achieve a higher
precision?
e. Now unfreeze the top two hidden layers and continue training: can you get the
model to perform even better?

In [6]:
import tensorflow as tf

# Load the pretrained model
pretrained_model = tf.keras.models.load_model("dnn_mnist.h5")

# Freeze the pretrained hidden layers
for layer in pretrained_model.layers:
    layer.trainable = False

# Create a new model with the pretrained hidden layers and a new softmax output layer
new_model = tf.keras.models.Sequential(pretrained_model.layers[:-1])  # Remove the original output layer
new_model.add(tf.keras.layers.Dense(5, activation="softmax"))  # Add a new softmax output layer


OSError: No file or directory found at dnn_mnist.h5

In [7]:
import tensorflow as tf
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split

# Load and preprocess the MNIST dataset
(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()
X_train_full, X_test = X_train_full / 255.0, X_test / 255.0
X_train, X_val, y_train, y_val = train_test_split(X_train_full, y_train_full, test_size=0.1, stratify=y_train_full)

# Select only digits 5 to 9
mask_train = y_train >= 5
mask_val = y_val >= 5
X_train, y_train = X_train[mask_train][:100], y_train[mask_train][:100]
X_val, y_val = X_val[mask_val][:100], y_val[mask_val][:100]

# Compile and train the new model
new_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
new_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10)

# Evaluate the model on the test set
new_model.evaluate(X_test, y_test)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz

KeyboardInterrupt: 

In [8]:
import tensorflow as tf

# Load the pretrained model
pretrained_model = tf.keras.models.load_model("dnn_mnist.h5")

# Freeze the pretrained hidden layers and cache them
for layer in pretrained_model.layers[:-1]:
    layer.trainable = False

# Create a new model with the cached frozen layers and a new softmax output layer
new_model = tf.keras.models.Sequential(pretrained_model.layers[:-1])  # Remove the original output layer
new_model.add(tf.keras.layers.Dense(5, activation="softmax"))  # Add a new softmax output layer

# Compile and train the new model
new_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
new_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10)


OSError: No file or directory found at dnn_mnist.h5

In [9]:
import tensorflow as tf

# Load the pretrained model
pretrained_model = tf.keras.models.load_model("dnn_mnist.h5")

# Freeze only the first four hidden layers
for layer in pretrained_model.layers[:-5]:
    layer.trainable = False

# Create a new model with the first four frozen hidden layers and a new softmax output layer
new_model = tf.keras.models.Sequential(pretrained_model.layers[:-5])  # Remove the last four hidden layers
new_model.add(tf.keras.layers.Dense(5, activation="softmax"))  # Add a new softmax output layer

# Compile and train the new model
new_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
new_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10)


OSError: No file or directory found at dnn_mnist.h5

In [10]:
import tensorflow as tf

# Unfreeze the top two hidden layers
for layer in pretrained_model.layers[-2:]:
    layer.trainable = True

# Compile and train the model
new_model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
new_model.fit(X_train, y_train, validation_data=(X_val, y_val), epochs=10)


NameError: name 'pretrained_model' is not defined

## 3. Pretraining on an auxiliary task.


In [None]:
a. In this exercise you will build a DNN that compares two MNIST digit images and
predicts whether they represent the same digit or not. Then you will reuse the lower
layers of this network to train an MNIST classifier using very little training data. Start
by building two DNNs (let’s call them DNN A and B), both similar to the one you built
earlier but without the output layer: each DNN should have five hidden layers of 100
neurons each, He initialization, and ELU activation. Next, add one more hidden layer
with 10 units on top of both DNNs. To do this, you should use
TensorFlow’s concat() function with axis=1 to concatenate the outputs of both DNNs
for each instance, then feed the result to the hidden layer. Finally, add an output
layer with a single neuron using the logistic activation function.
b. Split the MNIST training set in two sets: split #1 should containing 55,000 images,
and split #2 should contain contain 5,000 images. Create a function that generates a
training batch where each instance is a pair of MNIST images picked from split #1.
Half of the training instances should be pairs of images that belong to the same
class, while the other half should be images from different classes. For each pair, the

training label should be 0 if the images are from the same class, or 1 if they are from
different classes.
c. Train the DNN on this training set. For each image pair, you can simultaneously feed
the first image to DNN A and the second image to DNN B. The whole network will
gradually learn to tell whether two images belong to the same class or not.
d. Now create a new DNN by reusing and freezing the hidden layers of DNN A and
adding a softmax output layer on top with 10 neurons. Train this network on split #2
and see if you can achieve high performance despite having only 500 images per
class.

In [11]:
import tensorflow as tf

# Define the DNN architecture
def create_dnn():
    dnn = tf.keras.models.Sequential([
        tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
        tf.keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"),
    ])
    return dnn

# Build DNN A and B
dnn_a = create_dnn()
dnn_b = create_dnn()

# Add a hidden layer with 10 units on top of both DNNs
concat_layer = tf.keras.layers.Concatenate(axis=1)
output_layer = tf.keras.layers.Dense(1, activation="sigmoid")

inputs_a = tf.keras.Input(shape=(10,))
inputs_b = tf.keras.Input(shape=(10,))

concat = concat_layer([dnn_a(inputs_a), dnn_b(inputs_b)])
outputs = output_layer(concat)

model = tf.keras.Model(inputs=[inputs_a, inputs_b], outputs=outputs)


In [12]:
import numpy as np
from sklearn.utils import shuffle

def generate_training_batch(X_train, y_train, batch_size=32):
    X_train, y_train = shuffle(X_train, y_train)
    half_batch = batch_size // 2

    same_class_indices = np.random.choice(np.where(y_train[:-1] == y_train[1:])[0], size=half_batch)
    different_class_indices = np.random.choice(np.where(y_train[:-1] != y_train[1:])[0], size=half_batch)

    X_same_class = np.stack([X_train[same_class_indices], X_train[same_class_indices + 1]], axis=1)
    X_diff_class = np.stack([X_train[different_class_indices], X_train[different_class_indices + 1]], axis=1)
    X_batch = np.concatenate([X_same_class, X_diff_class], axis=0)

    y_batch = np.concatenate([np.zeros(half_batch), np.ones(half_batch)])

    return shuffle(X_batch, y_batch)

# Split the MNIST training set into split #1 and split #2
split1_size = 55000
split2_size = 5000

X_train_split1 = X_train[:split1_size]
y_train_split1 = y_train[:split1_size]

X_train_split2 = X_train[split1_size:split1_size + split2_size]
y_train_split2 = y_train[split1_size:split1_size + split2_size]

# Generate a training batch from split #1
X_batch, y_batch = generate_training_batch(X_train_split1, y_train_split1)


NameError: name 'X_train' is not defined

In [13]:
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
model.fit([X_batch[:, 0], X_batch[:, 1]], y_batch, epochs=10)


NameError: name 'X_batch' is not defined

In [14]:
# Freeze the hidden layers of DNN A
for layer in dnn_a.layers:
    layer.trainable = False

# Create a new model by reusing the hidden layers of DNN A and adding a softmax output layer
inputs_split2 = tf.keras.Input(shape=(10,))
concat_split2 = concat_layer([dnn_a(inputs_split2), dnn_b(inputs_split2)])
outputs_split2 = tf.keras.layers.Dense(10, activation="softmax")(concat_split2)

model_split2 = tf.keras.Model(inputs=inputs_split2, outputs=outputs_split2)

# Compile and train the new model on split #2
model_split2.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
model_split2.fit(X_train_split2, y_train_split2, epochs=10)


NameError: name 'X_train_split2' is not defined