In [None]:
import tensorflow as tf
import os
import numpy as np
from sklearn.datasets import load_iris
from sklearn.linear_model import Perceptron
from tensorflow import keras
import pandas as pd
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import root_mean_squared_error, mean_squared_error, hinge_loss
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from tensorflow.keras.utils import plot_model
from tensorflow.keras import activations, models, layers, losses, optimizers, metrics, regularizers

In [None]:
regularizers.L1(0.01)

In [None]:
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

In [None]:
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

In [None]:
# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "ann"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

In [None]:
iris = load_iris()
X = iris.data[:, (2, 3)]  # petal length, petal width
y = (iris.target == 0).astype(np.int8)

#### Perceptron

Note: we set max_iter and tol explicitly to avoid warnings about the fact that their default value will change in future versions of Scikit-Learn.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, test_size=0.2)

In [None]:
per_clf = Perceptron(max_iter=1000, tol=1e-3, random_state=42)
per_clf.fit(X_train, y_train)

y_pred = per_clf.predict(X_test)

cv_score = cross_val_score(per_clf, X_test, y_test, cv=3, scoring="accuracy")
cv_score

In [None]:
sdg_clf = SGDClassifier(max_iter=1000,
                        tol=1e-3,
                        random_state=42,
                        loss="perceptron",
                        learning_rate="constant",
                        eta0=1.0, penalty=None)

sdg_clf.fit(X_train, y_train)

y_pred = sdg_clf.predict(X_test)

accuracy = np.mean(y_pred == y_test)
accuracy

In [None]:
a = -per_clf.coef_[0][0] / per_clf.coef_[0][1] # slope
b = -per_clf.intercept_ / per_clf.coef_[0][1]  # y-intercept

axes = [0, 5, 0, 2] # x_min, x_max, y_min, y_max

x0, x1 = np.meshgrid( # create a grid of points
        np.linspace(axes[0], axes[1], 500).reshape(-1, 1),
        np.linspace(axes[2], axes[3], 200).reshape(-1, 1),
    )
X_new = np.c_[x0.ravel(), x1.ravel()] # combine the grid points into pairs
y_predict = per_clf.predict(X_new)    # predict the class for each pair
zz = y_predict.reshape(x0.shape)      # reshape the predictions to match the grid shape

plt.figure(figsize=(10, 4))           # width, height
plt.plot(X[y==0, 0], X[y==0, 1], "bs", label="Not Iris-Setosa")
plt.plot(X[y==1, 0], X[y==1, 1], "yo", label="Iris-Setosa")

plt.plot([axes[0], axes[1]], [a * axes[0] + b, a * axes[1] + b], "k-", linewidth=3)

custom_cmap = ListedColormap(['#9898ff', '#fafab0'])

plt.contourf(x0, x1, zz, cmap=custom_cmap)
plt.xlabel("Petal length", fontsize=14)
plt.ylabel("Petal width", fontsize=14)
plt.legend(loc="lower right", fontsize=14)
plt.axis(axes)

save_fig("perceptron_iris_plot")
plt.show()

#### Activation functions

In [None]:
def sigmoid(z):
    # Sigmoid activation function
    return 1 / (1 + np.exp(-z))

def relu(z):
    # ReLU activation function
    return np.maximum(0, z)

def derivative(f, z, eps=0.000001):
    # Numerical derivative of function f at point z
    return (f(z + eps) - f(z - eps))/(2 * eps)

In [None]:
z = np.linspace(-5, 5, 200)

plt.figure(figsize=(11,4))

plt.subplot(121)
plt.plot(z, np.sign(z), "r-", linewidth=1, label="Step")
plt.plot(z, sigmoid(z), "g--", linewidth=2, label="Sigmoid")
plt.plot(z, np.tanh(z), "b-", linewidth=2, label="Tanh")
plt.plot(z, relu(z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
plt.legend(loc="center right", fontsize=14)
plt.title("Activation functions", fontsize=14)
plt.axis((-5, 5, -1.2, 1.2))

plt.subplot(122)
plt.plot(z, derivative(np.sign, z), "r-", linewidth=1, label="Step")
plt.plot(0, 0, "ro", markersize=5)
plt.plot(0, 0, "rx", markersize=10)
plt.plot(z, derivative(sigmoid, z), "g--", linewidth=2, label="Sigmoid")
plt.plot(z, derivative(np.tanh, z), "b-", linewidth=2, label="Tanh")
plt.plot(z, derivative(relu, z), "m-.", linewidth=2, label="ReLU")
plt.grid(True)
plt.title("Derivatives", fontsize=14)
plt.axis((-5, 5, -0.2, 1.2))

save_fig("activation_functions_plot")
plt.show()

In [None]:
def heaviside(z):
    # Heaviside step function
    return (z >= 0).astype(z.dtype)

def mlp_xor(x1, x2, activation=heaviside):
    # MLP to compute XOR of two inputs x1 and x2 using given activation function
    return activation(-activation(x1 + x2 - 1.5) + activation(x1 + x2 - 0.5) - 0.5)

In [None]:
x1s = np.linspace(-0.2, 1.2, 100)
x2s = np.linspace(-0.2, 1.2, 100)
x1, x2 = np.meshgrid(x1s, x2s)

z1 = mlp_xor(x1, x2, activation=heaviside)
z2 = mlp_xor(x1, x2, activation=sigmoid)

plt.figure(figsize=(10,4))

plt.subplot(121)
plt.contourf(x1, x2, z1)
plt.plot([0, 1], [0, 1], "gs", markersize=20)
plt.plot([0, 1], [1, 0], "y^", markersize=20)
plt.title("Activation function: heaviside", fontsize=14)
plt.grid(True)

plt.subplot(122)
plt.contourf(x1, x2, z2)
plt.plot([0, 1], [0, 1], "gs", markersize=20)
plt.plot([0, 1], [1, 0], "y^", markersize=20)
plt.title("Activation function: sigmoid", fontsize=14)
plt.grid(True)

## Building an Image Classifier

- Let's start by loading the fashion MNIST dataset.
- Keras has a number of functions to load popular datasets in keras.datasets.
- The dataset is already split for you between a training set and a test set, but it can be useful to split the training set further to have a validation set:

In [None]:
fashion_mnist = keras.datasets.fashion_mnist
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist.load_data()

In [None]:
X_train_full.shape

In [None]:
X_train_full.dtype

In [None]:
X_valid, X_train = X_train_full[:5000] / 255., X_train_full[5000:] / 255.
y_valid, y_train = y_train_full[:5000], y_train_full[5000:]
X_test = X_test / 255.

In [None]:
# You can plot an image using Matplotlib's imshow() function, with a 'binary' color map:
plt.imshow(X_train[0], cmap="binary")
plt.axis('off')
plt.show()

In [None]:
# The labels are the class IDs (represented as uint8), from 0 to 9:
y_train

In [None]:
# Here are the corresponding class names:
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
               "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

In [None]:
class_names[y_train[0]]

In [None]:
# Let's take a look at a sample of the images in the dataset:
n_rows = 4
n_cols = 10
plt.figure(figsize=(n_cols * 1.2, n_rows * 1.2))
for row in range(n_rows):
    for col in range(n_cols):
        index = n_cols * row + col
        plt.subplot(n_rows, n_cols, index + 1)
        plt.imshow(X_train[index], cmap="binary", interpolation="nearest")
        plt.axis('off')
        plt.title(class_names[y_train[index]], fontsize=12)
plt.subplots_adjust(wspace=0.2, hspace=0.5)
save_fig('fashion_mnist_plot', tight_layout=False)
plt.show()

In [None]:
# create a DNN model

model = keras.models.Sequential([
    keras.layers.InputLayer(shape=[28, 28]), # input layer
    keras.layers.Flatten(),  # input layer
    keras.layers.Dense(300, activation="relu"),  # hidden layer 1
    keras.layers.Dense(100, activation="relu"),  # hidden layer 2
    keras.layers.Dense(10, activation="softmax") # output layer
])

In [None]:
model.summary()

In [None]:
hidden1 = model.layers[1]
weights, biases = hidden1.get_weights()
weights

In [None]:
model.compile(loss="sparse_categorical_crossentropy", # loss function
              optimizer="sgd",                        # optimizer
              metrics=["accuracy"])                   # metrics to monitor

In [None]:
history = model.fit(X_train,
                    y_train,
                    epochs=30,
                    validation_data=(X_valid, y_valid),
                    verbose=2)

In [None]:
history.params

In [None]:
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0.2, 0.95)
save_fig("keras_learning_curves_plot")
plt.show()

In [None]:
model.evaluate(X_test, y_test)

In [None]:
X_new = X_test[:3]             # pretend we have 3 new images
y_proba = model.predict(X_new)
y_proba.round(2)

In [None]:
y_pred = np.argmax(model.predict(X_new), axis=-1)
y_pred

In [None]:
np.array(class_names)[y_pred]

In [None]:
y_new = y_test[:3]
y_new

In [None]:
plt.figure(figsize=(7.2, 2.4))
for index, image in enumerate(X_new):
    plt.subplot(1, 3, index + 1)
    plt.imshow(image, cmap="binary", interpolation="nearest")
    plt.axis('off')
    plt.title(class_names[y_test[index]], fontsize=12)
plt.subplots_adjust(wspace=0.2, hspace=0.5)
save_fig('fashion_mnist_images_plot', tight_layout=False)
plt.show()

# Regression MLP
- Let's load, split and scale the California housing dataset (the original one, not the modified one as in chapter 2):

In [None]:
housing = fetch_california_housing()

X_train_full, X_test, y_train_full, y_test = train_test_split(housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

In [None]:
shape = X_train.shape[1:]
model = keras.models.Sequential([
    keras.layers.InputLayer(shape=shape),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])
model.compile(loss="mean_squared_error", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
X_new = X_test[:3]
y_pred = model.predict(X_new)

In [None]:
plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
y_pred

#### Functional API
Not all neural network models are simply sequential. Some may have complex topologies. Some may have multiple inputs and/or multiple outputs. For example, a Wide & Deep neural network (see paper) connects all or part of the inputs directly to the output layer.

In [None]:
input_ = keras.layers.Input(shape=X_train.shape[1:])
hidden1 = keras.layers.Dense(30, activation="relu")(input_)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_, hidden2])
output = keras.layers.Dense(1)(concat)
model = keras.models.Model(inputs=[input_], outputs=[output])


model.summary()

In [None]:
model.compile(loss="mean_squared_error",
              optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)
y_pred = model.predict(X_new)

In [None]:
plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0.35, 1.1)
plt.show()

In [None]:
# What if you want to send different subsets of input features through the wide or deep paths?
# We will send 5 features (features 0 to 4), and 6 through the deep path (features 2 to 7).
# Note that 3 features will go through both (features 2, 3 and 4).
input_A = layers.Input(shape=[5], name="wide_input")
input_B = layers.Input(shape=[6], name="deep_input")
hidden1 = layers.Dense(30, activation=activations.relu)(input_B)
hidden2 = layers.Dense(30, activation=activations.relu)(hidden1)
concat = layers.concatenate([input_A, hidden2])
output = layers.Dense(1, name="output")(concat)
model = models.Model(inputs=[input_A, input_B], outputs=[output])

model.compile(loss="mse",
              optimizer=keras.optimizers.SGD(learning_rate=1e-3))

X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]

history = model.fit((X_train_A, X_train_B),
                    y_train,
                    epochs=20,
                    validation_data=((X_valid_A, X_valid_B), y_valid))
mse_test = model.evaluate((X_test_A, X_test_B), y_test)
y_pred = model.predict((X_new_A, X_new_B))

In [None]:
plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0.35, 1.1)
plt.show()

In [None]:
# Adding an auxiliary output for regularization:
np.random.seed(42)
tf.random.set_seed(42)

input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
aux_output = keras.layers.Dense(1, name="aux_output")(hidden2)
model = keras.models.Model(inputs=[input_A, input_B],
                           outputs=[output, aux_output])

model.compile(loss=["mse", "mse"],
              loss_weights=[0.9, 0.1],
              optimizer=keras.optimizers.SGD(learning_rate=1e-3))

history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,
                    validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))

total_loss, main_loss, aux_loss = model.evaluate(
    [X_test_A, X_test_B], [y_test, y_test])
y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])

plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0.35, 1.1)
plt.show()

### The subclassing API

In [None]:
class WideAndDeepModel(keras.models.Model):

    def __init__(self, units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(units, activation=activation)
        self.hidden2 = keras.layers.Dense(units, activation=activation)
        self.main_output = keras.layers.Dense(1)
        self.aux_output = keras.layers.Dense(1)

    def call(self, inputs):
        input_A, input_B = inputs
        hidden1 = self.hidden1(input_B)
        hidden2 = self.hidden2(hidden1)
        concat = keras.layers.concatenate([input_A, hidden2])
        main_output = self.main_output(concat)
        aux_output = self.aux_output(hidden2)
        return main_output, aux_output

model = WideAndDeepModel(30, activation="relu")

model.compile(loss=["mse", "mse"],
              loss_weights=[0.9, 0.1],
              optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit((X_train_A, X_train_B),
                    (y_train, y_train),
                    epochs=10,
                    validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)))
total_loss, main_loss, aux_loss = model.evaluate((X_test_A, X_test_B), (y_test, y_test))
y_pred_main, y_pred_aux = model.predict((X_new_A, X_new_B))

plt.plot(pd.DataFrame(history.history))
plt.grid(True)
plt.gca().set_ylim(0.52, 5)
plt.show()

#### Saving and Restoring

In [None]:
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.InputLayer(shape=[8]),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(30, activation="relu"),
    keras.layers.Dense(1)
])

model.compile(loss="mse",
              optimizer=keras.optimizers.SGD(learning_rate=1e-3))
history = model.fit(X_train,
                    y_train,
                    epochs=10,
                    validation_data=(X_valid, y_valid))
mse_test = model.evaluate(X_test, y_test)

In [None]:
model.save("my_keras_model.keras")

In [None]:
model = keras.models.load_model("models/my_keras_model.keras")

In [None]:
model.predict(X_new)

In [None]:
model.save_weights("my_keras_weights.weights.h5")

In [None]:
model.load_weights("my_keras_weights.weights.h5")

### Using Callbacks during Training

In [None]:
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

model = keras.models.Sequential([
    keras.layers.InputLayer(shape=[8]),
    keras.layers.Dense(32, activation=activations.relu),
    keras.layers.Dense(16, activation=activations.relu),
    keras.layers.Dense(1, activation=activations.softplus)
])

model.compile(loss=losses.Huber(),
              optimizer=optimizers.Adam(learning_rate=1e-3, epsilon=0.001),
              metrics=[metrics.RootMeanSquaredError()])
checkpoint_cb = keras.callbacks.ModelCheckpoint("models/my_keras_model.keras", save_best_only=True)
history = model.fit(X_train,
                    y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb])
model = keras.models.load_model("models/my_keras_model.keras") # rollback to best model
mse_test = model.evaluate(X_test, y_test)

In [None]:
# display model history metrics by epoch
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

In [None]:
model.compile(loss="mse", optimizer=keras.optimizers.SGD(learning_rate=1e-3))
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10,
                                                  restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=100,
                    validation_data=(X_valid, y_valid),
                    callbacks=[checkpoint_cb,      # Save model checkpoints
                               early_stopping_cb]) # Stop if no progress for 10 epochs
mse_test = model.evaluate(X_test, y_test)

In [None]:
class PrintValTrainRatioCallback(keras.callbacks.Callback):
    """ Callback to print the ratio of validation loss to training loss at the end of each epoch."""

    def on_epoch_end(self, epoch, logs):
        """ Called at the end of each epoch. """
        print("\nval/train: {:.2f}".format(logs["val_loss"] / logs["loss"]))

val_train_ratio_cb = PrintValTrainRatioCallback() # create the callback instance
history = model.fit(X_train,
                    y_train,
                    epochs=1,
                    validation_data=(X_valid, y_valid),
                    callbacks=[val_train_ratio_cb]) # use the callback during training

# Wide & Deep model

In [None]:

np.random.seed(42)
tf.random.set_seed(42)


input_ = layers.Input(shape=X_train.shape[1:])
hidden1 = layers.Dense(30, activation=activations.relu)(input_)  # provide as input to first hidden layer
hidden2 = layers.Dense(30, activation=activations.relu)(hidden1) # provide hidden layer 1 as input to hidden layer 2
concat = layers.Concatenate()([input_, hidden2])                 # concatenate input and hidden layer 2
output = layers.Dense(1, activation=activations.softplus)(concat)                                 # output layer

model = models.Model(inputs=[input_], outputs=[output])          # create the model

model.compile(loss=losses.Huber(),
              optimizer=optimizers.Adam(learning_rate=1e-3,
                                        epsilon=0.001),
              metrics=[metrics.RootMeanSquaredError()])

history = model.fit(X_train,
                    y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(X_valid, y_valid),
                    )

# display model history metrics by epoch
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

# Additional Output for DNN model

In [None]:
X_train_A, X_train_B = X_train[:, :5], X_train[:, 2:]
X_valid_A, X_valid_B = X_valid[:, :5], X_valid[:, 2:]
X_test_A, X_test_B = X_test[:, :5], X_test[:, 2:]
X_new_A, X_new_B = X_test_A[:3], X_test_B[:3]

input_A = keras.layers.Input(shape=[5], name="wide_input")
input_B = keras.layers.Input(shape=[6], name="deep_input")
hidden1 = keras.layers.Dense(30, activation="relu")(input_B)
hidden2 = keras.layers.Dense(30, activation="relu")(hidden1)
concat = keras.layers.concatenate([input_A, hidden2])
output = keras.layers.Dense(1, name="main_output")(concat)
aux_output = keras.layers.Dense(1, name="aux_output")(hidden2)
model = keras.models.Model(inputs=[input_A, input_B],
                           outputs=[output, aux_output])

model.compile(loss=["mse", "mse"], loss_weights=[0.9, 0.1], optimizer=keras.optimizers.SGD(learning_rate=1e-3))

# plot_model(
#     model,
#     to_file="wide_deep_model.png",
#     show_shapes=True,      #     показати розміри тензорів
#     show_layer_names=True, # показати назви шарів
#     expand_nested=True
# )

history = model.fit([X_train_A, X_train_B], [y_train, y_train], epochs=20,
                    validation_data=([X_valid_A, X_valid_B], [y_valid, y_valid]))

y_pred_main, y_pred_aux = model.predict([X_new_A, X_new_B])

# display model history metrics by epoch
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()

# Dynamic Models by Subclassing ( experimental, cannot be saved/cloned/loaded)

In [None]:
class WideAndDeepModel(keras.models.Model):

    def __init__(self, units=30, activation="relu", **kwargs):
        super().__init__(**kwargs)
        self.hidden1 = keras.layers.Dense(units, activation=activation)
        self.hidden2 = layers.Dense(units, activation=activation)
        self.main_output = keras.layers.Dense(1)
        self.aux_output = keras.layers.Dense(1)

    def call(self, inputs):
        input_A, input_B = inputs
        hidden1 = self.hidden1(input_B)
        hidden2 = self.hidden2(hidden1)
        concat = layers.concatenate([input_A, hidden2])
        main_output = self.main_output(concat)
        aux_output = self.aux_output(hidden2)
        return main_output, aux_output

model = WideAndDeepModel(30, activation="relu")

# Callbacks

In [None]:
checkpoint_callback = keras.callbacks.ModelCheckpoint(
    "models/my_keras_model.keras",
    save_best_only=True
)
early_stopping_callback = keras.callbacks.EarlyStopping(
    patience=10,
    restore_best_weights=True
)

model.compile(
    loss=["mse", "mse"],
    loss_weights=[0.9, 0.1],
    optimizer=keras.optimizers.SGD(learning_rate=1e-3)
)

model.fit(
    (X_train_A, X_train_B),
    (y_train, y_train),
    epochs=100,
    validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)),
    callbacks=[checkpoint_callback, # Model Checkpoint ( save best model )
               early_stopping_callback]) # Early Stopping ( stop if no progress for 10 epochs )

class PrintValTrainRatioCallback(keras.callbacks.Callback):
    """ Callback to print the ratio of validation loss to training loss at the end of each epoch."""

    def on_epoch_end(self, epoch, logs):
        """ Called at the end of each epoch. """
        print("\nval/train: {:.2f}".format(logs["val_loss"] / logs["loss"]))

    def on_train_begin(self, logs=None):
        """ Called at the beginning of training. """
        print("Starting training...")

    def on_train_end(self, logs=None):
        """ Called at the end of training. """
        print("Training finished.")

    def on_epoch_begin(self, epoch, logs=None):
        """ Called at the beginning of each epoch. """
        print(f"Starting epoch {epoch + 1}...")

### TensorBoard

In [None]:
import os

root_logdir = os.path.join(os.curdir, "logs")

def get_run_logdir():
    import time
    run_id = time.strftime("run_%Y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir() # example: "logs/run_2024_06_01-12_00_00"

tensorboard_callback = keras.callbacks.TensorBoard(
    log_dir=run_logdir,
    histogram_freq=1,
    profile_batch=0
)

history = model.fit(
    (X_train_A, X_train_B),
    (y_train, y_train),
    epochs=20,
    validation_data=((X_valid_A, X_valid_B), (y_valid, y_valid)),
    callbacks=[tensorboard_callback]
)

# Run TensorBoard in Jupyter Notebook

In [None]:
%load_ext tensorboard
%tensorboard --logdir=./logs --port=600

# Hyperparameter Tuning

## Using SKLearnRegressor wrapper without RandomSearch

In [None]:
from tensorflow.keras.wrappers import SKLearnRegressor
keras.backend.clear_session()
np.random.seed(42)
tf.random.set_seed(42)

def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3, input_shape=[8], X=None, y=None):
    model = keras.models.Sequential()
    model.add(keras.layers.InputLayer(shape=input_shape))
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(loss="mse", optimizer=optimizer)
    return model


keras_reg = SKLearnRegressor(build_model)

keras_reg.fit(X_train,
              y_train,
              epochs=100,
              verbose=0,
              validation_data=(X_valid, y_valid),
              callbacks=[keras.callbacks.EarlyStopping(patience=10)]
              )

y_pred = keras_reg.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
rmse = root_mean_squared_error(y_test, y_pred)

y_real = y_test[:10]
y_pred_sample = y_pred[:10]

print(f'Predicted values: {y_pred_sample}')
print(f'Real values: {y_real}')

print(f'RMSE: {rmse}')
print(f'MSE Test: {mse}')

## Using KerasRegressor (scikeras ) with RandomizedSearchCV

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from scikeras.wrappers import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from scipy.stats import reciprocal
from sklearn.datasets import fetch_california_housing

# Reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Data
housing = fetch_california_housing()
X_full, X_test, y_full, y_test = train_test_split(
    housing.data, housing.target, test_size=0.2, random_state=42
)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_full, y_full, test_size=0.2, random_state=42
)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_valid = scaler.transform(X_valid)
X_test = scaler.transform(X_test)
input_shape = X_train.shape[1:]

# Model builder
def build_model(n_hidden=1, n_neurons=30, learning_rate=3e-3):
    model = keras.Sequential()
    model.add(keras.layers.InputLayer(shape=input_shape))
    for _ in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="relu"))
    model.add(keras.layers.Dense(1))
    model.compile(
        loss="mse",
        optimizer=keras.optimizers.SGD(learning_rate=learning_rate)
    )
    return model

# SciKeras wrapper
reg = KerasRegressor(
    model=build_model,
    verbose=0
)

# Hyperparameter distributions (model build args + training args)
param_distribs = {
    "model__n_hidden": [1, 2, 3],
    "model__n_neurons": [16, 32, 64],
    "model__learning_rate": reciprocal(1e-4, 1e-2),
    "batch_size": [32, 64],
    "epochs": [5, 10, 20]
}

early_stop = keras.callbacks.EarlyStopping(
    patience=10,
    restore_best_weights=True
)

search = RandomizedSearchCV(
    estimator=reg,
    param_distributions=param_distribs,
    n_iter=5,
    cv=3,
    scoring="neg_root_mean_squared_error",
    verbose=2,
    random_state=42
)

search.fit(
    X_train,
    y_train,
    validation_data=(X_valid, y_valid),
    callbacks=[early_stop]
)

print("Best params:", search.best_params_)
best_reg = search.best_estimator_
y_pred = best_reg.predict(X_test)
rmse = mean_squared_error(y_test, y_pred)
print("Test RMSE:", rmse)

# Correct using BatchNormalization

In [None]:
model = keras.models.Sequential([
    keras.layers.InputLayer(shape=[8]),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(n_units=30,
                       activation='he_normal', # He initialization for ReLU
                       use_bias=False), # no bias when using BatchNorm
    keras.layers.BatchNormalization(),
    keras.layers.Dense(n_units=30,
                       activation='he_normal',
                       use_bias=False),
    keras.layers.BatchNormalization(),
    keras.layers.Dense(1)
])

# DNN Transfer Learning Example ( make sense on large DNN models and large datasets )

In [None]:
model_A = keras.models.load_model("models/my_keras_model_a.keras") # pre-trained model A
model_B_ON_A = keras.models.Sequential(model_A.layers[:-1])        # all layers except the output layer
model_B_ON_A.add(keras.layers.Dense(1, activation='sigmoid'))      # new output layer

### But if model_A is changed, model_B_ON_A will also change! To avoid this problem, we can clone model_A:

In [None]:
model_A_clone = keras.models.clone_model(model_A)                  # clone architecture
model_A_clone.set_weights(model_A.get_weights())                   # copy weights
model_B_ON_A = keras.models.Sequential(model_A_clone.layers[:-1])  # all layers except the output layer
model_B_ON_A.add(keras.layers.Dense(1, activation='sigmoid'))      # new output layer

# to avoid training all layers from scratch, we can freeze the pre-trained layers:
for layer in model_B_ON_A.layers[:-1]:
    layer.trainable = False

model_B_ON_A.compile(loss="binary_crossentropy",
                       optimizer=keras.optimizers.SGD(learning_rate=1e-3),
                       metrics=["accuracy"])

# now we can train model by few epochs after this unfreezing all layers and lowering the learning rate
history = model_B_ON_A.fit(X_train,
                             y_train,
                             epochs=5,
                             validation_data=(X_valid, y_valid))

for layer in model_B_ON_A.layers:
    layer.trainable = True

optimizer = keras.optimizers.SGD(learning_rate=1e-4) # lower learning rate
model_B_ON_A.compile(loss="binary_crossentropy",
                       optimizer=optimizer,
                       metrics=["accuracy"])

history = model_B_ON_A.fit(X_train,
                             y_train,
                             epochs=10,
                             validation_data=(X_valid, y_valid))