# Introduction to Artificial Neural Networks with Keras

### Regression MLPs with Scikit-Learn

In [1]:
from sklearn.datasets import fetch_california_housing
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

mlp_reg = MLPRegressor(hidden_layer_sizes=[50, 50, 50], random_state=42)
pipeline = make_pipeline(StandardScaler(), mlp_reg)
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_valid)
rmse = mean_squared_error(y_valid, y_pred, squared=False)

print(rmse)

0.6193725725047629


### Classification MLPs with Keras

In [3]:
import tensorflow as tf

fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

# Preprocessing of data
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
 "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

# MLPs with Keras
tf.keras.utils.set_random_seed(42)
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Input(shape=[28,28]))
# model.add(tf.keras.layers.Flatten())
# model.add(tf.keras.layers.Dense(300, activation="relu"))
# model.add(tf.keras.layers.Dense(100, activation="relu"))
# model.add(tf.keras.layers.Dense(10, activation="softmax"))

model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=[28, 28]),
    tf.keras.layers.Dense(300, activation="relu"),
    tf.keras.layers.Dense(100, activation="relu"),
    tf.keras.layers.Dense(10, activation="softmax")
])

# model.summary()
# model.layers

# hidden1 = model.layers[1]
# hidden1.name

# weights, biases = hidden1.get_weights()
# weights.shape

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer="sgd",
    metrics=["accuracy"]
)

history = model.fit(X_train, y_train, epochs=30, validation_data=(X_valid, y_valid))


2025-03-04 07:08:02.604008: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-04 07:08:02.607607: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-04 07:08:02.642774: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-04 07:08:02.642895: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-04 07:08:02.644339: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

Using model to do prediction for new instances: 

In [11]:
model.evaluate(X_test, y_test)

X_new = X_test[:3]
y_proba = model.predict(X_new)
y_proba.round(2)

y_proba_index = y_proba.argmax(axis=-1)
y_proba_index

y_new = y_test[:3]
y_new



array([9, 2, 1], dtype=uint8)

### Regression MLPs with Keras

In [27]:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Preparing the data
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

# Preprocessing the data
tf.keras.utils.set_random_seed(42)
norm_layer = tf.keras.layers.Normalization(input_shape=X_train.shape[1:])
model = tf.keras.Sequential([
    norm_layer,
    tf.keras.layers.Dense(50, activation="relu"),
    tf.keras.layers.Dense(50, activation="relu"),
    tf.keras.layers.Dense(50, activation="relu"),
    tf.keras.layers.Dense(1)
])
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(loss="mse", optimizer=optimizer, metrics=["RootMeanSquaredError"])
norm_layer.adapt(X_train)

# Training the model
history = model.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid))

# Evaluating the model
mse_test, rmse_test = model.evaluate(X_test, y_test)
X_new = X_test[:3]
y_pred = model.predict(X_new)
y_pred

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


array([[0.43979013],
       [1.0232092 ],
       [4.885579  ]], dtype=float32)

### Keras Functinal APIs for building Complex Models

In [35]:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Preparing the data
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

# Preprocessing the data
tf.keras.utils.set_random_seed(42)
norm_layer = tf.keras.layers.Normalization()

# Building the model
hidden_layer1 = tf.keras.layers.Dense(30, activation="relu")
hidden_layer2 = tf.keras.layers.Dense(30, activation="relu")
concat_layer = tf.keras.layers.Concatenate()
output_layer = tf.keras.layers.Dense(1)

input_ = tf.keras.layers.Input(shape=X_train.shape[1:])
normalized = norm_layer(input_)
hidden1 = hidden_layer1(normalized)
hidden2 = hidden_layer2(hidden1)
concat = concat_layer([input_, hidden2])
output = output_layer(concat)

model = tf.keras.Model(inputs=[input_], outputs=[output])

model.summary()



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_2 (InputLayer)        [(None, 8)]                  0         []                            
                                                                                                  
 normalization_6 (Normaliza  (None, 8)                    17        ['input_2[0][0]']             
 tion)                                                                                            
                                                                                                  
 dense_22 (Dense)            (None, 30)                   270       ['normalization_6[0][0]']     
                                                                                                  
 dense_23 (Dense)            (None, 30)                   930       ['dense_22[0][0]']      

With multiple outputs: 

In [14]:
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Preparing the data
housing = fetch_california_housing()
X_train_full, X_test, y_train_full, y_test = train_test_split(
    housing.data, housing.target, random_state=42)
X_train, X_valid, y_train, y_valid = train_test_split(
    X_train_full, y_train_full, random_state=42)

# Preprocessing the data
tf.keras.utils.set_random_seed(42)

# Building the model
norm_layer = tf.keras.layers.Normalization()

input_wide = tf.keras.layers.Input(
    shape=[5],
    name="input_wide"
)
input_deep = tf.keras.layers.Input(
    shape=[6],
    name="input_deep"
)

norm_layer_wide = tf.keras.layers.Normalization()
norm_layer_deep = tf.keras.layers.Normalization()

norm_wide = norm_layer_wide(input_wide)
norm_deep = norm_layer_deep(input_deep)

hidden1 = tf.keras.layers.Dense(30, activation="relu")(norm_deep)
hidden2 = tf.keras.layers.Dense(30, activation="relu")(hidden1)
concat = tf.keras.layers.concatenate([norm_wide, hidden2])
output = tf.keras.layers.Dense(1, name="output")(concat)
aux_output = tf.keras.layers.Dense(1, name="aux_output")(hidden2)

# model = tf.keras.Model(inputs=[input_wide, input_deep], outputs=[output])

# For multiple outputs
model = tf.keras.Model(inputs=[input_wide, input_deep], outputs=[output, aux_output])
model.summary()

# Training the model
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

# model.compile(loss="mse", optimizer=optimizer, metrics=["RootMeanSquaredError"])
# For multiple outputs
model.compile(
    loss=["mse", "mse"],
    loss_weights=[0.9, 0.1],
    optimizer=optimizer, metrics=["RootMeanSquaredError"]
)


X_train_wide, X_train_deep = X_train[:, :5], X_train[:, 2:]
X_valid_wide, X_valid_deep = X_valid[:, :5], X_valid[:, 2:]
X_test_wide, X_test_deep = X_test[:, :5], X_test[:, 2:]
X_new_wide, X_new_deep = X_test[:3, :5], X_test[:3, 2:]

norm_layer_wide.adapt(X_train_wide)
norm_layer_deep.adapt(X_train_deep)

# history = model.fit(
#     {
#         "input_wide": X_train_wide,
#         "input_deep": X_train_deep
#     },
#     y_train,
#     epochs=20,
#     validation_data=(
#         {
#             "input_wide": X_valid_wide,
#             "input_deep": X_valid_deep
#         },
#         y_valid
#     )
# )

checkpoint_cb = tf.keras.callbacks.ModelCheckpoint("my_checkpoints", save_weights_only=True)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

history = model.fit(
    {
        "input_wide": X_train_wide,
        "input_deep": X_train_deep
    },
    {
        "output": y_train,
        "aux_output": y_train
    },
    epochs=20,
    validation_data=(
        {
            "input_wide": X_valid_wide,
            "input_deep": X_valid_deep
        },
        {
            "output": y_valid,
            "aux_output": y_valid
        }
    ),
    callbacks=[
        checkpoint_cb,
        early_stopping_cb
    ]
)

# mse_test = model.evaluate(
#     {
#         "input_wide": X_test_wide,
#         "input_deep": X_test_deep
#     },
#     y_test
# )

eval_result = model.evaluate(
    {
        "input_wide": X_test_wide,
        "input_deep": X_test_deep
    },
    {
        "output": y_test,
        "aux_output": y_test
    }
)

# y_pred = model.predict(
#     {
#         "input_wide": X_new_wide,
#         "input_deep": X_new_deep
#     }
# )

y_pred_main, y_pred_aux = model.predict(
    {
        "input_wide": X_new_wide,
        "input_deep": X_new_deep
    }
)

print(y_pred_main)
print(y_pred_aux)


Model: "model_4"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_deep (InputLayer)     [(None, 6)]                  0         []                            
                                                                                                  
 normalization_20 (Normaliz  (None, 6)                    13        ['input_deep[0][0]']          
 ation)                                                                                           
                                                                                                  
 input_wide (InputLayer)     [(None, 5)]                  0         []                            
                                                                                                  
 dense_17 (Dense)            (None, 30)                   210       ['normalization_20[0][0]

### Fine-Tuning with Keras Tuner by build_model()

In [20]:
%pip install -q -U keras-tuner

import keras_tuner as kt
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "adam"])
    if optimizer == "sgd":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

# Preparing the data
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
 "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

# Preprocessing the data
tf.keras.utils.set_random_seed(42)

# Training models
random_search_tuner = kt.RandomSearch(
    build_model, objective="val_accuracy", max_trials=3, overwrite=True,
    directory="my_fashion_mnist", project_name="my_rnd_search", seed=42
)

random_search_tuner.search(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

# Retrieving the best model
top3_models = random_search_tuner.get_best_models(num_models=3)
best_model = top3_models[0]

# Retrieving the best params
top3_params = random_search_tuner.get_best_hyperparameters(num_trials=3)
best_params = top3_params[0]
best_params.values

# Retrieving the best trial
best_trial = random_search_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()


Trial 3 Complete [00h 01m 26s]
val_accuracy: 0.004392764996737242

Best val_accuracy So Far: 0.004392764996737242
Total elapsed time: 00h 03m 45s
Trial 0 summary
Hyperparameters:
n_hidden: 5
n_neurons: 25
learning_rate: 0.0006562536901904111
optimizer: sgd
Score: 0.004392764996737242


### Fine-Tuning with Keras Tuner by HyperModel

In [3]:
%pip install -q -U keras-tuner

import keras_tuner as kt
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Function for model building
def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "adam"])
    if optimizer == "sgd":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

# Class for tweaking model fitting parameters
class MyClassificationHyperModel(kt.HyperModel):
    # Building a model
    def build(self, hp):
        return build_model(hp)
    # Fitting a model and return the history object
    def fit(self, hp, model, X, y, **kwargs):
        if hp.Boolean("normalize"):
            norm_layer = tf.keras.layers.Normalization()
            X = norm_layer(X)
        return model.fit(X, y, **kwargs)

# Preparing the data
fashion_mnist = tf.keras.datasets.fashion_mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-5000], y_train_full[:-5000]
X_valid, y_valid = X_train_full[-5000:], y_train_full[-5000:]

# Preprocessing of data
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
 "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

# Preprocessing the data
tf.keras.utils.set_random_seed(42)

# Training models
hyperband_tuner = kt.Hyperband(
    MyClassificationHyperModel(),
    objective="val_accuracy",
    max_epochs=10,
    seed=42,
    factor=3,
    hyperband_iterations=2,
    overwrite=True,
    directory="my_fashion_mnist",
    project_name="hyperband"
)

bayesian_opt_tuner = kt.BayesianOptimization(
    MyClassificationHyperModel(),
    objective="val_accuracy",
    seed=42,
    max_trials=10,
    alpha=1e-4,
    beta=2.6,
    overwrite=True,
    directory="my_fashion_mnist",
    project_name="bayesian_opt"
)

hyperband_tuner.search(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

# Retrieving the best model
top3_models = hyperband_tuner.get_best_models(num_models=3)
best_model = top3_models[0]

# Retrieving the best params
top3_params = hyperband_tuner.get_best_hyperparameters(num_trials=3)
best_params = top3_params[0]
best_params.values

# Retrieving the best trial
best_trial = hyperband_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 60 Complete [00h 01m 06s]
val_accuracy: 0.8425999879837036

Best val_accuracy So Far: 0.8880000114440918
Total elapsed time: 00h 38m 34s
Trial 0055 summary
Hyperparameters:
n_hidden: 7
n_neurons: 247
learning_rate: 0.0003987720809096887
optimizer: adam
normalize: True
tuner/epochs: 10
tuner/initial_epoch: 4
tuner/bracket: 1
tuner/round: 1
tuner/trial_id: 0049
Score: 0.8880000114440918


### Training MNIST Dataset

In [1]:
%pip install -q -U keras-tuner

import keras_tuner as kt
import tensorflow as tf
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

# Function for model building
def build_model(hp):
    n_hidden = hp.Int("n_hidden", min_value=0, max_value=8, default=2)
    n_neurons = hp.Int("n_neurons", min_value=16, max_value=256)
    learning_rate = hp.Float("learning_rate", min_value=1e-4, max_value=1e-2, sampling="log")
    optimizer = hp.Choice("optimizer", values=["sgd", "adam"])
    if optimizer == "sgd":
        optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    else:
        optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Flatten())
    for _ in range(n_hidden):
        model.add(tf.keras.layers.Dense(n_neurons, activation="relu"))
    model.add(tf.keras.layers.Dense(10, activation="softmax"))
    model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
    return model

# Class for tweaking model fitting parameters
class MyClassificationHyperModel(kt.HyperModel):
    # Building a model
    def build(self, hp):
        return build_model(hp)
    # Fitting a model and return the history object
    def fit(self, hp, model, X, y, **kwargs):
        if hp.Boolean("normalize"):
            norm_layer = tf.keras.layers.Normalization()
            X = norm_layer(X)
        return model.fit(X, y, **kwargs)

# Preparing the data
fashion_mnist = tf.keras.datasets.mnist.load_data()
(X_train_full, y_train_full), (X_test, y_test) = fashion_mnist
X_train, y_train = X_train_full[:-10000], y_train_full[:-10000]
X_valid, y_valid = X_train_full[-10000:], y_train_full[-10000:]

# Preprocessing of data
X_train, X_valid, X_test = X_train / 255., X_valid / 255., X_test / 255.
class_names = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
 "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]

# Preprocessing the data
tf.keras.utils.set_random_seed(42)

# Training models
bayesian_opt_tuner = kt.BayesianOptimization(
    MyClassificationHyperModel(),
    objective="val_accuracy",
    seed=42,
    max_trials=10,
    alpha=1e-4,
    beta=2.6,
    overwrite=True,
    directory="my_fashion_mnist",
    project_name="bayesian_opt"
)

bayesian_opt_tuner.search(X_train, y_train, epochs=10, validation_data=(X_valid, y_valid))

# Retrieving the best model
top3_models = bayesian_opt_tuner.get_best_models(num_models=3)
best_model = top3_models[0]

# Retrieving the best params
top3_params = bayesian_opt_tuner.get_best_hyperparameters(num_trials=3)
best_params = top3_params[0]
best_params.values

# Retrieving the best trial
best_trial = bayesian_opt_tuner.oracle.get_best_trials(num_trials=1)[0]
best_trial.summary()

Trial 10 Complete [00h 02m 24s]
val_accuracy: 0.7185999751091003

Best val_accuracy So Far: 0.9779000282287598
Total elapsed time: 00h 17m 52s
Trial 02 summary
Hyperparameters:
n_hidden: 2
n_neurons: 251
learning_rate: 0.001715074355925934
optimizer: adam
normalize: False
Score: 0.9779000282287598


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=968d3c27-50e7-4d42-bdd9-442f6904c1c2' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>