# Training Deep Neural Networks

### Training a DNN on CIFAR10 image dataset

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
import math

# Class for one cycle scheduler
class OneCycleScheduler(keras.callbacks.Callback):
    def __init__(self, iterations, max_rate, start_rate=None, last_iterations=None, last_rate=None):
        self.iterations = iterations
        self.max_rate = max_rate
        self.start_rate = start_rate or max_rate / 10
        self.last_iterations = last_iterations or iterations // 10 + 1
        self.half_iteration = (iterations - self.last_iterations) // 2
        self.last_rate = last_rate or self.start_rate / 1000
        self.iteration = 0

    def _interpolate(self, iter1, iter2, rate1, rate2):
        return (rate2 - rate1) * (self.iteration - iter1) / (iter2 - iter1) + rate1
    
    def on_batch_begin(self, batch, logs):
        if self.iteration < self.half_iteration:
            lr = self._interpolate(0, self.half_iteration, self.start_rate, self.max_rate)
        elif self.iterations < 2 * self.half_iteration:
            lr = self._interpolate(self.half_iteration, 2 * self.half_iteration, self.max_rate, self.start_rate)
        else:
            lr = self._interpolate(self.last_iterations, self.iterations, self.start_rate, self.last_rate)
        self.iteration += 1
        self.model.optimizer.learning_rate = lr

# def build_model(hp):
#     n_hidden = 20
#     n_neurons = 100

#     # Build model
#     model = keras.Sequential()
#     model.add(layers.Flatten(input_shape=(32, 32, 3)))
#     for _ in range(n_hidden):
#         model.add(layers.BatchNormalization())
#         model.add(layers.Dense(n_neurons, activation="swish", kernel_initializer="he_normal"))
#     model.add(layers.BatchNormalization())
#     model.add(layers.Dense(10, activation="softmax"))
    
#     optimizer = keras.optimizers.Nadam(learning_rate=1e-3)
#     model.compile(loss="sparse_categorical_crossentropy", optimizer=optimizer, metrics=["accuracy"])
#     return model

def build_model(hp):
    n_hidden = 20
    n_neurons = 100

    # Build model
    model = keras.Sequential()
    model.add(layers.Flatten(input_shape=(32, 32, 3)))
    # Standardize the data
    # model.add(layers.Normalization())
    # Self-normalizing layers
    for _ in range(n_hidden):
        model.add(layers.Dense(n_neurons, activation="selu", kernel_initializer="lecun_normal"))
    model.add(layers.AlphaDropout(rate=0.1))
    model.add(layers.Dense(10, activation="softmax"))
    
    optimizer = keras.optimizers.SGD()
    model.compile(
        loss="sparse_categorical_crossentropy",
        optimizer=optimizer,
        metrics=["accuracy"]
    )
    return model

# Set random seed
tf.keras.utils.set_random_seed(42)

# Load data set
(X_train_full, y_train_full), (X_test, y_test) = tf.keras.datasets.cifar10.load_data()

X_train = X_train_full[5000:]
y_train = y_train_full[5000:]
X_valid = X_train_full[:5000]
y_valid = y_train_full[:5000]

X_means = X_train.mean(axis=0)
X_stds = X_train.std(axis=0)
X_train_scaled = (X_train - X_means) / X_stds
X_valid_scaled = (X_valid - X_means) / X_stds
X_test_scaled = (X_test - X_means) / X_stds

# Train the model
batch_size = 128
n_epochs = 15
early_stopping_cb = keras.callbacks.EarlyStopping(patience=20, restore_best_weights=True)
model_chackpoint_cb = keras.callbacks.ModelCheckpoint("my_cifar10_model.keras", save_best_only=True)
onecycle = OneCycleScheduler(
    math.ceil(len(X_train_scaled) / batch_size) * n_epochs,
    max_rate=0.05,
)
model = build_model(None)
model.fit(
    X_train_scaled,
    y_train,
    epochs=n_epochs, 
    validation_data=(X_valid_scaled, y_valid), 
    batch_size=batch_size,
    callbacks=[early_stopping_cb, model_chackpoint_cb, onecycle]
)

mse_test, rmse_test = model.evaluate(X_test_scaled, y_test)


2025-03-12 07:30:53.672939: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-03-12 07:30:53.678837: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-12 07:30:53.709032: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-03-12 07:30:53.709096: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-03-12 07:30:53.709973: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to

<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=968d3c27-50e7-4d42-bdd9-442f6904c1c2' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>