In [5]:
import tensorflow.keras as keras

INPUT_SHAPE = [32, 32, 3]
N_INPUT_FEATURES = 32 * 32 * 3
N_OUTPUT_CLASSES = 10

def create_dnn_model(n_hidden=20, n_neurons=100, activation="elu", kernel_initializer="he_normal", lr=0.001,
                     use_batch_norm=False, flatten=True):
    """
    Creates a dense neural network
    """

    model = keras.models.Sequential()
    
    # input layer
    if flatten:
        model.add(keras.layers.Flatten(input_shape=INPUT_SHAPE))
    else:
        model.add(keras.layers.InputLayer(input_shape=[N_INPUT_FEATURES]))
    if use_batch_norm:
        model.add(keras.layers.BatchNormalization())
    
    # add hidden layers
    for _ in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation=activation, kernel_initializer=kernel_initializer))
        if use_batch_norm:
            model.add(keras.layers.BatchNormalization())
    
    # output layer
    model.add(keras.layers.Dense(N_OUTPUT_CLASSES, activation="softmax"))

    # configure the model for training
    model.compile(loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"],
                  optimizer=keras.optimizers.Nadam(learning_rate=lr))

    return model

In [2]:
from tensorflow.keras.datasets import cifar10

# load the data
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

In [3]:
from sklearn.model_selection import train_test_split

# create a validation set
X_train_small, X_val, y_train_small, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [4]:
import os
import time

LOG_ROOT = 'train_logs'

def generate_log_dir(model_name):
    run_id = time.strftime(f"{model_name}_%Y_%m_%d-%H_%M_%S")
    return os.path.join(LOG_ROOT, run_id)

In [6]:
# create and train a basic DNN model
model = create_dnn_model(lr=5e-5)

model_name = "cifar10_dnn"
model_filepath = f"models/{model_name}.h5"

callbacks = []
callbacks.append(keras.callbacks.EarlyStopping(patience=20))
callbacks.append(keras.callbacks.TensorBoard(log_dir=generate_log_dir(model_name)))
callbacks.append(keras.callbacks.ModelCheckpoint(model_filepath, save_best_only=True))

model.fit(X_train_small, y_train_small, validation_data=(X_val, y_val), epochs=1000, callbacks=callbacks, verbose=2)

Train on 40000 samples, validate on 10000 samples
Epoch 1/1000
40000/40000 - 12s - loss: 5.5842 - accuracy: 0.1652 - val_loss: 2.2526 - val_accuracy: 0.1906
Epoch 2/1000
40000/40000 - 9s - loss: 2.1346 - accuracy: 0.2286 - val_loss: 2.0641 - val_accuracy: 0.2432
Epoch 3/1000
40000/40000 - 9s - loss: 2.0065 - accuracy: 0.2670 - val_loss: 1.9579 - val_accuracy: 0.2883
Epoch 4/1000
40000/40000 - 9s - loss: 1.9329 - accuracy: 0.2944 - val_loss: 1.9297 - val_accuracy: 0.3004
Epoch 5/1000
40000/40000 - 9s - loss: 1.8681 - accuracy: 0.3178 - val_loss: 1.8475 - val_accuracy: 0.3380
Epoch 6/1000
40000/40000 - 9s - loss: 1.8211 - accuracy: 0.3372 - val_loss: 1.8262 - val_accuracy: 0.3478
Epoch 7/1000
40000/40000 - 9s - loss: 1.7750 - accuracy: 0.3544 - val_loss: 1.7791 - val_accuracy: 0.3562
Epoch 8/1000
40000/40000 - 9s - loss: 1.7384 - accuracy: 0.3683 - val_loss: 1.7690 - val_accuracy: 0.3637
Epoch 9/1000
40000/40000 - 9s - loss: 1.7056 - accuracy: 0.3849 - val_loss: 1.7052 - val_accuracy: 0.

<tensorflow.python.keras.callbacks.History at 0x7f20ced265c0>

In [7]:
model = keras.models.load_model("models/cifar10_dnn.h5")
model.evaluate(X_val, y_val)



[1.5774483856201171, 0.4432]

The model with the lowest validation loss reaches 44.3% validation accuracy. It took 46 epochs to reach this result. Each epoch took about 9s on my machine.

Let's see if Batch Normalization will improve the performance.

In [11]:
# create and train a DNN model with Batch Normalization
model = create_dnn_model(lr=1e-4, use_batch_norm=True)

model_name = "cifar10_dnn_bn"
model_filepath = f"models/{model_name}.h5"

callbacks = []
callbacks.append(keras.callbacks.EarlyStopping(patience=20))
callbacks.append(keras.callbacks.TensorBoard(log_dir=generate_log_dir(model_name)))
callbacks.append(keras.callbacks.ModelCheckpoint(model_filepath, save_best_only=True))

model.fit(X_train_small, y_train_small, validation_data=(X_val, y_val), epochs=1000, callbacks=callbacks, verbose=2)

Train on 40000 samples, validate on 10000 samples
Epoch 1/1000
40000/40000 - 25s - loss: 2.1563 - accuracy: 0.2367 - val_loss: 1.8890 - val_accuracy: 0.3200
Epoch 2/1000
40000/40000 - 20s - loss: 1.8734 - accuracy: 0.3256 - val_loss: 1.7508 - val_accuracy: 0.3692
Epoch 3/1000
40000/40000 - 19s - loss: 1.7729 - accuracy: 0.3629 - val_loss: 1.6817 - val_accuracy: 0.3964
Epoch 4/1000
40000/40000 - 18s - loss: 1.7079 - accuracy: 0.3895 - val_loss: 1.6201 - val_accuracy: 0.4185
Epoch 5/1000
40000/40000 - 19s - loss: 1.6550 - accuracy: 0.4089 - val_loss: 1.5821 - val_accuracy: 0.4375
Epoch 6/1000
40000/40000 - 18s - loss: 1.6121 - accuracy: 0.4264 - val_loss: 1.5470 - val_accuracy: 0.4446
Epoch 7/1000
40000/40000 - 19s - loss: 1.5734 - accuracy: 0.4396 - val_loss: 1.5181 - val_accuracy: 0.4570
Epoch 8/1000
40000/40000 - 19s - loss: 1.5430 - accuracy: 0.4531 - val_loss: 1.5041 - val_accuracy: 0.4583
Epoch 9/1000
40000/40000 - 20s - loss: 1.5153 - accuracy: 0.4602 - val_loss: 1.4955 - val_accu

<tensorflow.python.keras.callbacks.History at 0x7f1ffcc43cc0>

In [12]:
model = keras.models.load_model("models/cifar10_dnn_bn.h5")
model.evaluate(X_val, y_val)



[1.3895051637649536, 0.5081]

The model with the lowest validation loss has 50.8% validation accuracy. It took 41 epochs with each epoch taking about 18 seconds on my machine. Batch Norm added more trainable variables so it is taking longer time per epoch. In terms of convergence, both models seem to be stuck in local optima.

Let's see if using SELU and LeCun normal initialization will improve the performance.

In [8]:
from sklearn.preprocessing import StandardScaler

# SELU requires that the input features are standardized
std_scaler = StandardScaler()
X_train_small_scaled = X_train_small.reshape(X_train_small.shape[0], -1)    # flatten
X_train_small_scaled = std_scaler.fit_transform(X_train_small_scaled)
X_val_scaled = X_val.reshape(X_val.shape[0], -1)                            # flatten
X_val_scaled = std_scaler.transform(X_val_scaled)

In [30]:
# create and train a DNN model with SELU and LeCun normal initialization
model = create_dnn_model(lr=5e-5, use_batch_norm=False, activation="selu", kernel_initializer="lecun_normal", flatten=False)

model_name = "cifar10_dnn_selu"
model_filepath = f"models/{model_name}.h5"

callbacks = []
callbacks.append(keras.callbacks.EarlyStopping(patience=20))
callbacks.append(keras.callbacks.TensorBoard(log_dir=generate_log_dir(model_name)))
callbacks.append(keras.callbacks.ModelCheckpoint(model_filepath, save_best_only=True))

model.fit(X_train_small_scaled, y_train_small, validation_data=(X_val_scaled, y_val), epochs=1000, callbacks=callbacks, verbose=2)

Train on 40000 samples, validate on 10000 samples
Epoch 1/1000
40000/40000 - 11s - loss: 1.8826 - accuracy: 0.3271 - val_loss: 1.7336 - val_accuracy: 0.3759
Epoch 2/1000
40000/40000 - 9s - loss: 1.6556 - accuracy: 0.4076 - val_loss: 1.6553 - val_accuracy: 0.4166
Epoch 3/1000
40000/40000 - 9s - loss: 1.5541 - accuracy: 0.4470 - val_loss: 1.5867 - val_accuracy: 0.4342
Epoch 4/1000
40000/40000 - 9s - loss: 1.4836 - accuracy: 0.4717 - val_loss: 1.5727 - val_accuracy: 0.4418
Epoch 5/1000
40000/40000 - 9s - loss: 1.4279 - accuracy: 0.4904 - val_loss: 1.5561 - val_accuracy: 0.4485
Epoch 6/1000
40000/40000 - 9s - loss: 1.3777 - accuracy: 0.5077 - val_loss: 1.5371 - val_accuracy: 0.4551
Epoch 7/1000
40000/40000 - 9s - loss: 1.3303 - accuracy: 0.5283 - val_loss: 1.5241 - val_accuracy: 0.4656
Epoch 8/1000
40000/40000 - 10s - loss: 1.2848 - accuracy: 0.5424 - val_loss: 1.5331 - val_accuracy: 0.4676
Epoch 9/1000
40000/40000 - 9s - loss: 1.2444 - accuracy: 0.5557 - val_loss: 1.5321 - val_accuracy: 0

<tensorflow.python.keras.callbacks.History at 0x7f1ead0c9550>

In [9]:
model = keras.models.load_model("models/cifar10_dnn_selu.h5")
model.evaluate(X_val_scaled, y_val)



[1.5240588359832763, 0.4656]

Using SELU and LeCun normal initialization produced a worse model than Batch Norm but it is slightly better than the original (ELU) model with a validation accuracy of 46.6%.