A: Building DNN with 20 layers, 100 neurons each. Use He initialization and ELU activation function.

B: Train the network on the CIFAR10 dataset.
<br>
keras.datasets.cifar10.load_data()
<br>
Dataset is 60k 32 by 32 pixel color images with 10 classes. Use 50k for training and 10k for testing.
<br> 
Use Nadam optimization and Early Stopping. Output layer should have 10 neurons and use softmax.

In [None]:
# Import TensorFlow and the dataset
import tensorflow as tf
from tensorflow import keras

cifar = keras.datasets.cifar10
(X_train, y_train), (X_test, y_test) = cifar.load_data()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


In [None]:
# Dataset properties
print(X_train.shape)
print(X_train.dtype)

(50000, 32, 32, 3)
uint8


In [None]:
# Class names
class_names = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]

In [None]:
# Build classification MLP with twenty hidden layers, 100 neurons each

# Create model with Flatten layer, then add 20 dense layers in the next block.
model = keras.models.Sequential([ 
                                 keras.layers.Flatten()
])

In [None]:
# Loop to add the 20 hidden layers. Adds BatchNormalization, Dropout, and Dense layers.
for x in range(0, 20) : 
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dropout(rate=0.2))
    model.add(keras.layers.Dense(100, activation="elu", kernel_initializer="he_normal"))

# Add output layer using softmax
model.add(keras.layers.Dense(10, activation="softmax"))

In [None]:
early_stopping = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)

# Compile with the optimizer (remove nadam variable, it has one pre-built)
model.compile(optimizer='nadam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
X_train = X_train.reshape(-1, 32*32*3)

X_train = X_train/255.0

In [None]:
# Train model
history = model.fit(X_train, y_train, epochs=30)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
# Will use tutorial from: https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/
# to combat scikit-learn being unable to clone the model object because of it missing the scikit-learn standard get_params method.

# Use scikit-learn to rnd search the batch size and epochs
import numpy
from sklearn.model_selection import RandomizedSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import Nadam
# Function to create model, required for KerasClassifier
# Need to figure out how to use early stopping & power scheduling. 
# Also unsure how I'm going to build the model with one type of dropout, train it, and switch without re-instantiating it.
def create_model(batch_normalization=False, dropout_layer=False, dropout_type=None
                 activation="elu", kernel_initializer="he_normal", 
                 early_stopping=True patience=5, 
                 learn_rate=0.1, power_scheduling=False):
    # Create model with Flatten layer, then add 20 dense layers in the next block.
    model = keras.models.Sequential([
                                     keras.layers.Flatten()
    ])
    # Build classification MLP with twenty hidden layers, 100 neurons each
    for x in range(0, 20) : 
        if (batch_normalization) :
            model.add(keras.layers.BatchNormalization())
        if (dropout) :
            model.add(keras.layers.Dropout(rate=0.2))
        model.add(keras.layers.Dense(100, activation=activation, kernel_initializer=kernel_initializer))
	# Early Stopping
    if (early_stopping) :
        early_stopping = keras.callbacks.EarlyStopping(patience=patience, restore_best_weights=True)

    # Compile model
    optimizer = Nadam(lr=learn_rate)
    model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# GridSearchCV for learning rate optimization
# Create model using the KerasClassifier wrapper
model = KerasClassifier(build_fn=create_model, verbose=1)

# Define the rnd search parameters, pass them in, and train.
epochs = 50
learn_rate = [0.01, 0.05, 0.1, 0.2, 0.3]
param_dist = dict(learn_rate=learn_rate)
rnd = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=5, n_jobs=-1, cv=3, verbose=1, random_state=42)
rnd_result = rnd.fit(X_train, y_train)

# Summarize results
print("Best: %f using %s" % (rnd_result.best_score_, rnd_result.best_params_))
means = rnd_result.cv_results_['mean_test_score']
stds = rnd_result.cv_results_['std_test_score']
params = rnd_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 5 candidates, totalling 15 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 2 concurrent workers.


KeyboardInterrupt: ignored

In [None]:
# Save best parameters from previous step (except for learn_rate, which we will rnd search for after adding BatchNormalization. 
# Running all hyperparamers again would be ideal but unrealistic)

epochs = params[0]
# printing to confirm I assigned it right
print(epochs)

In [None]:
# Try with Batch Normalization
# Considering naming the models appropriately instead of overwriting my previous one (model_BN for example)

# Create model using the KerasClassifier wrapper
model = KerasClassifier(build_fn=create_model, verbose=1)

# Define the rnd search parameters, pass them in, and train.
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
param_dist = dict(batch_size=batch_size, epochs=epochs, batch_normalization=True, learn_rate=learn_rate)
rnd = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=30, n_jobs=-1, cv=3, verbose=1, random_state=42)
rnd_result = rnd.fit(X_train, y_train)

# Summarize results
print("Best: %f using %s" % (rnd_result.best_score_, rnd_result.best_params_))
means = rnd_result.cv_results_['mean_test_score']
stds = rnd_result.cv_results_['std_test_score']
params = rnd_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Try with SELU instead of Batch Normalization

# Create model using the KerasClassifier wrapper
model = KerasClassifier(build_fn=create_model, verbose=1)

# Define the rnd search parameters, pass them in, and train.
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
param_dist = dict(batch_size=batch_size, epochs=epochs, activation="selu", kernel_initializer="lecun_normal", learn_rate=learn_rate)
rnd = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=30, n_jobs=-1, cv=3, verbose=1, random_state=42)
rnd_result = rnd.fit(X_train, y_train)

# Summarize results
print("Best: %f using %s" % (rnd_result.best_score_, rnd_result.best_params_))
means = rnd_result.cv_results_['mean_test_score']
stds = rnd_result.cv_results_['std_test_score']
params = rnd_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Try with alpha dropout
dropout_layer=True
dropout_type="alpha" #check docss for name

# Create model using the KerasClassifier wrapper
model = KerasClassifier(build_fn=create_model, verbose=1)

# Define the rmd search parameters, pass them in, and train.
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
param_dist = dict(batch_size=batch_size, epochs=epochs, activation="selu", kernel_initializer="lecun_normal", learn_rate=learn_rate)
rnd = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=30, n_jobs=-1, cv=3, verbose=1, random_state=42)
rnd_result = rnd.fit(X_train, y_train)

# Summarize results
print("Best: %f using %s" % (rnd_result.best_score_, rnd_result.best_params_))
means = rnd_result.cv_results_['mean_test_score']
stds = rnd_result.cv_results_['std_test_score']
params = rnd_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Try with MC Dropout instead of alpha dropout (unsure how to do this without recreating the model, considering its made with the dropout layers configured)
dropout_layer=True
dropout_type="mc" #check docs for name

# Create model using the KerasClassifier wrapper
model = KerasClassifier(build_fn=create_model, verbose=1)

# Define the grid search parameters, pass them in, and train.
batch_normalization=False

learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
param_dist = dict(batch_size=batch_size, epochs=epochs, activation="selu", kernel_initializer="lecun_normal", learn_rate=learn_rate)
rnd = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=30, n_jobs=-1, cv=3, verbose=1, random_state=42)
rnd_result = rnd.fit(X_train, y_train)

# Summarize results
print("Best: %f using %s" % (rnd_result.best_score_, rnd_result.best_params_))
means = rnd_result.cv_results_['mean_test_score']
stds = rnd_result.cv_results_['std_test_score']
params = rnd_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
# Try with power scheduling
