In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
import numpy as np
from tensorflow import keras
from sklear.model_selection import train_test_split

In [79]:
(X_train_full, y_train_full), (X_test, y_test) = keras.datasets.cifar10.load_data()
# Max entry is 255
X_train_full, X_test = X_train_full/255, X_test/255

X_train, X_valid, y_train, y_valid = train_test_split(X_train_full, y_train_full, test_size=0.2)

In [83]:
X_train.shape, X_valid.shape, y_train.shape, y_valid.shape

((40000, 32, 32, 3), (10000, 32, 32, 3), (40000, 1), (10000, 1))

In [102]:
def build_model(BN: bool, lr, neurons: int, layerz: int, activation: str, initializer: str):
    model = keras.Sequential([keras.layers.Flatten(input_shape=(32, 32, 3))])
    for _ in range(layerz):
        if BN: model.add(keras.layers.BatchNormalization())
        model.add(keras.layers.Dense(neurons, activation = activation, kernel_initializer = initializer))
    if BN: model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Dense(10, activation = 'softmax'))
    model.compile(loss='sparse_categorical_crossentropy', metrics=['accuracy'], optimizer = keras.optimizers.Nadam(lr=lr))
    return model

In [103]:
model_no_BN = build_model(BN=False, lr=0.001, 
                     neurons=100, layerz=20, activation='elu', initializer='he_normal')
# Default lr on Nadam is 0.001

In [98]:
model_no_BN.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100,
                callbacks = [keras.callbacks.EarlyStopping(patience=10), keras.callbacks.ReduceLROnPlateau(factor=0.5, patience=8)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100


<tensorflow.python.keras.callbacks.History at 0x160bdf159a0>

In [100]:
model_no_BN.evaluate(X_test, y_test)[1]



0.420199990272522

In [119]:
evals = []
for k in range(2, 5):
    model = build_model(BN=False, lr=k/10**4, 
                     neurons=100, layerz=20, activation='elu', initializer='he_normal')
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100,
                callbacks = [keras.callbacks.EarlyStopping(patience=10)])
    evals.append((model.evaluate(X_test, y_test)[1], k/10**4))
    print(evals)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
[(0.4941999912261963, 0.0002)]
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100

KeyboardInterrupt: 

In [121]:
evals_no_BN = evals1 # ==> best lr is 2/10**4
evals_no_BN

[(0.10000000149011612, 0.1),
 (0.10000000149011612, 0.01),
 (0.3806999921798706, 0.001),
 (0.4918999969959259, 0.0001),
 (0.490200012922287, 0.0001),
 (0.48590001463890076, 1e-05),
 (0.4327000081539154, 1e-06),
 (0.4941999912261963, 0.0002)]

## Now using batch norm:

In [125]:
evals = []
for k in range(3, 6, 2):
    model = build_model(BN=True, lr=k/10**3, 
                     neurons=100, layerz=20, activation='elu', initializer='he_normal')
    model.fit(X_train, y_train, validation_data=(X_valid, y_valid), epochs=100,
                callbacks = [keras.callbacks.EarlyStopping(patience=10)])
    evals.append((model.evaluate(X_test, y_test)[1], k/10**3))
    print(evals)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
[(0.5097000002861023, 0.003)]
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100


Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
[(0.5097000002861023, 0.003), (0.49810001254081726, 0.005)]


In [128]:
evals_BN ==>  # 0.001 is best lr

[(0.5238000154495239, 0.001),
 (0.5127999782562256, 0.0001),
 (0.5097000002861023, 0.003),
 (0.49810001254081726, 0.005),
 (0.5097000002861023, 0.003),
 (0.49810001254081726, 0.005)]