In [1]:
# save the final model to file
from keras.datasets import cifar10
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dense
from keras.layers import Flatten
from keras.optimizers import SGD

# load train and test dataset
def load_dataset():
	# load dataset
	(trainX, trainY), (testX, testY) = cifar10.load_data()
	# one hot encode target values
	trainY = to_categorical(trainY)
	testY = to_categorical(testY)
	return trainX, trainY, testX, testY

# scale pixels
def prep_pixels(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

# define cnn model
def define_model():
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Flatten())
	model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(10, activation='softmax'))
	# compile model
	opt = SGD(lr=0.001, momentum=0.9)
	model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
	return model

# run the test harness for evaluating a model
def run_test_harness():
	# load dataset
	trainX, trainY, testX, testY = load_dataset()
	# prepare pixel data
	trainX, testX = prep_pixels(trainX, testX)
	# define model
	model = define_model()
	# fit model
	model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
	# save model
	model.save('final_model.h5')

2023-05-18 11:52:46.397573: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-05-18 11:52:47.222965: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2023-05-18 11:52:47.223066: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/

In [2]:
# entry point, run the test harness
run_test_harness()

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz


2023-05-18 11:52:59.606431: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-18 11:52:59.618346: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-18 11:52:59.620003: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-05-18 11:52:59.622426: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild

In [3]:
# evaluate the deep model on the test dataset
from keras.datasets import cifar10
from keras.models import load_model
from keras.utils import to_categorical

# load train and test dataset
def load_dataset():
	# load dataset
	(trainX, trainY), (testX, testY) = cifar10.load_data()
	# one hot encode target values
	trainY = to_categorical(trainY)
	testY = to_categorical(testY)
	return trainX, trainY, testX, testY

# scale pixels
def prep_pixels(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# normalize to range 0-1
	train_norm = train_norm / 255.0
	test_norm = test_norm / 255.0
	# return normalized images
	return train_norm, test_norm

# run the test harness for evaluating a model
def run_test_harness():
	# load dataset
	trainX, trainY, testX, testY = load_dataset()
	# prepare pixel data
	trainX, testX = prep_pixels(trainX, testX)
	# load model
	model = load_model('final_model.h5')
	# evaluate model on test dataset
	_, acc = model.evaluate(testX, testY, verbose=0)
	print('> %.3f' % (acc * 100.0))

# entry point, run the test harness
run_test_harness()

> 72.990


In [4]:

# See if normalizing the data to have zero mean and standard deviation 1 improves performance

def prep_pixels_2(train, test):
	# convert from integers to floats
	train_norm = train.astype('float32')
	test_norm = test.astype('float32')
	# calculate mean and standard deviation
	train_mean, train_std = train_norm.mean(), train_norm.std()
	test_mean, test_std = test_norm.mean(), test_norm.std()
	# global standardization of pixels
	train_norm = (train_norm - train_mean) / train_std
	test_norm = (test_norm - test_mean) / test_std
	# return normalized images
	return train_norm, test_norm

# run the test harness for evaluating a model
def run_test_harness_1task():
	# load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model()
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_1task.h5')

# entry point, run the test harness
run_test_harness_1task()

# evaluate the deep model on the test dataset
from keras.datasets import cifar10
from keras.models import load_model
from keras.utils import to_categorical

def run_test_harness_load_1task():
	# load dataset
	trainX, trainY, testX, testY = load_dataset()
	# prepare pixel data
	trainX, testX = prep_pixels_2(trainX, testX)
	# load model
	model = load_model('final_model_1task.h5')
	# evaluate model on test dataset
	_, acc = model.evaluate(testX, testY, verbose=0)
	print('> %.3f' % (acc * 100.0))
	
# entry point, run the test harness
print("Task 1 performance: ")
run_test_harness_load_1task()

Task 1 performance: 
> 74.840


In [5]:
## task 2, Replace the SGD + momentum optimizer with Adam and then AdamW. Do these optimizers lead to better performance and/or faster convergence?
import keras.optimizers


def define_model_2task(optimizer='SGD'):
	model = Sequential()
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
	model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
	model.add(MaxPooling2D((2, 2)))
	model.add(Flatten())
	model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
	model.add(Dense(10, activation='softmax'))
	# compile model
	if optimizer == 'SGD':
		opt = SGD(lr=0.001, momentum=0.9)
	elif optimizer == 'Adam':
		opt = keras.optimizers.Adam()
	model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
	return model

# run the test harness for evaluating a model

def run_test_harness_2task_adam():
	# load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model_2task('Adam')
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_2task_adam.h5')

# entry point, run the test harness
run_test_harness_2task_adam()

# evaluate the deep model on the test dataset


def run_test_harness_load_2task_adam():
	# load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # load model
    model = load_model('final_model_2task_adam.h5')
    # evaluate model on test dataset
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))

# entry point, run the test harness
print("Task 2 Adam performance: ")

run_test_harness_load_2task_adam()

In [10]:
# AdamW

import tensorflow_addons as tfa

def define_model_2task(optimizer='SGD'):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(10, activation='softmax'))
    # compile model
    if optimizer == 'SGD':
        opt = SGD(lr=0.001, momentum=0.9)
    elif optimizer == 'Adam':
        opt = keras.optimizers.Adam()
    elif optimizer == 'AdamW':
        opt = tfa.optimizers.AdamW(learning_rate=0.001, weight_decay=0.0001)		
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# run the test harness for evaluating a model

def run_test_harness_2task_adam2():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model_2task('AdamW')
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_2task_adamW.h5')

# entry point, run the test harness
run_test_harness_2task_adam2()

# evaluate the deep model on the test dataset

def run_test_harness_load_2task_adamW():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # load model
    model = load_model('final_model_2task_adamW.h5')
    # evaluate model on test dataset
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))

# entry point, run the test harness
print("Task 2 AdamW performance: ")
run_test_harness_load_2task_adamW()

Task 2 AdamW performance: 
> 79.880


In [14]:
from keras.layers import Dropout, BatchNormalization
from keras.layers import Activation

# task 4 Check if changing the order to Batch Norm then Dropout has an effect on performance.
# Also check if the Dropout and Batch Norm are complementary ie having both Dropout and Batch Norm in the network
# is better or worse than having a network that just has one of these regularization techniques.
def define_model_4(optimizer='SGD', order='BD', use_both=True):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), kernel_initializer='he_uniform', padding='same', input_shape=(32, 32, 3)))
    if use_both or order == 'BD':
        model.add(BatchNormalization())
    if use_both or order == 'DB':
        model.add(Dropout(0.2))
    model.add(Activation('relu'))
    
    model.add(Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    
    model.add(Conv2D(64, (3, 3), kernel_initializer='he_uniform', padding='same'))
    if use_both or order == 'BD':
        model.add(BatchNormalization())
    if use_both or order == 'DB':
        model.add(Dropout(0.2))
    model.add(Activation('relu'))

    model.add(Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))

    model.add(Conv2D(128, (3, 3), kernel_initializer='he_uniform', padding='same'))
    if use_both or order == 'BD':
        model.add(BatchNormalization())
    if use_both or order == 'DB':
        model.add(Dropout(0.2))
    model.add(Activation('relu'))

    model.add(Conv2D(128, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer='he_uniform'))
    model.add(Dense(10, activation='softmax'))
    
    # compile model
    if optimizer == 'SGD':
        opt = SGD(lr=0.001, momentum=0.9)
    elif optimizer == 'Adam':
        opt = keras.optimizers.Adam()
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    
    return model




In [19]:
def run_test_harness_4BD():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model_4(optimizer='Adam', order='BD', use_both=True)
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_task4BD.h5')

# entry point, run the test harness
#run_test_harness_4BD()

# evaluate the deep model on the test dataset

def run_test_harness_load_4_BD():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # load model
    model = load_model('final_model_task4BD.h5')
    # evaluate model on test dataset
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))


print("BatchNorm then Dropout:")
run_test_harness_load_4_BD()

def run_test_harness_4_DB():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model_4(optimizer='Adam', order='BD', use_both=True)
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_task4_DB.h5')

# entry point, run the test harness
#run_test_harness_4_DB()

# evaluate the deep model on the test dataset

def run_test_harness_load_4_DB():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # load model
    model = load_model('final_model_task4_DB.h5')
    # evaluate model on test dataset
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))

print("Dropout then BatchNorm:")
run_test_harness_load_4_DB()

BatchNorm then Dropout:
> 82.720
Dropout then BatchNorm:
> 82.150


In [17]:
def run_test_harness_4_B_only():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model_4(optimizer='Adam', order='BD', use_both=True)
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_task4_B_only.h5')

# entry point, run the test harness
run_test_harness_4_B_only()

# evaluate the deep model on the test dataset

def run_test_harness_load_4_B_only():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # load model
    model = load_model('final_model_task4_B_only.h5')
    # evaluate model on test dataset
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))

In [18]:
print("Only BatchNorm:")
run_test_harness_load_4_B_only()


def run_test_harness_4_D_only():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # define model
    model = define_model_4(optimizer='Adam', order='BD', use_both=True)
    # fit model
    model.fit(trainX, trainY, epochs=100, batch_size=64, verbose=0)
    # save model
    model.save('final_model_task4_D_only.h5')

# entry point, run the test harness
run_test_harness_4_D_only()

# evaluate the deep model on the test dataset

def run_test_harness_load_4_D_only():
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    # load model
    model = load_model('final_model_task4_D_only.h5')
    # evaluate model on test dataset
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> %.3f' % (acc * 100.0))



print("Only Dropout:")
run_test_harness_load_4_D_only()

Only BatchNorm:
> 82.020


2023-05-18 13:33:44.379263: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_13/dropout_16/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Only Dropout:
> 81.880


In [30]:

def evaluate_model(model):
    # load dataset
    trainX, trainY, testX, testY = load_dataset()
    # prepare pixel data
    trainX, testX = prep_pixels_2(trainX, testX)
    _, acc = model.evaluate(testX, testY, verbose=0)
    print('> Test Accuracy: %.3f' % (acc * 100.0))

    #loss = model.evaluate(testX, testY, verbose=0)
    print('> Test Loss: %.3f' % _)

In [31]:
# 1. Learning Rate Warm-Up + Cosine Annealing:
#Best model so far with accuracy of 82.72 so far
model = load_model('final_model_task4BD.h5')


import tensorflow as tf
import tensorflow_addons as tfa

initial_learning_rate = 0.01

lr_schedule = tfa.optimizers.CyclicalLearningRate(
    initial_learning_rate,
    maximal_learning_rate=0.1,
    step_size=2000,
    scale_fn=lambda x: 1.,
    scale_mode="cycle",
    name="MyCyclicScheduler")

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

evaluate_model(model)


> Test Accuracy: 82.720
> Test Loss: 1.194


In [32]:
#Best model so far with accuracy of 82.72 so far
model = load_model('final_model_task4BD.h5')


# 2. Step Decay:
import numpy as np

def step_decay_schedule(initial_lr=0.1, decay_factor=0.75, step_size=10):
    def schedule(epoch):
        return initial_lr * (decay_factor ** np.floor(epoch/step_size))
    return tf.keras.callbacks.LearningRateScheduler(schedule)

lr_sched = step_decay_schedule(initial_lr=0.1, decay_factor=0.75, step_size=2)

model.compile(optimizer=tf.keras.optimizers.Adam(), loss='categorical_crossentropy', metrics=['accuracy'])

trainX, trainY, testX, testY = load_dataset()
# prepare pixel data
trainX, testX = prep_pixels_2(trainX, testX)

model.fit(trainX, trainY, callbacks=[lr_sched])


evaluate_model(model)




2023-05-18 14:10:21.456580: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape insequential_8/dropout_1/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


> Test Accuracy: 10.000
> Test Loss: 2.310


In [33]:
# 3. Cosine Annealing with Restarts (SGDR):
#Best model so far with accuracy of 82.72 so far
model = load_model('final_model_task4BD.h5')




lr_schedule = tfa.optimizers.sgdr_learning_rate_cycle(min_lr=1e-5,
                                                      max_lr=1e-2,
                                                      cycle_length=1000,
                                                      lr_decay=0.9,
                                                      mult_factor=1.5)

optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


AttributeError: module 'tensorflow_addons.optimizers' has no attribute 'sgdr_learning_rate_cycle'