## **Milestone 1 - Hyperparameter Tuning for Convolutional Neural Network** 




Author: Xiaotian Han

Modified by Shreesha Jagadeesh

In this milestone, you will implement the multi-layer preceptron for image classification. The step is listed as follows:

1. Code to conduct the grid search
2. Grid search the hyperparameters for CNN
3. Compare the model performance with the original model and searched model

Intructions:
1. Complete the code between # Your code here
2. The output of the cell is the expected output of the code 



In [1]:
import tensorflow as tf
tf.random.set_seed(42)

from tensorflow.keras.datasets import mnist, cifar10
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import Model, Input
from tensorflow.keras.layers import Layer, Dense, ReLU, Softmax, MaxPooling2D, Flatten, Conv2D

%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt

In [2]:
print( "tensorflow=",tf.__version__ )

tensorflow= 2.4.1


# Load and preprocess dataset

In [3]:
(x_train, y_train), (x_test, y_test) =  cifar10.load_data()

# scale
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

# reshape
x_train = x_train.reshape(-1, 32, 32, 3)
x_test = x_test.reshape(-1, 32, 32, 3)

print('Training image shape:', x_train.shape)
print('Training label shape:', y_train.shape)
print('Testing image shape:', x_test.shape)
print('Testing label shape:', y_test.shape)

Training image shape: (50000, 32, 32, 3)
Training label shape: (50000, 1)
Testing image shape: (10000, 32, 32, 3)
Testing label shape: (10000, 1)


# Build CNN model with a config dictionary

In [4]:
#### Build function

In [5]:
def build_cnn_model( model_config ):
    
    '''
    Args: model_config is a dictionary that has 4 keys for defining the input hyperparameters.
    logic: the rest of the function defines the architecture and then compiles
    Returns: untrained model
    
    '''
      # set hyperparameters
    conv2d_1_filters =  model_config["hp1"]
    conv2d_2_filters =  model_config["hp2"]
    conv2d_3_filters =  model_config["hp3"]
    
    dense_1_units =  model_config["hp4"]

    # build model
    
    # The main difference with the previous project module is that the filters param itself is parameterized.
    
    inputs = Input( shape=(32, 32, 3) )
    
    x = Conv2D(filters=conv2d_1_filters, kernel_size=3, activation='relu')(inputs)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(filters=conv2d_2_filters, kernel_size=3, activation='relu')(x)
    x = MaxPooling2D((2, 2))(x)
    
    x = Conv2D(filters=conv2d_3_filters, kernel_size=3, activation='relu')(x)     
    x = Flatten()(x)
    
    x = Dense(units=dense_1_units, activation='relu')(x)
    outputs = Dense(10, activation='softmax')(x)
    model = Model(inputs, outputs, name = "cnn_model")

    # compile model
    model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(),
              metrics=['accuracy'])
    
    return model

#### Train function

In [6]:
def train_func( model, epochs=1, batch_size=128, verbose = 1):
    '''
    Args: The model to be trained; epochs, batch size
    Logic: Trains the model on the x_train and y_train assuming a validation split of 20%
    The history is stored in the training_history object
    Returns: a float that is the average validation accuracy
    '''
    # Note that the training data is not explicitly passed as an input argument
    training_history = model.fit(x_train, y_train, 
                        epochs=epochs, batch_size=batch_size, validation_split=0.20, verbose = verbose)
    val_acc_list = training_history.history['val_accuracy']
    
    return sum( val_acc_list ) / len( val_acc_list )

#### Build the model 

In [7]:
model_config = {
    "hp1": 16,
    "hp2": 32,
    "hp3": 16,
    "hp4": 64,
# Note that I changed the hp4 to 64 and included in the model architecture as a param unlike the starter code
}

cnn_model = build_cnn_model(model_config)

cnn_model.summary()

# train_func( cnn_model )

Model: "cnn_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 30, 30, 16)        448       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 15, 15, 16)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 13, 13, 32)        4640      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 6, 6, 32)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 4, 4, 16)          4624      
_________________________________________________________________
flatten (Flatten)            (None, 256)               0 

In [8]:
# (Note that you need to assume you know all the input dimensions and the hyperparameters before you can roll up your sleeves
# to calculate the number of trainable weights
 
# The number of params are calculated as follows

# Conv_0 (Filter width x Filter height * previous layer channel filter + 1) * current layer filters = (3*3*3 + 1)*16 = 448

# Conv 1 The previous layer has 16  filters. The current one has 32 3x3 kernel hence(3*3*16 + 1)*32 = 4640

# Conv 2 The previous layer has 32 filters. The current one has 64 3x3 kernel hence (3*3*32 + 1)*16 = 4624

# Flatten to the first hidden layer: 
# Treat the 256 long vector as a set of features into the dense layer. 256 x 64 + 64 (bias node) = 16448

# First hidden to the last layer:
# 64x 10 + 10 = 650

# Note that the 3,3 for the filter width and height are not explicitly stated

### Train the model

In [9]:
train_func( cnn_model )
# The average validation accuracy will not be as high as the most recent or the max validation accuracy



0.41909998655319214

# Grid Search

In [9]:
from itertools import product

# Alternatively pass in a dictionary hyperparam_dict which contains the hp1, hp2, hp3 and hp4 as keys
# then your for loop will be different too
#     for i, hyperparameter_list in enumerate(list(product(*(list(hyperparam_dict.values()))))):


def grid_search_function(hp1s, hp2s, hp3s, hp4s):
    '''
    Args: hyperparameter dictionary containing a lists of hyperparameter candidates
    Logic: Perform a grid search iterating through the various combinations of the hyperparameters
    Returns: the hyperparameter combo for the best model
    
    '''
    
    # init varibles for best model
    best_val_acc = 0
    best_model_config = {}

    # build_cnn_model with the all potential combinations of hyperparameters( hp1s, hp2s, hp3s, hp4s )
    # Your code here

    # the hyperparameter_list variable has the individual combination of the 4 hyperparams.
    # this combo changes in each iteration
    # the model_config stores each of this configuration temporarily and calls the build and train functions
    
    for i, hyperparameter_list in enumerate(product(hp1s, hp2s, hp3s, hp4s)):
        model_config = dict(zip( ["hp1", "hp2", "hp3", "hp4"], hyperparameter_list ) )
        
        # Rebuild and train the model 
        cnn_model = build_cnn_model(model_config)
        val_acc = train_func( cnn_model, epochs = 1 )

        # save best hyperparameters
        if val_acc > best_val_acc:
            best_model_config = model_config
            best_val_acc = val_acc
        print( "{}-th Search:\tCurrent:\tmodel_config:{},\tval_acc:{} \n\t\tBest:   \tmodel_config:{},\tval_acc:{}\n".format(
            i+1, model_config, val_acc, best_model_config, best_val_acc ) )

    return best_model_config

In [12]:
# You can change the hyperparameters candidate
hp1s = [64, 256]
hp2s = [64, 256]
hp3s = [64, 256]
hp4s = [64, 256]

best_model_config = grid_search_function(hp1s, hp2s, hp3s, hp4s)

1-th Search:	Current:	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 64, 'hp4': 64},	val_acc:0.46619999408721924 
		Best:   	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 64, 'hp4': 64},	val_acc:0.46619999408721924

2-th Search:	Current:	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 64, 'hp4': 256},	val_acc:0.4731999933719635 
		Best:   	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 64, 'hp4': 256},	val_acc:0.4731999933719635

3-th Search:	Current:	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 256, 'hp4': 64},	val_acc:0.5054000020027161 
		Best:   	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 256, 'hp4': 64},	val_acc:0.5054000020027161

4-th Search:	Current:	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 256, 'hp4': 256},	val_acc:0.4684999883174896 
		Best:   	model_config:{'hp1': 64, 'hp2': 64, 'hp3': 256, 'hp4': 64},	val_acc:0.5054000020027161

5-th Search:	Current:	model_config:{'hp1': 64, 'hp2': 256, 'hp3': 64, 'hp4': 64},	val_acc:0.4867999851703644 
		Best:   	model_config:{'hp1': 64, 'hp2': 64, 'hp3':

In [13]:
best_model_config
# hp4 shouldn't even matter because the dense layer is not being iterated upon

{'hp1': 64, 'hp2': 256, 'hp3': 256, 'hp4': 64}

In [15]:
best_cnn_model = build_cnn_model(best_model_config)

best_cnn_model.summary()

# I reduced the epochs to 3 instead of 10
train_func( best_cnn_model, epochs=3, verbose=1 )

Model: "cnn_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_18 (InputLayer)        [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_51 (Conv2D)           (None, 30, 30, 64)        1792      
_________________________________________________________________
max_pooling2d_34 (MaxPooling (None, 15, 15, 64)        0         
_________________________________________________________________
conv2d_52 (Conv2D)           (None, 13, 13, 256)       147712    
_________________________________________________________________
max_pooling2d_35 (MaxPooling (None, 6, 6, 256)         0         
_________________________________________________________________
conv2d_53 (Conv2D)           (None, 4, 4, 256)         590080    
_________________________________________________________________
flatten_17 (Flatten)         (None, 4096)              0 

0.6022666692733765

In [16]:
# tf.keras.utils.plot_model(best_cnn_model, show_layer_names=False, show_shapes=True, expand_nested=True, rankdir='TB', dpi=48)

# I am getting errors when trying to run the utils for plotting, hence commented it out. 

('Failed to import pydot. You must `pip install pydot` and install graphviz (https://graphviz.gitlab.io/download/), ', 'for `pydotprint` to work.')


In [17]:
test_loss, test_acc = best_cnn_model.evaluate(x_test,  y_test, verbose=1)
print('Test loss: ', test_loss)
print('Test accuracy: ', test_acc)

Test loss:  1.0072462558746338
Test accuracy:  0.6478000283241272


# Random Search

In [10]:
import random

def random_search_function(search_num, hp1s, hp2s, hp3s, hp4s):
    # init varibles for best model
    best_val_acc = 0
    best_model_config = {}

    # build_cnn_model with the random combination of hyperparameters( hp1s, hp2s, hp3s, hp4s )
    # Your code here

    for i in range(search_num):
        model_config = dict()
        # For each of the hp, pick a random integer between the lower bound and upper bound
        model_config[ "hp1" ] = random.randint( hp1s[0], hp1s[1] )
        model_config[ "hp2" ] = random.randint( hp2s[0], hp2s[1] )
        model_config[ "hp3" ] = random.randint( hp3s[0], hp3s[1] )
        model_config[ "hp4" ] = random.randint( hp4s[0], hp4s[1] )

        cnn_model = build_cnn_model(model_config)
        val_acc = train_func( cnn_model, epochs = 1 )

        # save best hyperparameters
        if val_acc > best_val_acc:
            best_model_config = model_config
            best_val_acc = val_acc
        print( "{}-th Seach:\tCurrent:\tmodel_config:{},\tval_acc:{} \n\t\tBest:   \tmodel_config:{},\tval_acc:{}\n".format(
            i+1, model_config, val_acc, best_model_config, best_val_acc ) )

    return best_model_config

In [11]:
# You can chage the range the value of the hyperparemeters
hp1s = [16, 64]
hp2s = [16, 64]
hp3s = [16, 64]
hp4s = [16, 64]

# I reduced the search space and also the number of searches in the interests of run time. 
# its not a fair comparison with the grid search because of these differences
best_model_config = random_search_function( 10 ,hp1s, hp2s, hp3s, hp4s)

1-th Seach:	Current:	model_config:{'hp1': 56, 'hp2': 25, 'hp3': 59, 'hp4': 50},	val_acc:0.43630000948905945 
		Best:   	model_config:{'hp1': 56, 'hp2': 25, 'hp3': 59, 'hp4': 50},	val_acc:0.43630000948905945

2-th Seach:	Current:	model_config:{'hp1': 23, 'hp2': 59, 'hp3': 26, 'hp4': 32},	val_acc:0.4415000081062317 
		Best:   	model_config:{'hp1': 23, 'hp2': 59, 'hp3': 26, 'hp4': 32},	val_acc:0.4415000081062317

3-th Seach:	Current:	model_config:{'hp1': 34, 'hp2': 47, 'hp3': 25, 'hp4': 50},	val_acc:0.4189000129699707 
		Best:   	model_config:{'hp1': 23, 'hp2': 59, 'hp3': 26, 'hp4': 32},	val_acc:0.4415000081062317

4-th Seach:	Current:	model_config:{'hp1': 33, 'hp2': 16, 'hp3': 18, 'hp4': 42},	val_acc:0.41449999809265137 
		Best:   	model_config:{'hp1': 23, 'hp2': 59, 'hp3': 26, 'hp4': 32},	val_acc:0.4415000081062317

5-th Seach:	Current:	model_config:{'hp1': 56, 'hp2': 37, 'hp3': 30, 'hp4': 56},	val_acc:0.4226999878883362 
		Best:   	model_config:{'hp1': 23, 'hp2': 59, 'hp3': 26, 'hp4': 

In [12]:
best_cnn_model = build_cnn_model(best_model_config)

best_cnn_model.summary()

train_func( best_cnn_model, epochs=10, verbose=1 )

Model: "cnn_model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_12 (InputLayer)        [(None, 32, 32, 3)]       0         
_________________________________________________________________
conv2d_33 (Conv2D)           (None, 30, 30, 63)        1764      
_________________________________________________________________
max_pooling2d_22 (MaxPooling (None, 15, 15, 63)        0         
_________________________________________________________________
conv2d_34 (Conv2D)           (None, 13, 13, 32)        18176     
_________________________________________________________________
max_pooling2d_23 (MaxPooling (None, 6, 6, 32)          0         
_________________________________________________________________
conv2d_35 (Conv2D)           (None, 4, 4, 50)          14450     
_________________________________________________________________
flatten_11 (Flatten)         (None, 800)               0 

0.5908700048923492

In [14]:
test_loss, test_acc = best_cnn_model.evaluate(x_test,  y_test, verbose=1)
print('Test loss: ', test_loss)
print('Test accuracy: ', test_acc)

Test loss:  1.0080499649047852
Test accuracy:  0.6496999859809875
