# VGG ARCHITECTURE

This notebook implement the VGG architecture for image recognition using CNN.
You can find the paper describing this architecture by following the link:
https://arxiv.org/pdf/1409.1556.pdf

**Here is the global architecture of a VGG network : **
<img src="images/vgg.png" style="width:470px;height:276px;">

In [None]:
import numpy as np
import math
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops

In [None]:
%run Utilities.ipynb

In [None]:
trainSet = loadFile('images.npy')
labelTrainSet = loadFile('labels.npy')

In [None]:
showImage(inTainSet= trainSet, atPosition=1999)

# First step: Pre-processing

From the article we need to :
"The only pre-processing we do is substracting the mean RGB value, computed on the training set, from each pixel"

In [None]:
normalize_image(trainingSet=trainSet)

In [None]:
unique_array = np.unique(labelTrainSet) #take unique values from labelTrainSet (increasing ordering)

for i in range(0,len(labelTrainSet)):   #pass through all array
    labelTrainSet[i] = np.argwhere(unique_array==labelTrainSet[i])[0][0] #replace the current value by the index of the value in the unique_array
    
reshapeLabels = labelTrainSet.reshape(labelTrainSet.shape[0], 1) #reshape in shape (m,1)

labelSet = convert_to_one_hot(reshapeLabels.T.astype(np.int32), len(unique_array)).T #conversion in one hot with shape (m, n_c)
                              

In [None]:
labelSet[0]

In [None]:
s = np.arange(trainSet.shape[0])
np.random.shuffle(s)
trainSet = trainSet[s]
labelSet = labelSet[s]

In [None]:
X_train = trainSet[:-200,:,:,:]
Y_tain = labelSet[:-200,:]

X_test = trainSet[-200:,:,:,:]
Y_test = labelSet[-200:,:]

In [None]:
X_train.shape

In [None]:
Y_test[5]

In [None]:
showImage(inTainSet= trainSet, atPosition=1999)

# Second step: create variables and placeholders

In [None]:
def create_weights(shape, name):
    return tf.get_variable(name=name, shape=shape, initializer= tf.initializers.truncated_normal(stddev=0.05))
 
def create_biases(size, name):
    return tf.get_variable(name=name, shape=[size], initializer= tf.zeros_initializer())

In [None]:
def create_placeholders(img_size, num_channels, num_classes):
    """
    Creates the placeholders for the tensorflow session.
    
    Arguments:
    n_x -- scalar, size of an image vector (num_px * num_px = 64 * 64 * 3 = 12288)
    n_y -- scalar, number of classes (from 0 to 199, so -> 200)
    
    Returns:
    X -- placeholder for the data input, of shape [n_x, None] and dtype "float"
    Y -- placeholder for the input labels, of shape [n_y, None] and dtype "float"
    
    Tips:
    - Use None because it let's us be flexible on the number of examples you will for the placeholders.
      In fact, the number of examples during test/train is different.
    """
    
    X = tf.placeholder(name='X', dtype=tf.float32, shape=[None, img_size,img_size,num_channels])
    Y = tf.placeholder(name='Y', dtype=tf.int32, shape=[None, num_classes])

    return X, Y

# Third step: create convolutionnal layers

In [None]:
def create_convolutional_layer(input,
               num_input_channels, 
               conv_filter_size,        
               num_filters,
               padding,
               strides,
               layer_number, use_bias = True):  
    """
    TF's conv2d function calculates convolutions in batches and uses a slightly different format. 
    For an input it is [batch, in_height, in_width, in_channels] 
    For the filter it is [filter_height, filter_width, in_channels, out_channels]
    padding: 'SAME' or 'VALID'
    strides: [data, height, width, channel], usually [1, 2, 2, 1] to reduce data size per 2
    """
    
    ## We shall define the weights that will be trained using create_weights function.
    weights = create_weights(name='Wconv_'+layer_number, shape=[conv_filter_size, conv_filter_size, num_input_channels, num_filters])
    ## We create biases using the create_biases function. These are also trained.
    biases = create_biases(name='Bconv_'+layer_number, size=num_filters)
 
    ## Creating the convolutional layer
    layer = tf.nn.conv2d(input=input,
                     filter=weights,
                     strides=strides,
                     padding=padding)
 
    if use_bias:
        layer += biases
 
    return layer

In [None]:
def create_pooling_layer(input, parameters):  

    ksize = parameters['size_pooling'] #example [1, 2, 2, 1] corresponding [batch_size, img_size, img_size, number of channel]
    strides = parameters['strides_pooling'] #example [1, 2, 2, 1] corresponding [batch_size, img_size, img_size, number of channel]
    padding = parameters['padding'] #example 'SAME' or 'VALID'
    
    ## "Max-pooling is performed over a 2x2 pixel window, with stride 2".  
    layer = tf.nn.max_pool(value=input,
                            ksize=ksize,
                            strides=strides,
                            padding=padding)
 
    return layer

In [None]:
def create_activation_layer(input):  
    ## "All hidden layers are equipped with the rectification non-linearity"
    layer = tf.nn.relu(input)
 
    return layer

# Fourth step: create fully connected layers

In [None]:
def create_fully_connected_layer(input, num_input_channels, num_output_channel, layer_number):
        
    ## We shall define the weights that will be trained using create_weights function.
    weights = create_weights(name='Wfc_'+layer_number, shape=[input.shape[1], input.shape[2], num_input_channels, num_output_channel])
    ## We create biases using the create_biases function. These are also trained.
    biases = create_biases(name='Bfc_'+layer_number, size=num_output_channel)
    
    layer = tf.nn.conv2d(input=input,
                     filter=weights,
                     strides=[1, 1, 1, 1],
                    padding= 'VALID')
    
    layer += layer + biases
    
    return layer, weights, biases

# Fifth step: create softmax layer

In [None]:
def create_softmax_layer(networkOutput, Y):
    """
    Computes the cost
    
    Arguments:
    Output -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples)
    Y -- "true" labels vector placeholder, same shape as output
    
    Returns:
    the coss entropy
    """
    
    return tf.nn.softmax_cross_entropy_with_logits_v2(logits= networkOutput, labels= Y) 

# Sixth step: compute cost

In [None]:
def compute_cost(softmaxOutput):
    """
    Computes the cost
    
    Arguments:
    softmaxOutput -- output of the softmax layer
    
    Returns:
    cost - Tensor of the cost function
    """
    
    cost = tf.reduce_mean(softmaxOutput)
    
    return cost

# Ten step: Choose your number of layers and train the model

<img src="images/vgg_configurations.png" style="width:520px;height:525px;">

In [None]:
def get_parameters_configurationTest():
    filter_size_conv1 = 3
    filter_size_conv2 = 3
    
    num_filters_conv1 = 64
    num_filters_conv2 = 128
    
    size_pooling1 = [1, 2, 2, 1] 
    strides_pooling1 = [1, 2, 2, 1]
    padding1 = 'SAME'
    
    parameters_pooling1 = {
                            'size_pooling': size_pooling1,
                            'strides_pooling': strides_pooling1,
                            'padding': padding1
                            }
    
    parameters_pooling2 = parameters_pooling1
    
    num_output_class = 4
    
    architecture= {
               'filter_size_conv1': filter_size_conv1,
               'filter_size_conv2': filter_size_conv2,
               'num_filters_conv1': num_filters_conv1,
               'num_filters_conv2': num_filters_conv2,
               'parameters_pooling1': parameters_pooling1,
               'parameters_pooling2': parameters_pooling2,
               'num_output_class': num_output_class
    }
    
    return architecture

In [None]:
def configuration_Test(X, architecture):
    
    parameters = {}
    filter_size_conv1 = architecture['filter_size_conv1']
    filter_size_conv2 = architecture['filter_size_conv2']
    
    num_filters_conv1 = architecture['num_filters_conv1']
    num_filters_conv2 = architecture['num_filters_conv2']
    
    parameters_pooling1 = architecture['parameters_pooling1']
    parameters_pooling2 = architecture['parameters_pooling2']
    
    num_output_class = architecture['num_output_class']
    
    num_input_channels_conv1 = X.shape[3]
    
    print(X.shape)
    layer_conv1, W1, b1 = create_convolutional_layer(input=X, conv_filter_size=filter_size_conv1, num_filters=num_filters_conv1, num_input_channels=num_input_channels_conv1, layer_number='1')
    parameters['W1'] = W1
    layer_act1 = create_activation_layer(layer_conv1)
    print(layer_act1.shape)
    layer_maxpool1 = create_pooling_layer(input=layer_act1, parameters=parameters_pooling1)
    print(layer_maxpool1.shape)
    layer_conv2, W2, b2 = create_convolutional_layer(input=layer_maxpool1, conv_filter_size=filter_size_conv2, num_filters=num_filters_conv2, num_input_channels=num_filters_conv1, layer_number='2')
    parameters['W2'] = W2
    layer_act2 = create_activation_layer(layer_conv2)
    print(layer_act2.shape)
    layer_maxpool2 = create_pooling_layer(input=layer_act2, parameters=parameters_pooling2)
    print(layer_maxpool2.shape)
    layer_fc1, Wfc1, bfc1 = create_fully_connected_layer(input=layer_maxpool2, num_input_channels=num_filters_conv2, num_output_channel=num_output_class, layer_number='3')
    parameters['Wfc1'] = Wfc1
    # FLATTEN
    #P2 = tf.contrib.layers.flatten(layer_maxpool2)
    # FULLY-CONNECTED without non-linear activation function (not not call softmax).
    # 6 neurons in output layer. Hint: one of the arguments should be "activation_fn=None" 
    #layer_fc1 = tf.contrib.layers.fully_connected(P2, num_outputs=4, activation_fn=None)
    ### END CODE HERE ###
    
    return layer_fc1, parameters

In [None]:
def get_parameters_configuration0():
    filter_size_conv1 = 3
    filter_size_conv2 = 3
    filter_size_conv3 = 3
    filter_size_conv3bis = 3
    
    num_filters_conv1 = 64
    num_filters_conv2 = 128
    num_filters_conv3 = 256
    num_filters_conv3bis = 256
    
    size_pooling1 = [1, 2, 2, 1] 
    strides_pooling1 = [1, 2, 2, 1]
    padding1 = 'SAME'
    
    parameters_pooling1 = {
                            'size_pooling': size_pooling1,
                            'strides_pooling': strides_pooling1,
                            'padding': padding1
                            }
    
    parameters_pooling2 = parameters_pooling1
    parameters_pooling3 = parameters_pooling1
    
    num_filters_fc1 = 512
    num_output_class = 4
    
    parameters= {
               'filter_size_conv1': filter_size_conv1,
               'filter_size_conv2': filter_size_conv2,
               'filter_size_conv3': filter_size_conv3,
               'filter_size_conv3bis': filter_size_conv3bis,
               'num_filters_conv1': num_filters_conv1,
               'num_filters_conv2': num_filters_conv2,
               'num_filters_conv3': num_filters_conv3,
               'num_filters_conv3bis': num_filters_conv3bis,
               'parameters_pooling1': parameters_pooling1,
               'parameters_pooling2': parameters_pooling2,
               'parameters_pooling3': parameters_pooling3,
               'num_filters_fc1': num_filters_fc1,
               'num_output_class': num_output_class
    }
    
    return parameters

In [None]:
def configuration_0(X, architecture):
    
    filter_size_conv1 = architecture['filter_size_conv1']
    filter_size_conv2 = architecture['filter_size_conv2']
    filter_size_conv3 = architecture['filter_size_conv3']
    filter_size_conv3bis = architecture['filter_size_conv3bis']
    
    num_filters_conv1 = architecture['num_filters_conv1']
    num_filters_conv2 = architecture['num_filters_conv2']
    num_filters_conv3 = architecture['num_filters_conv3']
    num_filters_conv3bis = architecture['num_filters_conv3bis']
    
    parameters_pooling1 = architecture['parameters_pooling1']
    parameters_pooling2 = architecture['parameters_pooling2']
    parameters_pooling3 = architecture['parameters_pooling3']
    
    num_filters_fc1 = architecture['num_filters_fc1']
    num_output_class = architecture['num_output_class']
    
    num_input_channels_conv1 = X.shape[3]
    
    layer_conv1, W1, b1 = create_convolutional_layer(input=X, conv_filter_size=filter_size_conv1, num_filters=num_filters_conv1, num_input_channels=num_input_channels_conv1, layer_number='1')
    layer_act1 = create_activation_layer(layer_conv1)
    layer_maxpool1 = create_pooling_layer(input=layer_act1, parameters=parameters_pooling1)
    
    layer_conv2, W2, b2 = create_convolutional_layer(input=layer_maxpool1, conv_filter_size=filter_size_conv2, num_filters=num_filters_conv2, num_input_channels=num_filters_conv1, layer_number='2')
    layer_act2 = create_activation_layer(layer_conv2)
    layer_maxpool2 = create_pooling_layer(input=layer_act2, parameters=parameters_pooling2)
    
    layer_conv3, W3, b3 = create_convolutional_layer(input=layer_maxpool2, conv_filter_size=filter_size_conv3, num_filters=num_filters_conv3, num_input_channels=num_filters_conv2, layer_number='3')
    layer_act3 = create_activation_layer(layer_conv3)
    layer_conv3bis, W3bis, b3bis = create_convolutional_layer(input=layer_act3, conv_filter_size=filter_size_conv3bis, num_filters=num_filters_conv3bis, num_input_channels=num_filters_conv3, layer_number='3bis')
    layer_act3bis = create_activation_layer(layer_conv3bis)
    layer_maxpool3 = create_pooling_layer(input=layer_act3bis, parameters=parameters_pooling3)
    
    layer_fc1, Wfc1, bfc1 = create_fully_connected_layer(input=layer_maxpool3, num_input_channels=num_filters_conv3bis, num_output_channel=num_filters_fc1, layer_number='1')
    layer_act_fc1 = create_activation_layer(layer_fc1)
    layer_fc2, Wfc2, bfc2 = create_fully_connected_layer(input=layer_act_fc1, num_input_channels=num_filters_fc1, num_output_channel=num_output_class, layer_number='2')
    
    return layer_fc2

In [None]:
def get_parameters_configurationA():
    filter_size_conv1 = 3
    filter_size_conv2 = 3
    filter_size_conv3 = 3
    filter_size_conv3bis = 3
    filter_size_conv4 = 3
    filter_size_conv4bis = 3
    filter_size_conv5 = 3
    filter_size_conv5bis = 3
    
    num_filters_conv1 = 64
    num_filters_conv2 = 128
    num_filters_conv3 = 256
    num_filters_conv3bis = 256
    num_filters_conv4 = 512
    num_filters_conv4bis = 512
    num_filters_conv5 = 512
    num_filters_conv5bis = 512
    
    size_pooling1 = [1, 2, 2, 1] 
    strides_pooling1 = [1, 2, 2, 1]
    padding1 = 'SAME'
    
    parameters_pooling1 = {
                            'size_pooling': size_pooling1,
                            'strides_pooling': strides_pooling1,
                            'padding': padding1
                            }
    
    parameters_pooling2 = parameters_pooling1
    parameters_pooling3 = parameters_pooling1
    parameters_pooling4 = parameters_pooling1
    parameters_pooling5 = parameters_pooling1
    
    num_filters_fc1 = 4096
    num_filters_fc2 = 4096
    num_output_class = 30
    
    parameters= {
               'filter_size_conv1': filter_size_conv1,
               'filter_size_conv2': filter_size_conv2,
               'filter_size_conv3': filter_size_conv3,
               'filter_size_conv3bis': filter_size_conv3bis,
               'filter_size_conv4': filter_size_conv4,
               'filter_size_conv4bis': filter_size_conv4bis,
               'filter_size_conv5': filter_size_conv5,
               'filter_size_conv5bis': filter_size_conv5bis,
               'num_filters_conv1': num_filters_conv1,
               'num_filters_conv2': num_filters_conv2,
               'num_filters_conv3': num_filters_conv3,
               'num_filters_conv3bis': num_filters_conv3bis,
               'num_filters_conv4': num_filters_conv4,
               'num_filters_conv4bis': num_filters_conv4bis,
               'num_filters_conv5': num_filters_conv5,
               'num_filters_conv5bis': num_filters_conv5bis,
               'parameters_pooling1': parameters_pooling1,
               'parameters_pooling2': parameters_pooling2,
               'parameters_pooling3': parameters_pooling3,
               'parameters_pooling4': parameters_pooling4,
               'parameters_pooling5': parameters_pooling5,
               'num_filters_fc1': num_filters_fc1,
               'num_filters_fc2': num_filters_fc2,
               'num_output_class': num_output_class
    }
    
    return parameters

In [None]:
def configuration_A(X, parameters):
    
    filter_size_conv1 = parameters['filter_size_conv1']
    filter_size_conv2 = parameters['filter_size_conv2']
    filter_size_conv3 = parameters['filter_size_conv3']
    filter_size_conv3bis = parameters['filter_size_conv3bis']
    filter_size_conv4 = parameters['filter_size_conv4']
    filter_size_conv4bis = parameters['filter_size_conv4bis']
    filter_size_conv5 = parameters['filter_size_conv5']
    filter_size_conv5bis = parameters['filter_size_conv5bis']
    
    num_filters_conv1 = parameters['num_filters_conv1']
    num_filters_conv2 = parameters['num_filters_conv2']
    num_filters_conv3 = parameters['num_filters_conv3']
    num_filters_conv3bis = parameters['num_filters_conv3bis']
    num_filters_conv4 = parameters['num_filters_conv4']
    num_filters_conv4bis = parameters['num_filters_conv4bis']
    num_filters_conv5 = parameters['num_filters_conv5']
    num_filters_conv5bis = parameters['num_filters_conv5bis']
    
    parameters_pooling1 = parameters['parameters_pooling1']
    parameters_pooling2 = parameters['parameters_pooling2']
    parameters_pooling3 = parameters['parameters_pooling3']
    parameters_pooling4 = parameters['parameters_pooling4']
    parameters_pooling5 = parameters['parameters_pooling5']
    
    num_filters_fc1 = parameters['num_filters_fc1']
    num_filters_fc2 = parameters['num_filters_fc2']
    num_output_class = parameters['num_output_class']
    
    num_input_channels_conv1 = X.shape[3]
    
    layer_conv1 = create_convolutional_layer(input=X, conv_filter_size=filter_size_conv1, num_filters=num_filters_conv1, num_input_channels=num_input_channels_conv1, layer_number='1')
    layer_maxpool1 = create_pooling_layer(input=layer_conv1, parameters=parameters_pooling1)
    
    layer_conv2 = create_convolutional_layer(input=layer_maxpool1, conv_filter_size=filter_size_conv2, num_filters=num_filters_conv2, num_input_channels=num_filters_conv1, layer_number='2')
    layer_maxpool2 = create_pooling_layer(input=layer_conv2, parameters=parameters_pooling2)
    
    layer_conv3 = create_convolutional_layer(input=layer_maxpool2, conv_filter_size=filter_size_conv3, num_filters=num_filters_conv3, num_input_channels=num_filters_conv2, layer_number='3')
    layer_conv3bis = create_convolutional_layer(input=layer_conv3, conv_filter_size=filter_size_conv3bis, num_filters=num_filters_conv3bis, num_input_channels=num_filters_conv3, layer_number='3bis')
    layer_maxpool3 = create_pooling_layer(input=layer_conv3bis, parameters=parameters_pooling3)
    
    layer_conv4 = create_convolutional_layer(input=layer_maxpool3, conv_filter_size=filter_size_conv4, num_filters=num_filters_conv4, num_input_channels=num_filters_conv3bis, layer_number='4')
    layer_conv4bis = create_convolutional_layer(input=layer_conv4, conv_filter_size=filter_size_conv4bis, num_filters=num_filters_conv4bis, num_input_channels=num_filters_conv4, layer_number='4bis')
    layer_maxpool4 = create_pooling_layer(input=layer_conv4bis, parameters=parameters_pooling4)
    
    layer_conv5 = create_convolutional_layer(input=layer_maxpool4, conv_filter_size=filter_size_conv5, num_filters=num_filters_conv5, num_input_channels=num_filters_conv4bis, layer_number='5')
    layer_conv5bis = create_convolutional_layer(input=layer_conv5, conv_filter_size=filter_size_conv5bis, num_filters=num_filters_conv5bis, num_input_channels=num_filters_conv5, layer_number='5bis')
    layer_maxpool5 = create_pooling_layer(input=layer_conv5bis, parameters=parameters_pooling5)
    layer_fc1 = create_fully_connected_layer(input=layer_maxpool5, num_input_channels=num_filters_conv5bis, num_output_channel=num_filters_fc1, layer_number='1')
    layer_fc2 = create_fully_connected_layer(input=layer_fc1, num_input_channels=num_filters_fc1, num_output_channel=num_filters_fc2, layer_number='2')
    layer_fc3 = create_fully_connected_layer(input=layer_fc2, num_input_channels=num_filters_fc2, num_output_channel=num_output_class, layer_number='3')
    
    return layer_fc3

In [None]:
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.01, configuration= 'configuration_A',
          num_epochs = 100, minibatch_size = 32, momentum = 0.9, use_nesterov = False, print_cost = True, beta = 0.001):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    (m, n_w, n_h, n_c) = X_train.shape                # (m : number of examples in the train set, n_w: image width, n_h: image height, n_c: number of image channel)
    n_y = Y_train.shape[1]                            # n_y : output size
    costs = []                                        # To keep track of the cost
    
    # Create Placeholders
    X, Y = create_placeholders(img_size=n_w, num_channels=n_c, num_classes=n_y)
    
    # Forward propagation
    if configuration == 'configuration_A':
        architecture = get_parameters_configurationA()
        configuration_output = configuration_A(X=X,architecture=architecture)
    elif configuration == 'configuration_0':
        architecture = get_parameters_configuration0()
        configuration_output = configuration_0(X=X,architecture=architecture)
    elif configuration == 'configuration_Test':
        architecture = get_parameters_configurationTest()
        configuration_output, parameters = configuration_Test(X=X,architecture=architecture)
            
    #Create softmax layer
    softmax_layer = create_softmax_layer(Y=Y, networkOutput=configuration_output)
    
    # Cost function
    cost = compute_cost(softmaxOutput=softmax_layer)
    
    # Backpropagation: Define the tensorflow optimizer.
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=use_nesterov).minimize(cost)
    #gvs = optimizer.compute_gradients(cost)
    #capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
    #train_op = optimizer.apply_gradients(capped_gvs)
    
    # Initialize variables
    init = tf.global_variables_initializer()

    # Start the session
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set

            minibatches = random_mini_batches(X_train, Y_train, minibatch_size)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # Run the session: execute "optimizer" and "cost"
                _, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
               
                epoch_cost += minibatch_cost / num_minibatches

            # Print the cost every epoch
            if print_cost == True and epoch % 1 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 1 == 0:
                costs.append(epoch_cost)
                
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # Calculate the correct predictions
        predict_op = tf.reshape(tf.argmax(configuration_output, -1), [-1])
        true_label = tf.argmax(Y, 1)
        correct_prediction = tf.equal(predict_op, true_label)
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("Train Accuracy:", train_accuracy)
        print("Test Accuracy:", test_accuracy)
        
        return train_accuracy

In [None]:
model(X_train=X_train, Y_train=Y_tain, X_test=X_test, Y_test=Y_test, configuration='configuration_Test', momentum= 0.9, num_epochs=20,learning_rate=0.01, minibatch_size=128, use_nesterov=False, print_cost= True)

