# ResNet ARCHITECTURE

This notebook implement the Inception architecture for image recognition using CNN.
You can find the paper describing this architecture by following the link:
https://arxiv.org/pdf/1409.4842.pdf

**Here is the global architecture of a Inception network : **
<img src="images/inception_network.png" style="width:1984px;height:584px;">

In [None]:
import numpy as np
import math
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.python.framework import ops

In [None]:
%run Utilities.ipynb

In [None]:
def unpickle(file):
    import pickle
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
    return dict

# Create Residual layer


**Here is the architecture of an Inception unit block : **
<img src="images/inception_block.png" style="width:664px;height:354px;">

In [None]:
def create_1x1_convolution(input,
               num_input_channels,        
               num_filters,
               padding,
               strides,
               layer_number, use_bias = True):
    return create_convolutional_layer(input, num_input_channels=num_input_channels, num_filters= num_filters, conv_filter_size=1, padding= padding, strides= strides, layer_number=layer_number, use_bias= use_bias)
    

In [None]:
def create_3x3_convolution(input,
               num_input_channels,        
               num_filters,
               padding,
               strides,
               layer_number, use_bias = True):
    return create_convolutional_layer(input, num_input_channels=num_input_channels, num_filters= num_filters, conv_filter_size=3, padding= padding, strides= strides, layer_number=layer_number, use_bias= use_bias)
    

In [None]:
def create_5x5_convolution(input,
               num_input_channels,        
               num_filters,
               padding,
               strides,
               layer_number, use_bias = True):
    return create_convolutional_layer(input, num_input_channels=num_input_channels, num_filters= num_filters, conv_filter_size=5, padding= padding, strides= strides, layer_number=layer_number, use_bias= use_bias)
    

In [None]:
def create_max_pooling_layer(input, parameters):  

    ksize = [1, 3, 3, 1] #example [1, 2, 2, 1] corresponding [batch_size, img_size, img_size, number of channel]
    strides = parameters['strides_pooling'] #example [1, 2, 2, 1] corresponding [batch_size, img_size, img_size, number of channel]
    padding = parameters['padding'] #example 'SAME' or 'VALID'
    
    ## "Max-pooling is performed over a 2x2 pixel window, with stride 2".  
    layer = tf.nn.max_pool(value=input,
                            ksize=ksize,
                            strides=strides,
                            padding=padding)
 
    return layer

In [None]:
def create_activation_layer(input):  
    ## "All hidden layers are equipped with the rectification non-linearity"
    layer = tf.nn.relu(input)
 
    return layer

In [None]:
def concatenate(input1, input2, input3, input4):
    return np.concatenate((input1, input2, input3, input4), axis = -1)

In [None]:
def create_inception_module(input):
    conv11 = create1x1convolution(input=input)
    act11 = create_activation_layer(conv11)
    
    conv21 = create1x1convolution(input=input)
    act21 = create_activation_layer(conv21)
    conv22 = create3x3convolution(input=conv21)
    act22 = create_activation_layer(conv22)
    
    conv31 = create1x1convolution(input=input)
    act31 = create_activation_layer(conv31)
    conv32 = create5x5convolution(input=conv31)
    act32 = create_activation_layer(conv32)
    
    pool41 = create_max_pooling_layer(input=input)
    act41 = create_activation_layer(conv41)
    conv42 = create1x1convolution(input=pool41)
    act42 = create_activation_layer(conv42)
    
    return concatenate(act12, act22, act32, act42)

# Create fully connected layers

In [None]:
def create_fully_connected_layer(input, num_input_channels, num_output_channel, layer_number):
        
    ## We shall define the weights that will be trained using create_weights function.
    weights = create_weights(name='Wfc_'+layer_number, shape=[input.shape[1], input.shape[2], num_input_channels, num_output_channel])
    ## We create biases using the create_biases function. These are also trained.
    biases = create_biases(name='Bfc_'+layer_number, size=num_output_channel)
    
    layer = tf.nn.conv2d(input=input,
                     filter=weights,
                     strides=[1, 1, 1, 1],
                    padding= 'VALID')
    
    layer += layer + biases
    
    return layer, weights, biases

# Create softmax layer

In [None]:
def create_softmax_layer(networkOutput, Y):
    """
    Computes the cost
    
    Arguments:
    Output -- output of forward propagation (output of the last LINEAR unit), of shape (6, number of examples)
    Y -- "true" labels vector placeholder, same shape as output
    
    Returns:
    the coss entropy
    """
    
    return tf.nn.softmax_cross_entropy_with_logits_v2(logits= networkOutput, labels= Y) 

# Compute Cost

In [None]:
def compute_cost(softmaxOutput):
    """
    Computes the cost
    
    Arguments:
    softmaxOutput -- output of the softmax layer
    
    Returns:
    cost - Tensor of the cost function
    """
    
    cost = tf.reduce_mean(softmaxOutput)
    
    return cost

# Create average pooling

In [None]:
def create_average_pooling_layer(input, parameters):  

    ksize = parameters['ksize'] #example [1, 2, 2, 1] corresponding [batch_size, img_size, img_size, number of channel]
    strides = parameters['strides_pooling'] #example [1, 2, 2, 1] corresponding [batch_size, img_size, img_size, number of channel]
    padding = parameters['padding'] #example 'SAME' or 'VALID'
    
    ## "Max-pooling is performed over a 2x2 pixel window, with stride 2".  
    layer = tf.nn.avg_pool(input=input, ksize=ksize, padding=padding, strides=strides)
 
    return layer

# Create architectures

In [None]:
def configuration_Test(X, architecture):
    
    parameters = {}
    
    parameters_inception1 = architecture['parameters_inception']
    
    parameters_max_pooling = architecture['parameters_max_pooling']
    parameters_avg_pooling = architecture['parameters_avg_pooling']
    
    num_output_class = architecture['num_output_class']
    
    size_pooling1 = [1, 2, 2, 1] 
    strides_pooling1 = [1, 2, 2, 1]
    padding1 = 'SAME'

    parameters_pooling1 = {
                            'size_pooling': size_pooling1,
                            'strides_pooling': strides_pooling1,
                            'padding': padding1
                            }
    
    layer_conv1 = create_convolutional_layer(input= X,conv_filter_size=9, num_input_channels= X.shape[3], num_filters= 64,padding= 'VALID',strides= [1,1,1,1], layer_number= 'conv1')
    
    layer_bn1 = tf.layers.batch_normalization(layer_conv1)
    layer_act1 = create_activation_layer(layer_bn1)
    
    layer_inception3a = create_inception_module(input=layer_act1)
    layer_inception3b = create_inception_module(input=layer_inception3a)
    
    layer_maxpool1 = create_max_pooling_layer(input=layer_inception3b, parameters= parameters_pooling)
    
    layer_inception4a = create_inception_module(input=layer_maxpool1)
    layer_inception4b = create_inception_module(input=layer_inception4a)
    layer_inception4c = create_inception_module(input=layer_inception4b)
    layer_inception4d = create_inception_module(input=layer_inception4c)
    layer_inception4e = create_inception_module(input=layer_inception4d)
    
    layer_maxpool2 = create_max_pooling_layer(input=layer_inception4e, parameters= parameters_pooling)
    
    layer_inception5a = create_inception_module(input=layer_maxpool2)
    layer_inception5b = create_inception_module(input=layer_inception5a)
    
    layer_avg = create_average_pooling_layer(input=layer_inception5b, parameters=parameters_avg_pooling) # Average Pooling
    layer_dropout = tf.layers.dropout(inputs=layer_avg, rate=0.7)
    #layer_fc1 = create_fully_connected_layer(input=layer_gad, num_input_channels=layer_gad.shape[3], num_output_channel=num_output_class, layer_number='fc1')
    #layer_maxpool1 = create_pooling_layer(input=layer_convRes4, parameters=parameters_pooling1)
    # FLATTEN
    P2 = tf.contrib.layers.flatten(layer_dropout)
    # FULLY-CONNECTED without non-linear activation function (not not call softmax).
    # 6 neurons in output layer. Hint: one of the arguments should be "activation_fn=None" 
    layer_fc1 = tf.contrib.layers.fully_connected(P2, num_outputs=4, activation_fn=None)
    ### END CODE HERE ###
    
    return layer_fc1, parameters

In [None]:
def parallele_exit(input):
    layer_avg = create_average_pooling_layer(input=input, parameters=parameters_avg_pooling)
    layer_conv1 = create1x1convolution(input=layer_avg, num_filters=128, num_input_channels=layer_avg.shape[3], layer_number='conv_exit_1')
    layer_act1 = create_activation_layer(input=layer_conv1)
    
    layer_fc1 = create_fully_connected_layer(input=layer_act1, num_input_channels=layer_act1.shape[3], num_output_channel=1024, layer_number='fc1_exit1')
    layer_dropout = tf.layers.dropout(inputs=layer_fc1, rate=0.7)
    layer_fc2 = create_fully_connected_layer(input=layer_dropout, num_input_channels=layer_dropout.shape[3], num_output_channel=4, layer_number='fc2_exit1')
    
    return layer_fc2

# Model routine

In [None]:
def model(X_train, Y_train, X_test, Y_test, learning_rate = 0.01, configuration= 'configuration_A',
          num_epochs = 100, minibatch_size = 32, momentum = 0.9, use_nesterov = False, print_cost = True, beta = 0.001):
    """
    Implements a three-layer tensorflow neural network: LINEAR->RELU->LINEAR->RELU->LINEAR->SOFTMAX.
    
    Arguments:
    X_train -- training set, of shape (input size = 12288, number of training examples = 1080)
    Y_train -- test set, of shape (output size = 6, number of training examples = 1080)
    X_test -- training set, of shape (input size = 12288, number of training examples = 120)
    Y_test -- test set, of shape (output size = 6, number of test examples = 120)
    learning_rate -- learning rate of the optimization
    num_epochs -- number of epochs of the optimization loop
    minibatch_size -- size of a minibatch
    print_cost -- True to print the cost every 100 epochs
    
    Returns:
    parameters -- parameters learnt by the model. They can then be used to predict.
    """
    
    ops.reset_default_graph()                         # to be able to rerun the model without overwriting tf variables
    (m, n_w, n_h, n_c) = X_train.shape                # (m : number of examples in the train set, n_w: image width, n_h: image height, n_c: number of image channel)
    n_y = Y_train.shape[1]                            # n_y : output size
    costs = []                                        # To keep track of the cost
    
    # Create Placeholders
    X, Y = create_placeholders(img_size=n_w, num_channels=n_c, num_classes=n_y)
    
    # Forward propagation
    if configuration == 'configuration_Test':
        architecture = get_parameters_configurationTest()
        configuration_output, parameters = configuration_Test(X=X,architecture=architecture)
            
    #Create softmax layer
    softmax_layer = create_softmax_layer(Y=Y, networkOutput=configuration_output)
    
    # Cost function
    cost = compute_cost(softmaxOutput=softmax_layer)
    
    # Backpropagation: Define the tensorflow optimizer.
    optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=use_nesterov).minimize(cost)
    #gvs = optimizer.compute_gradients(cost)
    #capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs]
    #train_op = optimizer.apply_gradients(capped_gvs)
    
    # Initialize variables
    init = tf.global_variables_initializer()

    # Start the session
    with tf.Session() as sess:
        
        # Run the initialization
        sess.run(init)
        
        # Do the training loop
        for epoch in range(num_epochs):

            epoch_cost = 0.                       # Defines a cost related to an epoch
            num_minibatches = int(m / minibatch_size) # number of minibatches of size minibatch_size in the train set

            minibatches = random_mini_batches(X_train, Y_train, minibatch_size)

            for minibatch in minibatches:

                # Select a minibatch
                (minibatch_X, minibatch_Y) = minibatch

                # Run the session: execute "optimizer" and "cost"
                _, minibatch_cost = sess.run([optimizer, cost], feed_dict={X: minibatch_X, Y: minibatch_Y})
               
                epoch_cost += minibatch_cost / num_minibatches

                #print(minibatch_cost)
            # Print the cost every epoch
            if print_cost == True and epoch % 1 == 0:
                print ("Cost after epoch %i: %f" % (epoch, epoch_cost))
            if print_cost == True and epoch % 1 == 0:
                costs.append(epoch_cost)
                
        # plot the cost
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()

        # Calculate the correct predictions
        predict_op = tf.reshape(tf.argmax(configuration_output, -1), [-1])
        true_label = tf.argmax(Y, 1)
        correct_prediction = tf.equal(predict_op, true_label)
        
        # Calculate accuracy on the test set
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
        train_accuracy = accuracy.eval({X: X_train, Y: Y_train})
        test_accuracy = accuracy.eval({X: X_test, Y: Y_test})
        print("Train Accuracy:", train_accuracy)
        print("Test Accuracy:", test_accuracy)
        
        return train_accuracy

In [None]:
model(X_train=X_train, Y_train=Y_tain, X_test=X_test, Y_test=Y_test, configuration='configuration_Test', momentum= 0.9, num_epochs=50,learning_rate=0.01, minibatch_size=128, use_nesterov=False, print_cost= True)
