# Import

In [0]:
!pip install tensorflow-gpu==1.15.0 # GPU Version

Collecting tensorflow-gpu==1.15.0
[?25l  Downloading https://files.pythonhosted.org/packages/a5/ad/933140e74973fb917a194ab814785e7c23680ca5dee6d663a509fe9579b6/tensorflow_gpu-1.15.0-cp36-cp36m-manylinux2010_x86_64.whl (411.5MB)
[K     |████████████████████████████████| 411.5MB 29kB/s 
Installing collected packages: tensorflow-gpu
Successfully installed tensorflow-gpu-1.15.0


In [1]:
# import tensorflow.compat.v1 as tf
import tensorflow as tf
import time
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
# tf.disable_v2_behavior()

# Confirm Tensorflow can see the GPU
device_name = tf.test.gpu_device_name()
print('Found GPU at: {}'.format(device_name))
#load MNIST dataset 
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Found GPU at: /device:GPU:0
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use urllib or similar directly.
Successfully downloaded train-images-idx3-ubyte.gz 9912422 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Successfully downloaded train-labels-idx1-ubyte.gz 28881 bytes.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Successfully downloaded t10k-images-idx3-ubyte.gz 1648877 bytes.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Successfully downloaded t10k-labels-idx1-ubyte.gz 4542 bytes.
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as 

# Parameters

In [0]:
# The length of window in the pooling layer
k = 2

# Parameters
learning_rate = 0.005
training_epochs = 50
batch_size = 200
display_step = 1

# Define 2-d Convolution Function

In [0]:
def module_conv2d(x, weight_shape, bias_shape):
    """
    https://www.tensorflow.org/api_docs/python/tf/nn/conv2d
    Computes a 2 dimentional convolution given the 4d input and filter
    input:
        x: [batch, in_height, in_width, in_channels]
        weight: [filter_height, filter_width, in_channels, out_channels]
        bias: [out_channels]
    output:
        The relu activation of convolution
    """
    print([weight_shape[0], weight_shape[1], weight_shape[2], weight_shape[3]])
    sizeIn = weight_shape[0] * weight_shape[1] * weight_shape[2]
    
    # initialize weights with data generated from a normal distribution.
    # Sometimes, a smaller stddev can improve the accuracy significantly. Take some trials by yourself.
    weight_init = tf.random_normal_initializer(stddev=(2.0/sizeIn)**0.5)
    W = tf.get_variable("W", weight_shape, initializer=weight_init)
    
    # initialize bias with zeros
    bias_init = tf.constant_initializer(value=0)
    b = tf.get_variable("b", bias_shape, initializer=bias_init)
    
    # Specify the stride length to be one in all directions.
    # padding='SAME': pad enough so the output has the same dimensions as the input tensor.
    # strides: The stride of the sliding window for each dimension of input.
    # data_format: Defaults to "NHWC" [batch, height, width, channels].
    #"VALID" = without padding, "SAME" = with zero padding, if use a stride of 1, output size is the same as input size
    return tf.nn.relu(tf.nn.bias_add(tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME'), b))

# Define Layer Function

In [0]:
def layer(x, weight_shape, bias_shape):
    """
    Defines the network layers
    input:
        - x: input vector of the layer
        - weight_shape: shape of the weight maxtrix
        - bias_shape: shape of the bias vector
    output:
        - output vector of the layer after the matrix multiplication and transformation
    """
    
    weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**0.5)
    W = tf.get_variable("W", weight_shape, initializer=weight_init)
    
    bias_init = tf.constant_initializer(value=0)
    b = tf.get_variable("b", bias_shape, initializer=bias_init)
    
    return tf.nn.relu(tf.matmul(x, W) + b)

# Define Pooling Function

In [0]:
def pooling(x, k):
    """
    Extracts the main information of the conv layer by performs the max pooling on the input x.
    input:
        x: A 4-D Tensor. [batch, in_height, in_width, in_channels]
        k: The length of window
    """
    
    #value: A 4-D Tensor of the format specified by data_format. That is x in this case.
    #ksize: A 1-D int Tensor of 4 elements. The size of the window for each dimension of input
    #strides: A 1-D int Tensor of 4 elements. The stride of the sliding window for each dimension of input
    #padding: A string, either 'VALID' or 'SAME'. Difference of 'VALID' and 'SAME' in tf.nn.max_pool:
    #new_height=new_width=roundup(Size/Stride)
    #https://stackoverflow.com/questions/37674306/what-is-the-difference-between-same-and-valid-padding-in-tf-nn-max-pool-of-t
    return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], padding='SAME')

# Define Inference

In [0]:
def inference(x, keep_prob):
    """
    define the structure of the whole network
    input:
        - x: a batch of pictures 
        (input shape = (batch_size*image_size))
        - keep_prob: The keep_prob of dropout layer
    output:
        - a batch vector corresponding to the logits predicted by the network
        (output shape = (batch_size*output_size)) 
    """

    # Reshape the input into Nx28x28x1 (N # of examples & 1 due to Black-White)
    # flatten 
    # pass '[-1]' to flatten 't'
    # -1 can also be used to infer the shape, -1是缺省值，未定
    # 通过reshape生成的新的形状的数组和原始数组共用一个内存
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    
    with tf.variable_scope("convolutional_layer_1"):

        # convolutional layer with 32 filters and spatial extent e = 5
        # this causes in taking an input of volume with depth of 1 and producing an output tensor with 32 channels.
        # Nx28x28x32
        convolutional_1 = module_conv2d(x, [5, 5, 1, 32], [32])
        
        # output in passed to max-pooling to be compressed (k=2 non-overlapping).
        # Nx14x14x32
        pooling_1 = pooling(convolutional_1, k)

    with tf.variable_scope("convolutional_layer_2"):
        
        # convolutional layer with 64 filters with spatial extent e = 5
        # taking an input tensor with depth of 32 and 
        # producing an output tensor with depth 64
        # Nx14x14x64
        convolutional_2 = module_conv2d(pooling_1, [5, 5, 32, 64], [64])
        
        # output in passed to max-pooling to be compressed (k=2 non-overlapping).
        # Nx7x7x64
        pooling_2 = pooling(convolutional_2, k)

    with tf.variable_scope("convolutional_layer_3"):
        
        # convolutional layer with 128 filters with spatial extent e = 5
        # taking an input tensor with depth of 64 and 
        # producing an output tensor with depth 128
        # Nx7x7x128
        convolutional_3 = module_conv2d(pooling_2, [5, 5, 64, 128], [128])
        
        # output in passed to max-pooling to be compressed (k=2 non-overlapping).
        # Nx4x4x128
        pooling_3 = pooling(convolutional_3, k)

    with tf.variable_scope("convolutional_layer_4"):
        
        # convolutional layer with 256 filters with spatial extent e = 5
        # taking an input tensor with depth of 128 and 
        # producing an output tensor with depth 256
        # Nx4x4x256
        convolutional_4 = module_conv2d(pooling_3, [5, 5, 128, 256], [256])
        
        # output in passed to max-pooling to be compressed (k=2 non-overlapping).
        # Nx2x2x256
        pooling_4 = pooling(convolutional_4, k)

    with tf.variable_scope("fully_connected"):
        
        # pass the output of max-pooling into a Fully_Connected layer
        # use reshape to flatten the tensor
        # We have 64 filters
        # To find the height & width after max-pooling:
        # roundup((16-5)/2) + 1 = 7
        pool_2_flat = tf.reshape(pooling_4, [-1, 2*2*256])
        
        # after reshaping, use fully-connected layer to compress
        # the flattened representation into a hidden layer of size 784?
        # each feature map has a height & width of 7
        # 7*7*64/784=4
        fc_1 = layer(pool_2_flat, [2*2*256, 784], [784])
        
        # apply dropout. You may try to add drop out after every pooling layer.
        # outputs the input element scaled up by 1/keep_prob
        # The scaling is so that the expected sum is unchanged
        fc_1_drop = tf.nn.dropout(fc_1, keep_prob)

    with tf.variable_scope("output"):
        output = layer(fc_1_drop, [784, 10], [10])

    return output


# Define Loss Function

In [0]:
def loss(output, y):
    """
    Computes softmax cross entropy between logits and labels and then the loss 
    
    intput:
        - output: the output of the inference function 
        - y: true value of the sample batch
        
        the two have the same shape (batch_size * num_of_classes)
    output:
        - loss: loss of the corresponding batch (scalar tensor)
    
    """
    xentropy = tf.nn.softmax_cross_entropy_with_logits(logits=output, labels=y)    
    loss = tf.reduce_mean(xentropy)
    return loss

# Define the Optimizer and Training Target

In [0]:
def training(cost, global_step):
    """
    defines the necessary elements to train the network
    
    intput:
        - cost: the cost is the loss of the corresponding batch
        - global_step: number of batch seen so far, it is incremented by one each time the .minimize() function is called
    """
    tf.summary.scalar("cost", cost)
    # using Adam Optimizer 
    optimizer = tf.train.AdamOptimizer(learning_rate)
    train_op = optimizer.minimize(cost, global_step=global_step)
    return train_op

# Define evaluation method

In [0]:
def evaluate(output, y):
    """
    evaluates the accuracy on the validation set 
    input:
        -output: prediction vector of the network for the validation set
        -y: true value for the validation set
    output:
        - accuracy: accuracy on the validation set (scalar between 0 and 1)
    """
    #correct prediction is a binary vector which equals one when the output and y match
    #otherwise the vector equals 0
    #tf.cast: change the type of a tensor into another one
    #then, by taking the mean of the tensor, we directly have the average score, so the accuracy
    
    correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar("validation_error", (1.0 - accuracy))
    return accuracy

# Main Function

In [10]:
if __name__ == '__main__':
    
    #please, make sure you changed for your own path 
    log_files_path = './logs/CNNs/'
    start_time = time.time()
    
    with tf.Graph().as_default():

        with tf.variable_scope("MNIST_convolutional_model"):
            #neural network definition
            
            #the input variables are first define as placeholder 
            # a placeholder is a variable/data which will be assigned later 
            # MNIST data image of shape 28*28=784
            x = tf.placeholder("float", [None, 784]) 
            # 0-9 digits recognition
            y = tf.placeholder("float", [None, 10])  
            
            # dropout probability
            keep_prob = tf.placeholder(tf.float32) 
            #the network is defined using the inference function defined above in the code
            output = inference(x, keep_prob)
            cost = loss(output, y)
            #initialize the value of the global_step variable 
            # recall: it is incremented by one each time the .minimise() is called
            global_step = tf.Variable(0, name='global_step', trainable=False)
            train_op = training(cost, global_step)
            #evaluate the accuracy of the network (done on a validation set)
            eval_op = evaluate(output, y)
            summary_op = tf.summary.merge_all()
            saver = tf.train.Saver()
            sess = tf.Session()
            
            summary_writer = tf.summary.FileWriter(log_files_path, sess.graph)
            init_op = tf.global_variables_initializer()
            sess.run(init_op)
            
            # Training cycle
            for epoch in range(training_epochs):

                avg_cost = 0.0
                total_batch = int(mnist.train.num_examples/batch_size)
                
                # Loop over all batches
                for i in range(total_batch):
                    
                    minibatch_x, minibatch_y = mnist.train.next_batch(batch_size)
                    
                    # Fit training using batch data
                    sess.run(train_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})
                    
                    # Compute average loss
                    avg_cost += sess.run(cost, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.5})/total_batch
                
                
                # Display logs per epoch step
                if epoch % display_step == 0:
                    
                    print("Epoch:", '%04d' % (epoch+1), "cost =", "{:0.9f}".format(avg_cost))
                    
                    #probability dropout of 1 during validation
                    accuracy = sess.run(eval_op, feed_dict={x: mnist.validation.images, y: mnist.validation.labels, keep_prob: 1})
                    print("Validation Error:", (1 - accuracy))
                    
                    # probability dropout of 0.25 during training
                    summary_str = sess.run(summary_op, feed_dict={x: minibatch_x, y: minibatch_y, keep_prob: 0.25})
                    summary_writer.add_summary(summary_str, sess.run(global_step))
                    
                    saver.save(sess, log_files_path + 'model-checkpoint', global_step=global_step)
                    
            print("Optimization Done")
                    
            accuracy = sess.run(eval_op, feed_dict={x: mnist.test.images, y: mnist.test.labels, keep_prob: 1})
            print("Test Accuracy:", accuracy)
                    
        elapsed_time = time.time() - start_time
        print('Execution time (seconds) was %0.3f' % elapsed_time)

[5, 5, 1, 32]
[5, 5, 32, 64]
[5, 5, 64, 128]
[5, 5, 128, 256]
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Epoch: 0001 cost = 1.550357307
Validation Error: 0.44859999418258667
Epoch: 0002 cost = 1.159402251
Validation Error: 0.40060001611709595
Epoch: 0003 cost = 1.006479060
Validation Error: 0.30720001459121704
Epoch: 0004 cost = 0.944815741
Validation Error: 0.3101999759674072
Epoch: 0005 cost = 0.940766630
Validation Error: 0.3222000002861023
Epoch: 0006 cost = 0.937866667
Validation Error: 0.3274000287055969
Instructions for updating:
Use standard file APIs to delete files with this prefix.
Epoch: 0007 cost = 0.937267811
Validation Error: 0.32260000705718994
Epoch: 0008 cost = 0.936099361
Validation Error: 0.3141999840736389
