# Applied Deep Learning - a use case based approach to understand deep neural networks

### Umberto Michelucci

Buy the book: https://www.apress.com/us/book/9781484237892

(C) Umberto Michelucci 2018-2019 - umberto.michelucci@gmail.com 

github repository: https://github.com/Apress/applied-deep-learning

# An example of a convolutional Neural Network

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time
import pandas as pd

# Data Zalando

## Get the datafiles

To avoid to make the repository unecessarily big, you can get the two needed files

- fashion-mnist_train.csv
- fashion-mnist_test.csv

zipped in the Week 5 folder. Copy them in the same folder where this notebook is and unzip them before running the following cell.

## Loading and preparing the files

In [3]:
data_train = pd.read_csv('fashion-mnist_train.csv', header = 0)
data_test = pd.read_csv('fashion-mnist_test.csv', header = 0)

In [4]:
labels = data_train['label'].values.reshape(1, 60000)

labels_ = np.zeros((60000, 10))
labels_[np.arange(60000), labels] = 1
train = data_train.drop('label', axis=1)

labels_dev = data_test['label'].values.reshape(1, 10000)

labels_dev_ = np.zeros((10000, 10))
labels_dev_[np.arange(10000), labels_dev] = 1
dev = data_test.drop('label', axis=1)

In [5]:
print(labels_.shape)
print(labels_dev_.shape)

(60000, 10)
(10000, 10)


In [6]:
train = np.array(train / 255.0)
dev = np.array(dev / 255.0)
labels_ = np.array(labels_)
labels_dev_ = np.array(labels_dev_)

# Build the CNN

In [7]:
# Placeholder variable for the input images
x = tf.placeholder(tf.float32, shape=[None, 28*28], name='X')
# Reshape it into [num_images, img_height, img_width, num_channels]
x_image = tf.reshape(x, [-1, 28, 28, 1])

# Placeholder variable for the true labels associated with the images
y_true = tf.placeholder(tf.float32, shape=[None, 10], name='y_true')
y_true_cls = tf.argmax(y_true, axis=1)

In [8]:
def new_conv_layer(input, num_input_channels, filter_size, num_filters):
    
        # Shape of the filter-weights for the convolution
    shape = [filter_size, filter_size, num_input_channels, num_filters]

        # Create new weights (filters) with the given shape
    weights = tf.Variable(tf.truncated_normal(shape, stddev=0.05))

        # Create new biases, one for each filter
    biases = tf.Variable(tf.constant(0.05, shape=[num_filters]))

        # TensorFlow operation for convolution
    layer = tf.nn.conv2d(input=input, filter=weights, strides=[1, 1, 1, 1], padding='SAME')

        # Add the biases to the results of the convolution.
    layer += biases
        
    return layer, weights

In [9]:
def new_pool_layer(input):
    
        # TensorFlow operation for convolution
    layer = tf.nn.max_pool(value=input, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
        
    return layer

In [10]:
def new_relu_layer(input):
    
    #with tf.variable_scope(name) as scope:
        # TensorFlow operation for convolution
    layer = tf.nn.relu(input)
        
    return layer

In [11]:
def new_fc_layer(input, num_inputs, num_outputs):
         # Create new weights and biases.
    weights = tf.Variable(tf.truncated_normal([num_inputs, num_outputs], stddev=0.05))
    biases = tf.Variable(tf.constant(0.05, shape=[num_outputs]))
        
        # Multiply the input and weights, and then add the bias-values.
    layer = tf.matmul(input, weights) + biases
        
    return layer

In [12]:
# Convolutional Layer 1
layer_conv1, weights_conv1 = new_conv_layer(input=x_image, num_input_channels=1, filter_size=5, num_filters=6)

# Pooling Layer 1
layer_pool1 = new_pool_layer(layer_conv1)

# RelU layer 1
layer_relu1 = new_relu_layer(layer_pool1)

# Convolutional Layer 2
layer_conv2, weights_conv2 = new_conv_layer(input=layer_relu1, num_input_channels=6, filter_size=5, num_filters=16)

# Pooling Layer 2
layer_pool2 = new_pool_layer(layer_conv2)

# RelU layer 2
layer_relu2 = new_relu_layer(layer_pool2)

# Flatten Layer
num_features = layer_relu2.get_shape()[1:4].num_elements()
layer_flat = tf.reshape(layer_relu2, [-1, num_features])

# Fully-Connected Layer 1
layer_fc1 = new_fc_layer(layer_flat, num_inputs=num_features, num_outputs=128)

# RelU layer 3
layer_relu3 = new_relu_layer(layer_fc1)

# Fully-Connected Layer 2
layer_fc2 = new_fc_layer(input=layer_relu3, num_inputs=128, num_outputs=10)

In [13]:
# Use Softmax function to normalize the output
y_pred = tf.nn.softmax(layer_fc2)
y_pred_cls = tf.argmax(y_pred, axis=1)

In [14]:
# Use Cross entropy cost function
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=layer_fc2, labels=y_true)
cost = tf.reduce_mean(cross_entropy)

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.



In [15]:
# Use Adam Optimizer
optimizer = tf.train.AdamOptimizer(learning_rate=1e-3).minimize(cost)

In [16]:
# Accuracy
correct_prediction = tf.equal(y_pred_cls, y_true_cls)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [18]:
num_epochs = 10
batch_size = 100

## Training the CNN with the Zalando dataset

In [19]:
with tf.Session() as sess:
    # Initialize all variables
    sess.run(tf.global_variables_initializer())
    
    # Add the model graph to TensorBoard
    #writer.add_graph(sess.graph)
    
    # Loop over number of epochs
    for epoch in range(num_epochs):
        
        start_time = time.time()
        train_accuracy = 0
        
        #for batch in range(0, int(len(labels_)/batch_size)):
            
        for i in range(0, train.shape[0], batch_size):
            if (i%5000 == 0):
                print('-->',i)
            x_batch = train[i:i + batch_size,:]
            y_true_batch = labels_[i:i + batch_size,:]
            
            # Put the batch into a dict with the proper names for placeholder variables
            feed_dict_train = {x: x_batch, y_true: y_true_batch}
            
            # Run the optimizer using this batch of training data.
            sess.run(optimizer, feed_dict=feed_dict_train)
            
            # Calculate the accuracy on the batch of training data
            train_accuracy += sess.run(accuracy, feed_dict=feed_dict_train)
            
            # Generate summary with the current batch of data and write to file
            #summ = sess.run(merged_summary, feed_dict=feed_dict_train)
            #writer.add_summary(summ, epoch*int(len(labels_)/batch_size) + batch)
        
          
        train_accuracy /= int(len(labels_)/batch_size)
        
        # Generate summary and validate the model on the entire validation set
        vali_accuracy = sess.run(accuracy, feed_dict={x:dev, y_true:labels_dev_})
        #writer1.add_summary(summ, epoch)
        

        end_time = time.time()
        
        print("Epoch "+str(epoch+1)+" completed : Time usage "+str(int(end_time-start_time))+" seconds")
        print("\tAccuracy:")
        print ("\t- Training Accuracy:\t{}".format(train_accuracy))
        print ("\t- Validation Accuracy:\t{}".format(vali_accuracy))

--> 0
--> 5000
--> 10000
--> 15000
--> 20000
--> 25000
--> 30000
--> 35000
--> 40000
--> 45000
--> 50000
--> 55000
Epoch 1 completed : Time usage 58 seconds
	Accuracy:
	- Training Accuracy:	0.7606666664034128
	- Validation Accuracy:	0.8374000191688538
--> 0
--> 5000
--> 10000
--> 15000
--> 20000
--> 25000
--> 30000
--> 35000
--> 40000
--> 45000
--> 50000
--> 55000
Epoch 2 completed : Time usage 62 seconds
	Accuracy:
	- Training Accuracy:	0.852583332558473
	- Validation Accuracy:	0.8628000020980835
--> 0
--> 5000
--> 10000
--> 15000
--> 20000
--> 25000
--> 30000
--> 35000
--> 40000
--> 45000
--> 50000
--> 55000
Epoch 3 completed : Time usage 65 seconds
	Accuracy:
	- Training Accuracy:	0.8724500008424123
	- Validation Accuracy:	0.8770999908447266
--> 0
--> 5000
--> 10000
--> 15000
--> 20000
--> 25000
--> 30000
--> 35000
--> 40000
--> 45000
--> 50000
--> 55000
Epoch 4 completed : Time usage 65 seconds
	Accuracy:
	- Training Accuracy:	0.8864500005046526
	- Validation Accuracy:	0.8817999958