# Chapter 5: Convolutional Neural Networks

Convolutional Neural Networks (CNNs), or ConvNets, are a special class of feedforward networks; they are primarily used for computer vision tasks, but have also been adapted to other domains with unstructured data, such as natural language processing. As they are feedforward networks, they are very similar to the simple networks that we just learned about; information passes through them in one direction, and they are made up of layers, weights, and biases.

In [None]:
## imports 
import pandas as pd 
import numpy as np
import tensorflow as tf
import os
import cv2

#### Preprocess the image data

In [None]:
dogs_dir = '/users/patricksmith/desktop/Chp5/dataset/training/dogs/'
cats_dir = '/users/patricksmith/desktop/Chp5/dataset/training/cats/'

In [None]:
dog_images = [dogs_dir + i for i in os.listdir(dogs_dir) if 'dog' in i]
cat_images = [cats_dir + i for i in os.listdir(cats_dir) if 'cat' in i]

#### Pre-Process the images
This preprocessing code for the images is from @jshin49 on GitHub

In [None]:
def process_image(img, img_size, pixel_depth):
    img = cv2.imread(img, cv2.IMREAD_COLOR)
    if (img.shape[0] >= img.shape[1]): 
        resizeto = (img_size, int(
            round(img_size * (float(img.shape[1]) / img.shape[0]))))
    else:
        resizeto = (
            int(round(img_size * (float(img.shape[0]) / img.shape[1]))), img_size)

    img = cv2.resize(img, (resizeto[1], resizeto[
        0]), interpolation=cv2.INTER_CUBIC)
    img = cv2.copyMakeBorder(
        img, 0, img_size - img.shape[0], 0, img_size - img.shape[1], cv2.BORDER_CONSTANT, 0)

    img = normalize_image(img, pixel_depth)

    return img[:, :, ::-1]

In [None]:
def normalize_image(image, pixel_depth):
    image_data = np.array(image, dtype=np.float32)
    image_data[:, :, 0] = (image_data[:, :, 0].astype(
        float) - pixel_depth / 2) / pixel_depth
    image_data[:, :, 1] = (image_data[:, :, 1].astype(
        float) - pixel_depth / 2) / pixel_depth
    image_data[:, :, 2] = (image_data[:, :, 2].astype(
        float) - pixel_depth / 2) / pixel_depth

    return image_data

#### Process the images

In [None]:
depth = 255.0
image_size = 64

In [None]:
training_dogs = []
training_cats = []

for dog in dog_images:
    p_image = process_image(dog, image_size, depth)
    training_dogs.append([np.array(p_image), 1])

for cat in cat_images:
    p_image = process_image(cat, image_size, depth)
    training_cats.append([np.array(p_image), 0])

#### Split and define the batches

In [None]:
batch_size = 32

In [None]:
dog_data = np.array(np.array_split(train_dogs, 12500 / (batch_size / 2)))
cat_data = np.array(np.array_split(train_cats, 12500 / (batch_size / 2))

total_batch = []
for dog_image, cat_image in zip(dog_data, cat_data):
    batch = np.concatenate([dog_image, cat_image])
    total_batch.append(batch)

#### Create the training and validation data

In [None]:
validation_size = int(len(total_batch) * 0.1)
validation_set = total_batch[-validation_size:]

In [None]:
training_batches = total_batch[:-validation_size]

In [None]:
training_batches = total_batch

#### Define the network

In [None]:
color_channels = 3

In [None]:
def convolutional_network(x, image_size, color_channels):

    ## Input Layer
    input_layer = tf.reshape(x, [-1, image_size, image_size, color_channels])

    ## First convolutional layer with pooling
    convolution_one = tf.layers.conv2d(inputs=input_layer, filters=32, kernel_size=[3, 3],
            padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer,
            use_bias=True, bias_initializer=initializer, bias_regularizer=regularizer,
            activation=tf.nn.relu)
    
    pooling_one = tf.layers.max_pooling2d(inputs=convolution_one, pool_size=[2, 2], strides=(2, 2))
    
    ## Second convolutional layer with pooling
    convolution_two = tf.layers.conv2d(inputs=pooling_one, filters=32, kernel_size=[3, 3],
            padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer,
            use_bias=True, bias_initializer=initializer, bias_regularizer=regularizer,
            activation=tf.nn.relu)
    
    pooling_two = tf.layers.max_pooling2d(inputs=convolution_two, pool_size=[2, 2], strides=(2, 2))
    
    ## Third Convolutional layer with pooling
    convolution_three = tf.layers.conv2d(inputs=pooling_two, filters=64, kernel_size=[3, 3],
            padding="same", kernel_initializer=initializer, kernel_regularizer=regularizer,
            use_bias=True, bias_initializer=initializer, bias_regularizer=regularizer,
            activation=tf.nn.relu)
    
    pooling_three = tf.layers.max_pooling2d(inputs=convolution_three, pool_size=[2, 2], strides=(2, 2))
    
    ## Flatting layer
    flatten_layer = tf.reshape(pooling_three, [-1, 8 * 8 * 64])
    
    ## Fully Connected Layer with dropout 
    fc_layer = tf.layers.dense(inputs=flatten_layer, units=1024, activation=tf.nn.relu,
            kernel_initializer=initializer, kernel_regularizer=regularizer,
            use_bias=True, bias_initializer=initializer, bias_regularizer=regularizer)
    dropout_layer = tf.layers.dropout(inputs=fc_layer, rate=dropout_rate, training=True)
    
    ## Output Layer
    output = tf.layers.dense(inputs=dropout_layer, units=1, activation=tf.nn.sigmoid) 
    
    return output

#### Input Placeholders

In [None]:
x = tf.placeholder(shape=[None, image_size, image_size, color_channels], dtype=tf.float32)
y = tf.placeholder(shape=[None, 1], dtype=tf.float32, name='labels')

#### Define the training parameters
-Image size represents the image dimensions

In [None]:
lr = 0.0001
training_epochs = 20
batch_size = 32
display = 1
threshold = 0.5 

#### Define the network and optimizer before training

In [None]:
initializer = tf.contrib.layers.xavier_initializer()
regularizer = tf.contrib.layers.l2_regularizer(0.001)

##Construct the model
model_output = convolutional_network(x)

## Define the optimizer and the loss function for the network 
loss = tf.losses.log_loss(labels=y, predictions=model_output)
optimizer = tf.train.AdamOptimizer(learning_rate=lr).minimize(loss)

#tf.train.RMSPropOptimizer(learning_rate=lr).minimize(loss)

## Define the accuracy metric
thresholds = tf.fill([batch_size], threshold)
predictions = tf.greater_equal(model_output, thresholds)
correct_prediction = tf.equal(predictions, tf.cast(y, tf.bool))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

#### Run the training session

In [None]:
## Start the training session
with tf.Session() as sess:
    ## Initialize the variables
    sess.run(tf.global_variables_initializer())
    
    for epoch in range(training_epochs):           
        for batch in training_batches:
            batch_images, batch_labels = map(list, zip(*batch))
            batch_images = np.array(batch_images)
            batch_labels = np.array(batch_labels).reshape(-1, 1)
            
            ## Run the training procedures
            _, l, acc = sess.run([optimizer, loss, accuracy], feed_dict={x: batch_images, y: batch_labels})
        
        if epoch % display == 0:
            print('\nEpoch: %d, Loss: %f, Accuracy: %f' % (epoch + 1, l, acc))