### Imports

In [1]:
import os
from PIL import Image
import tensorflow as tf
import pandas as pd
import numpy as np
import h5py
import tables as pt
import math

### Parameters

In [2]:
# Model Saver
checkpoint_file_name = 'cat-or-dog.ckpt'
use_saved_model = False
save_model = True

# Model
channels_nb = 3 # 1 for greyscale images as input, 3 for RGB
classes_nb = 2
img_height = 90
img_width = int(math.floor(1.15*img_height)) # Made to keep the 1.15 image size ratio
learning_rate = 0.0001
weights_init_stddev = 0.01
bias_init_val = 0.01
train_keep_prob =  0.25

# Total training images : 20996
# Total validation images : 2000
# Total test images : 2000

# Batch size is the same for training and evaluation
batch_size = 50

# Training
train_steps_nb = 10000 # For each image to be used only once for training, train_steps_nb=20996/batch_size=20996/50=419.92

# Evaluation
train_eval_steps_nb = 30
validation_eval_steps_nb = 30
test_eval_steps_nb = 40 # Max : total-test-image/batch_size= 2000/50 = 40

# Regular events while training
print_starting_batch_every_x_train_step = 20
eval_every_x_train_step = 100

### Constants

In [4]:
# Paths
cwd = os.getcwd()
images_dir = os.path.join(cwd, 'images/')
checkpoints_dir = os.path.join(cwd, 'checkpoints/')

# Storage
store = pd.HDFStore(cwd + '/store.h5')

# Load images data
images_df = store['images_df']
store.close()

# File to save model
checkpoint_file = checkpoints_dir + checkpoint_file_name

In [5]:
images_df.tail()

Unnamed: 0,full_path,label,width,height,ratio,set_name
24993,/home/hugo/Desktop/cat-or-dog/images/dog.661.jpg,1,240,179,1.340782,train
24994,/home/hugo/Desktop/cat-or-dog/images/dog.7429.jpg,1,399,500,0.798,train
24995,/home/hugo/Desktop/cat-or-dog/images/dog.10948...,1,499,375,1.330667,train
24996,/home/hugo/Desktop/cat-or-dog/images/dog.2499.jpg,1,499,375,1.330667,validation
24997,/home/hugo/Desktop/cat-or-dog/images/dog.8010.jpg,1,174,223,0.780269,train


### Data preparation 

In [6]:
train_images_labels_df = images_df[images_df['set_name'] == 'train'].reset_index(drop=True)
validation_images_labels_df = images_df[images_df['set_name'] == 'validation'].reset_index(drop=True)
test_images_labels_df = images_df[images_df['set_name'] == 'test'].reset_index(drop=True)

### Helpers

In [7]:
def get_filepaths_and_labels(set_name):
    if set_name == 'training':
        images_labels_df = train_images_labels_df
    elif set_name == 'validation':
        images_labels_df = validation_images_labels_df
    elif set_name == 'test':
        images_labels_df = test_images_labels_df
    else:
        print 'Error, no set corresponding to this name.'
    return list(images_labels_df['full_path'].values), list(images_labels_df['label'].values)
    
def get_eval_steps_nb_from_set_name(set_name):
    if set_name == 'training':
        return train_eval_steps_nb
    elif set_name == 'validation':
        return validation_eval_steps_nb
    elif set_name == 'test':
        return test_eval_steps_nb
    else:
        print 'Error, no set corresponding to this name.'

### Model

In [8]:
# Model helpers
def weight_init(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=weights_init_stddev))

def bias_init(shape):
    return tf.Variable(tf.constant(bias_init_val, shape=shape))

def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 3, 3, 1], padding='SAME')

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

def norm(x):
    return tf.nn.lrn(x, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75)

In [9]:
# Model 2
def do_inference(images, keep_prob):
    # CNN 1
    cnn1_output_length = 32
    weights_conv1 = weight_init([15, 15, channels_nb, cnn1_output_length])
    bias_conv1 = bias_init([cnn1_output_length])
    conv1 = tf.nn.relu(conv2d(images, weights_conv1) + bias_conv1)
    
    # Max pooling 1
    pool1 = max_pool_2x2(conv1)
    
    # Local response normalization 1
    norm1 = norm(conv1)
    
    # CNN 2
    cnn2_output_length = 64
    weights_conv2 = weight_init([11, 11, cnn1_output_length, cnn2_output_length])
    bias_conv2 = bias_init([cnn2_output_length])
    conv2 = tf.nn.relu(conv2d(pool1, weights_conv2) + bias_conv2)
    
    # Max pooling 2
    pool2 = max_pool_2x2(conv2)
    
    # Local response normalization 2
    norm2 = norm(pool2)
    
    # CNN 3
    cnn3_output_length = 128
    weights_conv3 = weight_init([8, 8, cnn2_output_length, cnn3_output_length])
    bias_conv3 = bias_init([cnn3_output_length])
    conv3 = tf.nn.relu(conv2d(norm2, weights_conv3) + bias_conv3)
    
    # Max pooling 3
    pool3 = max_pool_2x2(conv3)
    
    # Local response normalization 3
    norm3 = norm(pool3)
    
    # CNN 4
    cnn4_output_length = 256
    weights_conv4 = weight_init([5, 5, cnn3_output_length, cnn4_output_length])
    bias_conv4 = bias_init([cnn4_output_length])
    conv4 = tf.nn.relu(conv2d(norm3, weights_conv4) + bias_conv4)
    
    # Local response normalization 4
    norm4 = norm(conv4)
    
    # Max pooling 4
    pool4 = max_pool_2x2(norm4)
    
    # Reshape for fully connected layers
    pool4_flat = tf.reshape(pool4, [batch_size, -1])
    pool4_flat_output_length = pool4_flat.get_shape()[1].value
    
    # Fully connected 1
    fc1_units = 2048
    weights_fc1 = weight_init([pool4_flat_output_length, fc1_units])
    bias_fc1 = bias_init([fc1_units])
    fc1 = tf.nn.relu(tf.matmul(pool4_flat, weights_fc1) + bias_fc1)
    
    # Fully connected 2
    fc2_units = 2048
    weights_fc2 = weight_init([fc1_units, fc2_units])
    bias_fc2 = bias_init([fc2_units])
    fc2 = tf.nn.relu(tf.matmul(fc1, weights_fc2) + bias_fc2)
    
    # Drop out
    drop = tf.nn.dropout(fc2, keep_prob)
    
    # Fully connected 3
    fc3_units = 2048
    weights_fc3 = weight_init([fc2_units, fc3_units])
    bias_fc3 = bias_init([fc3_units])
    fc3 = tf.nn.relu(tf.matmul(drop, weights_fc3) + bias_fc3)
    
    # Fully connected 4
    fc4_units = 2048
    weights_fc4 = weight_init([fc3_units, fc4_units])
    bias_fc4 = bias_init([fc4_units])
    fc4 = tf.nn.relu(tf.matmul(fc3, weights_fc4) + bias_fc4)

    # Linear output
    weights_lin = weight_init([fc4_units, classes_nb])
    bias_lin = bias_init([classes_nb])
    y = tf.matmul(fc4, weights_lin) + bias_lin
    return y

### Operations helpers

In [10]:
def get_images_labels_batch(set_name, is_eval):
    # Set queue
    filenames_str, labels_str = get_filepaths_and_labels(set_name)
    # convert string into tensors
    filenames = tf.convert_to_tensor(filenames_str, dtype=tf.string)
    labels = tf.convert_to_tensor(labels_str, dtype=tf.int32)
    
    filename, label = tf.train.slice_input_producer([filenames, labels], shuffle=True)
    
    # Add image reader
    image_content = tf.read_file(filename)
    image = tf.image.decode_jpeg(image_content, channels=channels_nb)
    
    if is_eval:
        # Resize image to the fixed image size
        image_prepared = tf.image.resize_images(image, [img_height, img_width], method=1)
        
    else:
        # If is training, apply multiple images transformation to artificially multiply the number of training images
        image_prepared = tf.image.resize_images(image, [img_height, img_width], method=1)
        
        # Randomly flip the image horizontally.
        image_prepared = tf.image.random_flip_left_right(image_prepared)

        # Randomly change image brightness
        image_prepared = tf.image.random_brightness(image_prepared, max_delta=0.5)
        
        # Randomly change image contrast
        image_prepared = tf.image.random_contrast(image_prepared, lower=0.2, upper=1.8)

    # Labels must be in the one_hot format
    label_one_hot = tf.one_hot(label, classes_nb)
    
    # Create batches
    image_batch, label_batch = tf.train.batch([image_prepared, label_one_hot], batch_size=batch_size)
    
    return image_batch, label_batch
    

def get_train_op(ys, labels, learning_rate):
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(ys, labels))
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    return train_op

def do_eval(sess, images_op, labels_op, keep_prob, accuracy_op, set_name, images, labels):
    # set_name can be 'training', 'validation' or 'test'
    print('\nEvaluating on {} set...'.format(set_name))
    eval_steps_nb = get_eval_steps_nb_from_set_name(set_name)
    
    accs = np.array([])
    for eval_step in range(eval_steps_nb):
        images_val, labels_val = sess.run([images_op, labels_op])
        batch_acc = sess.run([accuracy_op], feed_dict={
                        images: images_val,
                        labels: labels_val,
                        keep_prob: 1
                    })
        accs = np.append(accs, batch_acc)
    
    acc = np.mean(accs)
    print 'Accuracy on {} set is : {}'.format(set_name, acc)

### Train 

In [11]:
def do_train():
    with tf.Graph().as_default():
        # Create (image, label) tuples batches queue for training
        images_op, labels_op = get_images_labels_batch(set_name='training', is_eval=False)
        
        # Create (image, label) tuples batches queue for evaluation on each type
        eval_train_images_op, eval_train_labels_op = get_images_labels_batch(set_name='training', is_eval=True)
        eval_validation_images_op, eval_validation_labels_op = get_images_labels_batch(set_name='validation', is_eval=True)
        eval_test_images_op, eval_test_labels_op = get_images_labels_batch(set_name='test', is_eval=True)
        
        # Create placeholders
        images = tf.placeholder(tf.float32, shape=[batch_size, img_height, img_width, channels_nb])
        labels = tf.placeholder(tf.int32, shape=[batch_size, classes_nb])
        keep_prob = tf.placeholder(tf.float32)

        # Get model inference
        ys = do_inference(images, keep_prob)

        # Define train_op
        train_op = get_train_op(ys, labels, learning_rate)
        
        # Define accuracy_op
        correct_prediction = tf.equal(tf.argmax(ys,1), tf.argmax(labels,1))
        accuracy_op = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        # Create session and initialize the variables
        sess = tf.Session()
        
        # Create a saver for writing/loading training checkpoints.
        saver = tf.train.Saver()
        
        # Initialize the variables.
        if not use_saved_model:
            sess.run(tf.initialize_all_variables())
        else:
            print 'Loading saved model...'
            saver.restore(sess, checkpoint_file)
        

        # Coordinate the (image, label) batches loading from queue
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        # Train on the examples of the training set
        for train_step in range(train_steps_nb):
            if train_step % print_starting_batch_every_x_train_step == 0 and train_step !=0:
                print('Starting training on batch {} / {}'.format(train_step, train_steps_nb - 1))
            
            images_val, labels_val = sess.run([images_op, labels_op])
            
            sess.run([train_op], feed_dict={
                    images: images_val,
                    labels: labels_val,
                    keep_prob: train_keep_prob
                })
            
            # Regularly evaluate on the training and validation sets
            if train_step % eval_every_x_train_step == 0 :
                # Evaluate on training set
                do_eval(sess = sess,
                        images_op = eval_train_images_op,
                        labels_op = eval_train_labels_op,
                        images = images,
                        labels = labels,
                        keep_prob = keep_prob,
                        accuracy_op = accuracy_op,
                        set_name = 'training')
                
                # Evaluate on validation set
                do_eval(sess = sess,
                        images_op = eval_validation_images_op,
                        labels_op = eval_validation_labels_op,
                        images = images,
                        labels = labels,
                        keep_prob = keep_prob,
                        accuracy_op = accuracy_op,
                        set_name = 'validation')
                
                # Save model
                if save_model :
                    print('Saving model...')
                    saver.save(sess, checkpoint_file)
                    print 'Model saved.\n\n'
            
        # Evaluate on test set
        do_eval(sess = sess,
                images_op = eval_test_images_op,
                labels_op = eval_test_labels_op,
                images = images,
                labels = labels,
                keep_prob = keep_prob,
                accuracy_op = accuracy_op,
                set_name = 'test')
        
        # Save model
        if save_model :
            print('Saving model...')
            saver.save(sess, checkpoint_file)
            print 'Model saved.'
    
        # Stop the queue threads and close the session
        coord.request_stop()
        coord.join(threads)
        sess.close()
        print 'The end.'

In [12]:
do_train()


Evaluating on training set...
Accuracy on training set is : 0.477999999126

Evaluating on validation set...
Accuracy on validation set is : 0.501333328088
Saving model...
Model saved.


Starting training on batch 20 / 9999
Starting training on batch 40 / 9999
Starting training on batch 60 / 9999
Starting training on batch 80 / 9999
Starting training on batch 100 / 9999

Evaluating on training set...
Accuracy on training set is : 0.489333331585

Evaluating on validation set...
Accuracy on validation set is : 0.508000002305
Saving model...
Model saved.


Starting training on batch 120 / 9999
Starting training on batch 140 / 9999
Starting training on batch 160 / 9999
Starting training on batch 180 / 9999
Starting training on batch 200 / 9999

Evaluating on training set...
Accuracy on training set is : 0.519999996821

Evaluating on validation set...
Accuracy on validation set is : 0.494666665792
Saving model...
Model saved.


Starting training on batch 220 / 9999
Starting training on batc