# Convolutional Network 

In [2]:
import tensorflow as tf

import dataset
import tfutil as tfu

## Network Construction

In [3]:
ALPHA=100

def conv_inference_op(images, reg_terms, train=True, share=False):
    # reshape flat input vectors to 3D 'images' (height x width x channel depth)
    h = tf.reshape(images, [-1] + list(dataset.image_dim(include_channels=True)))
    
    # dropout probability: 50% chance of dropout during training; disabled during evaluation/prediction
    keep_prob = 0.5 if train else 1.0
    
    with tf.variable_scope('conv', reuse=share):
        h = tfu.conv_op(h, size=10, channels=[3, 32], stride=1, name='conv1.1')
        h = tfu.conv_op(h, size=10, channels=[32, 80], stride=2, name='conv1.2')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool1')
        
        h = tfu.conv_op(h, size=5, channels=[80, 80], stride=1, name='conv2.1')
        h = tfu.conv_op(h, size=5, channels=[80, 160], stride=2, name='conv2.2')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool2')
        
        h = tfu.conv_op(h, size=3, channels=[160, 160], stride=1, name='conv3.1')
        h = tfu.conv_op(h, size=3, channels=[160, 160], stride=1, name='conv3.2')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool3')
        
        h = tfu.conv_op(h, size=3, channels=[160, 160], stride=1, name='conv4.1')
        h = tfu.conv_op(h, size=3, channels=[160, 200], stride=1, name='conv4.2')
        
        h = tfu.conv_op(h, size=3, channels=[200, 300], stride=1, name='conv5.1')
        h = tfu.conv_op(h, size=3, channels=[300, 300], stride=1, name='conv5.2')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool4')
        
        FC_IN_SIZE = 5 * 5 * 300
        FC_HIDDEN_SIZE = 4096
        
        # flatten output back to 1D vector
        h = tf.reshape(h, [-1, FC_IN_SIZE])
        
        h = tf.nn.dropout(h, keep_prob=keep_prob, name='dropout1')
        h = tfu.fc_op(h, channels_in=FC_IN_SIZE, channels_out=FC_HIDDEN_SIZE, name='fc1', reg_terms=reg_terms, alpha=ALPHA, relu=False)

        h = tf.nn.dropout(h, keep_prob=keep_prob, name='dropout2')
        h = tfu.fc_op(h, channels_in=FC_HIDDEN_SIZE, channels_out=1, name='out', reg_terms=reg_terms, alpha=ALPHA, relu=False)
        
    return h

## Training and Evaluation

First, I set up some general settings and the arguments to be passed to the training, evaluation and prediction functions.

In [4]:
NAME = 'conv.100'
conv_reg_terms = {}

args = {
    'name': NAME,
    'inference_op': conv_inference_op,
    'inputs': dataset.inputs,
    'reg_terms': conv_reg_terms,
}

training_args = {
    'optimizer': tf.train.AdamOptimizer,
}

The cell below erases any saved logs, checkpoints and prediction files. Run this to start with the model from scratch instead of resuming from the most recent checkpoint.

In [13]:
tfu.run_cleanup(name=NAME)
tfu.run_setup(name=NAME)

Finally, the cells below run training itself. The helper library function `tfu.run_training` allows us to run training multiple times with different learning rates, each time picking up from the last checkpoint saved by the previous operation.

In [14]:
final_step = tfu.run_training(
    learning_rate=1e-4,
    num_epochs=10,
    **training_args,
    **args,
)

Train Accuracy: 49.4%
Validation Accuracy: 51.6%
Train Loss: 418838.243
Validation Loss: 418838.238
Cross Entropy: 418838.812
Cross Entropy: 18511.455
Cross Entropy: 266.296
Cross Entropy: 1.738
Train Accuracy: 64.0%
Validation Accuracy: 66.0%
Train Loss: 0.715
Validation Loss: 0.721
Cross Entropy: 0.755
Cross Entropy: 0.805
Cross Entropy: 0.703
Cross Entropy: 0.670
Train Accuracy: 69.9%
Validation Accuracy: 71.8%
Train Loss: 0.677
Validation Loss: 0.675
Cross Entropy: 0.691
Cross Entropy: 0.717
Cross Entropy: 0.823
Cross Entropy: 0.830
Train Accuracy: 63.4%
Validation Accuracy: 61.1%
Train Loss: 0.740
Validation Loss: 0.742
Cross Entropy: 0.744
Cross Entropy: 0.609
Cross Entropy: 0.696
Cross Entropy: 0.519
Done training for 3944 steps.
Train Accuracy: 81.3%
Validation Accuracy: 82.1%
Train Loss: 0.593
Validation Loss: 0.594


In [15]:
final_step = tfu.run_training(
    learning_rate=1e-5,
    num_epochs=10,
    **training_args,
    **args,
    step=final_step,
)

Train Accuracy: 81.1%
Validation Accuracy: 80.4%
Train Loss: 0.467
Validation Loss: 0.473
Cross Entropy: 0.421
Cross Entropy: 0.455
Cross Entropy: 0.389
Cross Entropy: 0.613
Train Accuracy: 83.0%
Validation Accuracy: 83.2%
Train Loss: 0.415
Validation Loss: 0.433
Cross Entropy: 0.612
Cross Entropy: 0.571
Cross Entropy: 0.498
Cross Entropy: 0.419
Train Accuracy: 81.7%
Validation Accuracy: 83.2%
Train Loss: 0.399
Validation Loss: 0.424
Cross Entropy: 0.585
Cross Entropy: 0.391
Cross Entropy: 0.373
Cross Entropy: 0.548
Train Accuracy: 84.0%
Validation Accuracy: 83.7%
Train Loss: 0.399
Validation Loss: 0.417
Cross Entropy: 0.332
Cross Entropy: 0.377
Cross Entropy: 0.283
Cross Entropy: 0.430
Done training for 7889 steps.
Train Accuracy: 85.0%
Validation Accuracy: 85.2%
Train Loss: 0.374
Validation Loss: 0.416


Evaluation and prediction restore the checkpoint saved previously by training. Here, I finally evaluate on the test set to check the model generalises, before generating submissions for Kaggle. Note that the Kaggle submission file is written in a random order (though with each prediction labelled by the image id). On Linux, one can sort it for submission using `sort -t, -nk1 file.csv > file_sorted.csv`.

In [16]:
tfu.run_eval(**args)
tfu.run_prediction(**args)
tfu.run_prediction(**args, clip=True)

Train Accuracy: 84.9%
Validation Accuracy: 84.8%
Test Accuracy: 85.6%
Train Loss: 0.389
Validation Loss: 0.402
Test Loss: 0.382
Wrote predictions to ./data/conv.100.csv
Wrote predictions to ./data/conv.100_clipped.csv


For reference my original network architecture is recorded below. Due to the large convolutional filters, this network seemed to have problems with overfitting.

In [7]:
# original convnet architecture that got ~80% on validation set
def conv_old_inference_op(images, train=True):
    h = tf.reshape(images, [-1,] + list(dataset.image_dim(include_channels=True)))
    keep_prob = 0.5 if train else 1.0
    
    with tf.variable_scope('conv', reuse=(not train)):
        h = tfu.conv_op(h, size=40, channels=[3, 16], stride=2, name='conv1')
        h = tfu.conv_op(h, size=25, channels=[16, 64], stride=2, name='conv2')
        h = tfu.pool_op(h, size=2, stride=2, mode='avg', name='pool1')
        # size is now 38 x 38 x 64
        h = tfu.conv_op(h, size=16, channels=[64, 128], stride=1, name='conv3')
        h = tfu.conv_op(h, size=7, channels=[128, 256], stride=2, name='conv4', padding='VALID')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool2')
        
        # now size is:
        FC_IN_SIZE = 8 * 8 * 256
        h = tf.reshape(h, [-1, FC_IN_SIZE])
        
        h = tfu.fc_op(h, FC_IN_SIZE, 1024, name='fc1')
        h = tfu.fc_op(h, 1024, 1024, name='fc2')
        h = tf.nn.dropout(h, keep_prob=keep_prob)
        
        logits = tfu.fc_op(h, 1024, 1, relu=False, name='out')
        
    return logits