# Convnet
Now we try adding convolutional layers which hopefully give better performance in the context of image recognition. We follow the principle that we should add more layers as long as our network does not appear to be overfitting (validation error increasing while cross entropy on test batches continues to decrease). However, we also keep things fairly simple to keep computation times down.

In [1]:
import tensorflow as tf

import dataset
import tfutil as tfu

In [2]:
# original convnet that got ~80% (but now seems to be overfitting). name=conv_2
def conv_2_inference_op(images, train=True):
    h = tf.reshape(images, [-1,] + list(dataset.image_dim(include_channels=True)))
    keep_prob = 0.5 if train else 1.0
    
    with tf.variable_scope('conv', reuse=(not train)):
        h = tfu.conv_op(h, size=40, channels=[3, 16], stride=2, name='conv1')
        h = tfu.conv_op(h, size=25, channels=[16, 64], stride=2, name='conv2')
        h = tfu.pool_op(h, size=2, stride=2, mode='avg', name='pool1')
        # size is now 38 x 38 x 64
        h = tfu.conv_op(h, size=16, channels=[64, 128], stride=1, name='conv3')
        h = tfu.conv_op(h, size=7, channels=[128, 256], stride=2, name='conv4', padding='VALID')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool2')
        
        # now size is:
        FC_IN_SIZE = 8 * 8 * 256
        h = tf.reshape(h, [-1, FC_IN_SIZE])
        
        h = tfu.fc_op(h, FC_IN_SIZE, 1024, name='fc1')
        h = tfu.fc_op(h, 1024, 1024, name='fc2')
        h = tf.nn.dropout(h, keep_prob=keep_prob)
        
        logits = tfu.fc_op(h, 1024, 1, relu=False, name='out')
        
    return logits

In [3]:
def conv_3_inference_op(images, train=True):
    h = tf.reshape(images, [-1,] + list(dataset.image_dim(include_channels=True)))
    keep_prob = 0.5 if train else 1.0
    
    with tf.variable_scope('conv', reuse=(not train)):
        h = tfu.conv_op(h, size=40, channels=[3, 16], stride=2, name='conv1')
        h = tfu.conv_op(h, size=25, channels=[16, 64], stride=2, name='conv2')
        h = tfu.pool_op(h, size=2, stride=2, mode='avg', name='pool1')
        # size is now 38 x 38 x 64
        
        h = tfu.conv_op(h, size=16, channels=[64, 128], stride=1, name='conv3')
        h = tfu.conv_op(h, size=7, channels=[128, 256], stride=2, name='conv4', padding='VALID')
        h = tfu.pool_op(h, size=2, stride=2, mode='max', name='pool2')
        # size is now 8 * 8 * 256
        
        h = tfu.conv_op(h, size=3, channels=[256, 256], stride=1, name='conv5', padding='VALID')
        h = tf.nn.dropout(h, keep_prob=keep_prob)
        
        # now size is:
        FC_IN_SIZE = 6 * 6 * 256
        h = tf.reshape(h, [-1, FC_IN_SIZE])
        
        h = tfu.fc_op(h, FC_IN_SIZE, 512, name='fc1')
        h = tfu.fc_op(h, 512, 512, name='fc2')
        #h = tfu.fc_op(h, 1024, 1024, name='fc3')
        h = tf.nn.dropout(h, keep_prob=keep_prob)
        
        logits = tfu.fc_op(h, 512, 1, relu=False, name='out')
        
    return logits

In [5]:
tfu.run_all(
    inference_op=conv_3_inference_op,
    inputs=dataset.inputs,
    total_epochs=5,
    learning_rate=1e-4,
    name='conv_3',
    do_training=True
)

Train accuracy: 49.6%
Validation accuracy: 51.7%
Cross Entropy: 0.73
Cross Entropy: 0.69
Cross Entropy: 0.69
Cross Entropy: 0.7
Train accuracy: 50.0%
Validation accuracy: 48.4%
Cross Entropy: 0.69
Cross Entropy: 0.69
Cross Entropy: 0.69
Cross Entropy: 0.69
Done training for 1972 steps.
Validation accuracy: 51.7%
Test accuracy: 51.2%
Wrote 12500 predictions to ./data/conv_3.csv


In [2]:
def conv_inference_op(images, train=True):
    images = tf.reshape(images, [-1, 299, 299, 3])
    keep_prob = 0.5 if train else 1.0
    
    with tf.variable_scope('conv', reuse=(not train)):
        with tf.variable_scope('conv1'):
            weights = tfu.normal_weight_variable([40, 40, 3, 16], stddev=sqrt(2./(40*40*3)))
            bias = tfu.constant_bias_variable([16])
            h_conv = tf.nn.relu(tf.nn.conv2d(images, weights, strides=[1, 2, 2, 1], padding='SAME') + bias)
            h_out = h_conv
        
        with tf.variable_scope('conv2'):
            weights = tfu.normal_weight_variable([40, 40, 16, 32], stddev=sqrt(2./(40*40*16)))
            bias = tfu.constant_bias_variable([32])
            h_conv = tf.nn.relu(tf.nn.conv2d(h_out, weights, strides=[1, 1, 1, 1], padding='SAME') + bias)
            h_pool = tf.nn.max_pool(h_conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1])
            h_out = h_pool
            
        with tf.variable_scope('conv3'):
            weights = tfu.normal_weight_variable([15, 15, 32, 128], stddev=sqrt(2./(25*25*64)))
            bias = tfu.constant_bias_variable([64])
            h_conv = tf.nn.relu(tf.nn.conv2d(h_conv, weights, strides=[1, 2, 2, 1], padding='SAME') + bias)
            h_pool = tf.nn.avg_pool(h_conv, ksize=[1, 2, 2, 1], strides=[1, 2 ,2, 1], padding='SAME')
            
        with tf.variable_scope('conv3'):
            weights = tfu.normal_weight_variable([16, 16, 64, 128], stddev=sqrt(2./(16*16*32)))
            bias = tfu.constant_bias_variable([128])
            h_conv = tf.nn.relu(tf.nn.conv2d(h_pool, weights, strides=[1, 1, 1, 1], padding='VALID') + bias)
            h_pool = tf.nn.max_pool(h_conv, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    
        FC_IN_SIZE = 5 * 5 * 128
        h_in = tf.reshape(h_pool, [-1, FC_IN_SIZE])
        
        with tf.variable_scope('fc1'):
            
            weights = tfu.normal_weight_variable([FC_IN_SIZE, 1024], stddev=sqrt(2./FC_IN_SIZE))
            bias = tfu.constant_bias_variable([1024])
            h_out = tf.nn.relu(tf.matmul(h_in, weights) + bias)
            
        with tf.name_scope('out'):
            weights_out = tfu.normal_weight_variable([1024, 1])
            bias_out = tfu.constant_bias_variable([1])
            y = tf.matmul(h_out, weights_out) + bias_out
    
    return y