# Deep Residual Networks with CIFAR-10 Dataset

In [1]:
import tensorflow as tf
import numpy as np
import cPickle
from nesterov import NesterovOptimizer
import time
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

## Load Data

In [2]:
def unpickle(file):
    fo = open(file, 'rb')
    dict = cPickle.load(fo)
    fo.close()
    return dict

def one_hot_vec(label):
    vec = np.zeros(10)
    vec[label] = 1
    return vec

def load_data():
    x_all = []
    y_all = []
    for i in range (5):
        d = unpickle("/Volumes/EXTRADRIVE/data/cifar-10-batches-py/data_batch_" + str(i+1))
        x_ = d['data']
        y_ = d['labels']
        x_all.append(x_)
        y_all.append(y_)

    d = unpickle('/Volumes/EXTRADRIVE/data/cifar-10-batches-py/test_batch')
    x_all.append(d['data'])
    y_all.append(d['labels'])

    x = np.concatenate(x_all) / np.float32(255)
    y = np.concatenate(y_all)
    x = np.dstack((x[:, :1024], x[:, 1024:2048], x[:, 2048:]))
    x = x.reshape((x.shape[0], 32, 32, 3))
    
    pixel_mean = np.mean(x[0:50000],axis=0)
    x -= pixel_mean

    y = map(one_hot_vec, y)
    X_train = x[0:50000,:,:,:]
    Y_train = y[0:50000]
    X_test = x[50000:,:,:,:]
    Y_test = y[50000:]

    return (X_train, Y_train, X_test, Y_test)

## Helper Functions

In [3]:
def weight_variable(shape, name=None):
    initial = tf.truncated_normal(shape, stddev=0.1)
    return tf.Variable(initial, name=name)

def softmax_layer(inpt, shape):
    fc_w = weight_variable(shape)
    fc_b = tf.Variable(tf.zeros([shape[1]]))

    fc_h = tf.nn.softmax(tf.matmul(inpt, fc_w) + fc_b)

    return fc_h

def conv_layer(inpt, filter_shape, stride):
    out_channels = filter_shape[3]

    filter_ = weight_variable(filter_shape)
    conv = tf.nn.conv2d(inpt, filter=filter_, strides=[1, stride, stride, 1], padding="SAME")
    mean, var = tf.nn.moments(conv, axes=[0,1,2])
    beta = tf.Variable(tf.zeros([out_channels]), name="beta")
    gamma = weight_variable([out_channels], name="gamma")
    
    batch_norm = tf.nn.batch_norm_with_global_normalization(
        conv, mean, var, beta, gamma, 0.001,
        scale_after_normalization=True)

    out = tf.nn.relu(batch_norm)

    return out

def residual_block(inpt, output_depth, down_sample, projection=False):
    input_depth = inpt.get_shape().as_list()[3]
    if down_sample:
        filter_ = [1,2,2,1]
        inpt = tf.nn.max_pool(inpt, ksize=filter_, strides=filter_, padding='SAME')

    conv1 = conv_layer(inpt, [3, 3, input_depth, output_depth], 1)
    conv2 = conv_layer(conv1, [3, 3, output_depth, output_depth], 1)

    if input_depth != output_depth:
        if projection:
            # Option B: Projection shortcut
            input_layer = conv_layer(inpt, [1, 1, input_depth, output_depth], 2)
        else:
            # Option A: Zero-padding
            input_layer = tf.pad(inpt, [[0,0], [0,0], [0,0], [0, output_depth - input_depth]])
    else:
        input_layer = inpt

    res = conv2 + input_layer
    return res

## Resnet Model

In [4]:
# ResNet architectures used for CIFAR-10
def resnet(inpt, n):
    if n < 20 or (n - 20) % 12 != 0:
        print("ResNet depth invalid.")
        return

    num_conv = int((n - 20) / 12 + 1)
    layers = []

    with tf.variable_scope('conv1'):
        conv1 = conv_layer(inpt, [3, 3, 3, 16], 1)
        layers.append(conv1)

    for i in range (num_conv):
        with tf.variable_scope('conv2_%d' % (i+1)):
            conv2_x = residual_block(layers[-1], 16, False)
            conv2 = residual_block(conv2_x, 16, False)
            layers.append(conv2_x)
            layers.append(conv2)

        assert conv2.get_shape().as_list()[1:] == [32, 32, 16]

    for i in range (num_conv):
        down_sample = True if i == 0 else False
        with tf.variable_scope('conv3_%d' % (i+1)):
            conv3_x = residual_block(layers[-1], 32, down_sample)
            conv3 = residual_block(conv3_x, 32, False)
            layers.append(conv3_x)
            layers.append(conv3)

        assert conv3.get_shape().as_list()[1:] == [16, 16, 32]
    
    for i in range (num_conv):
        down_sample = True if i == 0 else False
        with tf.variable_scope('conv4_%d' % (i+1)):
            conv4_x = residual_block(layers[-1], 64, down_sample)
            conv4 = residual_block(conv4_x, 64, False)
            layers.append(conv4_x)
            layers.append(conv4)

        assert conv4.get_shape().as_list()[1:] == [8, 8, 64]

    with tf.variable_scope('fc'):
        global_pool = tf.reduce_mean(layers[-1], [1, 2])
        assert global_pool.get_shape().as_list()[1:] == [64]
        
        out = softmax_layer(global_pool, [64, 10])
        layers.append(out)

    return layers[-1]

## Train Function

In [5]:
def train_model(n):
      
    X = tf.placeholder("float", [FLAGS.batch_size, 32, 32, 3])
    Y = tf.placeholder("float", [FLAGS.batch_size, 10])

    # ResNet Models
    net = resnet(X, n)

    # cross entropy loss
    loss = -tf.reduce_mean(Y*tf.log(net))

    # Optimize
    #opt = NesterovOptimizer(FLAGS.learning_rate, 0.9)
    opt =tf.train.MomentumOptimizer(FLAGS.learning_rate, 0.9)
    train_op = opt.minimize(loss)

    # predict
    correct_prediction = tf.equal(tf.argmax(net, 1), tf.argmax(Y, 1))

    # Accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    # Build the summary operation based on the TF collection of Summaries.
    #summary_op = tf.merge_all_summaries()

    saver = tf.train.Saver()

    sess = tf.Session()
    sess.run(tf.initialize_all_variables())

    print('Start training...\n')
    train_acc = []
    
    for epoch in xrange (FLAGS.epoch):
        print('Epoch %d \n' % (epoch))
        for i in xrange (0, FLAGS.sample, FLAGS.batch_size):

            start_time = time.time()
            feed_dict={
                X: X_train[i:i + FLAGS.batch_size], 
                Y: Y_train[i:i + FLAGS.batch_size]}
            _, loss_value = sess.run([train_op, loss],
                                         feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            #if i % 100 == 0:
            acc = sess.run(accuracy,feed_dict={
                X: X_train[i:i + FLAGS.batch_size],
                Y: Y_train[i:i + FLAGS.batch_size]
            })
            step = int(i / FLAGS.batch_size)
            print('Step %d: loss = %.3f Accuracy = %.3f (%.3f sec)' % (step, loss_value, acc, duration))
            train_acc.append(acc)
            #accuracy_summary = tf.scalar_summary("accuracy", accuracy)
            # Update the events file.
            #summary_str = sess.run(summary_op, feed_dict=feed_dict)
            #summary_writer.add_summary(summary_str, step)

    
    print('Computing Test Accuracy ...\n')
    accuracy = []
    
    for i in range (0, FLAGS.sample_test, FLAGS.batch_size):
        if i + FLAGS.batch_size < FLAGS.sample_test:

            acc = sess.run(accuracy,feed_dict={
                X: X_test[i:i + FLAGS.batch_size],
                Y: Y_test[i:i + FLAGS.batch_size]
            })
        accuracy_list.append(acc)
    
    test_accuracy = np.mean(np.array(accuracy_list))
    print("Test Accuracy: %.3f" % (test_accuracy))
    return train_acc

# Model Hyperparameters

In [6]:
flags = tf.app.flags
FLAGS = flags.FLAGS
flags.DEFINE_float('learning_rate', 0.001, 'Learning rate')
flags.DEFINE_integer('batch_size', 128, 'Batch size')
flags.DEFINE_integer('epoch', 3, 'Number of epochs')
flags.DEFINE_integer('sample', 128*30, 'Number of samples in trainset') # Full train data 50000
flags.DEFINE_integer('sample_test', 128*10, 'Number of samples in testset') # Full test data 10000

# Load CIFAR-10 Dataset

In [None]:
print('Load data...\n')
X_train, Y_train, X_test, Y_test = load_data()

Load data...



# Train and Test Resnet

In [None]:
acc = train_model(20) # 32, 44, 56

Start training...

Epoch 0 

Step 0: loss = 0.229 Accuracy = 0.156 (2.624 sec)
Step 1: loss = 0.229 Accuracy = 0.133 (2.930 sec)
Step 2: loss = 0.230 Accuracy = 0.125 (3.091 sec)
Step 3: loss = 0.231 Accuracy = 0.102 (2.609 sec)
Step 4: loss = 0.230 Accuracy = 0.102 (3.133 sec)
