## Experiments on CIFAR-100

In [1]:
import os
import tensorflow as tf
import numpy as np
from mlp.data_providers import CIFAR10DataProvider, CIFAR100DataProvider
import matplotlib.pyplot as plt
import time

### CIFAR-100

In [2]:
random_seed = 142857
rng = np.random.RandomState(random_seed)
train_data = CIFAR10DataProvider('train', batch_size=50,rng=rng)
valid_data = CIFAR10DataProvider('valid', batch_size=50,rng=rng)

### EXPERIMENT 1 - CNN

In [3]:
def data_augmentation(image, theight, twidth, whitten_image = True,phrase_train=True):
    # augmentation by croping the images target_height = 36, target_width = 36
    reshape_image = tf.reshape(image,[50,3,32,32])
    trans_image = tf.map_fn(lambda img: tf.transpose(img),reshape_image)                              
    padding_image = tf.map_fn(lambda img:  tf.image.resize_image_with_crop_or_pad(img, 36, 36), trans_image)
    flip_image = tf.map_fn(lambda img: tf.image.random_flip_left_right(img), padding_image)
    if not whitten_image:
        # whitten_image = tf.map_fn(lambda img: tf.image.per_image_standardization(img), flip_image)
        rndx = np.random.randint(4)
        rndy = np.random.randint(4)
        new_image = tf.map_fn(lambda img: tf.image.crop_to_bounding_box(img,rndx,rndy,twidth,theight),flip_image)
        #output_image = tf.map_fn(lambda img: tf.transpose(img),new_image)
        # new_image = tf.image.resize_image_with_crop_or_pad(flip_image, theight, twidth)  
        # whitten_image = tf.map_fn(lambda img: tf.image.per_image_standardization(img), new_image)
        return new_image
        # flip_image = tf.image.random_flip_left_right(image)
    else:
        whitten_image = tf.map_fn(lambda img: tf.image.per_image_standardization(img), image)
        return image
      

In [4]:
def kernel(name, shape, stddev, wd=None):
    dtype = tf.float32
    with tf.device('/cpu:0'):
        kernel_weights = tf.get_variable(name,shape,initializer=tf.contrib.layers.xavier_initializer_conv2d())
    if wd is not None:
        weight_decay = tf.multiply(tf.nn.l2_loss(kernel_weights), wd, name='weight_loss')
        kernel_weights = tf.add(kernel_weights,weight_decay)
    return kernel_weights
    

In [5]:
def cnn_layer(inputs,kernel,strides,output_channel,nonlinearity=tf.nn.relu,dropout=False,keep_prob=1.0):
    conv = tf.nn.conv2d(inputs, kernel, strides, padding='SAME')
    biases = tf.Variable(tf.zeros([output_channel]),'biases')
    pre_activation = tf.nn.bias_add(conv, biases)
    if dropout:
        outputs = tf.nn.dropout(nonlinearity(pre_activation), keep_prob)
    else:
        outputs = nonlinearity(pre_activation)
    return outputs

In [6]:
def BN_layer(inputs, input_dim, phase_train, scope='bn'):
    with tf.variable_scope(scope):
        beta = tf.Variable(tf.constant(0.0,shape=input_dim),name='beta',trainable=True)
        gamma = tf.Variable(tf.constant(1.0,shape=input_dim),name='gamma',trainable=True)
        
        batch_mean, batch_var = tf.nn.moments(inputs, [0], name='moments')
        
        ema = tf.train.ExponentialMovingAverage(decay=0.5)
        def mean_var_with_update():
            ema_apply_op = ema.apply([batch_mean, batch_var])
            with tf.control_dependencies([ema_apply_op]):
                return tf.identity(batch_mean), tf.identity(batch_var)

        mean, var = tf.cond(phase_train,
                            mean_var_with_update,
                            lambda: (ema.average(batch_mean), ema.average(batch_var)))
        normed = tf.nn.batch_normalization(inputs, mean, var, beta, gamma, 1e-3)
    return normed

In [7]:
def fully_connected_layer(inputs, input_dim, output_dim, nonlinearity=tf.nn.relu,dropout=False,keep_prob=1.0, wd=None):
    with tf.device('/cpu:0'):
        weights = tf.Variable(
            tf.truncated_normal(
                [input_dim, output_dim], stddev=2. / (input_dim + output_dim)**0.5), 
            'weights')
        biases = tf.Variable(tf.zeros([output_dim]), 'biases')
    
    if wd is not None:
        weight_decay = tf.multiply(tf.nn.l2_loss(weights), wd, name='weight_loss')
        weights = tf.add(weights,weight_decay)    
    if dropout:
        outputs = tf.nn.dropout(nonlinearity(tf.matmul(inputs, weights) + biases), keep_prob)
    else:
        outputs = nonlinearity(tf.matmul(inputs, weights) + biases)  
    
    return outputs

In [12]:
def residual_layer(inputs, previous_inputs=None,
                   kernel,strides,output_channel,nonlinearity=tf.nn.relu,
                   dropout=False,keep_prob=1.0,wd=None):
    activation_output = nonlinearity(inputs)
    conv = tf.nn.conv2d(activation_output, kernel, strides, padding='SAME')
    biases = tf.Variable(tf.zeros([output_channel]),'biases')
    outputs = tf.nn.bias_add(conv, biases)
    if previous_inputs is not None:
        residual_output = tf.identity(previous_inputs) + outputs
        return residual_output
    else:
        return outputs
    

SyntaxError: non-default argument follows default argument (<ipython-input-12-2188e17b747a>, line 1)

In [10]:
def max_pooling_layer(inputs, window_size, strides):
    # using max pooling
    pooling_output = tf.nn.max_pool(inputs, ksize=window_size, strides=strides, padding='SAME')
    return pooling_output

In [11]:
def avg_pooling_layer(inputs, window_size, strides):
    # using global average pooling
    pooling_output = tf.nn.avg_pool(inputs, ksize=window_size, strides=strides, padding='VALID')
    return pooling_output

### further trying -- baselinelearning on two residual layers

In [None]:
tf.reset_default_graph()

inputs = tf.placeholder(tf.float32, [None,train_data.inputs.shape[1]], 'inputs')
targets = tf.placeholder(tf.float32, [None,train_data.num_classes], 'targets')

# for setting dropout 
keep_prob = tf.placeholder(tf.float32)

# for setting batch normalization
phrase_train = tf.placeholder(tf.bool,name='phase_train')

# learning rate schedule manually
learning_rate = tf.placeholder(tf.float32)

with tf.name_scope('data_augmentation'):
    # reshape_inputs = tf.reshape(inputs,[50,32,32,3])
    aug_inputs = data_augmentation(inputs, 32, 32, whitten_image=False,
                                   phrase_train = phrase_train)

with tf.name_scope('kernel-1'):
    kernel_1 = kernel('kernel-1', [3,3,3,64], stddev=5e-2, wd=0.0001)
with tf.name_scope('cnn-layer-1'):    
    cnn_1 = cnn_layer(aug_inputs, kernel_1, [1,1,1,1],64,dropout=False, keep_prob=keep_prob)  
with tf.name_scope('bn-layer-1'):
    bn_1 = BN_layer(cnn_1, [32,32,64], phrase_train)

with tf.name_scope('max-pooling'):
    max_pool = max_pooling_layer(bn_1,window_size=[1, 2, 2, 1], strides=[1, 2, 2, 1])
    
with tf.name_scope('kernel-2'):
    kernel_2 = kernel('kernel-2', [3,3,64,64], stddev=5e-2, wd=0.0001)
with tf.name_scope('bn-layer-2'):
    bn_2 = BN_layer(max_pool, [16,16,64], phrase_train)
with tf.name_scope('res-layer-2'):    
    res_2 = residual_layer(bn_2, None, kernel_2, [1,1,1,1],64,dropout=False, keep_prob=keep_prob)  

with tf.name_scope('kernel-3'):
    kernel_3 = kernel('kernel-3', [3,3,64,64], stddev=5e-2, wd=0.0001)
with tf.name_scope('bn-layer-3'):
    bn_3 = BN_layer(res_2, [16,16,64], phrase_train)
with tf.name_scope('res-layer-3'):    
    res_3 = residual_layer(bn_3, None, kernel_3, [1,1,1,1],64,dropout=False, keep_prob=keep_prob)

with tf.name_scope('kernel-4'):
    kernel_4 = kernel('kernel-4', [3,3,64,64], stddev=5e-2, wd=0.0001)
with tf.name_scope('bn-layer-4'):
    bn_4 = BN_layer(res_3, [16,16,64], phrase_train)
with tf.name_scope('res-layer-4'):    
    res_4 = residual_layer(bn_4, None, kernel_4, [1,1,1,1],64,dropout=False, keep_prob=keep_prob)  

with tf.name_scope('kernel-5'):
    kernel_5 = kernel('kernel-5', [3,3,64,64], stddev=5e-2, wd=0.0001)
with tf.name_scope('bn-layer-5'):
    bn_5 = BN_layer(res_4, [16,16,64], phrase_train)
with tf.name_scope('res-layer-3'):    
    res_5 = residual_layer(bn_5, None, kernel_5, [1,1,1,1],64,dropout=False, keep_prob=keep_prob)

with tf.name_scope('bn-layer-4'):
    bn_4 = BN_layer(res_5, [16,16,64], phrase_train)
    
with tf.name_scope('avg_pooling'):
    avg_pool = avg_pooling_layer(bn_4, window_size=[1, 2, 2, 1], strides=[1, 1, 1, 1])
    
with tf.name_scope('hidden-layer-1'):
    out_of_cnn = tf.reshape(avg_pool,[50,-1])
    hidden_1 = fully_connected_layer(out_of_cnn, 14400, 1024, dropout=True, keep_prob=keep_prob)   
with tf.name_scope('hidden-layer-2'):
    hidden_2 = fully_connected_layer(hidden_1, 1024, 1024, dropout=True, keep_prob=keep_prob)
with tf.name_scope('output-layer'):
    outputs = fully_connected_layer(hidden_2, 1024, train_data.num_classes, tf.identity)

with tf.name_scope('error'):
    error = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(outputs, targets))
with tf.name_scope('accuracy'):
    accuracy = tf.reduce_mean(tf.cast(
            tf.equal(tf.argmax(outputs, 1), tf.argmax(targets, 1)), 
            tf.float32))
with tf.name_scope('train'):
    train_step = tf.train.MomentumOptimizer(learning_rate = learning_rate,momentum=0.9).minimize(error)
    
init = tf.global_variables_initializer()

In [None]:
train_log_exp2 = {'epoch_num':[],
             'running_error':[],
             'running_accuracy':[],
             'running_time':[]
            }
valid_log_exp2 = {'epoch_num':[],
             'running_error':[],
             'running_accuracy':[],
            }
train_data.reset()
valid_data.reset()

schedule_learning_rate = 0.01
with tf.Session() as sess:
    sess.run(init,feed_dict={phrase_train: True,learning_rate: schedule_learning_rate})
    for e in range(50):
        running_error = 0.
        running_accuracy = 0.
        start_time = time.time()
        if e % 15 == 0:
            schedule_learning_rate /= 10
        for input_batch, target_batch in train_data:
            _, batch_error, batch_acc = sess.run(
                [train_step, error, accuracy], 
                feed_dict={inputs: input_batch, targets: target_batch, keep_prob:0.8, 
                           phrase_train: True, learning_rate:schedule_learning_rate})
            running_error += batch_error
            running_accuracy += batch_acc
        epoch_time = time.time() - start_time
        running_error /= train_data.num_batches
        running_accuracy /= train_data.num_batches
        train_log_exp2['epoch_num'].append(e+1)
        train_log_exp2['running_error'].append(running_error)
        train_log_exp2['running_accuracy'].append(running_accuracy)
        train_log_exp2['running_time'].append(epoch_time)
        print('End of epoch {0:02d}: err(train)= {1:.2f} acc(train)= {2:.2f} time(train)= {3:.2f}'
              .format(e + 1, running_error, running_accuracy, epoch_time))
        if (e + 1) % 1 == 0:
            valid_error = 0.
            valid_accuracy = 0.
            for input_batch, target_batch in valid_data:
                batch_error, batch_acc = sess.run(
                    [error, accuracy], 
                    feed_dict={inputs: input_batch, targets: target_batch, keep_prob:1.0, 
                               phrase_train: False})
                valid_error += batch_error
                valid_accuracy += batch_acc
            valid_error /= valid_data.num_batches
            valid_accuracy /= valid_data.num_batches
            valid_log_exp2['epoch_num'].append(e+1)
            valid_log_exp2['running_error'].append(valid_error)
            valid_log_exp2['running_accuracy'].append(valid_accuracy)
            print('                 err(valid)= {0:.2f} acc(valid)= {1:.2f}'
                   .format(valid_error, valid_accuracy))

### ==========================================================================

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
'''
#print train_log
one = []
valid_one = []
for i in range(50):
    one.append(1)
    valid_one.append(1)
    one[i] = one[i] - train_log['running_accuracy'][i]
    valid_one[i] = valid_one[i] - valid_log['running_accuracy'][i]
# print (train_log['running_accuracy'])



# ax1.plot(train_log['epoch_num'],one,label='train_acc')
# ax1.plot(valid_log['epoch_num'],valid_one,label='valid_acc')
# ax2.plot(train_log['epoch_num'],train_log['running_error'],color='b',ls='-',label='train_err on CIFAR-10')
# ax2.plot(valid_log['epoch_num'],valid_log['running_error'],color='b',ls='--',label='valid_err on CIFAR-10')
'''

fig = plt.figure(figsize=(8, 5))


# ax1 = fig.add_subplot(1, 1, 1)
# ax1.plot(train_log_exp['epoch_num'],train_log_exp['running_error'],color='b',ls='-',label='3x3x32')
# ax1.plot(valid_log_exp['epoch_num'],valid_log_exp['running_error'],color='b',ls='--')

# ax1.plot(train_log_exp2['epoch_num'],train_log_exp2['running_error'],color='g',ls='-',label='3x3x64')
# ax1.plot(valid_log_exp2['epoch_num'],valid_log_exp2['running_error'],color='g',ls='--')

# ax1.plot(train_log_exp3['epoch_num'],train_log_exp3['running_error'],color='r',ls='-',label='5x5x32')
# ax1.plot(valid_log_exp3['epoch_num'],valid_log_exp3['running_error'],color='r',ls='--')

# ax1.plot(train_log_exp4['epoch_num'],train_log_exp4['running_error'],color='k',ls='-',label='5x5x64')
# ax1.plot(valid_log_exp4['epoch_num'],valid_log_exp4['running_error'],color='k',ls='--')

ax2 = fig.add_subplot(1, 1, 1)
# ax2.plot(train_log_ag1['epoch_num'],train_log_ag1['running_error'],color='g',ls='-',label='flip+whitten')
# ax2.plot(valid_log_ag2['epoch_num'],valid_log_ag2['running_error'],color='g',ls='--',label='flip+whitten')

ax2.plot(train_log_exp['epoch_num'],train_log_exp['running_accuracy'],color='b',ls='-',label='3x3x32')
ax2.plot(valid_log_exp['epoch_num'],valid_log_exp['running_accuracy'],color='b',ls='--')

ax2.plot(train_log_exp2['epoch_num'],train_log_exp2['running_accuracy'],color='g',ls='-',label='3x3x64')
ax2.plot(valid_log_exp2['epoch_num'],valid_log_exp2['running_accuracy'],color='g',ls='--')

ax2.plot(train_log_exp3['epoch_num'],train_log_exp3['running_accuracy'],color='r',ls='-',label='5x5x32')
ax2.plot(valid_log_exp3['epoch_num'],valid_log_exp3['running_accuracy'],color='r',ls='--')

ax2.plot(train_log_exp4['epoch_num'],train_log_exp4['running_accuracy'],color='k',ls='-',label='5x5x64')
ax2.plot(valid_log_exp4['epoch_num'],valid_log_exp4['running_accuracy'],color='k',ls='--')


# ax1.legend(loc=0)
# ax1.set_xlabel('Epoch number')
# ax1.set_ylabel('Error')
# ax1.set_title('Evolution of Error')
ax2.legend(loc=0)
ax2.set_xlabel('Epoch number')
ax2.set_ylabel('Accuracy')
ax2.set_title('Evolution of accuracy')

plt.show()

fig.savefig('kernel_size_acc.png',api=200)

In [None]:
print np.mean(train_log_exp['running_time'])
print np.mean(train_log_exp2['running_time'])
print np.mean(train_log_exp3['running_time'])
print np.mean(train_log_exp4['running_time'])