In [0]:
import numpy as np
import tensorflow as tf


def to_one_hot(y, no_labels):
    arr = np.squeeze(np.eye(no_labels)[y])
    return arr

n_classes = 10
cifar10 = tf.keras.datasets.cifar10.load_data()
cifar10_train, cifar10_test = cifar10[0], cifar10[1]

# use only first 25000 images (full dataset 50k)
train_X = cifar10_train[0][:25000]
cifar10_train_y = cifar10_train[1][:25000]

test_X = cifar10_test[0][:25000]
cifar10_test_y = cifar10_test[1][:25000]

train_Y = to_one_hot(cifar10_train_y,n_classes)
test_Y  = to_one_hot(cifar10_test_y, n_classes)

In [0]:
# resunit from first paper, conv -> batch -> relu -> conv batch -> shortcut addition -> relu
def resUnit(input_layer,i):
    with tf.variable_scope("res_unit"+str(i)):
        part1 = slim.conv2d(input_layer,64,[3,3],activation_fn=None)
        part2 = slim.batch_norm(part1,activation_fn=None)
        part3 = tf.nn.relu(part2)
        part4 = slim.conv2d(part3,64,[3,3],activation_fn=None)
        part5 = slim.batch_norm(part4,activation_fn=None)
        shortcut = part5 + input_layer # remove input_layer here for 'plain' network
        output = tf.nn.relu(shortcut)
        return output

In [0]:
# resnet from follow up paper, batch -> relu -> conv -> batch -> relu -> conv -> shortcut addition
def resUnit2(input_layer,i):
    with tf.variable_scope("res_unit"+str(i)):
        part1 = slim.batch_norm(input_layer,activation_fn=None)
        part2 = tf.nn.relu(part1)
        part3 = slim.conv2d(part2,64,[3,3],activation_fn=None)
        part4 = slim.batch_norm(part3,activation_fn=None)
        part5 = tf.nn.relu(part4)
        part6 = slim.conv2d(part5,64,[3,3],activation_fn=None)
        output = part6 + input_layer
        return output

In [0]:
# this is not working right now
# tried to replace conv with dense layer
def denseResNet(input_layer, i):
    shortcut = input_layer
    with tf.variable_scope("res_unit"+str(i)):
        part1 = slim.batch_norm(input_layer,activation_fn=None)
        part2 = tf.nn.relu(part1)
        #part3 = slim.conv2d(part2,64,[3,3],activation_fn=None)
        part3 = tf.layers.dense(part2, part2.shape[1], activation=None)
        part4 = slim.batch_norm(part3,activation_fn=None)
        part5 = tf.nn.relu(part4)
        part6 = tf.layers.dense(part5, part5.shape[1], activation=None)
        #part6 = slim.conv2d(part5,64,[3,3],activation_fn=None)
        output =  part6 + shortcut
        print(input_layer.shape, part6.shape)

        return output

In [0]:
import tensorflow.contrib.slim as slim

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

total_layers = 1 # not used atm, Specify how deep we want our network
units_between_stride = 2 # how many resUnits between a stride

tf.reset_default_graph()

learning_rate = 0.001
batch_size = 64

# CIFAR10 data input (img shape: 32*32*3)
n_input = 32
n_depth = 3

# input layer
x = tf.placeholder(shape=[None,n_input,n_input,n_depth],dtype=tf.float32,name='input')
label_layer = tf.placeholder(shape=[None],dtype=tf.int32)
y = slim.layers.one_hot_encoding(label_layer, n_classes)


layer1 = slim.conv2d(x,64,[3,3],normalizer_fn=slim.batch_norm,scope='conv_'+str(0))

# the following 2 lines are neccessary if you want to use denseResNet() in the next loop
# in this case, the above conv2d layer can be removed
#layer1 = slim.layers.flatten(x)
#layer1 = tf.layers.dense(layer1, 512, activation=tf.nn.relu)

for i in range(3):
    for j in range(units_between_stride):
        layer1 = resUnit2(layer1, j + (i*units_between_stride))
    layer1 = slim.conv2d(layer1,64,[3,3],stride=[2,2],normalizer_fn=slim.batch_norm, scope='conv_s_'+str(i))

#top = slim.conv2d(layer1,10,[3,3],normalizer_fn=slim.batch_norm,activation_fn=None,scope='conv_top')

layer1 = slim.layers.flatten(layer1)
# get 10 outputs for 10 classes
top = tf.layers.dense(layer1, n_classes, activation=tf.nn.relu)

output = slim.layers.softmax(top)

cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(output) + 1e-10, reduction_indices=[1]))
trainer = tf.train.AdamOptimizer(learning_rate=learning_rate)
update = trainer.minimize(cost)

correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
#calculate accuracy across all the given images and average them out. 
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()


### unused, ignore
def _log_gradients(num_layers):
    gr = tf.get_default_graph()
    for i in range(num_layers):
        weight = gr.get_tensor_by_name('conv_{}/weights:0'.format(i + 1))
        grad = tf.gradients(cost, weight)[0]
        mean = tf.reduce_mean(tf.abs(grad))
        print('mean_{}'.format(i + 1), mean)
        print('histogram_{}'.format(i + 1), str(grad))
        print('hist_weights_{}'.format(i + 1), str(grad))
        


Found GPU at: /device:GPU:0


In [0]:
import time
start = time.time()
training_iters = 10
with tf.Session() as sess:
    saver = tf.train.Saver()
    sess.run(init)
    # restore saved weights
    #saver.restore(sess, "./plain2-20")
    train_loss = []
    test_loss = []
    train_accuracy = []
    test_accuracy = []
    #summary_writer = tf.summary.FileWriter('./Output', sess.graph)
    
    for i in range(training_iters):
        iter_loss = []
        iter_accuracy = []
        
        # training
        for batch in range(len(train_X)//batch_size):
            batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
            batch_y = train_Y[batch*batch_size:min((batch+1)*batch_size,len(train_Y))]
            # Run optimization op (backprop).
            # Calculate batch loss and accuracy
            opt = sess.run(update, feed_dict={x: batch_x, y: batch_y})
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})

            iter_loss.append(loss)
            iter_accuracy.append(acc)

        train_loss.append(sum(iter_loss)/(len(train_X)//batch_size))
        train_accuracy.append(sum(iter_accuracy)/(len(train_X)//batch_size))
        print("Iter " + str(i) + ", Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f} ".format(acc), end='')

        # Calculate accuracy for all test images
        iter_loss = []
        iter_accuracy = []
        
        # testing
        for batch in range(len(test_X)//batch_size):

            batch_x = test_X[batch*batch_size:min((batch+1)*batch_size,len(test_X))]
            batch_y = test_Y[batch*batch_size:min((batch+1)*batch_size,len(test_Y))]
            valid_loss, test_acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})
            iter_loss.append(valid_loss)
            iter_accuracy.append(test_acc)

        
        test_loss.append(sum(iter_loss)/(len(test_X)//batch_size))
        test_accuracy.append(sum(iter_accuracy)/(len(test_X)//batch_size))
        print("Testing Accuracy:","{:.5f}".format(test_accuracy[i]))
        
    # save weights
    save_path = saver.save(sess, './orig2_resNet',global_step=training_iters)
    
    end = time.time()
    print("duration:", end-start)
    
    #summary_writer.close()
    
    #tvars = tf.trainable_variables()
    #tvars_vals = sess.run(tvars)

    #for var, val in zip(tvars, tvars_vals):
        #print(var.name, val)  # Prints the name of the variable alongside its value.

Iter 0, Loss= 2.087293, Training Accuracy= 0.21875 Testing Accuracy: 0.27214
Iter 1, Loss= 1.824421, Training Accuracy= 0.35938 Testing Accuracy: 0.35777
Iter 2, Loss= 1.608509, Training Accuracy= 0.48438 Testing Accuracy: 0.41647
Iter 3, Loss= 1.537956, Training Accuracy= 0.46875 Testing Accuracy: 0.44401
Iter 4, Loss= 1.441008, Training Accuracy= 0.51562 Testing Accuracy: 0.45713


KeyboardInterrupt: ignored