In [0]:
import numpy as np
import tensorflow as tf


def to_one_hot(y, no_labels):
    arr = np.squeeze(np.eye(no_labels)[y])
    return arr

n_classes = 10
cifar10 = tf.keras.datasets.cifar10.load_data()
cifar10_train, cifar10_test = cifar10[0], cifar10[1]

# use only first 25000 images (full dataset 50k)
train_X = cifar10_train[0][:25000]
cifar10_train_y = cifar10_train[1][:25000]

test_X = cifar10_test[0][:25000]
cifar10_test_y = cifar10_test[1][:25000]

train_Y = to_one_hot(cifar10_train_y,n_classes)
test_Y  = to_one_hot(cifar10_test_y, n_classes)

In [0]:
# resunit from first paper, conv -> batch -> relu -> conv batch -> shortcut addition -> relu
def resUnit(input_layer,i):
    with tf.variable_scope("res_unit"+str(i)):
        part1 = slim.conv2d(input_layer,64,[3,3],activation_fn=None)
        part2 = slim.batch_norm(part1,activation_fn=None)
        part3 = tf.nn.relu(part2)
        part4 = slim.conv2d(part3,64,[3,3],activation_fn=None)
        part5 = slim.batch_norm(part4,activation_fn=None)
        shortcut = part5 + input_layer # remove input_layer here for 'plain' network
        output = tf.nn.relu(shortcut)
        return output

In [0]:
# resnet from follow up paper, batch -> relu -> conv -> batch -> relu -> conv -> shortcut addition
def resUnit2(input_layer,i):
    with tf.variable_scope("res_unit"+str(i)):
        part1 = slim.batch_norm(input_layer,activation_fn=None)
        part2 = tf.nn.relu(part1)
        part3 = slim.conv2d(part2,64,[3,3],activation_fn=None)
        part4 = slim.batch_norm(part3,activation_fn=None)
        part5 = tf.nn.relu(part4)
        part6 = slim.conv2d(part5,64,[3,3],activation_fn=None)
        output = part6 + input_layer
        return output

In [0]:
# this is not working right now
# tried to replace conv with dense layer
def denseResNet(input_layer, i):
    shortcut = input_layer
    with tf.variable_scope("res_unit"+str(i)):
        part1 = slim.batch_norm(input_layer,activation_fn=None)
        part2 = tf.nn.relu(part1)
        #part3 = slim.conv2d(part2,64,[3,3],activation_fn=None)
        part3 = tf.layers.dense(part2, part2.shape[1], activation=None)
        part4 = slim.batch_norm(part3,activation_fn=None)
        part5 = tf.nn.relu(part4)
        part6 = tf.layers.dense(part5, part5.shape[1], activation=None)
        #part6 = slim.conv2d(part5,64,[3,3],activation_fn=None)
        output =  part6 + shortcut
        print(input_layer.shape, part6.shape)

        return output

In [0]:
import tensorflow.contrib.slim as slim

device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

total_layers = 1 # not used atm, Specify how deep we want our network
units_between_stride = 2 # how many resUnits between a stride

tf.reset_default_graph()

learning_rate = 0.001
batch_size = 64

# CIFAR10 data input (img shape: 32*32*3)
n_input = 32
n_depth = 3

# input layer
x = tf.placeholder(shape=[None,n_input,n_input,n_depth],dtype=tf.float32,name='input')
label_layer = tf.placeholder(shape=[None],dtype=tf.int32)
y = slim.layers.one_hot_encoding(label_layer, n_classes)


layer1 = slim.layers.flatten(x)
layer1 = tf.layers.dense(layer1, 3072, activation=tf.nn.sigmoid)

for i in range(3):
      layer1 = tf.layers.dense(layer1, layer1.shape[1], activation=tf.nn.sigmoid)

# get 10 outputs for 10 classes
top = tf.layers.dense(layer1, n_classes, activation=tf.nn.sigmoid)

output = slim.layers.softmax(top)

cost = tf.reduce_mean(-tf.reduce_sum(y * tf.log(output) + 1e-10, reduction_indices=[1]))
trainer = tf.train.AdamOptimizer(learning_rate=learning_rate)
#update = trainer.minimize(cost)
grads_and_vars = trainer.compute_gradients(cost)
update = trainer.apply_gradients(grads_and_vars)

correct_prediction = tf.equal(tf.argmax(output, 1), tf.argmax(y, 1))
#calculate accuracy across all the given images and average them out. 
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# Initializing the variables
init = tf.global_variables_initializer()

Found GPU at: /device:GPU:0


In [0]:
import time
start = time.time()
training_iters = 10
with tf.Session() as sess:
    saver = tf.train.Saver()
    sess.run(init)
    #saver.restore(sess, "./plain2-20")
    train_loss = []
    test_loss = []
    train_accuracy = []
    test_accuracy = []
    #summary_writer = tf.summary.FileWriter('./Output', sess.graph)
    
    print("len", len(train_X)//batch_size, len(train_X), batch_size)
    for i in range(training_iters):
        iter_loss = []
        iter_accuracy = []
        printed = False # only print the gradients for one batch, but in each training iteration
        for batch in range(len(train_X)//batch_size):
            batch_x = train_X[batch*batch_size:min((batch+1)*batch_size,len(train_X))]
            batch_y = train_Y[batch*batch_size:min((batch+1)*batch_size,len(train_Y))]
            # Run optimization op (backprop).
            # Calculate batch loss and accuracy
            for gv in grads_and_vars:
                grads = sess.run(gv[0], feed_dict={x: batch_x, y: batch_y})
                #print(gv[1].name, grads.shape)
                if ("dense_1/kernel" in gv[1].name or "dense_4/kernel" in gv[1].name) and not printed: #41
                    print(str(grads[0][:10]) + " -", gv[1].name, grads.shape, "\n")
                if ("dense_1/bias" in gv[1].name or "dense_4/bias" in gv[1].name) and not printed:
                    print(str(grads[:10]) + " -", gv[1].name, grads.shape, "\n")

            printed = True # do not print again for another batch, next time will be in the next training iteration
            opt = sess.run(update, feed_dict={x: batch_x, y: batch_y})
            loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})

            iter_loss.append(loss)
            iter_accuracy.append(acc)

        train_loss.append(sum(iter_loss)/(len(train_X)//batch_size))
        train_accuracy.append(sum(iter_accuracy)/(len(train_X)//batch_size))
        print("Iter " + str(i) + ", Loss= " + \
                      "{:.6f}".format(loss) + ", Training Accuracy= " + \
                      "{:.5f} ".format(acc), end='')

        # Calculate accuracy for all test images
        iter_loss = []
        iter_accuracy = []
        for batch in range(len(test_X)//batch_size):
            batch_x = test_X[batch*batch_size:min((batch+1)*batch_size,len(test_X))]
            batch_y = test_Y[batch*batch_size:min((batch+1)*batch_size,len(test_Y))]
            valid_loss, test_acc = sess.run([cost, accuracy], feed_dict={x: batch_x, y: batch_y})
            iter_loss.append(valid_loss)
            iter_accuracy.append(test_acc)

        
        test_loss.append(sum(iter_loss)/(len(test_X)//batch_size))
        test_accuracy.append(sum(iter_accuracy)/(len(test_X)//batch_size))
        print("Testing Accuracy:","{:.5f}".format(test_accuracy[i]))
    save_path = saver.save(sess, './orig2_resNet',global_step=training_iters)
    
    end = time.time()
    print("duration:", end-start)
    
    #summary_writer.close()
    
    #tvars = tf.trainable_variables()
    #tvars_vals = sess.run(tvars)

    #for var, val in zip(tvars, tvars_vals):
        #print(var.name, val)  # Prints the name of the variable alongside its value.

len 390 25000 64
[-4.1575827e-22  8.5311281e-22 -7.9517210e-22  7.0842537e-23
  7.1489150e-22  2.4826940e-22  1.0753200e-21 -9.3620283e-23
 -1.5633921e-22  2.9381020e-22] - dense_1/kernel:0 (3072, 3072) 

[-3.3305082e-07  1.0113700e-05 -5.3744561e-06  9.9348358e-07
 -3.1010240e-06 -3.9314054e-06  3.2233872e-06 -1.3785571e-05
 -1.0149827e-05 -9.2219780e-06] - dense_1/bias:0 (3072,) 

[ 0.00635467 -0.0008898  -0.01240641 -0.00363719  0.00070055  0.00340486
  0.0045593  -0.00150823  0.0089436  -0.00486355] - dense_4/kernel:0 (3072, 10) 

[ 0.01142841 -0.00164186 -0.02232053 -0.00648938  0.00120292  0.00618264
  0.00814964 -0.00271715  0.01610724 -0.00872106] - dense_4/bias:0 (10,) 



KeyboardInterrupt: ignored