In [1]:
!rm -rf ./benchmark_resnet

In [1]:
%config IPCompleter.greedy=True
import numpy as np
import tensorflow as tf
import time

In [64]:
%run ./datagen.py

datagen, (x_train, y_train), (x_test, y_test) = data_preparation()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(50000, 32, 32, 3)
(50000, 10)
(10000, 32, 32, 3)
(10000, 10)


In [4]:
path_logdir = './benchmark_resnet/'
path_model = './benchmark_resnet/'
num_gpu = 4
batch_size = 100
iterations = x_train.shape[0] // (batch_size * num_gpu)
epochs = 500
old_acc = 0
start_lr = 1e-2
end_lr = 5*1e-4
decay_rate = (end_lr / start_lr) ** (1 / epochs)

In [5]:
def act(x):
#     out = 0.5 + tf.multiply(0.133, x) - tf.multiply(0.0014, tf.pow(x, 3))
    out = 0.5 + tf.multiply(0.15012, x) - tf.multiply(0.0015930078, tf.pow(x, 3))
    return out

In [6]:
# resnet layer
def res_layer(inputs, filter_num, filter_size, stride, is_train, 
              conv_first=False, batch_norm=True, activation=True):
    
    x = inputs
    
    if conv_first:
        x = tf.layers.conv2d(inputs=x, filters=filter_num, 
                             kernel_size=filter_size, strides=stride, padding='same')
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = act(x)
    else:
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = act(x)
        x = tf.layers.conv2d(inputs=x, filters=filter_num, 
                             kernel_size=filter_size, strides=stride, padding='same')
    
    return x

In [7]:
def resnetv2(inputs, is_train):
    
    with tf.variable_scope("Conv1"):
        x = res_layer(inputs, 64, 3, 2, is_train, conv_first=True)
          
    # Res Blocks
    for stack in range(3):
        for block in range(6):
            with tf.variable_scope('ResBlock{}'.format(stack*6+block+1)):
                
                batch_norm = True
                activation = True
                stride = 1
                if stack == 0:
                    filter_num = 64
                    if block == 0:
                        batch_norm = False
                        activation = False
                else:
                    filter_num = 64*2*stack
                    if block == 0:
                        stride = 2
                
                residual_x = x
                with tf.variable_scope('conv1'):
                    x = res_layer(x, filter_num, 1, stride, is_train, 
                                  batch_norm=batch_norm, activation=activation)
                with tf.variable_scope('conv2'):
                    x = res_layer(x, filter_num, 3, 1, is_train, 
                                  batch_norm=batch_norm, activation=activation)
                with tf.variable_scope('conv3'):
                    x = res_layer(x, filter_num*4, 1, 1, is_train, 
                                  batch_norm=batch_norm, activation=activation)
                if block == 0:
                    with tf.variable_scope('residual'):
                        residual_x = res_layer(residual_x, filter_num*4, 1, stride, is_train, 
                                               batch_norm=False, activation=False)
                x = x + residual_x   
    
    #x.shape = (?, 4, 4, 1024)
    with tf.variable_scope("AfterResBlock"):
        x = tf.layers.batch_normalization(x, training=is_train)                
        x = act(x)
        x = 4 * tf.layers.average_pooling2d(x, pool_size=2, strides=2, padding='SAME', name='pool1')
    
    with tf.variable_scope("Flatten"):
        x = tf.transpose(x, perm=[0, 3, 1, 2])
        x = tf.layers.flatten(x)
    
    with tf.variable_scope("Prediction"):
        pred = tf.layers.dense(x, units=10)
        
    return pred

In [8]:
# def inference(inputs, is_train):

#     # L1: 2*128conv + pooling + bn
#     with tf.variable_scope("ConvBlock1"):
#         x = tf.layers.conv2d(inputs=inputs, 
#                    filters=128, 
#                    kernel_size=(3, 3),
#                    strides=(1, 1), 
#                    padding='same')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.relu(x)

#         x = tf.layers.conv2d(inputs=x,
#                    filters=128, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = 4 * tf.layers.average_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='SAME')
#         x = tf.layers.batch_normalization(x, training=is_train)        
#         x = tf.nn.relu(x)
# #         x = tf.layers.dropout(x, 0.1)

#     # L2: 2*256conv + pooling + bn + dropout
#     with tf.variable_scope("ConvBlock2"):
#         x = tf.layers.conv2d(inputs=x,
#                    filters=256, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = tf.layers.batch_normalization(x, training=is_train)        
#         x = tf.nn.relu(x)

#         x = tf.layers.conv2d(inputs=x,
#                    filters=256, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = 4*tf.layers.average_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='SAME')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.relu(x)
# #         x = tf.layers.dropout(x, 0.1)

#     # L3: 2*512conv + pooling + dropout
#     with tf.variable_scope("ConvBlock3"):
#         x = tf.layers.conv2d(inputs=x,
#                    filters=512, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.relu(x)
        
#         x = tf.layers.conv2d(inputs=x,
#                    filters=512, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = 4*tf.layers.average_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='SAME')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.relu(x)    
# #         x = tf.layers.dropout(x, 0.1)

#     with tf.variable_scope("Flatten"):
#         x = tf.transpose(x, perm=[0, 3, 1, 2])
#         x = tf.layers.flatten(x)

#     # L4: 2*FC1024 + bn + dropout
#     with tf.variable_scope("FCBlock1"):
#         x = tf.layers.dense(x, units=1024)
#         x = tf.layers.batch_normalization(x, training=is_train)       
#         x = tf.nn.relu(x)

#         x = tf.layers.dense(x, units=1024)
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.relu(x)   
# #         x = tf.layers.dropout(x, 0.1)

#     # predict layer
#     with tf.variable_scope("Prediction"):
#         pred = tf.layers.dense(x, units=10)
    
#     return pred

In [9]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        grads = []
        for g, _ in grad_and_vars:
            expend_g = tf.expand_dims(g, 0)
            grads.append(expend_g)
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, 0)
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [10]:
with tf.device("/cpu:0"):
    
    inputs = tf.placeholder(tf.float32, [None, 32, 32, 3], name='input')
    outputs = tf.placeholder(tf.float32, [None, 10], name='output')
    is_train = tf.placeholder(tf.bool, name='is_train')
    
    global_step = tf.Variable(0, trainable=False)
    l_r = tf.train.exponential_decay(start_lr, global_step, iterations, decay_rate, staircase=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        opt = tf.train.AdamOptimizer(learning_rate=l_r)

    tower_grads = []
    tower_acc = []
    tower_loss = []
    
    with tf.variable_scope(tf.get_variable_scope()) as variable_scope:
        for i in range(num_gpu):
            with tf.device('/gpu:%d' % (i+5)):
                with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                    x = inputs[i * batch_size:(i + 1) * batch_size]
                    y = outputs[i * batch_size:(i + 1) * batch_size]
                    pred = resnetv2(x, is_train)
                    
                    tf.get_variable_scope().reuse_variables()
                    
                    loss = tf.losses.softmax_cross_entropy(y, pred)
                    tower_loss.append(loss)
#                     loss = tf.reduce_mean(tf.nn.relu(tf.losses.hinge_loss(y, pred)))
                    grads = opt.compute_gradients(loss)
                    tower_grads.append(grads)

                    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
                    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
                    tower_acc.append(accuracy)
    
    losses = tf.reduce_mean(tower_loss)
    grads = average_gradients(tower_grads)
    accs = tf.reduce_mean(tower_acc)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
#     print(update_ops)
    with tf.control_dependencies(update_ops):
        train_op = opt.apply_gradients(grads, global_step=global_step)
    
    saver = tf.train.Saver(tf.global_variables())
    tf.summary.scalar('loss', losses)
    tf.summary.scalar('accuracy', accs)
    tf.summary.scalar('learning_rate', l_r)
    merged = tf.summary.merge_all()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


In [11]:
config = tf.ConfigProto(allow_soft_placement=True,
                        log_device_placement=True)
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:

    print('*****************Training Start!*****************')
    train_writer = tf.summary.FileWriter(path_logdir+'train', sess.graph)
    
    sess.run(tf.global_variables_initializer())

    for m in range(epochs):
        start = time.time()
        batch_gen = datagen.flow(x_train, y_train, batch_size=batch_size*num_gpu)

        for i in range(iterations):
            x_batch, y_batch = next(batch_gen)
            _, loss_train, summary = sess.run([train_op, losses, merged],
                                              {inputs: x_batch, outputs: y_batch, is_train: True})
            train_writer.add_summary(summary, m * iterations + i)
        
        val_accs = []
        for i in range(5000//(batch_size*num_gpu)):
            val_acc = sess.run(accs,{inputs: x_test[i*batch_size*num_gpu:(i+1)*num_gpu*batch_size], 
                                     outputs: y_test[i*batch_size*num_gpu:(i+1)*num_gpu*batch_size], 
                                     is_train: False})
            val_accs.append(val_acc)

        if np.mean(val_accs) > old_acc:
            old_acc = np.mean(val_accs)
            saver.save(sess, path_model+'cifar10.ckpt')

        end = time.time()
        print('Epoch: {}'.format(m + 1),
              'Train_loss: {:.3f}'.format(loss_train),
              'Val_acc: {:.3f}'.format(np.mean(val_accs)),
              'Time consumed: {:.4f} s'.format(end - start))

    print('*****************Training End!*****************')

*****************Training Start!*****************
Epoch: 1 Train_loss: 17.275 Val_acc: 0.153 Time consumed: 99.6743 s
Epoch: 2 Train_loss: 23.583 Val_acc: 0.193 Time consumed: 56.4696 s
Epoch: 3 Train_loss: 13.616 Val_acc: 0.172 Time consumed: 52.7602 s
Epoch: 4 Train_loss: 6.844 Val_acc: 0.126 Time consumed: 52.7744 s
Epoch: 5 Train_loss: 5.061 Val_acc: 0.170 Time consumed: 52.8774 s
Epoch: 6 Train_loss: 6.090 Val_acc: 0.203 Time consumed: 56.7235 s
Epoch: 7 Train_loss: 2.924 Val_acc: 0.225 Time consumed: 57.2063 s
Epoch: 8 Train_loss: 2.101 Val_acc: 0.292 Time consumed: 57.5051 s
Epoch: 9 Train_loss: 2.475 Val_acc: 0.169 Time consumed: 53.5115 s
Epoch: 10 Train_loss: 2.309 Val_acc: 0.209 Time consumed: 52.5538 s
Epoch: 11 Train_loss: 2.568 Val_acc: 0.213 Time consumed: 52.7418 s
Epoch: 12 Train_loss: 3.240 Val_acc: 0.275 Time consumed: 53.0867 s
Epoch: 13 Train_loss: 1.957 Val_acc: 0.285 Time consumed: 53.8268 s
Epoch: 14 Train_loss: 2.183 Val_acc: 0.239 Time consumed: 51.1126 s
Epoc

Epoch: 121 Train_loss: 1.408 Val_acc: 0.490 Time consumed: 53.7729 s
Epoch: 122 Train_loss: 1.445 Val_acc: 0.367 Time consumed: 49.5391 s
Epoch: 123 Train_loss: 1.409 Val_acc: 0.423 Time consumed: 49.7690 s
Epoch: 124 Train_loss: 1.374 Val_acc: 0.486 Time consumed: 49.5165 s
Epoch: 125 Train_loss: 1.402 Val_acc: 0.379 Time consumed: 49.7055 s
Epoch: 126 Train_loss: 1.405 Val_acc: 0.459 Time consumed: 49.8529 s
Epoch: 127 Train_loss: 1.444 Val_acc: 0.429 Time consumed: 49.8329 s
Epoch: 128 Train_loss: 1.286 Val_acc: 0.473 Time consumed: 49.8932 s
Epoch: 129 Train_loss: 1.428 Val_acc: 0.491 Time consumed: 54.0098 s
Epoch: 130 Train_loss: 1.419 Val_acc: 0.448 Time consumed: 49.8179 s
Epoch: 131 Train_loss: 1.420 Val_acc: 0.461 Time consumed: 49.8512 s
Epoch: 132 Train_loss: 1.305 Val_acc: 0.479 Time consumed: 50.0178 s
Epoch: 133 Train_loss: 1.388 Val_acc: 0.483 Time consumed: 49.6348 s
Epoch: 134 Train_loss: 1.434 Val_acc: 0.473 Time consumed: 49.8702 s
Epoch: 135 Train_loss: 1.386 Val_a

Epoch: 240 Train_loss: 1.330 Val_acc: 0.490 Time consumed: 49.7941 s
Epoch: 241 Train_loss: 1.303 Val_acc: 0.469 Time consumed: 50.4371 s
Epoch: 242 Train_loss: 1.377 Val_acc: 0.487 Time consumed: 50.0857 s
Epoch: 243 Train_loss: 1.335 Val_acc: 0.493 Time consumed: 49.9199 s
Epoch: 244 Train_loss: 1.355 Val_acc: 0.480 Time consumed: 50.3705 s
Epoch: 245 Train_loss: 1.293 Val_acc: 0.474 Time consumed: 49.7327 s
Epoch: 246 Train_loss: 1.390 Val_acc: 0.493 Time consumed: 49.9281 s
Epoch: 247 Train_loss: 1.359 Val_acc: 0.492 Time consumed: 49.8771 s
Epoch: 248 Train_loss: 1.341 Val_acc: 0.484 Time consumed: 50.3819 s
Epoch: 249 Train_loss: 1.245 Val_acc: 0.490 Time consumed: 50.2625 s
Epoch: 250 Train_loss: 1.387 Val_acc: 0.476 Time consumed: 49.8972 s
Epoch: 251 Train_loss: 1.337 Val_acc: 0.489 Time consumed: 49.7912 s
Epoch: 252 Train_loss: 1.346 Val_acc: 0.493 Time consumed: 49.9778 s
Epoch: 253 Train_loss: 1.327 Val_acc: 0.510 Time consumed: 49.9682 s
Epoch: 254 Train_loss: 1.312 Val_a

Epoch: 359 Train_loss: 1.441 Val_acc: 0.500 Time consumed: 50.4857 s
Epoch: 360 Train_loss: 1.262 Val_acc: 0.503 Time consumed: 51.6426 s
Epoch: 361 Train_loss: 1.242 Val_acc: 0.500 Time consumed: 49.6378 s
Epoch: 362 Train_loss: 1.256 Val_acc: 0.494 Time consumed: 49.8281 s
Epoch: 363 Train_loss: 1.267 Val_acc: 0.487 Time consumed: 50.0152 s
Epoch: 364 Train_loss: 1.247 Val_acc: 0.496 Time consumed: 49.7647 s
Epoch: 365 Train_loss: 1.345 Val_acc: 0.487 Time consumed: 49.9101 s
Epoch: 366 Train_loss: 1.276 Val_acc: 0.510 Time consumed: 49.7500 s
Epoch: 367 Train_loss: 1.312 Val_acc: 0.500 Time consumed: 49.8721 s
Epoch: 368 Train_loss: 1.334 Val_acc: 0.500 Time consumed: 50.0665 s
Epoch: 369 Train_loss: 1.294 Val_acc: 0.496 Time consumed: 49.7992 s
Epoch: 370 Train_loss: 1.324 Val_acc: 0.498 Time consumed: 49.9638 s
Epoch: 371 Train_loss: 1.396 Val_acc: 0.497 Time consumed: 49.8231 s
Epoch: 372 Train_loss: 1.341 Val_acc: 0.500 Time consumed: 50.2470 s
Epoch: 373 Train_loss: 1.317 Val_a

Epoch: 478 Train_loss: 1.278 Val_acc: 0.503 Time consumed: 50.2957 s
Epoch: 479 Train_loss: 1.307 Val_acc: 0.488 Time consumed: 50.1434 s
Epoch: 480 Train_loss: 1.312 Val_acc: 0.499 Time consumed: 50.2255 s
Epoch: 481 Train_loss: 1.381 Val_acc: 0.489 Time consumed: 50.2426 s
Epoch: 482 Train_loss: 1.300 Val_acc: 0.495 Time consumed: 50.0194 s
Epoch: 483 Train_loss: 1.222 Val_acc: 0.511 Time consumed: 50.2722 s
Epoch: 484 Train_loss: 1.237 Val_acc: 0.511 Time consumed: 50.6186 s
Epoch: 485 Train_loss: 1.205 Val_acc: 0.496 Time consumed: 50.2295 s
Epoch: 486 Train_loss: 1.348 Val_acc: 0.514 Time consumed: 50.0404 s
Epoch: 487 Train_loss: 1.273 Val_acc: 0.507 Time consumed: 50.5372 s
Epoch: 488 Train_loss: 1.300 Val_acc: 0.510 Time consumed: 50.3514 s
Epoch: 489 Train_loss: 1.271 Val_acc: 0.501 Time consumed: 50.3793 s
Epoch: 490 Train_loss: 1.250 Val_acc: 0.495 Time consumed: 51.8231 s
Epoch: 491 Train_loss: 1.233 Val_acc: 0.511 Time consumed: 52.4650 s
Epoch: 492 Train_loss: 1.171 Val_a