In [1]:
!rm -rf ./binary_resnet

In [1]:
%config IPCompleter.greedy=True
import numpy as np
import tensorflow as tf
import time

In [3]:
%run ./datagen.py
#datagen, (x_train, y_train), (x_test, y_test) = data_preparation()

datagen, (x_train, y_train), (x_test, y_test) = data_preparation()
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(50000, 32, 32, 3)
(50000, 10)
(10000, 32, 32, 3)
(10000, 10)


In [4]:
path_logdir = './binary_resnet/'
path_model = './binary_resnet/'
num_gpu = 5
batch_size = 100
iterations = x_train.shape[0] // (batch_size * num_gpu)
epochs = 800
old_acc = 0
start_lr = 1e-2
end_lr = 1e-4
decay_rate = (end_lr / start_lr) ** (1 / epochs)

In [5]:
# %run ./binary_layer.py 
# # binary_dense: dense()
# # binary_conv2d: conv2d()

# def inference(inputs, is_train):

#     # L1: 2*128conv + pooling + bn
#     with tf.variable_scope("ConvBlock1"):
#         x = conv2d(inputs=inputs, 
#                    filters=128, 
#                    kernel_size=(3, 3),
#                    strides=(1, 1), 
#                    padding='same')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.sigmoid(x)

#         x = conv2d(inputs=x,
#                    filters=128, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = 4 * tf.layers.average_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='SAME')
#         x = tf.layers.batch_normalization(x, training=is_train)        
#         x = tf.nn.sigmoid(x)
# #         x = tf.layers.dropout(x, 0.1)

#     # L2: 2*256conv + pooling + bn + dropout
#     with tf.variable_scope("ConvBlock2"):
#         x = conv2d(inputs=x,
#                    filters=256, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = tf.layers.batch_normalization(x, training=is_train)        
#         x = tf.nn.sigmoid(x)

#         x = conv2d(inputs=x,
#                    filters=256, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = 4*tf.layers.average_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='SAME')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.sigmoid(x)
# #         x = tf.layers.dropout(x, 0.1)

#     # L3: 2*512conv + pooling + dropout
#     with tf.variable_scope("ConvBlock3"):
#         x = conv2d(inputs=x,
#                    filters=512, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.sigmoid(x)
        
#         x = conv2d(inputs=x,
#                    filters=512, 
#                    kernel_size=(3, 3), 
#                    strides=(1, 1), 
#                    padding='same')
#         x = 4*tf.layers.average_pooling2d(x, pool_size=(2, 2), strides=(2, 2), padding='SAME')
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.sigmoid(x)    
# #         x = tf.layers.dropout(x, 0.1)

#     with tf.variable_scope("Flatten"):
#         x = tf.transpose(x, perm=[0, 3, 1, 2])
#         x = tf.layers.flatten(x)

#     # L4: 2*FC1024 + bn + dropout
#     with tf.variable_scope("FCBlock1"):
#         x = dense(x, units=1024)
#         x = tf.layers.batch_normalization(x, training=is_train)       
#         x = tf.nn.sigmoid(x)

#         x = dense(x, units=1024)
#         x = tf.layers.batch_normalization(x, training=is_train)
#         x = tf.nn.sigmoid(x)   
# #         x = tf.layers.dropout(x, 0.1)

#     # predict layer
#     with tf.variable_scope("Prediction"):
#         pred = dense(x, units=10)
    
#     return pred

In [6]:
%run ./binary_layer.py 

# resnet layer
def res_layer(inputs, filter_num, filter_size, stride, is_train, 
              conv_first=False, batch_norm=True, activation=True):
    
    x = inputs
    
    if conv_first:
        x = conv2d(inputs=x, filters=filter_num, 
                   kernel_size=filter_size, strides=stride, padding='same')
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = tf.nn.sigmoid(x)
    else:
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = tf.nn.sigmoid(x)
        x = conv2d(inputs=x, filters=filter_num, 
                   kernel_size=filter_size, strides=stride, padding='same')
    
    return x

In [7]:
def resnetv2(inputs, is_train):
    
    with tf.variable_scope("Conv1"):
        x = res_layer(inputs, 64, 3, 2, is_train, conv_first=True)
          
    # Res Blocks
    for stack in range(3):
        for block in range(6):
            with tf.variable_scope('ResBlock{}'.format(stack*6+block+1)):
                
                batch_norm = True
                activation = True
                stride = 1
                if stack == 0:
                    filter_num = 64
                    if block == 0:
                        batch_norm = False
                        activation = False
                else:
                    filter_num = 64*2*stack
                    if block == 0:
                        stride = 2
                
                residual_x = x
                with tf.variable_scope('conv1'):
                    x = res_layer(x, filter_num, 1, stride, is_train, 
                                  batch_norm=batch_norm, activation=activation)
                with tf.variable_scope('conv2'):
                    x = res_layer(x, filter_num, 3, 1, is_train, 
                                  batch_norm=batch_norm, activation=activation)
                with tf.variable_scope('conv3'):
                    x = res_layer(x, filter_num*4, 1, 1, is_train, 
                                  batch_norm=batch_norm, activation=activation)
                if block == 0:
                    with tf.variable_scope('residual'):
                        residual_x = res_layer(residual_x, filter_num*4, 1, stride, is_train, 
                                               batch_norm=False, activation=False)
                x = x + residual_x   
    
    #x.shape = (?, 4, 4, 1024)
    with tf.variable_scope("AfterResBlock"):
        x = tf.layers.batch_normalization(x, training=is_train)                
        x = tf.nn.sigmoid(x)
        x = 4 * tf.layers.average_pooling2d(x, pool_size=2, strides=2, padding='SAME', name='pool1')
    
    with tf.variable_scope("Flatten"):
        x = tf.transpose(x, perm=[0, 3, 1, 2])
        x = tf.layers.flatten(x)
    
    with tf.variable_scope("Prediction"):
        pred = tf.layers.dense(x, units=10)
        
    return pred

In [8]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        grads = []
        for g, _ in grad_and_vars:
            expend_g = tf.expand_dims(g, 0)
            grads.append(expend_g)
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, 0)
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [9]:
with tf.device("/cpu:0"):
    
    inputs = tf.placeholder(tf.float32, [None, 32, 32, 3], name='input')
    outputs = tf.placeholder(tf.float32, [None, 10], name='output')
    is_train = tf.placeholder(tf.bool, name='is_train')
    
    global_step = tf.Variable(0, trainable=False)
    l_r = tf.train.exponential_decay(start_lr, global_step, iterations, decay_rate, staircase=True)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        opt = tf.train.AdamOptimizer(learning_rate=l_r)

    tower_grads = []
    tower_acc = []
    tower_loss = []
    
    with tf.variable_scope(tf.get_variable_scope()) as variable_scope:
#         print('-'*20)
#         print(variable_scope.name)
#         print('-'*20)
        for i in range(num_gpu):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                    x = inputs[i * batch_size:(i + 1) * batch_size]
                    y = outputs[i * batch_size:(i + 1) * batch_size]
                    pred = resnetv2(x, is_train)
                    
                    tf.get_variable_scope().reuse_variables()
                    
                    loss = tf.losses.softmax_cross_entropy(y, pred)
                    tower_loss.append(loss)
#                     loss = tf.reduce_mean(tf.nn.sigmoid(tf.losses.hinge_loss(y, pred)))
                    grads = opt.compute_gradients(loss)
                    tower_grads.append(grads)

                    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
                    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
                    tower_acc.append(accuracy)
    
    losses = tf.reduce_mean(tower_loss)
    grads = average_gradients(tower_grads)
    accs = tf.reduce_mean(tower_acc)
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = opt.apply_gradients(grads, global_step=global_step)
    
    saver = tf.train.Saver(tf.global_variables())
    tf.summary.scalar('loss', losses)
    tf.summary.scalar('accuracy', accs)
    tf.summary.scalar('learning_rate', l_r)
    merged = tf.summary.merge_all()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


In [10]:
# gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
config = tf.ConfigProto(allow_soft_placement=True,
                        log_device_placement=True)
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:

    print('*****************Training Start!*****************')
    train_writer = tf.summary.FileWriter(path_logdir+'train', sess.graph)
#     val_writer = tf.summary.FileWriter(path_logdir+'val', sess.graph)
    sess.run(tf.global_variables_initializer())
#     saver.restore(sess, path_model+'cifar10.ckpt')

    for m in range(epochs):
        start = time.time()
        batch_gen = datagen.flow(x_train, y_train, batch_size=batch_size*num_gpu)

        for i in range(iterations):
            x_batch, y_batch = next(batch_gen)
            _, loss_train, summary = sess.run([train_op, losses, merged],
                                              {inputs: x_batch, outputs: y_batch, is_train: True})
            train_writer.add_summary(summary, m * iterations + i)
        
        val_accs = []
        for i in range(5000//(batch_size*num_gpu)):
            val_acc = sess.run(accs,{inputs: x_test[i*batch_size*num_gpu:(i+1)*num_gpu*batch_size], 
                                     outputs: y_test[i*batch_size*num_gpu:(i+1)*num_gpu*batch_size], 
                                     is_train: False})
#             val_writer.add_summary(summary, m)
            val_accs.append(val_acc)

        if np.mean(val_accs) > old_acc:
            old_acc = np.mean(val_accs)
            saver.save(sess, path_model+'cifar10.ckpt')

#         if loss_train == 0:
#             break

        end = time.time()
        print('Epoch: {}'.format(m + 1),
              'Train_loss: {:.3f}'.format(loss_train),
              'Val_acc: {:.3f}'.format(np.mean(val_accs)),
              'Time consumed: {:.4f} s'.format(end - start))

    print('*****************Training End!*****************')

*****************Training Start!*****************
Epoch: 1 Train_loss: 12.856 Val_acc: 0.146 Time consumed: 94.5805 s
Epoch: 2 Train_loss: 9.810 Val_acc: 0.185 Time consumed: 50.5846 s
Epoch: 3 Train_loss: 16.951 Val_acc: 0.160 Time consumed: 46.0162 s
Epoch: 4 Train_loss: 4.246 Val_acc: 0.168 Time consumed: 46.3472 s
Epoch: 5 Train_loss: 9.225 Val_acc: 0.123 Time consumed: 45.9266 s
Epoch: 6 Train_loss: 3.588 Val_acc: 0.196 Time consumed: 49.6013 s
Epoch: 7 Train_loss: 2.232 Val_acc: 0.233 Time consumed: 49.5704 s
Epoch: 8 Train_loss: 5.341 Val_acc: 0.138 Time consumed: 46.3116 s
Epoch: 9 Train_loss: 2.494 Val_acc: 0.203 Time consumed: 45.9447 s
Epoch: 10 Train_loss: 1.867 Val_acc: 0.247 Time consumed: 49.1349 s
Epoch: 11 Train_loss: 1.908 Val_acc: 0.228 Time consumed: 46.2065 s
Epoch: 12 Train_loss: 3.613 Val_acc: 0.213 Time consumed: 46.1352 s
Epoch: 13 Train_loss: 1.933 Val_acc: 0.180 Time consumed: 46.0883 s
Epoch: 14 Train_loss: 2.484 Val_acc: 0.251 Time consumed: 49.5479 s
Epoch

Epoch: 121 Train_loss: 1.093 Val_acc: 0.382 Time consumed: 46.7115 s
Epoch: 122 Train_loss: 1.158 Val_acc: 0.404 Time consumed: 46.1545 s
Epoch: 123 Train_loss: 1.161 Val_acc: 0.518 Time consumed: 45.9088 s
Epoch: 124 Train_loss: 1.182 Val_acc: 0.534 Time consumed: 45.9551 s
Epoch: 125 Train_loss: 1.064 Val_acc: 0.404 Time consumed: 46.2969 s
Epoch: 126 Train_loss: 1.062 Val_acc: 0.477 Time consumed: 46.5050 s
Epoch: 127 Train_loss: 1.142 Val_acc: 0.418 Time consumed: 46.5288 s
Epoch: 128 Train_loss: 1.142 Val_acc: 0.415 Time consumed: 46.3545 s
Epoch: 129 Train_loss: 1.083 Val_acc: 0.359 Time consumed: 45.9711 s
Epoch: 130 Train_loss: 1.133 Val_acc: 0.558 Time consumed: 50.7737 s
Epoch: 131 Train_loss: 1.222 Val_acc: 0.507 Time consumed: 45.9198 s
Epoch: 132 Train_loss: 1.121 Val_acc: 0.320 Time consumed: 45.9711 s
Epoch: 133 Train_loss: 1.164 Val_acc: 0.421 Time consumed: 46.0179 s
Epoch: 134 Train_loss: 1.069 Val_acc: 0.497 Time consumed: 45.9147 s
Epoch: 135 Train_loss: 1.146 Val_a

Epoch: 240 Train_loss: 1.020 Val_acc: 0.513 Time consumed: 46.4582 s
Epoch: 241 Train_loss: 0.852 Val_acc: 0.559 Time consumed: 45.7421 s
Epoch: 242 Train_loss: 0.964 Val_acc: 0.607 Time consumed: 45.9295 s
Epoch: 243 Train_loss: 0.971 Val_acc: 0.318 Time consumed: 45.8713 s
Epoch: 244 Train_loss: 1.007 Val_acc: 0.525 Time consumed: 46.2159 s
Epoch: 245 Train_loss: 0.954 Val_acc: 0.530 Time consumed: 45.9240 s
Epoch: 246 Train_loss: 1.011 Val_acc: 0.545 Time consumed: 46.1537 s
Epoch: 247 Train_loss: 1.095 Val_acc: 0.470 Time consumed: 46.1500 s
Epoch: 248 Train_loss: 1.001 Val_acc: 0.597 Time consumed: 45.9522 s
Epoch: 249 Train_loss: 1.011 Val_acc: 0.465 Time consumed: 46.2706 s
Epoch: 250 Train_loss: 1.105 Val_acc: 0.468 Time consumed: 45.8997 s
Epoch: 251 Train_loss: 0.984 Val_acc: 0.572 Time consumed: 46.6472 s
Epoch: 252 Train_loss: 1.040 Val_acc: 0.565 Time consumed: 46.8730 s
Epoch: 253 Train_loss: 1.002 Val_acc: 0.295 Time consumed: 46.4349 s
Epoch: 254 Train_loss: 1.028 Val_a

Epoch: 359 Train_loss: 0.950 Val_acc: 0.575 Time consumed: 45.4959 s
Epoch: 360 Train_loss: 0.958 Val_acc: 0.524 Time consumed: 45.7540 s
Epoch: 361 Train_loss: 0.973 Val_acc: 0.541 Time consumed: 47.4930 s
Epoch: 362 Train_loss: 0.930 Val_acc: 0.396 Time consumed: 48.2276 s
Epoch: 363 Train_loss: 0.893 Val_acc: 0.541 Time consumed: 49.7729 s
Epoch: 364 Train_loss: 0.951 Val_acc: 0.389 Time consumed: 49.1746 s
Epoch: 365 Train_loss: 0.929 Val_acc: 0.651 Time consumed: 54.6844 s
Epoch: 366 Train_loss: 0.953 Val_acc: 0.491 Time consumed: 47.5244 s
Epoch: 367 Train_loss: 0.979 Val_acc: 0.409 Time consumed: 46.0469 s
Epoch: 368 Train_loss: 0.908 Val_acc: 0.451 Time consumed: 46.2385 s
Epoch: 369 Train_loss: 0.936 Val_acc: 0.532 Time consumed: 46.2966 s
Epoch: 370 Train_loss: 0.864 Val_acc: 0.421 Time consumed: 45.9028 s
Epoch: 371 Train_loss: 0.923 Val_acc: 0.499 Time consumed: 46.2988 s
Epoch: 372 Train_loss: 0.930 Val_acc: 0.468 Time consumed: 45.9436 s
Epoch: 373 Train_loss: 0.975 Val_a

Epoch: 478 Train_loss: 0.893 Val_acc: 0.347 Time consumed: 46.4332 s
Epoch: 479 Train_loss: 0.845 Val_acc: 0.508 Time consumed: 46.4397 s
Epoch: 480 Train_loss: 0.832 Val_acc: 0.362 Time consumed: 45.9907 s
Epoch: 481 Train_loss: 0.960 Val_acc: 0.284 Time consumed: 46.2850 s
Epoch: 482 Train_loss: 0.940 Val_acc: 0.512 Time consumed: 45.7883 s
Epoch: 483 Train_loss: 0.925 Val_acc: 0.303 Time consumed: 45.9851 s
Epoch: 484 Train_loss: 0.988 Val_acc: 0.455 Time consumed: 46.4063 s
Epoch: 485 Train_loss: 0.837 Val_acc: 0.534 Time consumed: 46.1327 s
Epoch: 486 Train_loss: 0.940 Val_acc: 0.568 Time consumed: 45.8426 s
Epoch: 487 Train_loss: 0.837 Val_acc: 0.557 Time consumed: 46.3156 s
Epoch: 488 Train_loss: 0.896 Val_acc: 0.587 Time consumed: 46.1256 s
Epoch: 489 Train_loss: 0.877 Val_acc: 0.571 Time consumed: 46.3854 s
Epoch: 490 Train_loss: 0.870 Val_acc: 0.517 Time consumed: 46.4247 s
Epoch: 491 Train_loss: 0.835 Val_acc: 0.340 Time consumed: 46.3177 s
Epoch: 492 Train_loss: 0.867 Val_a

Epoch: 597 Train_loss: 0.830 Val_acc: 0.573 Time consumed: 46.2652 s
Epoch: 598 Train_loss: 0.913 Val_acc: 0.359 Time consumed: 47.4344 s
Epoch: 599 Train_loss: 0.914 Val_acc: 0.532 Time consumed: 45.8091 s
Epoch: 600 Train_loss: 0.900 Val_acc: 0.629 Time consumed: 45.7844 s
Epoch: 601 Train_loss: 0.943 Val_acc: 0.510 Time consumed: 45.8016 s
Epoch: 602 Train_loss: 0.807 Val_acc: 0.515 Time consumed: 46.2682 s
Epoch: 603 Train_loss: 0.917 Val_acc: 0.364 Time consumed: 45.9395 s
Epoch: 604 Train_loss: 0.951 Val_acc: 0.550 Time consumed: 46.4709 s
Epoch: 605 Train_loss: 0.878 Val_acc: 0.679 Time consumed: 50.4422 s
Epoch: 606 Train_loss: 0.912 Val_acc: 0.457 Time consumed: 46.0897 s
Epoch: 607 Train_loss: 0.857 Val_acc: 0.468 Time consumed: 45.8477 s
Epoch: 608 Train_loss: 0.799 Val_acc: 0.340 Time consumed: 45.9857 s
Epoch: 609 Train_loss: 0.944 Val_acc: 0.313 Time consumed: 45.7864 s
Epoch: 610 Train_loss: 0.889 Val_acc: 0.288 Time consumed: 46.4289 s
Epoch: 611 Train_loss: 0.838 Val_a

Epoch: 716 Train_loss: 0.860 Val_acc: 0.460 Time consumed: 46.1943 s
Epoch: 717 Train_loss: 0.928 Val_acc: 0.466 Time consumed: 46.2055 s
Epoch: 718 Train_loss: 0.837 Val_acc: 0.618 Time consumed: 45.9893 s
Epoch: 719 Train_loss: 0.883 Val_acc: 0.544 Time consumed: 46.2544 s
Epoch: 720 Train_loss: 0.797 Val_acc: 0.512 Time consumed: 46.2129 s
Epoch: 721 Train_loss: 0.809 Val_acc: 0.582 Time consumed: 46.3476 s
Epoch: 722 Train_loss: 0.756 Val_acc: 0.533 Time consumed: 46.1676 s
Epoch: 723 Train_loss: 0.848 Val_acc: 0.458 Time consumed: 46.4650 s
Epoch: 724 Train_loss: 0.866 Val_acc: 0.481 Time consumed: 46.0822 s
Epoch: 725 Train_loss: 0.910 Val_acc: 0.488 Time consumed: 46.3355 s
Epoch: 726 Train_loss: 0.915 Val_acc: 0.649 Time consumed: 46.0896 s
Epoch: 727 Train_loss: 0.875 Val_acc: 0.197 Time consumed: 46.2947 s
Epoch: 728 Train_loss: 0.919 Val_acc: 0.400 Time consumed: 46.3281 s
Epoch: 729 Train_loss: 0.834 Val_acc: 0.359 Time consumed: 46.1694 s
Epoch: 730 Train_loss: 0.864 Val_a