In [1]:
!rm -rf ./Resnet23_7block_binary2

In [2]:
%config IPCompleter.greedy=True
import numpy as np
import tensorflow as tf
import time

In [3]:
%run ./datagen.py
datagen, (x_train, y_train), (x_test, y_test) = data_preparation()

In [4]:
save_dir = './Resnet23_7block_binary2/'
num_gpu = 5
batch_size = 100
iterations = x_train.shape[0] // (batch_size * num_gpu)
epochs = 1500
old_acc = 0
start_lr = 1e-3
end_lr = 1e-4
decay_rate = (end_lr / start_lr) ** (1 / epochs)

In [5]:
%run ./binary_layer.py

# resnet layer
def res_layer(inputs, filter_num, filter_size, stride, is_train,
              binarized=False, batch_norm=True, activation=True):

    x = inputs

    if binarized:
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = 0.01*tf.square(x)
        x = conv2d(inputs=x, filters=filter_num, 
                   kernel_size=filter_size, strides=stride, padding='same')
    else:
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = 0.1*tf.square(x)
        x = tf.layers.conv2d(inputs=x, filters=filter_num, 
                             kernel_size=filter_size, strides=stride, padding='same')
        
    return x

In [6]:
def resnetv2(inputs, is_train, binarized):

    with tf.variable_scope("1st_Conv"):
        x = tf.layers.conv2d(inputs=inputs, filters=16,
                             kernel_size=3, strides=1, padding='same')
        x = tf.layers.batch_normalization(x, training=is_train)
        x = tf.nn.relu(x)
        tf.summary.histogram('activation', x)

    # Res Blocks
    a = [1, 2, 3, 1]
    block_num = 0

    for stack in range(len(a)):
        for block in range(a[stack]):

            block_num += 1

            with tf.variable_scope('ResBlock{}'.format(block_num)):

                batch_norm = True
                activation = True
                stride = 1
                if stack == 0:
                    filter_num = 16
                    if block == 0:
                        batch_norm = False
                        activation = False
                else:
                    filter_num = 16*(2**stack)
                    if block == 0:
                        stride = 2

                residual_x = x
                with tf.variable_scope('conv1'):
                    x = res_layer(x, filter_num, 1, stride, is_train, 
                                  binarized=binarized, batch_norm=batch_norm, activation=activation)
                with tf.variable_scope('conv2'):
                    x = res_layer(x, filter_num, 3, 1, is_train, binarized=binarized)
                with tf.variable_scope('conv3'):
                    x = res_layer(x, filter_num*4, 1, 1, is_train, binarized=binarized)
                if block == 0:
                    with tf.variable_scope('residual'):
                        residual_x = res_layer(residual_x, filter_num*4, 1, stride, is_train, 
                                               binarized=binarized, batch_norm=False, activation=False)
                x = x + residual_x
#                 tf.summary.histogram('block_output', x)

    with tf.variable_scope("AfterResBlock"):
        x = tf.layers.batch_normalization(x, training=is_train)
        x = tf.nn.relu(x)
        x = tf.layers.average_pooling2d(x, pool_size=2, strides=2, 
                                        padding='SAME', name='ave_pool')
        tf.summary.histogram('bn_relu_pooling', x)
    
    ######## current x.shape = (?, 2, 2, 512) ##########

    with tf.variable_scope("Flatten"):
        x = tf.transpose(x, perm=[0, 3, 1, 2])
        x = tf.layers.flatten(x)

    with tf.variable_scope("Prediction"):
        pred = tf.layers.dense(x, units=10) #, kernel_initializer=tf.initializers.he_normal()
        tf.summary.histogram('prediction', pred)
        
    return pred

In [7]:
def average_gradients(tower_grads):
    average_grads = []
    for grad_and_vars in zip(*tower_grads):
        grads = []
        for g, _ in grad_and_vars:
            expend_g = tf.expand_dims(g, 0)
            grads.append(expend_g)
        grad = tf.concat(grads, 0)
        grad = tf.reduce_mean(grad, 0)
        v = grad_and_vars[0][1]
        grad_and_var = (grad, v)
        average_grads.append(grad_and_var)
    return average_grads

In [8]:
with tf.device("/cpu:0"):
    
    tf.reset_default_graph()
    
    inputs = tf.placeholder(tf.float32, [None, 32, 32, 3], name='input')
    outputs = tf.placeholder(tf.float32, [None, 10], name='output')
    is_train = tf.placeholder(tf.bool, name='is_train')

    global_step = tf.Variable(0, trainable=False)
    
    l_r = tf.train.exponential_decay(
        start_lr, global_step, iterations, decay_rate, staircase=True)
    tf.summary.scalar('learning_rate', l_r)
    
    opt = tf.train.AdamOptimizer(learning_rate=l_r)

    tower_grads = []
    tower_acc = []
    tower_loss = []

    with tf.variable_scope(tf.get_variable_scope()) as variable_scope:
        for i in range(num_gpu):
            with tf.device('/gpu:%d' % i):
                with tf.name_scope('%s_%d' % ('tower', i)) as scope:
                    x = inputs[i * batch_size:(i + 1) * batch_size]
                    y = outputs[i * batch_size:(i + 1) * batch_size]
                    pred = resnetv2(x, is_train, binarized=True)

                    tf.get_variable_scope().reuse_variables()

                    loss = tf.losses.softmax_cross_entropy(y, pred)
                    tower_loss.append(loss)
#                     loss = tf.reduce_mean(tf.nn.sigmoid(tf.losses.hinge_loss(y, pred)))

                    grads = opt.compute_gradients(loss)
                    tower_grads.append(grads)

                    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1))
                    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))
                    tower_acc.append(accuracy)

    losses = tf.reduce_mean(tower_loss)
    tf.summary.scalar('loss', losses)
    
    grads = average_gradients(tower_grads)
    capped_grads = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in grads]
    
    for grad, var in capped_grads:
        if grad is not None:
            tf.summary.histogram(var.name.split(":")[0] + '/gradients', grad)
    
    accs = tf.reduce_mean(tower_acc)
    tf.summary.scalar('accuracy', accs)
    
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = opt.apply_gradients(capped_grads, global_step=global_step)
    
    kernel_vars = []

    for i in tf.trainable_variables():
        if 'bin/kernel' in i.name:
            kernel_vars.append(i)
    
    with tf.control_dependencies(update_ops):
        with tf.control_dependencies([train_op]):
            kernel_clip_op = [tf.clip_by_value(var, -1, 1) for var in kernel_vars]

    saver = tf.train.Saver(tf.global_variables())
    
    def add_hist(train_vars):
        for i in train_vars:
            name = i.name.split(":")[0] + '/value'
            value = i.value()
            tf.summary.histogram(name, value)
    
    add_hist(tf.trainable_variables())
    
    merged = tf.summary.merge_all()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


In [9]:
config = tf.ConfigProto(allow_soft_placement=True,
                        log_device_placement=True)
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:

    print('*****************Training Start!*****************')
    train_writer = tf.summary.FileWriter(save_dir+'train', sess.graph)
    sess.run(tf.global_variables_initializer())

    for m in range(epochs):
        start = time.time()
        batch_gen = datagen.flow(
            x_train, y_train, batch_size=batch_size*num_gpu)

        for i in range(iterations):
            x_batch, y_batch = next(batch_gen)
            _, loss_train = sess.run([train_op, losses], 
                                     {inputs: x_batch, outputs: y_batch, is_train: True})
        
        summary = sess.run(merged, {inputs: x_batch, outputs: y_batch, is_train: False})
        train_writer.add_summary(summary, m*iterations + i + 1)

        val_accs = []
        for i in range(5000//(batch_size*num_gpu)):
            val_acc = sess.run(accs, {inputs: x_test[i*batch_size*num_gpu: (i+1)*num_gpu*batch_size],
                                      outputs: y_test[i*batch_size*num_gpu: (i+1)*num_gpu*batch_size],
                                      is_train: False})
            val_accs.append(val_acc)

        if np.mean(val_accs) > old_acc:
            old_acc = np.mean(val_accs)
            saver.save(sess, save_dir+'cifar10.ckpt')

        end = time.time()
        print('Epoch: {}'.format(m + 1),
              'Train_loss: {:.3f}'.format(loss_train),
              'Val_acc: {:.3f}'.format(np.mean(val_accs)),
              'Time consumed: {:.4f} s'.format(end - start))

    print('*****************Training End!*****************')

*****************Training Start!*****************
Epoch: 1 Train_loss: 1.798 Val_acc: 0.339 Time consumed: 77.8434 s
Epoch: 2 Train_loss: 1.672 Val_acc: 0.326 Time consumed: 34.3015 s
Epoch: 3 Train_loss: 1.746 Val_acc: 0.380 Time consumed: 35.3578 s
Epoch: 4 Train_loss: 1.590 Val_acc: 0.396 Time consumed: 35.0382 s
Epoch: 5 Train_loss: 1.613 Val_acc: 0.406 Time consumed: 34.7693 s
Epoch: 6 Train_loss: 1.577 Val_acc: 0.421 Time consumed: 35.2729 s
Epoch: 7 Train_loss: 1.648 Val_acc: 0.395 Time consumed: 32.8872 s
Epoch: 8 Train_loss: 1.643 Val_acc: 0.402 Time consumed: 33.2057 s
Epoch: 9 Train_loss: 1.623 Val_acc: 0.421 Time consumed: 32.8210 s
Epoch: 10 Train_loss: 1.569 Val_acc: 0.412 Time consumed: 33.5582 s
Epoch: 11 Train_loss: 1.571 Val_acc: 0.438 Time consumed: 35.1954 s


InvalidArgumentError: Nan in summary histogram for: tower_2/Prediction/prediction
	 [[node tower_2/Prediction/prediction (defined at <ipython-input-6-766a84fea596>:64) ]]
	 [[node tower_3/gradients/AddN_20 (defined at <ipython-input-8-811982b4d761>:35) ]]

Caused by op 'tower_2/Prediction/prediction', defined at:
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3214, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-8-811982b4d761>", line 27, in <module>
    pred = resnetv2(x, is_train, binarized=True)
  File "<ipython-input-6-766a84fea596>", line 64, in resnetv2
    tf.summary.histogram('prediction', pred)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/summary/summary.py", line 177, in histogram
    tag=tag, values=values, name=scope)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_logging_ops.py", line 312, in histogram_summary
    "HistogramSummary", tag=tag, values=values, name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
    self._traceback = tf_stack.extract_stack()

InvalidArgumentError (see above for traceback): Nan in summary histogram for: tower_2/Prediction/prediction
	 [[node tower_2/Prediction/prediction (defined at <ipython-input-6-766a84fea596>:64) ]]
	 [[node tower_3/gradients/AddN_20 (defined at <ipython-input-8-811982b4d761>:35) ]]
