In [1]:
!rm -rf ./mixnet_bin_8

In [2]:
%config IPCompleter.greedy=True
import numpy as np
import tensorflow as tf
import time

In [3]:
%run ./datagen.py
datagen, (x_train, y_train), (x_test, y_test) = data_preparation()

In [4]:
save_dir = './mixnet_bin_8/'
batch_size = 100
iterations = x_train.shape[0] // batch_size
epochs = 1200
old_acc = 0
start_lr = 2.
end_lr = 0.5
decay_rate = (end_lr / start_lr) ** (1 / epochs)
k = 8

In [5]:
%run ./binary_layer.py

# resnet layer
def res_layer(inputs, filter_num, filter_size, stride, is_train,
              binarized=False, batch_norm=True, activation=True):

    x = inputs

    if binarized:
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = tf.square(x)
        x = conv2d(inputs=x, filters=filter_num, 
                   kernel_size=filter_size, strides=stride, padding='same')
    else:
        if batch_norm:
            x = tf.layers.batch_normalization(x, training=is_train)
        if activation:
            x = tf.square(x)
        x = tf.layers.conv2d(inputs=x, filters=filter_num, 
                             kernel_size=filter_size, strides=stride, padding='same')
        
    return x

In [6]:
def wide_resnet(inputs, k, is_train, binarized):

    with tf.variable_scope("1st_Conv"):
        x = tf.layers.conv2d(inputs=inputs, filters=16, 
                             kernel_size=3, strides=1, padding='same')
        x = tf.layers.batch_normalization(x, training=is_train)
        x = tf.nn.relu(x)
        tf.summary.histogram('activation', x)
    
    x_temp_0 = x
    
    with tf.variable_scope('ResBlock_%d_%d' % (1, 1)):
        
        with tf.variable_scope('conv1'):
            x = res_layer(x, 16*k, 3, 1, is_train, binarized=binarized, 
                          batch_norm=False, activation=False)
                    
        x = tf.layers.dropout(x, 0.1)
                
        with tf.variable_scope('conv2'):
            x = res_layer(x, 16*k, 3, 1, is_train, binarized=binarized)
    
        with tf.variable_scope('x_plus_shortcut'):
            shortcut = res_layer(x_temp_0, 16*k, 1, 1, is_train=False, 
                                 binarized=False, batch_norm=False, activation=False)
            x = x + shortcut 
            
        tf.summary.histogram('output', x)
        
    x_temp_1 = x
    
    with tf.variable_scope('ResBlock_%d_%d' % (2, 1)):
        
        with tf.variable_scope('conv1'):
            x = res_layer(x, 32*k, 3, 2, is_train, binarized=binarized)
                    
        x = tf.layers.dropout(x, 0.1)
                
        with tf.variable_scope('conv2'):
            x = res_layer(x, 32*k, 3, 1, is_train, binarized=binarized)
            
        with tf.variable_scope('x_plus_shortcut'):
            shortcut = res_layer(x_temp_0, 32*k, 1, 2, is_train=False, 
                                 binarized=False, batch_norm=False, activation=False)
            x = x + shortcut
            
            shortcut = res_layer(x_temp_1, 32*k, 1, 2, is_train=False, 
                                 binarized=False, batch_norm=False, activation=False)
            x = x + shortcut
            
        tf.summary.histogram('output', x)
        
    x_temp_2 = x    
    
    with tf.variable_scope('ResBlock_%d_%d' % (3, 1)):
        
        with tf.variable_scope('conv1'):
            x = res_layer(x, 64*k, 3, 2, is_train, binarized=binarized)
                    
        x = tf.layers.dropout(x, 0.1)
                
        with tf.variable_scope('conv2'):
            x = res_layer(x, 64*k, 3, 1, is_train, binarized=binarized)
            
        with tf.variable_scope('x_plus_shortcut'):
            
            shortcut = res_layer(x_temp_0, 64*k, 1, 4, is_train=False, 
                                     binarized=False, batch_norm=False, activation=False)
            x = x + shortcut

            shortcut = res_layer(x_temp_1, 64*k, 1, 4, is_train=False, 
                                     binarized=False, batch_norm=False, activation=False)
            x = x + shortcut

            shortcut = res_layer(x_temp_2, 64*k, 1, 2, is_train=False, 
                                     binarized=False, batch_norm=False, activation=False)
            x = x + shortcut
            
        tf.summary.histogram('output', x)

    with tf.variable_scope("AfterResBlock"):
        x = tf.layers.batch_normalization(x, training=is_train)
        x = tf.square(x)
        x = tf.layers.average_pooling2d(x, pool_size=8, strides=8, 
                                        padding='SAME', name='ave_pool')
        tf.summary.histogram('bn_relu_pooling', x)
    
    ######## current x.shape = (?, 1, 1, N) ##########

    with tf.variable_scope("Flatten"):
        x = tf.transpose(x, perm=[0, 3, 1, 2])
        x = tf.layers.flatten(x)

    with tf.variable_scope("Prediction"):
        pred = tf.layers.dense(x, units=10)
        tf.summary.histogram('prediction', pred)
        
    return pred

In [7]:
tf.reset_default_graph()

with tf.device('/GPU:7'):

    inputs = tf.placeholder(tf.float32, [None, 32, 32, 3], name='input')
    outputs = tf.placeholder(tf.float32, [None, 10], name='output')
    is_train = tf.placeholder(tf.bool, name='is_train')

    global_step = tf.Variable(0, trainable=False)

    l_r = tf.train.exponential_decay(
        start_lr, global_step, iterations, decay_rate, staircase=True)
    tf.summary.scalar('learning_rate', l_r)

    opt = tf.train.MomentumOptimizer(learning_rate=l_r, momentum=0.9)

    pred = wide_resnet(inputs, k, is_train, binarized=False)

    loss = tf.losses.softmax_cross_entropy(outputs, pred)
    grads = opt.compute_gradients(loss)

    ########################## LARS ##########################
    grads_norm = [(tf.norm(grad, ord=2), tf.norm(var, ord=2)) for grad, var in grads]
    eta = 1e-4
    local_lr = [tf.where(var_norm < 1e-3, 1e-3, eta*var_norm / (grad_norm + 1e-8)) 
                for grad_norm, var_norm in grads_norm]
    new_grads = [(local_lr[i]*grad, var) for i, (grad, var) in enumerate(grads)]
    ##########################################################

    correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(outputs, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    for grad, var in new_grads:
        if grad is not None:
            tf.summary.histogram(var.name.split(":")[0] + '/gradients', grad)

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    with tf.control_dependencies(update_ops):
        train_op = opt.apply_gradients(new_grads, global_step=global_step)

    kernel_vars = []

#     for i in tf.trainable_variables():
#         if 'bin/kernel' in i.name:
#             kernel_vars.append(i)

#     with tf.control_dependencies(update_ops):
#         with tf.control_dependencies([train_op]):
#             kernel_clip_op = [tf.clip_by_value(var, -1, 1) for var in kernel_vars]

saver = tf.train.Saver(tf.global_variables())

def add_hist(train_vars):
    for i in train_vars:
        name = i.name.split(":")[0] + '/value'
        value = i.value()
        tf.summary.histogram(name, value)

add_hist(tf.trainable_variables())

tf.summary.scalar('loss', loss)
tf.summary.scalar('accuracy', accuracy)
merged = tf.summary.merge_all()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.conv2d instead.
Instructions for updating:
Use keras.layers.batch_normalization instead.
Instructions for updating:
Use keras.layers.dropout instead.
Instructions for updating:
Use keras.layers.average_pooling2d instead.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Use tf.cast instead.


In [8]:
config = tf.ConfigProto(allow_soft_placement=True,
                        log_device_placement=True)
config.gpu_options.allow_growth = True

with tf.Session(config=config) as sess:

    print('*****************Training Start!*****************')
    train_writer = tf.summary.FileWriter(save_dir+'train', sess.graph)
    sess.run(tf.global_variables_initializer())

    for m in range(epochs):
        start = time.time()
        batch_gen = datagen.flow(
            x_train, y_train, batch_size=batch_size)

        for i in range(iterations):
            x_batch, y_batch = next(batch_gen)
            _, loss_train = sess.run([train_op, loss], 
                                     {inputs: x_batch, outputs: y_batch, is_train: True})
        
        summary = sess.run(merged, {inputs: x_batch, outputs: y_batch, is_train: False})
        train_writer.add_summary(summary, m*iterations + i + 1)

        val_accs = []
        for i in range(5000//(batch_size*5)):
            val_acc = sess.run(accuracy, {inputs: x_test[i*batch_size*5: (i+1)*batch_size*5],
                                          outputs: y_test[i*batch_size*5: (i+1)*batch_size*5],
                                          is_train: False})
            val_accs.append(val_acc)

        if np.mean(val_accs) > old_acc:
            old_acc = np.mean(val_accs)
            saver.save(sess, save_dir+'cifar10.ckpt')

        end = time.time()
        print('Epoch: {}'.format(m + 1),
              'Train_loss: {:.3f}'.format(loss_train),
              'Val_acc: {:.3f}'.format(np.mean(val_accs)),
              'Time consumed: {:.4f} s'.format(end - start))

    print('*****************Training End!*****************')

*****************Training Start!*****************
Epoch: 1 Train_loss: 2.532 Val_acc: 0.123 Time consumed: 53.1935 s
Epoch: 2 Train_loss: 2.196 Val_acc: 0.147 Time consumed: 47.6529 s
Epoch: 3 Train_loss: 2.146 Val_acc: 0.158 Time consumed: 47.5407 s
Epoch: 4 Train_loss: 2.099 Val_acc: 0.231 Time consumed: 47.7330 s
Epoch: 5 Train_loss: 2.094 Val_acc: 0.224 Time consumed: 47.2291 s
Epoch: 6 Train_loss: 1.925 Val_acc: 0.256 Time consumed: 47.7197 s
Epoch: 7 Train_loss: 2.008 Val_acc: 0.295 Time consumed: 47.6876 s
Epoch: 8 Train_loss: 2.020 Val_acc: 0.316 Time consumed: 47.6406 s
Epoch: 9 Train_loss: 1.823 Val_acc: 0.341 Time consumed: 47.6457 s
Epoch: 10 Train_loss: 1.846 Val_acc: 0.277 Time consumed: 47.3885 s
Epoch: 11 Train_loss: 1.731 Val_acc: 0.355 Time consumed: 47.7985 s
Epoch: 12 Train_loss: 1.613 Val_acc: 0.392 Time consumed: 47.7213 s
Epoch: 13 Train_loss: 1.647 Val_acc: 0.421 Time consumed: 47.7492 s
Epoch: 14 Train_loss: 1.606 Val_acc: 0.441 Time consumed: 47.6246 s
Epoch: 

Epoch: 121 Train_loss: 0.383 Val_acc: 0.778 Time consumed: 48.9641 s
Epoch: 122 Train_loss: 0.363 Val_acc: 0.778 Time consumed: 49.3757 s
Epoch: 123 Train_loss: 0.415 Val_acc: 0.767 Time consumed: 48.8459 s
Epoch: 124 Train_loss: 0.222 Val_acc: 0.784 Time consumed: 48.8555 s
Epoch: 125 Train_loss: 0.300 Val_acc: 0.772 Time consumed: 48.8047 s
Epoch: 126 Train_loss: 0.413 Val_acc: 0.783 Time consumed: 48.8618 s
Epoch: 127 Train_loss: 0.177 Val_acc: 0.778 Time consumed: 49.1619 s
Epoch: 128 Train_loss: 0.215 Val_acc: 0.771 Time consumed: 49.2026 s
Epoch: 129 Train_loss: 0.401 Val_acc: 0.692 Time consumed: 49.4872 s
Epoch: 130 Train_loss: 0.220 Val_acc: 0.744 Time consumed: 49.9006 s
Epoch: 131 Train_loss: 0.248 Val_acc: 0.777 Time consumed: 48.9874 s
Epoch: 132 Train_loss: 0.261 Val_acc: 0.774 Time consumed: 49.2879 s
Epoch: 133 Train_loss: 0.273 Val_acc: 0.747 Time consumed: 70.7541 s
Epoch: 134 Train_loss: 0.249 Val_acc: 0.715 Time consumed: 73.3175 s
Epoch: 135 Train_loss: 0.207 Val_a

Epoch: 240 Train_loss: 0.075 Val_acc: 0.801 Time consumed: 61.2470 s
Epoch: 241 Train_loss: 0.033 Val_acc: 0.817 Time consumed: 73.0914 s
Epoch: 242 Train_loss: 0.078 Val_acc: 0.781 Time consumed: 70.8305 s
Epoch: 243 Train_loss: 0.020 Val_acc: 0.812 Time consumed: 70.2995 s
Epoch: 244 Train_loss: 0.058 Val_acc: 0.803 Time consumed: 70.9504 s
Epoch: 245 Train_loss: 0.015 Val_acc: 0.822 Time consumed: 72.9191 s
Epoch: 246 Train_loss: 0.033 Val_acc: 0.809 Time consumed: 69.5617 s
Epoch: 247 Train_loss: 0.027 Val_acc: 0.800 Time consumed: 70.4731 s
Epoch: 248 Train_loss: 0.082 Val_acc: 0.814 Time consumed: 71.9119 s
Epoch: 249 Train_loss: 0.039 Val_acc: 0.818 Time consumed: 70.5169 s
Epoch: 250 Train_loss: 0.093 Val_acc: 0.798 Time consumed: 70.1527 s
Epoch: 251 Train_loss: 0.016 Val_acc: 0.814 Time consumed: 69.9684 s
Epoch: 252 Train_loss: 0.036 Val_acc: 0.813 Time consumed: 63.2381 s
Epoch: 253 Train_loss: 0.032 Val_acc: 0.814 Time consumed: 49.5967 s
Epoch: 254 Train_loss: 0.099 Val_a

Epoch: 359 Train_loss: 0.005 Val_acc: 0.822 Time consumed: 48.0844 s
Epoch: 360 Train_loss: 0.000 Val_acc: 0.814 Time consumed: 48.0675 s
Epoch: 361 Train_loss: 0.050 Val_acc: 0.824 Time consumed: 49.0183 s
Epoch: 362 Train_loss: 0.006 Val_acc: 0.833 Time consumed: 47.5593 s
Epoch: 363 Train_loss: 0.105 Val_acc: 0.827 Time consumed: 47.4479 s
Epoch: 364 Train_loss: 0.029 Val_acc: 0.831 Time consumed: 47.6171 s
Epoch: 365 Train_loss: 0.033 Val_acc: 0.820 Time consumed: 47.3928 s
Epoch: 366 Train_loss: 0.028 Val_acc: 0.829 Time consumed: 47.5710 s
Epoch: 367 Train_loss: 0.001 Val_acc: 0.822 Time consumed: 47.5851 s
Epoch: 368 Train_loss: 0.005 Val_acc: 0.829 Time consumed: 47.4877 s
Epoch: 369 Train_loss: 0.124 Val_acc: 0.830 Time consumed: 47.6076 s
Epoch: 370 Train_loss: 0.017 Val_acc: 0.830 Time consumed: 47.6204 s
Epoch: 371 Train_loss: 0.019 Val_acc: 0.823 Time consumed: 47.6905 s
Epoch: 372 Train_loss: 0.038 Val_acc: 0.829 Time consumed: 47.7266 s
Epoch: 373 Train_loss: 0.012 Val_a

Epoch: 478 Train_loss: 0.197 Val_acc: 0.836 Time consumed: 48.5095 s
Epoch: 479 Train_loss: 0.001 Val_acc: 0.834 Time consumed: 48.4625 s
Epoch: 480 Train_loss: 0.000 Val_acc: 0.842 Time consumed: 48.0576 s
Epoch: 481 Train_loss: 0.002 Val_acc: 0.839 Time consumed: 48.5668 s
Epoch: 482 Train_loss: 0.000 Val_acc: 0.833 Time consumed: 48.4843 s
Epoch: 483 Train_loss: 0.000 Val_acc: 0.847 Time consumed: 49.3506 s
Epoch: 484 Train_loss: 0.000 Val_acc: 0.829 Time consumed: 48.3719 s
Epoch: 485 Train_loss: 0.000 Val_acc: 0.840 Time consumed: 48.3584 s
Epoch: 486 Train_loss: 0.000 Val_acc: 0.825 Time consumed: 48.2495 s
Epoch: 487 Train_loss: 0.000 Val_acc: 0.841 Time consumed: 48.3220 s
Epoch: 488 Train_loss: 0.003 Val_acc: 0.840 Time consumed: 48.5141 s
Epoch: 489 Train_loss: 0.001 Val_acc: 0.836 Time consumed: 48.3534 s
Epoch: 490 Train_loss: 0.045 Val_acc: 0.835 Time consumed: 48.6125 s
Epoch: 491 Train_loss: 0.005 Val_acc: 0.833 Time consumed: 48.3869 s
Epoch: 492 Train_loss: 0.000 Val_a

Epoch: 597 Train_loss: 0.322 Val_acc: 0.846 Time consumed: 50.1409 s
Epoch: 598 Train_loss: 0.000 Val_acc: 0.840 Time consumed: 50.4646 s
Epoch: 599 Train_loss: 0.000 Val_acc: 0.838 Time consumed: 50.6667 s
Epoch: 600 Train_loss: 0.128 Val_acc: 0.848 Time consumed: 50.6603 s
Epoch: 601 Train_loss: 0.071 Val_acc: 0.844 Time consumed: 52.2489 s
Epoch: 602 Train_loss: 0.000 Val_acc: 0.847 Time consumed: 50.0518 s
Epoch: 603 Train_loss: 0.000 Val_acc: 0.846 Time consumed: 50.0925 s
Epoch: 604 Train_loss: 0.040 Val_acc: 0.841 Time consumed: 49.9479 s
Epoch: 605 Train_loss: 0.011 Val_acc: 0.835 Time consumed: 49.7460 s
Epoch: 606 Train_loss: 0.000 Val_acc: 0.843 Time consumed: 49.9617 s
Epoch: 607 Train_loss: 0.003 Val_acc: 0.832 Time consumed: 50.2401 s
Epoch: 608 Train_loss: 0.003 Val_acc: 0.847 Time consumed: 50.3029 s
Epoch: 609 Train_loss: 0.000 Val_acc: 0.834 Time consumed: 50.2938 s
Epoch: 610 Train_loss: 0.238 Val_acc: 0.838 Time consumed: 50.5044 s
Epoch: 611 Train_loss: 0.000 Val_a

Epoch: 716 Train_loss: 0.669 Val_acc: 0.850 Time consumed: 48.3485 s
Epoch: 717 Train_loss: 0.000 Val_acc: 0.846 Time consumed: 49.2213 s
Epoch: 718 Train_loss: 0.000 Val_acc: 0.840 Time consumed: 48.8804 s
Epoch: 719 Train_loss: 0.963 Val_acc: 0.837 Time consumed: 48.5790 s
Epoch: 720 Train_loss: 0.000 Val_acc: 0.838 Time consumed: 49.0862 s
Epoch: 721 Train_loss: 0.000 Val_acc: 0.844 Time consumed: 48.9946 s
Epoch: 722 Train_loss: 0.000 Val_acc: 0.823 Time consumed: 48.7675 s
Epoch: 723 Train_loss: 0.000 Val_acc: 0.841 Time consumed: 48.4026 s
Epoch: 724 Train_loss: 0.000 Val_acc: 0.821 Time consumed: 48.0892 s
Epoch: 725 Train_loss: 0.000 Val_acc: 0.841 Time consumed: 48.2995 s
Epoch: 726 Train_loss: 0.000 Val_acc: 0.839 Time consumed: 48.6484 s
Epoch: 727 Train_loss: 0.000 Val_acc: 0.842 Time consumed: 48.4357 s
Epoch: 728 Train_loss: 0.000 Val_acc: 0.844 Time consumed: 48.3833 s
Epoch: 729 Train_loss: 0.633 Val_acc: 0.835 Time consumed: 48.4856 s
Epoch: 730 Train_loss: 0.000 Val_a

Epoch: 835 Train_loss: 0.000 Val_acc: 0.851 Time consumed: 48.4346 s
Epoch: 836 Train_loss: 0.999 Val_acc: 0.847 Time consumed: 48.1448 s
Epoch: 837 Train_loss: 0.000 Val_acc: 0.853 Time consumed: 49.4031 s
Epoch: 838 Train_loss: 0.247 Val_acc: 0.852 Time consumed: 48.2759 s
Epoch: 839 Train_loss: 0.000 Val_acc: 0.854 Time consumed: 47.6215 s
Epoch: 840 Train_loss: 0.000 Val_acc: 0.834 Time consumed: 47.8583 s
Epoch: 841 Train_loss: 1.479 Val_acc: 0.849 Time consumed: 48.1685 s
Epoch: 842 Train_loss: 2.698 Val_acc: 0.846 Time consumed: 48.1867 s
Epoch: 843 Train_loss: 0.000 Val_acc: 0.845 Time consumed: 47.9546 s
Epoch: 844 Train_loss: 0.000 Val_acc: 0.851 Time consumed: 48.3050 s
Epoch: 845 Train_loss: 0.000 Val_acc: 0.849 Time consumed: 48.1537 s
Epoch: 846 Train_loss: 0.000 Val_acc: 0.834 Time consumed: 48.1580 s
Epoch: 847 Train_loss: 0.000 Val_acc: 0.842 Time consumed: 47.7832 s
Epoch: 848 Train_loss: 0.000 Val_acc: 0.828 Time consumed: 48.0855 s
Epoch: 849 Train_loss: 0.184 Val_a

Epoch: 954 Train_loss: 0.000 Val_acc: 0.844 Time consumed: 49.5218 s
Epoch: 955 Train_loss: 0.000 Val_acc: 0.859 Time consumed: 50.2180 s
Epoch: 956 Train_loss: 0.000 Val_acc: 0.849 Time consumed: 48.6447 s
Epoch: 957 Train_loss: 0.000 Val_acc: 0.846 Time consumed: 49.5983 s
Epoch: 958 Train_loss: 0.000 Val_acc: 0.847 Time consumed: 49.2663 s
Epoch: 959 Train_loss: 0.000 Val_acc: 0.856 Time consumed: 49.2405 s
Epoch: 960 Train_loss: 0.000 Val_acc: 0.860 Time consumed: 48.9589 s
Epoch: 961 Train_loss: 0.000 Val_acc: 0.837 Time consumed: 49.2825 s
Epoch: 962 Train_loss: 0.000 Val_acc: 0.845 Time consumed: 50.7517 s
Epoch: 963 Train_loss: 0.000 Val_acc: 0.849 Time consumed: 49.2684 s
Epoch: 964 Train_loss: 0.000 Val_acc: 0.848 Time consumed: 49.8369 s
Epoch: 965 Train_loss: 0.190 Val_acc: 0.840 Time consumed: 48.4604 s
Epoch: 966 Train_loss: 1.247 Val_acc: 0.851 Time consumed: 48.5579 s
Epoch: 967 Train_loss: 0.000 Val_acc: 0.837 Time consumed: 48.5325 s
Epoch: 968 Train_loss: 0.000 Val_a

Epoch: 1072 Train_loss: 0.000 Val_acc: 0.862 Time consumed: 47.7216 s
Epoch: 1073 Train_loss: 9.833 Val_acc: 0.847 Time consumed: 47.6403 s
Epoch: 1074 Train_loss: 0.000 Val_acc: 0.858 Time consumed: 47.5642 s
Epoch: 1075 Train_loss: 0.000 Val_acc: 0.851 Time consumed: 47.6526 s
Epoch: 1076 Train_loss: 0.000 Val_acc: 0.863 Time consumed: 47.9626 s
Epoch: 1077 Train_loss: 4.958 Val_acc: 0.842 Time consumed: 47.8374 s
Epoch: 1078 Train_loss: 0.000 Val_acc: 0.847 Time consumed: 47.5600 s
Epoch: 1079 Train_loss: 0.327 Val_acc: 0.852 Time consumed: 47.8479 s
Epoch: 1080 Train_loss: 0.000 Val_acc: 0.855 Time consumed: 47.6367 s
Epoch: 1081 Train_loss: 1.341 Val_acc: 0.843 Time consumed: 47.7556 s
Epoch: 1082 Train_loss: 0.000 Val_acc: 0.849 Time consumed: 47.6778 s
Epoch: 1083 Train_loss: 0.077 Val_acc: 0.855 Time consumed: 47.5914 s
Epoch: 1084 Train_loss: 0.562 Val_acc: 0.848 Time consumed: 47.6517 s
Epoch: 1085 Train_loss: 0.000 Val_acc: 0.851 Time consumed: 47.7374 s
Epoch: 1086 Train_lo

Epoch: 1190 Train_loss: 0.000 Val_acc: 0.852 Time consumed: 47.6914 s
Epoch: 1191 Train_loss: 0.000 Val_acc: 0.852 Time consumed: 47.6280 s
Epoch: 1192 Train_loss: 3.429 Val_acc: 0.860 Time consumed: 47.6528 s
Epoch: 1193 Train_loss: 0.000 Val_acc: 0.856 Time consumed: 47.7515 s
Epoch: 1194 Train_loss: 0.832 Val_acc: 0.857 Time consumed: 47.6332 s
Epoch: 1195 Train_loss: 0.000 Val_acc: 0.857 Time consumed: 47.6672 s
Epoch: 1196 Train_loss: 0.000 Val_acc: 0.850 Time consumed: 47.5882 s
Epoch: 1197 Train_loss: 0.000 Val_acc: 0.851 Time consumed: 47.6432 s
Epoch: 1198 Train_loss: 0.000 Val_acc: 0.860 Time consumed: 47.8188 s
Epoch: 1199 Train_loss: 2.245 Val_acc: 0.840 Time consumed: 47.8473 s
Epoch: 1200 Train_loss: 0.000 Val_acc: 0.857 Time consumed: 47.8791 s
*****************Training End!*****************
