In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.python.layers.base import Layer
import time

  return f(*args, **kwds)
  from ._conv import register_converters as _register_converters


In [2]:
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_imgs = mnist.train.images
train_labels = mnist.train.labels
test_imgs = mnist.test.images
test_labels = mnist.test.labels
valid_imgs = mnist.validation.images
valid_labels = mnist.validation.labels

Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz


In [3]:
batch_size = 32
img_width = 28
img_height = 28
n_channel = 1
dropout_rate = 0.1
output_size = 10

n_epoch = 100
n_train_iteration = len(train_imgs) // batch_size
n_eval_train = n_train_iteration // 3
n_eval_valid = len(valid_imgs) // batch_size
n_eval_test = len(test_imgs) // batch_size
print_every = 100

In [4]:
class conv_block(Layer):
    def __init__(self, num_filter, num_conv):
        self.num_filter = num_filter
        self.num_conv = num_conv
        self.batch_norm1 = tf.layers.BatchNormalization(axis=3, name='batch1')
        self.batch_norm2 = tf.layers.BatchNormalization(axis=3, name='batch2')
        self.batch_norm3 = tf.layers.BatchNormalization(axis=3, name='batch3')
        self.conv1 = tf.layers.Conv2D(filters=self.num_filter, kernel_size=3, padding='same', activation=tf.nn.relu, name='conv1')
        self.conv2 = tf.layers.Conv2D(filters=self.num_filter, kernel_size=3, padding='same', activation=tf.nn.relu, name='conv2')
        self.conv3 = tf.layers.Conv2D(filters=self.num_filter, kernel_size=3, padding='same', activation=tf.nn.relu, name='conv3')
        self.maxpool = tf.layers.MaxPooling2D(pool_size=2, strides=2, padding='valid', name='maxpool')
    
    def __call__(self, inputs, is_train):
        conv_layer = self.batch_norm1(inputs, training=is_train)
        conv_layer = self.conv1(conv_layer)
        conv_layer = self.batch_norm2(conv_layer, training=is_train)
        conv_layer = self.conv2(conv_layer)
        if self.num_conv == 3:
            conv_layer = self.batch_norm3(conv_layer, training=is_train)
            conv_layer = self.conv3(conv_layer)
        conv_layer = self.maxpool(conv_layer)
        return conv_layer

In [5]:
class VGG(Layer):
    def __init__(self):
        self.conv_64 = conv_block(64, 2)
        self.conv_128 = conv_block(128, 2)
        self.conv_256 = conv_block(256, 3)
        self.conv_512_1 = conv_block(512, 3)
        self.conv_512_2 = conv_block(512, 3)
        self.dens_1 = tf.layers.Dense(units=4096, activation=tf.nn.relu, name='dens1')
        self.dens_2 = tf.layers.Dense(units=4096, activation=tf.nn.relu, name='dens2')
        self.drop_1 = tf.layers.Dropout(rate=dropout_rate)
        self.drop_2 = tf.layers.Dropout(rate=dropout_rate)
        self.logit = tf.layers.Dense(units=output_size)
        
    def __call__(self, inputs, is_train):
        input_layer = tf.reshape(inputs, [batch_size, img_height, img_width, n_channel])
        input_resize = tf.image.resize_nearest_neighbor(input_layer, size=[224,224], name='resize')
        conv_layer1 = self.conv_64(input_resize, is_train)
        conv_layer2 = self.conv_128(conv_layer1, is_train)
        conv_layer3 = self.conv_256(conv_layer2, is_train)
        conv_layer4 = self.conv_512_1(conv_layer3, is_train)
        conv_layer5 = self.conv_512_2(conv_layer4, is_train)
        flatten_layer = tf.layers.flatten(conv_layer5)
        dens_layer1 = self.dens_1(flatten_layer)
        drop_layer1 = self.drop_1(dens_layer1, training=is_train)
        dens_layer2 = self.dens_2(drop_layer1)
        drop_layer2 = self.drop_2(dens_layer2, training=is_train)
        logits = self.logit(drop_layer2)
        return logits
        

In [6]:
vgg_model = VGG()

In [7]:
X = tf.placeholder(dtype=tf.float32, shape=[batch_size, 784])
y = tf.placeholder(dtype=tf.int32, shape = [batch_size])
is_train = tf.placeholder(dtype = tf.bool)

y_onehot = tf.one_hot(indices=tf.cast(y, tf.int32), depth=10)
logits = vgg_model(X, is_train)
loss = tf.losses.softmax_cross_entropy(y_onehot, logits)

optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
# optimizer = tf.train.RMSPropOptimizer(learning_rate=1e-3, momentum=0.9)

train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step())
predictions = tf.argmax(logits, axis=1)
accuracy = tf.metrics.accuracy(y, predictions)

extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
    train_op = optimizer.minimize(loss)

In [8]:
def train():
#     n_iteration = 2
    for ite in range(n_train_iteration):
        images,labels = mnist.train.next_batch(batch_size)
        _,_,loss_value,_ = sess.run([y_onehot, logits, loss, train_op],feed_dict={X:images, y:labels, is_train:True})
        if (ite+1)%print_every == 0:
            print('[ite %d]loss=%f'%(ite+1, loss_value))

def evaluation():
    train_loss,valid_loss,test_loss = 0.0,0.0,0.0
    train_acc,valid_acc,test_acc = 0.0,0.0,0.0
    for ite in range(n_eval_train):
        images,labels = mnist.train.next_batch(batch_size)
        _,_,loss_value,_,acc = sess.run([y_onehot, logits, loss, predictions, accuracy], feed_dict={X:images, y:labels, is_train:False})
        train_loss += loss_value
        train_acc += acc[0]

    train_loss /= n_eval_train
    train_acc /= n_eval_train
    
    for ite in range(n_eval_valid):
        images,labels = mnist.validation.next_batch(batch_size)
        _,_,loss_value,_,acc = sess.run([y_onehot, logits, loss, predictions, accuracy], feed_dict={X:images, y:labels, is_train:False})
        valid_loss += loss_value
        valid_acc += acc[0]
    valid_loss /= n_eval_valid
    valid_acc /= n_eval_valid
    
    for ite in range(n_eval_test):
        images,labels = mnist.test.next_batch(batch_size)
        _,_,loss_value,_,acc = sess.run([y_onehot, logits, loss, predictions, accuracy], feed_dict={X:images, y:labels, is_train:False})
        test_loss += loss_value
        test_acc += acc[0]
    test_loss /= n_eval_test
    test_acc /= n_eval_test
    
    return train_loss,valid_loss,test_loss,train_acc,valid_acc,test_acc

In [None]:
trainloss_list = []
validloss_list = []
testloss_list = []
trainacc_list = []
validacc_list = []
testacc_list = []

with tf.Session() as sess:
    print('Start:'+time.strftime("%H:%M:%S", time.localtime()))
    tf.global_variables_initializer().run()
    tf.local_variables_initializer().run()
    for epoch in range(n_epoch):
        train()
        trl,val,tel,tra,vaa,tea = evaluation()
        trainloss_list.append(trl)
        validloss_list.append(val)
        testloss_list.append(tel)
        trainacc_list.append(tra)
        validacc_list.append(vaa)
        testacc_list.append(tea)
        print('[Epoch %d]'%(epoch+1)+time.strftime("%H:%M:%S", time.localtime()))
        print('train_loss=%f, train_acc=%f, valid_loss=%f, valid_acc=%f, test_loss=%f, test_acc=%f'%(trl,tra,val,vaa,tel,tea))

Start:14:40:42
[ite 100]loss=0.594229
[ite 200]loss=0.251106
[ite 300]loss=0.361489
[ite 400]loss=0.446248
[ite 500]loss=0.243903
[ite 600]loss=0.099808
[ite 700]loss=0.074153
[ite 800]loss=0.156157
[ite 900]loss=0.051852
[ite 1000]loss=0.092173
[ite 1100]loss=0.030079
[ite 1200]loss=0.133780
[ite 1300]loss=0.059157
[ite 1400]loss=0.087693
[ite 1500]loss=0.071187
[ite 1600]loss=0.086596
[ite 1700]loss=0.177253
[Epoch 1]14:53:54
train_loss=0.082762, train_acc=0.977972, valid_loss=0.085654, valid_acc=0.979558, test_loss=0.079694, test_acc=0.979049
[ite 100]loss=0.044159
[ite 200]loss=0.014432
[ite 300]loss=0.047107
[ite 400]loss=0.013024
[ite 500]loss=0.006498
[ite 600]loss=0.073465
[ite 700]loss=0.075118
[ite 800]loss=0.048996
[ite 900]loss=0.052494
[ite 1000]loss=0.166511
[ite 1100]loss=0.027743
[ite 1200]loss=0.004289
[ite 1300]loss=0.017507
[ite 1400]loss=0.034557
[ite 1500]loss=0.140650
[ite 1600]loss=0.009237
[ite 1700]loss=0.046320
[Epoch 2]15:07:02
train_loss=0.034615, train_acc=

[ite 700]loss=0.000455
[ite 800]loss=0.000316
[ite 900]loss=0.000474
[ite 1000]loss=0.000762
[ite 1100]loss=0.000307
[ite 1200]loss=0.002288
[ite 1300]loss=0.001634
[ite 1400]loss=0.000821
[ite 1500]loss=0.001137
[ite 1600]loss=0.001877
[ite 1700]loss=0.001565
[Epoch 16]18:10:40
train_loss=0.000433, train_acc=0.993832, valid_loss=0.031319, valid_acc=0.993939, test_loss=0.024647, test_acc=0.993909
[ite 100]loss=0.000485
[ite 200]loss=0.000346
[ite 300]loss=0.000180
[ite 400]loss=0.000560
[ite 500]loss=0.004131
[ite 600]loss=0.000609
[ite 700]loss=0.002925
[ite 800]loss=0.001448
[ite 900]loss=0.000405
[ite 1000]loss=0.001278
[ite 1100]loss=0.000440
[ite 1200]loss=0.000491
[ite 1300]loss=0.000634
[ite 1400]loss=0.000837
[ite 1500]loss=0.001943
[ite 1600]loss=0.001667
[ite 1700]loss=0.001535
[Epoch 17]18:23:46
train_loss=0.000429, train_acc=0.993984, valid_loss=0.033640, valid_acc=0.994075, test_loss=0.024787, test_acc=0.994036
[ite 100]loss=0.000983
[ite 200]loss=0.001494
[ite 300]loss=0.