In [1]:
import tensorflow as tf
from matplotlib import pyplot as plt

In [2]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

In [3]:
x_val  = x_train[50000:60000]
x_train = x_train[0:50000]
y_val  = y_train[50000:60000]
y_train = y_train[0:50000]

In [4]:
print("train data has " + str(x_train.shape[0]) + " samples")
print("every train data is " + str(x_train.shape[1])
     + "*" + str(x_train.shape[2]) + " image")



train data has 50000 samples
every train data is 28*28 image


In [5]:
print("validation data has " + str(x_val.shape[0]) + " samples")
print("every train data is " + str(x_val.shape[1]) 
      + " * " + str(x_train.shape[2]) + " image")

validation data has 10000 samples
every train data is 28 * 28 image


In [6]:
# sample to show gray scale values
print(x_train[0][8])

[  0   0   0   0   0   0   0  18 219 253 253 253 253 253 198 182 247 241
   0   0   0   0   0   0   0   0   0   0]


In [7]:
# sample to show labels for first train data to 10th train data
print(y_train[0:9])

[5 0 4 1 9 2 1 3 1]


In [8]:
import numpy as np
x_train = np.reshape(x_train, (50000,28,28,1))
x_val = np.reshape(x_val, (10000,28,28,1))
x_test = np.reshape(x_test, (10000,28,28,1))

print(x_train.shape)
print(x_test.shape)

(50000, 28, 28, 1)
(10000, 28, 28, 1)


In [9]:
x_train = x_train.astype('float32')
x_val = x_val.astype('float32')
x_test = x_test.astype('float32')

gray_scale = 255
x_train /= gray_scale
x_val /= gray_scale
x_test /= gray_scale

In [10]:
num_classes = 10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_val = tf.keras.utils.to_categorical(y_val, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [11]:
x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1])
y_ = tf.placeholder(tf.float32, shape=[None, 10])
phase_train = tf.placeholder(tf.bool)

In [12]:
# def weight_variable(shape):
#   initial = tf.truncated_normal(shape, stddev=0.1)
#   return tf.Variable(initial)

# def bias_variable(shape):
#   initial = tf.constant(0.1, shape=shape)
#   return tf.Variable(initial)

In [13]:
def conv_batch_norm(x, n_out, phase_train):
    beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
    gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)
    
    beta = tf.get_variable("beta", [n_out], initializer=beta_init)
    gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)    
    
    batch_mean, batch_var = tf.nn.monents(x, [0,1,2], name = 'moments')
    ema = tf.train.ExponentialMovingAverage(decay=0.9)
    ema_apply_op = ema.apply([batch_mean, batch_var])
    ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
    
    def mean_var_with_update():
        with tf.control_dependencies([ema_apply_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)
    mean, var = control_flow_ops.cond(phase_train,
                                     mean_var_with_update, lambda: (ema_mean, ema_var))

    normed = tf.nn.bach_norm_with_global_normalization(x, mean, var, beta, gamma, 1e-3, True)
    return normd

In [14]:
def layer_batch_norm(x, nout, phase_train):
    beta_init = tf.constant_initializer(value=0.0, dtype=tf.float32)
    gamma_init = tf.constant_initializer(value=1.0, dtype=tf.float32)
    
    beta = tf.get_variable("beta", [n_out], initializer=beta_init)
    gamma = tf.get_variable("gamma", [n_out], initializer=gamma_init)    
    
    batch_mean, batch_var = tf.nn.monents(x, [0,1,2], name = 'moments')
    ema = tf.train.ExponentialMovingAverage(decay=0.9)
    ema_apply_op = ema.apply([batch_mean, batch_var])
    ema_mean, ema_var = ema.average(batch_mean), ema.average(batch_var)
    
    def mean_var_with_update():
        with tf.control_dependencies([ema_apply_op]):
            return tf.identity(batch_mean), tf.identity(batch_var)
    mean, var = control_flow_ops.cond(phase_train,
                                     mean_var_with_update, lambda: (ema_mean, ema_var))
    x_r = tf.reshape(x, [-1, 1, 1, n_out])
    normed = tf.nn.bach_norm_with_global_normalization(x_r, mean, var, beta, gamma, 1e-3, True)
    return normd

In [15]:
def conv2d(input, weight_shape, bias_shape, phase_train):
    incoming = weight_shape[0]*weight_shape[1]*weight_shape[2]
    weight_init = tf.random_normal_initializer(stddev=(2.0/incoming)**0.5)
    bias_init = tf.constant_initializer(value=0)
    
    W = tf.get_variable("W", weight_shape, initializer=weight_init)
    b = tf.get_variable("b", bias_shape, initializer=bias_init)
    logits = tf.nn.bias_add(tf.nn.conv2d(input,W,
                                        strides=[1,1,1,1], padding = 'SAME'), b)
        
    return tf.nn.relu(conv_batch_norm(logits, weight_shape[3], phase_train))

def layer(input, weight_shape, bias_shape, phase_train):
    weight_init = tf.random_normal_initializer(stddev=(2.0/weight_shape[0])**5)
    bias_init = tf.constant_initializer(value=0)
    
    W = tf.get_variable("W", weight_shape, initializer=weight_init)
    b = tf.get_variable("b", bias_shape, initializer=bias_init)
    logits = tf.matmul(input, W)+b
    return tf.nn.relu(layer_batch_norm(logits, weight_shape[1], phase_train))

def max_pool_2x2(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

In [16]:
with tf.name_scope('ConLayer1') as scope:
#     W_conv1 = weight_variable([5, 5, 1, 16])
#     b_conv1 = bias_variable([16])
#     h_conv1 = tf.nn.relu(conv2d(x, W_conv1) + b_conv1)
    
    h_conv1 = conv2d(x, [5, 5, 1, 16], [16], phase_train)
#     W_hist1 = tf.summary.histogram('weight1', W_conv1)
#     b_hist1 = tf.summary.histogram('bias1', b_conv1)
#     h_hist1 = tf.summary.histogram('hlayer1', h_conv1)

    h_pool1 = max_pool_2x2(h_conv1)

AttributeError: module 'tensorflow.tools.api.generator.api.nn' has no attribute 'monents'

In [None]:
with tf.name_scope('ConLayer2') as scope:
    W_conv2 = weight_variable([5, 5, 16, 32])
    b_conv2 = bias_variable([32])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)

    W_hist2 = tf.summary.histogram('weight2', W_conv2)
    b_hist2 = tf.summary.histogram('bias2', b_conv2)
    h_hist2 = tf.summary.histogram('hlayer2', h_conv2)
    
    h_pool2 = max_pool_2x2(h_conv2)

In [None]:
W_fc1 = weight_variable([7 * 7 * 32, 128])
b_fc1 = bias_variable([128])

h_fc1_flat = tf.reshape(h_pool2, [-1, 7*7*32])
h_fc1 = tf.nn.relu(tf.matmul(h_fc1_flat, W_fc1) + b_fc1)

In [None]:
W_fc2 = weight_variable([128, 10])
b_fc2 = bias_variable([10])

y_conv = tf.matmul(h_fc1, W_fc2) + b_fc2

In [None]:
cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_conv))
cost_summ= tf.summary.scalar('cost',cross_entropy)

In [None]:
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)

In [None]:
correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(y_,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [None]:
# from tqdm.notebook import tqdm
# from time import sleep

# initialize
init = tf.global_variables_initializer()

# train hyperparameters
epoch_cnt = 3
batch_size = 500
iteration = len(x_train) // batch_size

# Start training
with tf.Session() as sess:
    # Writer logs and show graph 
    tf.set_random_seed(777)
    merged_summary= tf.summary.merge_all()
    writer = tf.summary.FileWriter('./CNN_logs', sess.graph)
    # Run the initializer
    sess.run(init)
    
    
    for epoch in range(epoch_cnt):
        avg_loss = 0.
        start = 0; end = batch_size
        feed_dict = {x:x_train[start: end],y_: y_train[start: end]}
        
        for step in range(iteration):
          
            if step%10 == 0:
                summary = sess.run(merged_summary, feed_dict=feed_dict)
                writer.add_summary(summary, step)
                train_acc = accuracy.eval(feed_dict=feed_dict)
                
                print("step "+str(step)+ ": train acc: "+str(train_acc))
                
            train_step.run(
                feed_dict={
                    x:x_train[start: end], 
                    y_: y_train[start: end]})
            start += batch_size; end += batch_size    
        
        # Validate model
        val_accuracy = accuracy.eval(feed_dict={x:x_val, y_: y_val})
        print("validation accuracy: "+str(val_accuracy))
        
    test_accuracy = accuracy.eval(feed_dict={x:x_test, y_: y_test}) 
    print("test accuracy: "+str(test_accuracy))