In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
import numpy as np
import os 
import matplotlib.pyplot as plt

tf.enable_eager_execution()

In [2]:
learning_rate = 0.01
training_epochs = 6
batch_size = 50

In [3]:
cur_dir = os.getcwd()
model_dir_name = 'cnn_eager_mode'

checkpoint_dir = os.path.join(cur_dir, 'checkpoints', model_dir_name)
os.makedirs(checkpoint_dir, exist_ok=True)

checkpoint_prefix = os.path.join(checkpoint_dir, model_dir_name)

In [4]:
mnist = keras.datasets.mnist
(train_x, train_y), (test_x, test_y) = mnist.load_data()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

train_x = train_x.astype(np.float32) / 255
test_x = test_x.astype(np.float32) / 255

train_x = np.expand_dims(train_x, 3)
test_x = np.expand_dims(test_x, 3)

train_y = to_categorical(train_y, 10)
test_y = to_categorical(test_y, 10)

train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).shuffle(buffer_size=70000).batch(batch_size)
test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).batch(batch_size)
print('after preprocessing')
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)
after preprocessing
(60000, 28, 28, 1) (60000, 10) (10000, 28, 28, 1) (10000, 10)


In [5]:
class CNNModel(keras.Model):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self.conv1 = keras.layers.Conv2D(32, 3, padding='SAME', activation='relu')
        self.pool1 = keras.layers.MaxPool2D(padding='SAME')
        
        self.conv2 = keras.layers.Conv2D(64, 3, padding='SAME', activation=None)
        self.batch2 = keras.layers.BatchNormalization()
        self.relu2 = keras.layers.ReLU()
        self.pool2 = keras.layers.MaxPool2D(padding='SAME')
        
        self.conv3 = keras.layers.Conv2D(128, 3, padding='SAME', activation='relu')
        self.flatten = keras.layers.Flatten()
        self.dropout1 = keras.layers.Dropout(0.3)
        
        self.dense_fc = keras.layers.Dense(256, activation=None)
        self.batch_fc = keras.layers.BatchNormalization()
        self.relu_fc = keras.layers.ReLU()
        self.dropout_fc = keras.layers.Dropout(0.3)
        
        self.output_layer = keras.layers.Dense(10, activation=None)
        
    def call(self, inputs, training=False):
        net = self.conv1(inputs)
        net = self.pool1(net)
        
        net = self.conv2(net)
        net = self.batch2(net)
        net = self.relu2(net)
        net = self.pool2(net)
        
        net = self.conv3(net)
        net = self.flatten(net)
        net = self.dropout1(net)
        net = self.dense_fc(net)
        net = self.batch_fc(net)
        net = self.relu_fc(net)
        net = self.dropout_fc(net)
        
        net = self.output_layer(net)
        return net

In [6]:
model = CNNModel()

In [7]:
def loss_fn(model, x, labels):
    logits = model(x, training=True)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))
    return loss

def grad(model, x, labels):
    with tf.GradientTape() as tape:
        loss = loss_fn(model, x, labels)
    return tape.gradient(loss, model.variables)

In [16]:
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate, epsilon=0.01)

def metric(model, x, labels):
    logits = model(x, training=False)
    pred = tf.equal(tf.argmax(logits,1), tf.argmax(labels,1))
    acc = tf.reduce_mean(tf.cast(pred, tf.float32))
    return acc

checkpoint = tf.train.Checkpoint(cnn=model)

In [18]:
num_train_data = train_x.shape[0]
num_test_data = test_x.shape[0]


status = checkpoint.restore(tf.train.latest_checkpoint(checkpoint_dir))
status.initialize_or_restore()

for epoch in range(training_epochs):
    avg_loss = 0
    avg_train_acc = 0
    avg_test_acc = 0
    train_step = 0
    test_step = 0
    
    for images, labels in train_dataset:
        grads = grad(model, images, labels)
        optimizer.apply_gradients(zip(grads, model.variables))
        loss = loss_fn(model, images, labels)
        acc = metric(model, images, labels)
        avg_loss += loss
        avg_train_acc += acc
        train_step += 1
    avg_loss /= train_step
    avg_train_acc /= train_step
    print("{}th Epoch".format(epoch+1))
    print("train_avg_loss: {}\t train_avg_acc: {}".format(avg_loss, avg_train_acc))
    
    if (epoch)%5 == 0:
        for images, labels in test_dataset:
            grads = grad(model, images, labels)
            acc = metric(model, images, labels)
            avg_test_acc += acc
            test_step += 1
        avg_test_acc /= test_step
        print("test_avg_acc: {}".format(avg_test_acc))
        checkpoint.save(file_prefix=checkpoint_prefix)
    
    
    

1th Epoch
train_avg_loss: 0.029715023934841156	 train_avg_acc: 0.9920313358306885
test_avg_acc: 0.9878000020980835
2th Epoch
train_avg_loss: 0.018695184960961342	 train_avg_acc: 0.9951484203338623
3th Epoch
train_avg_loss: 0.012028713710606098	 train_avg_acc: 0.9974321722984314
4th Epoch
train_avg_loss: 0.009232631884515285	 train_avg_acc: 0.9977321624755859
5th Epoch
train_avg_loss: 0.00614493852481246	 train_avg_acc: 0.9985994696617126
6th Epoch
train_avg_loss: 0.004174050875008106	 train_avg_acc: 0.9991827607154846
test_avg_acc: 0.9894000887870789
