In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.datasets import mnist
import os
tf.enable_eager_execution()

In [2]:
def load_mnist():
    (train_data, train_labels), (test_data, test_labels) = mnist.load_data()
    train_data = np.expand_dims(train_data, axis = -1)
    test_data = np.expand_dims(test_data, axis = -1)
    
    train_data, test_data = normalize(train_data, test_data)
    
    train_labels = to_categorical(train_labels, 10)
    test_labels = to_categorical(test_labels, 10)
    return train_data, train_labels, test_data, test_labels

def normalize(train_data, test_data):
    train_data = train_data.astype(np.float32)/255.0
    test_data = test_data.astype(np.float32)/255.0
    
    return train_data, test_data

In [3]:
def loss_fn(model, images, labels):
    logits = model(images, training = True)
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits = logits, labels = labels))
    return loss

def accuracy_fn(model, images, labels):
    logits = model(images, training = False)
    prediction = tf.equal(tf.argmax(logits, -1), tf.argmax(labels, -1))
    accuracy = tf.reduce_mean(tf.cast(prediction, tf.float32))
    return accuracy

def grad(model, images, labels):
    with tf.GradientTape() as tape:
        loss = loss_fn(model, images, labels)
    return tape.gradient(loss, model.variables)

In [4]:
def flatten():
    return tf.keras.layers.Flatten()

def dense(label_dim, weight_init):
    return tf.keras.layers.Dense(units = label_dim, use_bias= True, \
                                 kernel_initializer = weight_init)

def relu():
    return tf.keras.layers.Activation(tf.keras.activations.relu)

def dropout(rate):
    return tf.keras.layers.Dropout(rate)

def batch_norm():
    return tf.keras.layers.BatchNormalization()


In [5]:
class create_model(tf.keras.Model):
    def __init__(self, label_dim):
        super(create_model, self).__init__()
        weight_init = tf.keras.initializers.glorot_uniform()
        
        self.model = tf.keras.Sequential()
        self.model.add(flatten())
        
        for i in range(4):
            self.model.add(dense(512, weight_init))
            self.model.add(relu())
#             self.model.add(dropout(rate = 0.5))
        
        self.model.add(dense(label_dim, weight_init))
        
    def call(self, x, training = None, mask = None):
        x = self.model(x)
        
        return x

In [6]:
train_x, train_y, test_x, test_y = load_mnist()

learning_rate = 0.001
batch_size = 128

training_epochs = 1
training_iterations = len(train_x)//batch_size

label_dim = 10

train_flag = True

In [7]:
train_dataset = tf.data.Dataset.from_tensor_slices((train_x, train_y)).\
        shuffle(buffer_size = 100000).\
        prefetch(buffer_size = batch_size).\
        batch(batch_size).\
        repeat()

test_dataset = tf.data.Dataset.from_tensor_slices((test_x, test_y)).\
        shuffle(buffer_size = 100000).\
        prefetch(buffer_size = len(test_x)).\
        batch(len(test_x)).\
        repeat()

train_iterator = train_dataset.make_one_shot_iterator()
test_iterator = test_dataset.make_one_shot_iterator()


W0927 02:57:10.696192 4775978432 deprecation.py:323] From <ipython-input-7-0d43abf47789>:13: DatasetV1.make_one_shot_iterator (from tensorflow.python.data.ops.dataset_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `for ... in dataset:` to iterate over a dataset. If using `tf.estimator`, return the `Dataset` object directly from your input function. As a last resort, you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)`.


In [8]:
network = create_model(label_dim)

optimizer = tf.train.AdamOptimizer(learning_rate = learning_rate)

# checkpoint_dir = 'checkpoints'
# logs_dir = 'logs'

# model_dir = 'nn_deep'

# checkpoint_dir = os.path.join(checkpoint_dir, model_dir)
# check_folder(checkpoint_dir)
# checkpoint_prefix = os.path.join(checkpoint_dir, model_dir)
# logs_dir = os.path.join(logs_dir, model_dir)

In [13]:
for epoch in range(training_epochs):
    for idx in range(training_iterations):
        train_input, train_label = train_iterator.get_next()
        
        grads = grad(network, train_input, train_label)
        optimizer.apply_gradients(grads_and_vars = zip(grads, network.variables))
        
        train_loss = loss_fn(network, train_input, train_label)
        train_accuracy = accuracy_fn(network, train_input, train_label)
        
        test_input, test_label = test_iterator.get_next()
        
        test_accuracy = accuracy_fn(network, test_input, test_label)
        
        print("Epoch: [%2d] [%5d/%5d], train_loss: %.8f, train_accuracy: %.4f, test_Accuracy: %.4f"\
             % (epoch, idx, training_iterations, train_loss, train_accuracy, test_accuracy))


Epoch: [ 0] [    0/  468], train_loss: 0.10930698, train_accuracy: 0.9766, test_Accuracy: 0.9645
Epoch: [ 0] [    1/  468], train_loss: 0.12356662, train_accuracy: 0.9844, test_Accuracy: 0.9657
Epoch: [ 0] [    2/  468], train_loss: 0.08150953, train_accuracy: 0.9688, test_Accuracy: 0.9671
Epoch: [ 0] [    3/  468], train_loss: 0.09910454, train_accuracy: 0.9531, test_Accuracy: 0.9682
Epoch: [ 0] [    4/  468], train_loss: 0.04530586, train_accuracy: 0.9844, test_Accuracy: 0.9679
Epoch: [ 0] [    5/  468], train_loss: 0.02303698, train_accuracy: 1.0000, test_Accuracy: 0.9656
Epoch: [ 0] [    6/  468], train_loss: 0.09087033, train_accuracy: 0.9688, test_Accuracy: 0.9636
Epoch: [ 0] [    7/  468], train_loss: 0.03076844, train_accuracy: 1.0000, test_Accuracy: 0.9630
Epoch: [ 0] [    8/  468], train_loss: 0.07921658, train_accuracy: 0.9844, test_Accuracy: 0.9615
Epoch: [ 0] [    9/  468], train_loss: 0.13075106, train_accuracy: 0.9453, test_Accuracy: 0.9640
Epoch: [ 0] [   10/  468], tra

KeyboardInterrupt: 

In [10]:
print("test_Accuracy: %.4f" % (test_accuracy))

test_Accuracy: 0.1135
