In [5]:
import tensorflow as tf
import numpy as np

In [6]:
#Hyperparameter
learning_rate = 0.001
training_epochs = 10
batch_size = 100

In [7]:
#dataset processing
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
#data normalization
x_train, x_test = x_train/ 255.0, x_test/ 255.0

x_train = x_train[..., tf.newaxis]
x_test = x_test[..., tf.newaxis]

y_train = tf.keras.utils.to_categorical(y_train, 10)
y_test = tf.keras.utils.to_categorical(y_test, 10)

# Build dataset pipeline
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(buffer_size=100000).batch(batch_size)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(batch_size)

In [8]:
#build model

class mn_Model(tf.keras.Model):
  def __init__(self):
    super(mn_Model, self).__init__()    
    # init. your layers or inherit layer.   
    self.flat = tf.keras.layers.Flatten()
    self.dense1 = tf.keras.layers.Dense(units=256, activation=tf.nn.relu)
    self.dense2 = tf.keras.layers.Dense(units=10, activation=tf.nn.softmax)
  def call(self, inputs, training=False):
    #define your forwarding network
    net = self.flat(inputs)
    net = self.dense1(net)
    net = self.dense2(net)
    return net
  

model = mn_Model()

#simple print model
temp_inputs = tf.keras.Input(shape=(28, 28, 1))
model(temp_inputs)
model.summary()

Model: "mn__model_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 784)               0         
_________________________________________________________________
dense_4 (Dense)              (None, 256)               200960    
_________________________________________________________________
dense_5 (Dense)              (None, 10)                2570      
Total params: 203,530
Trainable params: 203,530
Non-trainable params: 0
_________________________________________________________________


In [9]:
# Loss function
def loss_func(model, images, labels):
    logits = model(images, training=True)
    loss = tf.reduce_mean(tf.keras.losses.sparse_categorical_crossentropy(y_true=tf.argmax(labels, 1), y_pred=logits))
    return loss

# Gradient Function
def grad(model, images, labels):
    with tf.GradientTape() as tape:
        loss = loss_func(model, images, labels)
    return tape.gradient(loss, model.trainable_variables)

# Evaluation Function
def evaluate(models, images, labels):    
    predicts = model(images, training=False)
    correct_predict = tf.equal(tf.argmax(predicts, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32))
    return accuracy

# learning decay
lr_decay = tf.keras.optimizers.schedules.ExponentialDecay(learning_rate,decay_steps=x_train.shape[0] / batch_size * 5 * 5,decay_rate=0.5,staircase=True)

# Optimizer
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_decay)

In [10]:
for epoch in range(training_epochs):
    avg_loss = 0.
    avg_train_acc = 0.
    avg_test_acc = 0.
    train_step = 0
    test_step = 0
    
    for images, labels in train_ds:
        grads = grad(model, images, labels)                
        optimizer.apply_gradients(zip(grads, model.variables))
        loss = loss_func(model, images, labels)
        acc = evaluate(model, images, labels)
        avg_loss = avg_loss + loss
        avg_train_acc = avg_train_acc + acc
        train_step += 1
    avg_loss = avg_loss / train_step
    avg_train_acc = avg_train_acc / train_step
    
    for images, labels in test_ds:        
        acc = evaluate(model, images, labels)        
        avg_test_acc = avg_test_acc + acc
        test_step += 1    
    avg_test_acc = avg_test_acc / test_step    

    print('Epoch:', '{}'.format(epoch + 1), 'loss =', '{:.8f}'.format(avg_loss), 
          'train accuracy = ', '{:.4f}'.format(avg_train_acc), 
          'test accuracy = ', '{:.4f}'.format(avg_test_acc))



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Epoch: 1 loss = 0.27460861 train accuracy =  0.9234 test accuracy =  0.9550
Epoch: 2 loss = 0.11668385 train accuracy =  0.9672 test accuracy =  0.9684
Epoch: 3 loss = 0.07709118 train accuracy =  0.9782 test accuracy =  0.9744
Epoch: 4 loss = 0.05394432 train accuracy =  0.9853 test accuracy =  0.9748
Epoch: 5 loss = 0.03972222 train accuracy =  0.9893 test accuracy =  0.9789
Epoch: 6 loss = 0.03050468 train accuracy =  0.9923 test accuracy =  0.9785
Epoch: 7 loss = 0.02349371 train accuracy =  0.9946 test accuracy =  0.9771
Epoch: 8 loss = 0.01771048 train accuracy =  0.9964 test accuracy =  0.9789
Epoch: 9 loss = 0.01342475 train accuracy =  0.9976 test accuracy =  0.9788
Epoch: 10 loss 