In [1]:
import tensorflow as tf
import random

In [2]:
mnist = tf.keras.datasets.mnist
( x_train, y_train ), ( x_test, y_test ) = mnist.load_data()

# Nomalization
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = tf.cast( x_train, tf.float32 )
x_test = tf.cast( x_test, tf.float32 )

print( x_train.shape, y_train.shape )

(60000, 28, 28) (60000,)


In [3]:
nb_classes = 10

In [4]:
# Reshape
x_train = tf.reshape( x_train, [ -1, 784 ] )
x_test = tf.reshape( x_test, [ -1, 784 ] )
print( x_train.shape, x_test.shape )

(60000, 784) (10000, 784)


In [5]:
# one-hot Encoding
y_train = tf.keras.utils.to_categorical( y_train, nb_classes )
y_test = tf.keras.utils.to_categorical( y_test, nb_classes )

print( y_train.shape, y_test.shape )

(60000, 10) (10000, 10)


In [6]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100

In [8]:
xavier = tf.keras.initializers.GlorotUniform()
W1 = tf.Variable( xavier( [ 784, 256 ] ) )
b1 = tf.Variable( tf.random.normal( [ 256 ] ) )
def Layer1( X ):
    return ( tf.nn.relu( tf.matmul( X, W1 ) + b1 ) )

W2 = tf.Variable( xavier( [ 256, 256 ] ) )
b2 = tf.Variable( tf.random.normal( [ 256 ] ) )
def Layer2( X ):
    return ( tf.nn.relu( tf.matmul( Layer1( X ), W2 ) + b2 ) )

W3 = tf.Variable( xavier( [ 256, nb_classes ] ) )
b3 = tf.Variable( tf.random.normal( [ nb_classes ] ) )
@tf.function
def Hypothesis( X ):
    return (  tf.matmul( Layer2( X ), W3 ) + b3 )

In [9]:
@tf.function
def Cost( X, Y ):
    return ( tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits = Hypothesis( X ), labels = Y )
    ) )

def Minimize( X, Y ):
    loss = lambda: Cost( X ,Y )
    
    tf.keras.optimizers.Adam( learning_rate ).minimize( loss, [ W1, W2, W3, b1, b2, b3 ] )

In [10]:
def CorrectPrediction( X, Y ):
    return ( tf.equal( tf.argmax( Hypothesis( X ), axis = 1 ),
                      tf.argmax( Y, axis = 1 ) ) )

def Accuracy( X, Y ):
    return ( tf.reduce_mean( tf.cast( CorrectPrediction( X, Y ), tf.float32 ) ) )

In [11]:
for epoch in range( training_epochs ):
    avg_cost = 0
    total_batch = int( len( x_train ) / batch_size )
    
    start_batch, end_batch = 0, batch_size
    for i in range( total_batch ):
        batch_xs, batch_ys = \
            x_train[ start_batch : end_batch ], y_train[start_batch : end_batch ]
        
        Minimize( batch_xs, batch_ys )
        cost_val = Cost( batch_xs, batch_ys )
        
        avg_cost += cost_val / total_batch
        
        start_batch = start_batch + batch_size
        end_batch = end_batch + batch_size
    
    print( 'Epoch: {:04d}, Cost: {:.9f}'.format( epoch + 1, avg_cost ) )
    
print( 'Learning finished' )
        
    

Epoch: 0001, Cost: 0.206731141
Epoch: 0002, Cost: 0.057357077
Epoch: 0003, Cost: 0.040945623
Epoch: 0004, Cost: 0.034725025
Epoch: 0005, Cost: 0.031204341
Epoch: 0006, Cost: 0.030347325
Epoch: 0007, Cost: 0.029546689
Epoch: 0008, Cost: 0.026658753
Epoch: 0009, Cost: 0.025776757
Epoch: 0010, Cost: 0.024853013
Epoch: 0011, Cost: 0.024018224
Epoch: 0012, Cost: 0.020779150
Epoch: 0013, Cost: 0.021980627
Epoch: 0014, Cost: 0.019082764
Epoch: 0015, Cost: 0.019101530
Learning finished


In [12]:
tf.print( 'Accuracy: ', Accuracy( x_test, y_test ))

Accuracy:  0.9705


In [13]:
r = random.randint( 0, len( x_test ) - 1 )
tf.print( 'Label: ', tf.argmax( y_test[ r: r + 1 ], axis = 1 ) )
tf.print( 
    'Prediction: ', tf.argmax(Hypothesis( x_test[ r: r + 1] ), axis = 1 ) )

Label:  [9]
Prediction:  [9]
