In [1]:
import tensorflow as tf
import random

In [2]:
mnist = tf.keras.datasets.mnist
( x_train, y_train ), ( x_test, y_test ) = mnist.load_data()

# Nomalization
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = tf.cast( x_train, tf.float32 )
x_test = tf.cast( x_test, tf.float32 )

print( x_train.shape, y_train.shape )

(60000, 28, 28) (60000,)


In [3]:
nb_classes = 10

In [4]:
# Reshape
x_train = tf.reshape( x_train, [ -1, 784 ] )
x_test = tf.reshape( x_test, [ -1, 784 ] )
print( x_train.shape, x_test.shape )

(60000, 784) (10000, 784)


In [5]:
# one-hot Encoding
y_train = tf.keras.utils.to_categorical( y_train, nb_classes )
y_test = tf.keras.utils.to_categorical( y_test, nb_classes )

print( y_train.shape, y_test.shape )

(60000, 10) (10000, 10)


In [6]:
learning_rate = 0.001
training_epochs = 15
batch_size = 100
total_batch = int( len( x_train ) / batch_size )

In [7]:
xavier = tf.keras.initializers.GlorotUniform()
W1 = tf.Variable( xavier( [ 784, 512 ] ) )
b1 = tf.Variable( tf.random.normal( [ 512 ] ) )
def Layer1( X ):
    return ( tf.nn.relu( tf.matmul( X, W1 ) + b1 ) )
def Dropout1( X, rate ):
    return ( tf.nn.dropout( Layer1( X ), rate ) )

W2 = tf.Variable( xavier( [ 512, 512 ] ) )
b2 = tf.Variable( tf.random.normal( [ 512 ] ) )
def Layer2( X, rate ):
    return ( tf.nn.relu( tf.matmul( Dropout1( X, rate ), W2 ) + b2 ) )
def Dropout2( X, rate ):
    return ( tf.nn.dropout( Layer2( X, rate ), rate ) )

W3 = tf.Variable( xavier( [ 512, 512 ] ) )
b3 = tf.Variable( tf.random.normal( [ 512 ] ) )
def Layer3( X, rate ):
    return ( tf.nn.relu( tf.matmul( Dropout2( X, rate ), W3 ) + b3 ) )
def Dropout3( X, rate ):
    return ( tf.nn.dropout( Layer3( X, rate ), rate ) )

W4 = tf.Variable( xavier( [ 512, 512 ] ) )
b4 = tf.Variable( tf.random.normal( [ 512 ] ) )
def Layer4( X, rate ):
    return ( tf.nn.relu( tf.matmul( Dropout3( X, rate ), W4 ) + b4 ) )
def Dropout4( X, rate ):
    return ( tf.nn.dropout( Layer4( X, rate ), rate ) )

W5 = tf.Variable( xavier( [ 512, nb_classes ] ) )
b5 = tf.Variable( tf.random.normal( [ nb_classes ] ) )

@tf.function
def Hypothesis( X, rate ):
    return (  tf.matmul( Dropout4( X, rate ), W5 ) + b5 )

In [8]:
@tf.function
def Cost( X, Y, rate ):
    return ( tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(
            logits = Hypothesis( X, rate ), labels = Y )
    ) )

def Minimize( X, Y, rate ):
    loss = lambda: Cost( X ,Y, rate )
    
    tf.keras.optimizers.Adam( learning_rate ).minimize( loss, [ W1, W2, W3, W4, W5,
                                                               b1, b2, b3, b4, b5 ] )

In [9]:
def CorrectPrediction( X, Y, rate ):
    return ( tf.equal( tf.argmax( Hypothesis( X, rate ), axis = 1 ),
                      tf.argmax( Y, axis = 1 ) ) )

def Accuracy( X, Y, rate ):
    return ( tf.reduce_mean( tf.cast( CorrectPrediction( X, Y, rate ), tf.float32 ) ) )

In [10]:
for epoch in range( training_epochs ):
    avg_cost = 0
    
    start_batch, end_batch = 0, batch_size
    for i in range( total_batch ):
        batch_xs, batch_ys = \
            x_train[ start_batch : end_batch ], y_train[start_batch : end_batch ]
        
        Minimize( batch_xs, batch_ys, 0.3 )
        cost_val = Cost( batch_xs, batch_ys, 0.3 )
        
        avg_cost += cost_val / total_batch
        
        start_batch = start_batch + batch_size
        end_batch = end_batch + batch_size
    
    print( 'Epoch: {:04d}, Cost: {:.9f}'.format( epoch + 1, avg_cost ) )
    
print( 'Learning finished' )    

Epoch: 0001, Cost: 0.513381422
Epoch: 0002, Cost: 0.636638761
Epoch: 0003, Cost: 0.644196630
Epoch: 0004, Cost: 0.538972676
Epoch: 0005, Cost: 0.509360254
Epoch: 0006, Cost: 0.447476506
Epoch: 0007, Cost: 0.403946608
Epoch: 0008, Cost: 0.355715424
Epoch: 0009, Cost: 0.329701751
Epoch: 0010, Cost: 0.272371858
Epoch: 0011, Cost: 0.260264277
Epoch: 0012, Cost: 0.227569073
Epoch: 0013, Cost: 0.218906552
Epoch: 0014, Cost: 0.223903701
Epoch: 0015, Cost: 0.206756756
Learning finished


In [11]:
tf.print( 'Accuracy: ', Accuracy( x_test, y_test, 0 ))

Accuracy:  0.9781


In [14]:
r = random.randint( 0, len( x_test ) - 1 )
tf.print( 'Label: ', tf.argmax( y_test[ r: r + 1 ], axis = 1 ) )
tf.print( 
    'Prediction: ', tf.argmax( Hypothesis( x_test[ r: r + 1], 0 ), axis = 1 ) )

Label:  [2]
Prediction:  [2]
