In [1]:
import tensorflow as tf
import numpy as np

In [2]:
from tensorflow.keras.datasets import mnist
(x_train,y_train),(x_test,y_test)=mnist.load_data()

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [3]:
def get_batch(x_data,y_data,batch_size):
  idxs=np.random.randint(0,len(y_data),batch_size)
  return(x_data[idxs,:,:],y_data[idxs])

In [23]:
from numpy import random
epochs=12
batch_size=100

#normalize the input image dividing by 255.0
x_train=x_train/255.0
x_test=x_test/255.0

#convert x_test to tensor to pass through model (train data will be converted to
#tensors on the fly )
x_test=tf.Variable(x_test)

#now declare the weights connecting the hidden layer
w1=tf.Variable(tf.random.normal([784, 300], stddev=0.03), name='w1')
b1=tf.Variable(tf.random.normal([300]), name='b1')

#and the weights connecting the hidden layer to output layer
w2=tf.Variable(tf.random.normal([300, 10], stddev=0.03), name='w2')
b2=tf.Variable(tf.random.normal([10]), name='b2')

In [24]:
def nn_model(x_input,w1,b1,w2,b2):

  #flatten the image from 28 x 28 to 784
  x_input=tf.reshape(x_input,(x_input.shape[0],-1))
  x=tf.add(tf.matmul(tf.cast(x_input,tf.float32),w1),b1)
  x=tf.nn.relu(x)
  logits=tf.add(tf.matmul(x,w2),b2)
  return logits

def loss_fn(logits, labels):
  cross_entropy=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=logits))
  return cross_entropy

In [25]:
#setup the optimizer
optimizer=tf.keras.optimizers.Adam()

total_batch = int(len(y_train) / batch_size)
for epoch in range(epochs):
  avg_loss=0
  for i in range(total_batch):
    batch_x,batch_y = get_batch(x_train, y_train, batch_size=batch_size)

    #create tensors
    batch_x = tf.Variable(batch_x)
    batch_y = tf.Variable(batch_y)
    #create one hot vector
    batch_y = tf.one_hot(batch_y, 10)
    with tf.GradientTape() as tape:
      logits = nn_model(batch_x, w1, b1, w2, b2)
      loss = loss_fn(logits,batch_y)
    gradients = tape.gradient(loss,[w1,b1,w2,b2])
    optimizer.apply_gradients(zip(gradients,[w1,b1,w2,b2]))
    avg_loss += loss/total_batch
    
  test_logits=nn_model(x_test,w1,b1,w2,b2)
  max_idxs=tf.argmax(test_logits,axis=1)
  test_acc=np.sum(max_idxs.numpy()== y_test) / len(y_test)
  print(f"Epoch:{epoch+1},   loss={avg_loss: .3f},    test set accuracy={test_acc*100:.3f}%")

print("\nTraining Complete!")

Epoch:1,   loss= 2.312,    test set accuracy=9.580%
Epoch:2,   loss= 2.308,    test set accuracy=10.320%
Epoch:3,   loss= 2.308,    test set accuracy=10.280%
Epoch:4,   loss= 2.309,    test set accuracy=9.580%
Epoch:5,   loss= 2.308,    test set accuracy=9.820%
Epoch:6,   loss= 2.308,    test set accuracy=9.580%
Epoch:7,   loss= 2.306,    test set accuracy=10.280%
Epoch:8,   loss= 2.304,    test set accuracy=11.350%
Epoch:9,   loss= 2.303,    test set accuracy=11.350%
Epoch:10,   loss= 2.300,    test set accuracy=9.580%
Epoch:11,   loss= 2.296,    test set accuracy=11.210%
Epoch:12,   loss= 2.287,    test set accuracy=20.570%

Training Complete!
