In [11]:
import tensorflow as tf
import numpy as np 
import matplotlib.pyplot as plt 
print('load data form MNIST')
mnist = tf.keras.datasets.mnist 
(x_train, y_train), (x_test,y_test) = mnist.load_data()
x_train = np.reshape(x_train,(60000,784))/255.0
x_test = np.reshape(x_test,(10000,784))/255.0
y_train = np.matrix(np.eye(10)[y_train])
y_test = np.matrix(np.eye(10)[y_test])

print("---------------------------------")
print(x_train.shape)
print(y_train.shape)


load data form MNIST
---------------------------------
(60000, 784)
(60000, 10)


In [12]:

def sigmoid(x):
  return 1./(1.+np.exp(-x))
  
def softmax(x):
  return np.divide(np.matrix(np.exp(x)),np.mat(np.sum(np.exp(x),axis=1)))

def relu(x):
  return (np.maximum(0,x))
# def relu(x):
#   data = [max(0,value) for value in x]
#   return np.array(data) 


In [13]:
def Forwardpass(X,Wh2,Wh1,bh1,bh2,Wo,bo):
  zh1 = X@Wh1.T + bh1
  a1 = relu(zh1)
  zh2 = a1@Wh2.T + bh2
  a2 = sigmoid(zh2)
  z=a2@Wo.T + bo
  o = softmax(z)
  return o

In [14]:
learningRate = 0.5
Epoch=10
NumTrainSamples=60000
NumTestSamples=10000
NumInputs=784
NumHiddenUnits=512


In [15]:
def AccTest(label,prediction): # calculate the matching score
  OutMaxArg=np.argmax(prediction,axis=1)
  LabelMaxArg=np.argmax(label,axis=1)
  Accuracy=np.mean(OutMaxArg==LabelMaxArg)
  return Accuracy

In [16]:
NumClasses=10
#hidden 1
Wh1=np.matrix(np.random.uniform(-0.5,0.5,(NumHiddenUnits,NumInputs)))
bh1= np.random.uniform(0,0.5,(1,NumHiddenUnits))
#hidden 2
Wh2=np.matrix(np.random.uniform(-0.5,0.5,(NumHiddenUnits,NumHiddenUnits)))
bh2= np.random.uniform(0,0.5,(1,NumHiddenUnits))

dWh1= np.zeros((NumHiddenUnits,NumInputs))
dWh2= np.zeros((NumHiddenUnits,NumHiddenUnits))
dbh1= np.zeros((1,NumHiddenUnits))
dbh2= np.zeros((1,NumHiddenUnits))
#Output layer
Wo=np.random.uniform(-0.5,0.5,(NumClasses,NumHiddenUnits))
bo= np.random.uniform(0,0.5,(1,NumClasses))
dWo= np.zeros((NumClasses,NumHiddenUnits))
dbo= np.zeros((1,NumClasses))

In [17]:
#test accuracy with random inititial weights 
prediction = Forwardpass(x_train,Wh2,Wh1,bh1,bh2,Wo,bo)
Acc = AccTest(y_train, prediction)
print(Acc)

0.10456666666666667


In [18]:
#mini batch SGD
from IPython.display import clear_output
loss = []
Acc = []
Batch_size = 200
Stochastic_samples = np.arange(NumTrainSamples) #0, 1,2,3,4,5...59999
def derivative_relu(x):
  x[x<=0] = 0
  x[x>0] = 1
  return x


for ep in range (Epoch):
  np.random.shuffle(Stochastic_samples)
  for ite in range (0,NumTrainSamples,Batch_size):
  #feed fordware propagation
    Batch_samples = Stochastic_samples[ite:ite+Batch_size]
    x = x_train[Batch_samples,:]
    y=y_train[Batch_samples,:]
    zh1 = x@Wh1.T + bh1
    a1 = relu(zh1)
    zh2 = a1@Wh2.T + bh2
    a2 = sigmoid(zh2)
    z=a2@Wo.T + bo
    o = softmax(z)
    #calculate loss
    loss.append(-np.sum(np.multiply(y,np.log10(o))))
    #calculate the error for the ouput layer
    d = o-y
    #Back propagate error
    dh = d@Wo
    dhs = np.multiply(np.multiply(dh,a2),(1-a2))
    dh1 = dhs@Wh2
    dhr = np.multiply(derivative_relu(a1),dh1)
    #update weight

    dWo = np.matmul(np.transpose(d),a2)
    dbo = np.mean(d) # consider a is 1 for bias
    dWh2 = np.matmul(np.transpose(dhs),a1)
    dbh2 = np.mean(dhs) # consider a is 1 for bias
    dWh1 = np.matmul(np.transpose(dhr),x)
    dbh1 = np.mean(dhr) # consider a is 1 for bias
    
    Wo =Wo - learningRate*dWo/Batch_size
    bo =bo - learningRate*dbo

    Wh2 =Wh2-learningRate*dWh2/Batch_size
    bh2 =bh2-learningRate*dbh2

    Wh1 =Wh1-learningRate*dWh1/Batch_size
    bh1 =bh1-learningRate*dbh1
  #Test accuracy with random innitial weights
  prediction = Forwardpass(x_test,Wh2,Wh1,bh1,bh2,Wo,bo)
  Acc.append(AccTest(y_test,prediction))
  # clear_output(wait=TrueqTrueq)
  # plt.plot([i for i, _ in enumerate(Acc)],Acc,'o')
  # plt.show()
  print('Epoch:', ep )
  print('Accuracy:',AccTest(y_test,prediction))
  

Epoch: 0
Accuracy: 0.927
Epoch: 1
Accuracy: 0.9421
Epoch: 2
Accuracy: 0.9489
Epoch: 3
Accuracy: 0.9542
Epoch: 4
Accuracy: 0.9574
Epoch: 5
Accuracy: 0.9577
Epoch: 6
Accuracy: 0.9605
Epoch: 7
Accuracy: 0.9614
Epoch: 8
Accuracy: 0.9632
Epoch: 9
Accuracy: 0.9625
