In [None]:
import tensorflow
from tensorflow import keras
from keras.datasets import mnist
import numpy as np

from matplotlib import pyplot

# fetch/load data
(X, Y), (X_test, Y_test) = mnist.load_data()

rand=np.arange(60000)
np.random.shuffle(rand)
train_no=rand[:50000]

val_no=np.setdiff1d(rand,train_no)

X_train,X_val=X[train_no,:,:],X[val_no,:,:]
Y_train,Y_val=Y[train_no],Y[val_no]
val_no=np.setdiff1d(rand,train_no)

# display some of the X_train data as images
for i in range(9):
    pyplot.subplot(330 + 1 + i)
    pyplot.imshow(X_train[i], cmap=pyplot.get_cmap('gray'))
    pyplot.show()

# 3 layers: 784 input, 128 second, 10 last
def init(x,y):
    layer=np.random.uniform(-1.,1.,size=(x,y))/np.sqrt(x*y)
    return layer.astype(np.float32)

np.random.seed(42)
l1=init(28*28,128)
l2=init(128,10)

# sigmoid function
def sigmoid(x):
    return 1/(np.exp(-x)+1)    

# derivative of sigmoid
def d_sigmoid(x):
    return (np.exp(-x))/((np.exp(-x)+1)**2)

# softmax - normalizes a vector?

# softmax
def softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)

# derivative of softmax
def d_softmax(x):
    exp_element=np.exp(x-x.max())
    return exp_element/np.sum(exp_element,axis=0)*(1-exp_element/np.sum(exp_element,axis=0))

# forward backward pass
def forward_backward_pass(x,y):
    # convert y into vector having 1 for “correct” digit and 0 for the rest
    targets = np.zeros((len(y),10), np.float32)
    targets[range(targets.shape[0]),y] = 1
    
    # multiply input matrix with weights
    # pass through respective activation Fns
    x_l1=x.dot(l1)
    x_sigmoid=sigmoid(x_l1)
    x_l2=x_sigmoid.dot(l2)
    out=softmax(x_l2)    # product of the NN
    
    # calculate errors and derivatives
    error=2*(out-targets)/out.shape[0]*d_softmax(x_l2)
    update_l2=x_sigmoid.T@error
    
    error=((l2).dot(error.T)).T*d_sigmoid(x_l1)
    update_l1=x.T@error
    
    return out,update_l1,update_l2 


# training

epochs=10000
lr=0.001
batch=128

losses,accuracies,val_accuracies=[],[],[]

for i in range(epochs):
    # 128 elements randomly chosen from 0 to 50000
    sample=np.random.randint(0,X_train.shape[0],size=(batch))
    # get elements from X_train and Y_train having corresponding indices
    x=X_train[sample].reshape((-1,28*28))
    y=Y_train[sample]
 
    # put x and y through forward_backward pass Fn and store vals
    out,update_l1,update_l2=forward_backward_pass(x,y)
    
    # pick category, calculate accuracy of batch  
    category=np.argmax(out,axis=1)
    accuracy=(category==y).mean()
    accuracies.append(accuracy)
    
    # calculate loss using MSE loss
    loss=((category-y)**2).mean()
    losses.append(loss.item())
    
    # update weights
    l1=l1-lr*update_l1
    l2=l2-lr*update_l2
    
    # test on validation set for accuracy
    if(i%20==0):    
        X_val=X_val.reshape((-1,28*28))
        val_out=np.argmax(softmax(sigmoid(X_val.dot(l1)).dot(l2)),axis=1)
        val_acc=(val_out==Y_val).mean()
        val_accuracies.append(val_acc.item())
    if(i%500==0): print(f'For {i}th epoch: train accuracy: {accuracy:.3f} | validation accuracy:{val_acc:.3f}')

# test         
m = [[0,0,0,0,0,0,0],
     [0,0,10,10,10,0,0],
     [0,0,0,0,10,0,0],
     [0,0,0,0,10,0,0],
     [0,0,0,0,10,0,0],
     [0,0,0,0,10,0,0],
     [0,0,0,0,0,0,0]]

m = np.concatenate([np.concatenate([[x]*4 for x in y]*4) for y in m])
m=m.reshape(1,-1)
pyplot.imshow(m.reshape(28,28))
x = np.argmax(sigmoid(m.dot(l1)).dot(l2),axis=1)
x