In [2]:
from sklearn.datasets import fetch_openml
X,y=fetch_openml('mnist_784',version=1,return_X_y=True)
X=X.values
y=y.astype(int).values

In [3]:
print(X.shape," ",y.shape)

(70000, 784)   (70000,)


In [4]:
X = ((X / 255.) - .5) * 2

In [6]:
from sklearn.model_selection import train_test_split

X_temp,X_test,y_temp,y_test=train_test_split(X,y,test_size=10000,random_state=123,stratify=y)
X_train,X_valid,y_train,y_valid=train_test_split(X_temp,y_temp,test_size=5000,random_state=123,stratify=y_temp)

In [7]:
import numpy as np

def sigmoid(z):
    return 1./1+np.exp(-z)

def int_to_onehot(y,num_labels):
    ary=np.zeros((y.shape[0],num_labels))
    for i,val in enumerate(y):
        ary[i,val]=1

    return ary    

In [13]:
class NeuralNetMLP:

    def __init__(self,num_features,num_hidden,num_classes,random_seed=123):
        super().__init__()
        self.num_classes=num_classes
        rng=np.random.RandomState(random_seed)
        self.weight_h=rng.normal(loc=0.0,scale=0.1,size=(num_hidden,num_features))
        self.bias_h=np.zeros(num_hidden)

        self.weight_out=rng.normal(loc=0.0,scale=0.1,size=(num_classes,num_hidden))
        self.bias_out=np.zeros(num_classes)
    
    def forward(self,x):
        z_h=np.dot(x,self.weight_h.T)+self.bias_h
        a_h=sigmoid(z_h)

        z_out=np.dot(a_h,self.weight_out.T)+self.bias_out
        a_out=sigmoid(z_out)
        return a_h,a_out
    
    def backward(self,x,a_h,a_out,y):
        y_onehot=int_to_onehot(y,self.num_classes)
        d_loss__d_a_out = 2.*(a_out - y_onehot) / y.shape[0]
        d_a_out__d_z_out=a_out+(1. - a_out)
        delta_out = d_loss__d_a_out * d_a_out__d_z_out
        d_z_out__dw_out = a_h
        d_loss__dw_out = np.dot(delta_out.T, d_z_out__dw_out)
        d_loss__db_out = np.sum(delta_out, axis=0)

        d_z_out__a_h=self.weight_out
        d_loss_a_h=np.dot(delta_out,d_z_out__a_h)
        d_a_h__d_z_h = a_h * (1. - a_h)
        d_z_h__d_w_h = x
        d_loss__d_w_h = np.dot((d_loss_a_h * d_a_h__d_z_h).T,
d_z_h__d_w_h)
        d_loss__d_b_h = np.sum((d_loss_a_h * d_a_h__d_z_h), axis=0)
        return (d_loss__dw_out, d_loss__db_out,
d_loss__d_w_h, d_loss__d_b_h)

        


In [14]:
model=NeuralNetMLP(num_features=28*28,num_hidden=50,num_classes=10)

In [17]:
import numpy as np
num_epochs=50
minibatch_size=100

def minibatch_generator(X,y,minibatch_size):
    indices=np.arange(X.shape[0])
    np.random.shuffle(indices)
    for start_idx in range(0,indices.shape[0]-minibatch_size+1,minibatch_size):
        batch_idx=indices[start_idx:start_idx+minibatch_size]
        yield X[batch_idx],y[batch_idx]

In [18]:
for i in range(num_epochs):
 minibatch_gen = minibatch_generator(
X_train, y_train, minibatch_size)

 for X_train_mini, y_train_mini in minibatch_gen:
  break
 break
print(X_train_mini.shape)
print(y_train_mini.shape)

(100, 784)
(100,)


In [19]:
def mse_loss(targets,probas,num_labels=10):
    onehot_targets=int_to_onehot(targets,num_labels=num_labels)
    return np.mean((onehot_targets-probas)**2)

def accuracy(targets,predicted_labels):
    return np.mean(predicted_labels==targets)


In [20]:
_,probas=model.forward(X_valid)
mse=mse_loss(y_valid,probas)
print(f'Initial validation MSE: {mse:.1f}')
predicted_labels = np.argmax(probas, axis=1)
acc = accuracy(y_valid, predicted_labels)
print(f'Initial validation accuracy: {acc*100:.1f}%')

Initial validation MSE: inf
Initial validation accuracy: 9.8%


  return 1./1+np.exp(-z)
  return np.mean((onehot_targets-probas)**2)
