In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [3]:
dataset = load_digits()
X = dataset.data
Y = dataset.target
Y = np.eye(10)[Y] # one hot

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=.2)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1437, 64), (360, 64), (1437, 10), (360, 10))

In [4]:
def sigmoid(X):
  return 1 / (1 + np.exp(-X))

def softmax(X):
  return np.exp(X) / np.sum(np.exp(X))

def root_mean_squired_error(Y_gt, Y_pred):
  return np.sqrt(np.mean((Y_gt - Y_pred) ** 2))

In [5]:
epochs = 80
η = 0.001 # learning rate

D_in = X_train.shape[1]
H1 = 128
H2 = 32
D_out = Y_train.shape[1]
# D_out = len(np.unique(Y_train))


In [6]:
W1 = np.random.randn(D_in, H1)
W2 = np.random.randn(H1, H2)
W3 = np.random.randn(H2, D_out)

In [7]:
B1 = np.random.randn(1, H1)
B2 = np.random.randn(1, H2)
B3 = np.random.randn(1, D_out)

In [8]:
for epoch in range(epochs):
  Y_pred_train = []

  # train

  for x, y in zip(X_train, Y_train):

      x = x.reshape(-1, 1)

      # forward

      # layer 1
      out1 = sigmoid(x.T @ W1 + B1)

      # layer 2
      out2 = sigmoid(out1 @ W2 + B2)

      # layer 3
      y_pred = softmax(out2 @ W3 + B3)

      Y_pred_train.append(y_pred)

      # backward

      # layer 3
      erorr = -2 * (y - y_pred)
      grad_B3 = erorr
      grad_W3 = out2.T @ erorr

      # layer 2
      erorr = erorr @ W3.T * out2 * (1 - out2)
      grad_B2 = erorr
      grad_W2 = out1.T @ erorr

      # layer 1
      erorr = erorr @ W2.T * out1 * (1 - out1)
      grad_B1 = erorr
      grad_W1 = x @ erorr

      # update

      # layer 1
      W1 -= η * grad_W1
      B1 -= η * grad_B1

      # layer 2
      W2 -= η * grad_W2
      B2 -= η * grad_B2

      # layer 3
      W3 -= η * grad_W3
      B3 -= η * grad_B3

  # test

  Y_pred_test = []
  for x, y in zip(X_test, Y_test):

      x = x.reshape(-1, 1)

      # forward

      # layer 1
      out1 = sigmoid(x.T @ W1 + B1)

      # layer 2
      out2 = sigmoid(out1 @ W2 + B2)

      # layer 3
      y_pred = softmax(out2 @ W3 + B3)

      Y_pred_test.append(y_pred)


  Y_pred_train = np.array(Y_pred_train).reshape(-1, 10)
  loss_train = root_mean_squired_error(Y_train, Y_pred_train)
  accuracy_train = np.sum(np.argmax(Y_train, axis=1) == np.argmax(Y_pred_train, axis=1)) / len(Y_train)
#   print('loss train : ',loss_train)
#   print('accuracy train : ',accuracy_train)

  Y_pred_test = np.array(Y_pred_test).reshape(-1, 10)
  loss_test = root_mean_squired_error(Y_test, Y_pred_test)
  accuracy_test = np.sum(np.argmax(Y_test, axis=1) == np.argmax(Y_pred_test, axis=1)) / len(Y_test)
#   print('loss test : ',loss_test)
  print('accuracy test : ',accuracy_test)


accuracy test :  0.21666666666666667
accuracy test :  0.33055555555555555
accuracy test :  0.45
accuracy test :  0.5305555555555556
accuracy test :  0.5861111111111111
accuracy test :  0.6416666666666667
accuracy test :  0.6916666666666667
accuracy test :  0.7194444444444444
accuracy test :  0.7388888888888889
accuracy test :  0.7611111111111111
accuracy test :  0.7666666666666667
accuracy test :  0.775
accuracy test :  0.7861111111111111
accuracy test :  0.8
accuracy test :  0.8055555555555556
accuracy test :  0.8111111111111111
accuracy test :  0.8166666666666667
accuracy test :  0.825
accuracy test :  0.8333333333333334
accuracy test :  0.8305555555555556
accuracy test :  0.8361111111111111
accuracy test :  0.8333333333333334
accuracy test :  0.8333333333333334
accuracy test :  0.8388888888888889
accuracy test :  0.8388888888888889
accuracy test :  0.8388888888888889
accuracy test :  0.8472222222222222
accuracy test :  0.8527777777777777
accuracy test :  0.8555555555555555
accuracy 