In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split

In [17]:
dataset = load_digits()
x = dataset.data
y = dataset.target
y = np.eye(10)[y] # One hot

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((1437, 64), (360, 64), (1437, 10), (360, 10))

In [19]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def softmax(x):
    exp_x = np.exp(x - np.max(x, axis=1, keepdims=True))
    return exp_x / np.sum(exp_x, axis=1, keepdims=True)

def root_mean_squired_error(y_gt, y_pred):
    return np.sqrt(np.mean(np.square(y_gt - y_pred)))

def cross_entropy(y, y_pred):
    return -np.sum(y * np.log(y_pred + 1e-9))

In [20]:
epochs = 80
lr = .001

d_in = x_train.shape[1]
h2 = 128
h1 = 32
d_out = y_train.shape[1]


In [21]:
w1 = np.random.randn(d_in, h1)
w2 = np.random.randn(h1, h2)
w3 = np.random.randn(h2, d_out)

In [22]:
b1 = np.random.randn(1, h1)
b2 = np.random.randn(1, h2)
b3 = np.random.randn(1, d_out)

In [23]:
for epochs in range(epochs):
    # train
    Y_pred_train = []
    for x, y in zip(x_train, y_train):

        x = x.reshape(1, -1)
        y = y.reshape(1, -1)
        # Forward

        # layer 1
        out1 = sigmoid(x @ w1 + b1)

        # layer 2
        out2 = sigmoid(out1 @ w2 + b2)

        # Layer 3 or predicted "y", because this layers is output layer
        # In classification situations, we usually use softmax activation function
        y_pred = softmax(out2 @ w3 + b3)

        # loss = cross_entropy(y, y_pred)
        error = y_pred - y
        Y_pred_train.append(y_pred)

        # backward: you must calculate derivative for each layer many times
        # layer 3
        error = y - y_pred
        grad_b3 = error
        grad_w3 = out2.T @ error


        # layer 2
        error = error @ w3.T * out2 * (1 - out2)
        grad_b2 = error
        grad_w2 = out1.T @ error

        # layer 1
        error = error @ w2.T * out1 * (1 - out1)
        grad_b1 = error
        grad_w1 = x.T @ error

        # update

        # layer 1
        w1 = w1 - (lr * grad_w1)
        b1 = b1 - (lr * grad_b1)

        # layer 2
        w2 = w2 - lr * grad_w2
        b2 = b2 - lr * grad_b2

        # layer 3
        w3 = w3 - lr * grad_w3
        b3 = b3 - lr * grad_b3

        # acc

        # evaluate

        acc = ...

    # test
    Y_pred_test = []
    for x, y in zip(x_test, y_test):
        # Forward

        # layer 1
        out1 = sigmoid(x @ w1 + b1)

        # layer 2
        out2 = sigmoid(out1 @ w2 + b2)

        # Layer 3 or predicted "y", because this layers is output layer
        # In classification situations, we usually use softmax activation function
        y_pred = softmax(out2 @ w3 + b3)

        # loss = cross_entropy(y, y_pred)
        error = y_pred - y
        Y_pred_test.append(y_pred)

    # train results
    Y_pred_train = np.array(Y_pred_train).reshape(-1, 10)
    loss_train = root_mean_squired_error(Y_pred_train, y_train)
    accuracy_train = np.sum(np.argmax(y_train, axis=1) == np.argmax(Y_pred_train , axis=1)) / len(y_train)
    print(f"loss_train: {loss_train}")
    print(f"accuracy_train: {accuracy_train}")

    # test results
    Y_pred_test = np.array(Y_pred_test).reshape(-1, 10)
    loss_test = root_mean_squired_error(Y_pred_test, y_test)
    accuracy_test = np.sum(np.argmax(y_test, axis=1) == np.argmax(Y_pred_test, axis=1)) / len(y_test)
    print(f"loss_test: {loss_test}")
    print(f"accuracy_test: {accuracy_test}")

loss_train: 0.34517048689459545
accuracy_train: 0.13987473903966596
loss_test: 0.309110103767163
accuracy_test: 0.24444444444444444
loss_train: 0.29491642242790966
accuracy_train: 0.3173277661795407
loss_test: 0.2844687700035382
accuracy_test: 0.3472222222222222
loss_train: 0.2713896831811188
accuracy_train: 0.4279749478079332
loss_test: 0.2656854160245652
accuracy_test: 0.4444444444444444
loss_train: 0.25331519651975937
accuracy_train: 0.534446764091858
loss_test: 0.25112237761885103
accuracy_test: 0.55
loss_train: 0.23832339709086092
accuracy_train: 0.5901183020180932
loss_test: 0.2393051508511419
accuracy_test: 0.5916666666666667
loss_train: 0.22536097141312922
accuracy_train: 0.639526791927627
loss_test: 0.2289058641454882
accuracy_test: 0.6277777777777778
loss_train: 0.2147813481684948
accuracy_train: 0.6840640222686152
loss_test: 0.22175207637737207
accuracy_test: 0.6527777777777778
loss_train: 0.2064943782907799
accuracy_train: 0.708420320111343
loss_test: 0.2168297853919547
acc

In [12]:
x

array([ 0.,  0.,  2., 13., 13.,  1.,  0.,  0.,  0.,  0.,  9., 13.,  5.,
        0.,  0.,  0.,  0.,  0., 13.,  5.,  0.,  0.,  0.,  0.,  0.,  0.,
       15.,  2.,  0.,  0.,  0.,  0.,  0.,  0., 15., 10.,  9.,  9.,  2.,
        0.,  0.,  0., 16., 11.,  8., 11., 12.,  0.,  0.,  1., 14., 11.,
        1.,  4., 13.,  0.,  0.,  0.,  3., 11., 16., 15.,  4.,  0.])

In [15]:
print(x)

[[ 0.]
 [ 0.]
 [ 2.]
 [13.]
 [13.]
 [ 1.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 9.]
 [13.]
 [ 5.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [13.]
 [ 5.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [15.]
 [ 2.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 0.]
 [15.]
 [10.]
 [ 9.]
 [ 9.]
 [ 2.]
 [ 0.]
 [ 0.]
 [ 0.]
 [16.]
 [11.]
 [ 8.]
 [11.]
 [12.]
 [ 0.]
 [ 0.]
 [ 1.]
 [14.]
 [11.]
 [ 1.]
 [ 4.]
 [13.]
 [ 0.]
 [ 0.]
 [ 0.]
 [ 3.]
 [11.]
 [16.]
 [15.]
 [ 4.]
 [ 0.]]


In [24]:
x.reshape(1, -1)

array([[ 0.,  0.,  0.,  5., 11.,  0.,  0.,  0.,  0.,  0.,  1., 14.,  9.,
         0.,  0.,  0.,  0.,  0.,  4., 14.,  1.,  0.,  0.,  0.,  0.,  0.,
        10.,  8.,  0.,  0.,  0.,  0.,  0.,  0., 13.,  8.,  4.,  6.,  2.,
         0.,  0.,  0., 11., 16., 13., 12., 13.,  0.,  0.,  0., 12., 14.,
         4.,  5., 16.,  2.,  0.,  0.,  1.,  8., 16., 13.,  9.,  1.]])