In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from mosaMLP import MLP
from easydict import EasyDict as edict

In [2]:
digits = load_digits()
X = digits.data
Y = digits.target
Y = np.eye(10)[Y]  # one hot
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3)

X_train.shape, Y_train.shape, X_test.shape, Y_test.shape

((1257, 64), (1257, 10), (540, 64), (540, 10))

In [3]:
MLPstructure = edict(
    D_in = X_train.shape[1],
    H1 = 128,
    H2 = 32,
    D_out = Y_train.shape[1],
    η = 0.001,
)

model = MLP(MLPstructure)
train_output = model.fit(X_train, Y_train, X_test, Y_test, epochs=200)

loss train: 0.3349964975557618 acc train: 0.1288782816229117
loss test: 0.3145453009096426 acc test: 0.12222222222222222
loss train: 0.2996322574354489 acc train: 0.23389021479713604
loss test: 0.29448821019084415 acc test: 0.2518518518518518
loss train: 0.28326435290198865 acc train: 0.3221957040572792
loss test: 0.28301287609071407 acc test: 0.32037037037037036
loss train: 0.2711251194153281 acc train: 0.4081145584725537
loss test: 0.272920100840897 acc test: 0.40185185185185185
loss train: 0.2602620053594599 acc train: 0.4805091487669053
loss test: 0.2646250815244694 acc test: 0.45
loss train: 0.24949537385706513 acc train: 0.5465393794749404
loss test: 0.2563483165296616 acc test: 0.5074074074074074
loss train: 0.23920350428664788 acc train: 0.6054097056483692
loss test: 0.2479577016265888 acc test: 0.55
loss train: 0.22957559997176782 acc train: 0.6523468575974543
loss test: 0.24000667657157013 acc test: 0.5796296296296296
loss train: 0.22030735868187598 acc train: 0.6992840095465

In [4]:
W1, W2, W3 = np.random.randn(D_in, H1), np.random.randn(H1, H2), np.random.randn(H2, D_out)
B1, B2, B3 = np.random.randn(H1), np.random.randn(H2), np.random.randn(D_out)

NameError: name 'D_in' is not defined

In [None]:
for epoch in range(epochs):

    # train
    
    Y_pred = []
    for x, y in zip(X_train, Y_train):

        # forward
        x = x.reshape(-1, 1)

        # layer 1
        net1 = x.T @ W1 + B1
        out1 = sigmoid(net1)

        # layer 2
        net2 = out1 @ W2 + B2
        out2 = sigmoid(net2)

        # layer 3
        net3 = out2 @ W3 + B3
        out3 = softmax(net3)

        y_pred = out3
        Y_pred.append(y_pred.T)

        # back propagation

        # layer 3
        error = -2 * (y - y_pred)
        grad_W3 = out2.T @ error
        grad_B3 = error

        # layer 2
        error = error @ W3.T * out2 * (1 - out2)
        grad_W2 = out1.T @ error
        grad_B2 = error

        # layer 1
        error = error @ W2.T * out1 * (1 - out1)
        grad_W1 = x @ error
        grad_B1 = error

        # update

        # layer 1
        W1 = W1 - η * grad_W1
        B1 = B1 - η * grad_B1
        
        # layer 2
        W2 = W2 - η * grad_W2
        B2 = B2 - η * grad_B2

        # layer 3
        W3 = W3 - η * grad_W3
        B3 = B3 - η * grad_B3

    Y_pred = np.array(Y_pred).reshape(-1, 10)
    loss_train = root_mean_squired_error(Y_pred, Y_train)
    acc_train = np.mean(np.argmax(Y_pred, axis=1) == np.argmax(Y_train, axis=1))
    
    # test

    Y_pred = []
    for x, y in zip(X_test, Y_test):

        # forward
        x = x.reshape(-1, 1)

        # layer 1
        net1 = x.T @ W1 + B1
        out1 = sigmoid(net1)

        # layer 2
        net2 = out1 @ W2 + B2
        out2 = sigmoid(net2)

        # layer 3
        net3 = out2 @ W3 + B3
        out3 = softmax(net3)

        y_pred = out3
        Y_pred.append(y_pred.T)

    Y_pred = np.array(Y_pred).reshape(-1, 10)
    loss_test = root_mean_squired_error(Y_pred, Y_test)
    acc_test = np.mean(np.argmax(Y_pred, axis=1) == np.argmax(Y_test, axis=1))

    print('loss train:', loss_train, 'acc train:', acc_train)
    print('loss test:', loss_test, 'acc test:', acc_test)

print('train completed!')

loss train: 0.32876991970936065 acc train: 0.15910898965791567
loss test: 0.2974473791285755 acc test: 0.27037037037037037
loss train: 0.28861985874357066 acc train: 0.3015115354017502
loss test: 0.27764417575961214 acc test: 0.3851851851851852
loss train: 0.26766636073440314 acc train: 0.4303898170246619
loss test: 0.2668043004484215 acc test: 0.4462962962962963
loss train: 0.2512767568865605 acc train: 0.5330151153540175
loss test: 0.2543257343808141 acc test: 0.512962962962963
loss train: 0.2360286431243093 acc train: 0.6062052505966588
loss test: 0.24101387070737132 acc test: 0.575925925925926
loss train: 0.22214937668116133 acc train: 0.6706443914081146
loss test: 0.23044991209608143 acc test: 0.6240740740740741
loss train: 0.2112238768078817 acc train: 0.7128082736674622
loss test: 0.22261298877347147 acc test: 0.6555555555555556
loss train: 0.20164746765740355 acc train: 0.7597454256165473
loss test: 0.2160182371619532 acc test: 0.6777777777777778
loss train: 0.19311878286327966

In [None]:
import cv2

image = cv2.imread("input/test4.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = image.astype(np.float32)

x = image.reshape(-1, 1)

# layer 1
net1 = x.T @ W1 + B1
out1 = sigmoid(net1)

# layer 2
net2 = out1 @ W2 + B2
out2 = sigmoid(net2)

# layer 3
net3 = out2 @ W3 + B3
out3 = softmax(net3)

y_pred = out3
print(np.argmax(y_pred))

1


In [None]:
image

array([[ 0.,  6., 15., 15., 15., 16.,  4.,  0.],
       [ 0., 12., 15.,  2.,  0., 14., 12.,  0.],
       [ 0., 13., 14.,  0.,  0., 13., 14.,  0.],
       [ 0., 13., 14.,  0.,  0., 13., 14.,  0.],
       [ 0., 12., 14.,  0.,  0., 14., 13.,  0.],
       [ 0., 11., 15.,  0.,  2., 16., 13.,  0.],
       [ 0.,  5., 16.,  6., 13., 16.,  9.,  0.],
       [ 0.,  1., 14., 16., 16., 15.,  0.,  0.]], dtype=float32)