In [26]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder

In [27]:
dataset = load_digits()
X = dataset.data
Y = dataset.target
Y = np.eye(10)[Y]  #one hot

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)
X_train.shape, X_test.shape, Y_train.shape, Y_test.shape

((1437, 64), (360, 64), (1437, 10), (360, 10))

epochs = 80

D_in = X_train.shape[1]
H1 = 128
H2 = 32
D_out = Y_train.shape[1]

In [28]:
class MLP:
    def __init__(self, D_in, H1, H2, D_out, learning_rate=0.001):
        self.learning_rate = learning_rate
        self.D_in = D_in
        self.D_out = D_out
        self.H1 = H1
        self.H2 = H2

        # Initialize weights and biases
        self.W1 = np.random.randn(D_in, H1)
        self.W2 = np.random.randn(H1, H2)
        self.W3 = np.random.randn(H2, D_out)

        self.B1 = np.random.randn(1, H1)
        self.B2 = np.random.randn(1, H2)
        self.B3 = np.random.randn(1, D_out)

    def sigmoid(self, X):
        return 1 / (1 + np.exp(-X))

    def softmax(self, X):
        return np.exp(X) / np.sum(np.exp(X))

    def root_mean_square_error(self, Y_gt, Y_pred):
        return np.sqrt(np.mean((Y_gt - Y_pred) ** 2))

    def forward(self, x):
        #layer1
        out1 = self.sigmoid(x.T @ self.W1 + self.B1)
        #print('out1.shape:', out1.shape)

        #layer2
        out2 = self.sigmoid(out1 @ self.W2 + self.B2)
        #print('out2.shape:', out2.shape)

        #layer3
        out3 = self.softmax(out2 @ self.W3 + self.B3)
        #print('out3.shape:', out3.shape)

        y_pred = out3
        return out1, out2, y_pred

    def backward(self,x ,y ,y_pred ,out1, out2):

        #layer3
        print('in backward y:', y)
        print('in backward y shape is:', y.shape)
        print('in backward y_pred:', y_pred)
        print('in backward y_pred shape is:', y_pred.shape)
        error = -2 * (y - y_pred) # error dar vaghe hamman grad_error yani moshtagh error hast
        grad_B3 = error
        grad_W3 = out2.T @ error

        #layer2
        error = error @ self.W3.T * out2 * (1 - out2)
        grad_B2 = error
        grad_W2 = out1.T @ error

        #layer1
        error = error @ self.W2.T * out1 * (1 - out1)
        grad_B1 = error
        grad_W1 = x @ error

        self.update(grad_W1, grad_B1, grad_W2, grad_B2, grad_W3, grad_B3)


    def update(self, grad_W1, grad_B1, grad_W2, grad_B2, grad_W3, grad_B3):
        #update

        #layer1
        self.W1 -= self.learning_rate * grad_W1
        self.B1 -= self.learning_rate * grad_B1

        #layer2
        self.W2 -= self.learning_rate * grad_W2
        self.B2 -= self.learning_rate * grad_B2

        #layer3
        self.W3 -= self.learning_rate * grad_W3
        self.B3 -= self.learning_rate * grad_B3

    #train
    def train_test(self, epochs, X_train, Y_train, X_test, Y_test):
        for epoch in range(epochs):
            Y_pred_train = []
            for x,y in zip(X_train, Y_train):

                x = x.reshape(-1, 1)
                print('x.shape:',x.shape)
                print('y.shape:', y.shape)

                #forward

                out1, out2, y_pred = self.forward(x)
                Y_pred_train.append(y_pred)

                #backward
                print('y_pred.shape:', y.shape)
                self.backward(x ,y ,y_pred ,out1, out2)


            Y_pred_test = []
            for x,y in zip(X_test, Y_test):

                x = x.reshape(-1, 1)
                print('x_test:',x,'x.shape:',x.shape)
                print('y_test:',y,'y.shape:', y.shape)
                #forward

                out1, out2, y_pred = self.forward(x)
                Y_pred_test.append(y_pred)


        Y_pred_train = np.array(Y_pred_train).reshape(-1, 10)

        loss_train = self.root_mean_square_error(Y_train, Y_pred_train)
        accuracy_train = np.sum(np.argmax(Y_train, axis=1) == np.argmax(Y_pred_train, axis=1)) / len(Y_train)
        print('----------------------------epoch:', epoch,'--------------------------------------------')
        print('loss train:',loss_train)
        print('accuracy train:',accuracy_train)

        Y_pred_test = np.array(Y_pred_test).reshape(-1, 10)

        loss_test = self.root_mean_square_error(Y_test, Y_pred_test)
        accuracy_test = np.sum(np.argmax(Y_test, axis=1) == np.argmax(Y_pred_test, axis=1)) / len(Y_test)

        print('loss test:',loss_test)
        print('accuracy test:',accuracy_test)

In [29]:
mlp_model = MLP(D_in=X_train.shape[1], H1=128, H2=32, D_out=X_test.shape[1])
mlp_model.train_test(epochs=80, X_train=X_train, Y_train=Y_train, X_test=X_test, Y_test=Y_test)

x.shape: (64, 1)
y.shape: (10,)
y_pred.shape: (10,)
in backward y: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
in backward y shape is: (10,)
in backward y_pred: [[2.23364417e-01 2.50486776e-06 1.08869942e-08 3.03373099e-03
  3.53124547e-06 1.04154055e-03 1.84693079e-04 1.41609597e-06
  6.23977818e-06 5.06033848e-05 2.82705062e-05 1.58591445e-04
  4.19522843e-04 4.53084378e-06 1.79986138e-07 4.98992029e-04
  6.26251476e-10 5.90832173e-04 1.85351944e-05 8.11569257e-03
  1.41729675e-04 2.80616869e-03 3.43302763e-07 2.61498567e-06
  1.93801109e-06 4.14162014e-04 1.38284184e-05 2.62685957e-04
  1.25038090e-07 1.47011551e-06 2.06915034e-04 1.06579759e-07
  2.89540065e-01 1.40952697e-02 5.29165584e-05 3.10409689e-06
  6.26321374e-09 2.15701143e-04 6.60385392e-06 5.41884218e-05
  5.81479026e-09 2.81944865e-04 3.99580933e-05 2.29008038e-05
  9.75609969e-09 2.72822175e-01 6.30781228e-03 5.23358898e-05
  1.99650075e-05 2.52431387e-06 4.64777456e-07 1.05068360e-03
  3.97090967e-08 2.63563222e-03 2.77059987e-0

ValueError: operands could not be broadcast together with shapes (10,) (1,64) 

In [None]:
import cv2

image = cv2.imread("num2.png")
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
image = image.reshape(64, 1)

x = image
#forward

#layer1
out1 = mlp_model.sigmoid(x.T @ mlp_model.W1 + mlp_model.B1)

#layer2
out2 = mlp_model.sigmoid(out1 @ mlp_model.W2 + mlp_model.B2)

#layer3
out3 = mlp_model.softmax(out2 @ mlp_model.W3 + mlp_model.B3)
y_pred = out3
print(np.argmax(y_pred))