In [1]:
import numpy as np
import copy
import matplotlib.pyplot as plt

from keras.datasets import mnist

np.random.seed(1)

2023-09-20 10:43:48.929663: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
#loading the dataset
(train_X, train_y), (test_X, test_y) = mnist.load_data()

In [3]:
train_X.shape

(60000, 28, 28)

In [4]:
X_train = (np.array(train_X).reshape(train_X.shape[0], train_X.shape[1] * train_X.shape[2])).T
T_train = np.zeros((10, X_train.shape[1]))
for i, n in enumerate(train_y):
    T_train[n][i] = 1

In [5]:
X_test = (np.array(test_X).reshape(test_X.shape[0], test_X.shape[1] * test_X.shape[2])).T
T_test = np.zeros((10, X_test.shape[1]))
for i, n in enumerate(test_y):
    T_test[n][i] = 1

In [76]:
def NN(X: list, Y: list, eps: float, eta: float, print_rate=1, max_epoch=10000) -> list:
    print(f'X.shape: {X.shape}')
    print(f'Y.shape: {Y.shape}')
    epoch = 0
    history = list()
    W = np.random.rand(Y.shape[0], X.shape[0])
    print(f'W.shape: {W.shape}')
    while epoch < max_epoch:
        history.append(0)
        for i in range(X.shape[1]):
            v = W @ X[:, i:i+1]
            pred_value = np.argmax(v)

            if Y[pred_value, i] != 1:
                history[epoch] += 1

        if epoch % print_rate == 0:
            print(f'{epoch}: errors -> {history[epoch]}; % -> {history[epoch - 1] / X.shape[1]}')
            
        epoch += 1

        if (history[epoch - 1] / X.shape[1]) <= eps:
            break

        for i in range(X.shape[1]):
            W = W + eta * ((Y[:, i:i+1] - np.heaviside(W @ X[:, i:i+1], 0))) @ X[:, i:i+1].T

    plt.plot(history, marker = 'o')
    #plt.title("Curve of misprediction during epoches (n = 0.1, # of sample = 100)")
    plt.xlabel('Number of epochs')
    plt.ylabel('Number of misclassification')
    plt.grid()
    # plt.savefig('plot/history-50samples-eta1.png', dpi=400, bbox_inches="tight", transparent=True)
    plt.savefig(f'plot/history-{X.shape[1]}samples-eta{eta}.png', dpi=400, bbox_inches="tight")

    plt.clf()

    return W


In [56]:
def test(W: list, X: list, Y: list):
    errors = 0
    for i in range(X_test.shape[1]):
        v = np.heaviside(W @ X[:, i], 0)
        true_value = Y[:, i].tolist().index(1)
        if v[true_value] != 1:
            errors += 1
    print(f'Errors: {errors}; Percentage: {errors/Y.shape[1]:.2f}')

In [61]:
print(T_train[:, :50].shape)
W = NN(copy.deepcopy(X_train[:, :50]), copy.deepcopy(T_train[:, :50]), 0, 1)
test(W, X_test, T_test)

(10, 50)
X.shape: (784, 50)
Y.shape: (10, 50)
W.shape: (10, 784)
0: errors -> 48
1: errors -> 17
2: errors -> 4
3: errors -> 4
4: errors -> 0
Errors: 5553; Percentage: 0.56


<Figure size 640x480 with 0 Axes>

In [62]:
W = NN(copy.deepcopy(X_train[:, :1000]), copy.deepcopy(T_train[:1000]), 0, 1)
test(W, X_test, T_test)

X.shape: (784, 1000)
Y.shape: (10, 60000)
W.shape: (10, 784)
0: errors -> 922
1: errors -> 226
2: errors -> 187
3: errors -> 97
4: errors -> 74
5: errors -> 75
6: errors -> 77
7: errors -> 95
8: errors -> 33
9: errors -> 52
10: errors -> 24
11: errors -> 34
12: errors -> 40
13: errors -> 20
14: errors -> 30
15: errors -> 28
16: errors -> 35
17: errors -> 41
18: errors -> 18
19: errors -> 12
20: errors -> 29
21: errors -> 4
22: errors -> 39
23: errors -> 27
24: errors -> 7
25: errors -> 8
26: errors -> 4
27: errors -> 13
28: errors -> 5
29: errors -> 3
30: errors -> 5
31: errors -> 1
32: errors -> 8
33: errors -> 3
34: errors -> 12
35: errors -> 7
36: errors -> 5
37: errors -> 1
38: errors -> 0
Errors: 2137; Percentage: 0.21


<Figure size 640x480 with 0 Axes>

In [80]:
W = NN(copy.deepcopy(X_train[:, :]), copy.deepcopy(T_train[:, :]), 0.14, 1, print_rate=50)
test(W, X_test, T_test)

X.shape: (784, 60000)
Y.shape: (10, 60000)
W.shape: (10, 784)
0: errors -> 49601; % -> 0.8266833333333333
Errors: 2022; Percentage: 0.20


<Figure size 640x480 with 0 Axes>

In [81]:
eta = [0.01, 0.1, 10]

for i in range(3):
    W = NN(copy.deepcopy(X_train[:, :]), copy.deepcopy(T_train[:, :]), 0.14, eta[i], print_rate=50)
    test(W, X_test, T_test)

X.shape: (784, 60000)
Y.shape: (10, 60000)
W.shape: (10, 784)
0: errors -> 55760; % -> 0.9293333333333333
Errors: 2051; Percentage: 0.21
X.shape: (784, 60000)
Y.shape: (10, 60000)
W.shape: (10, 784)
0: errors -> 54443; % -> 0.9073833333333333
Errors: 2251; Percentage: 0.23
X.shape: (784, 60000)
Y.shape: (10, 60000)
W.shape: (10, 784)
0: errors -> 51633; % -> 0.86055
Errors: 1823; Percentage: 0.18


<Figure size 640x480 with 0 Axes>