In [154]:
import numpy as np
import pandas as pd
import math
from sklearn.preprocessing import normalize


mnist_train = pd.read_csv("mnist_data/mnist_train.csv")
mnist_train


Unnamed: 0,label,1x1,1x2,1x3,1x4,1x5,1x6,1x7,1x8,1x9,...,28x19,28x20,28x21,28x22,28x23,28x24,28x25,28x26,28x27,28x28
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,4,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59995,8,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59996,3,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59997,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
59998,6,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [155]:
mnist_train = mnist_train.to_numpy()
print(mnist_train)

[[5 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 0]
 [4 0 0 ... 0 0 0]
 ...
 [5 0 0 ... 0 0 0]
 [6 0 0 ... 0 0 0]
 [8 0 0 ... 0 0 0]]


In [156]:
X = normalize(np.array([i[1:] for i in mnist_train]), axis=0)
y = np.array([i[0] for i in mnist_train])

print(f"X: {X}")
print(f"X-shape: {X.shape}")
print(f"y: {y}")
print(f"y-len: {len(y)}")

print(f"Unique y-values: {len(np.unique(y))}") 


X: [[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]
X-shape: (60000, 784)
y: [5 0 4 ... 5 6 8]
y-len: 60000
Unique y-values: 10


In [172]:
class NeuralNetwork():
    def __init__(self, X, y, n_hidden=10, alpha=0.5, epochs=1):
        self.X = X # values
        self.y = y # labels
        self.n_hidden = n_hidden # number of hidden neurons
        self.alpha = alpha # learning rate
        self.epochs = epochs # n of iterations through all of the data

        self.m, self.n_input = X.shape  # m = amount of samples, n = amount of inputs per sample
        self.unique_y = len(np.unique(y));

        # A1 : layer 1 (hidden)
        self.W1 = np.random.randn(self.n_input, self.n_hidden)
        self.B1 = np.zeros((1, self.n_hidden))

        # A2 : layer 2 (output)
        self.W2 = np.random.randn(self.n_hidden, self.unique_y)
        self.B2 = np.zeros((1, self.unique_y))
    
    # activation function
    def reLU(self, Z):
        return np.maximum(Z, 0)
    
    def deriv_reLU(self, Z):
        return Z > 0
    
    def sigmoid(self, Z):
        return np.exp(Z) / 1 + np.exp(Z)
    
    def deriv_sigmoid(self, Z):
        return Z * (1-Z)
    
    def softmax(self, Z):
        z_max = np.max(Z, axis=1, keepdims=True)
        z_subtract = np.subtract(Z, z_max)
        z_exp = np.exp(z_subtract)
        z_sum = np.sum(z_exp, axis=1, keepdims=True)
        return z_exp / z_sum

    def forward_prop(self):
        # layer 1
        Z1 = self.X.dot(self.W1) + self.B1
        A1 = self.sigmoid(Z1)

        # layer 2
        Z2 = A1.dot(self.W2) + self.B2
        A2 = self.softmax(Z2)

        return Z1, A1, Z2, A2
    

    def one_hot_y(self):
        one_hot_y = np.zeros((self.m, self.unique_y))
        one_hot_y[np.arange(self.m), self.y] = 1
        return one_hot_y
    
    def back_prop(self, A2, Z2, A1, Z1):
        dZ2 = A2 - self.one_hot_y()       # cost of every output for each sample
        dW2 = 1/self.m * A1.T.dot(dZ2)       # how much to nudge weights 2
        dB2 = 1/self.m * np.sum(dZ2.T, axis=1, keepdims=True).T       # how much to nudge biases 2
        dZ1 = dZ2.dot(self.W2.T) * self.deriv_sigmoid(Z1)       # errors of weights and reverse of activation
        dW1 = 1/self.m * self.X.T.dot(dZ1)        # how much to nudge weights 1
        dB1 = 1/self.m * np.sum(dZ1.T, axis=1, keepdims=True).T       # how much to nudge biases 1


        return dW2, dB2, dW1, dB1, dZ2, dZ1
    
    def update_params(self, dW1, dW2, dB1, dB2):
        W1 = self.W1 - self.alpha * dW1
        W2 = self.W2 - self.alpha * dW2
        B1 = self.B1 - self.alpha * dB1
        B2 = self.B2 - self.alpha * dB2

        return W1, W2, B1, B2
    
    def save_model(self):
        model = {"w1": self.W1, "b1": self.B1, "w2": self.W2, "b2": self.B2}
        return model

    def train(self):
        for i in range(self.epochs):
            Z1, A1, Z2, A2 = self.forward_prop()
            dW2, dB2, dW1, dB1, dZ2, dZ1 = self.back_prop(A2, Z2, A1, Z1)
            self.W1, self.W2, self.B1, self.B2 = self.update_params(dW1, dW2, dB1, dB2)

            if i % 10 == 0:
                print("Iteration: ", i)
                print("Accuracy: ", np.sum(np.argmax(A2, 1) == self.y) / self.m)




    



In [173]:
net = NeuralNetwork(X, y, n_hidden=20, alpha=0.5, epochs=50)

Z1, A1, Z2, A2 = net.forward_prop()
dW2, dB2, dW1, dB1, dZ2, dZ1 = net.back_prop(A2, Z2, A1, Z1)
W1, B1, W2, B2 = net.W1, net.B1, net.W2, net.B2

# print(net.X.shape)
# print(net.W1.shape)
# print(net.X.dot(net.W1).shape)
# print(net.B1)
# print(net.X.dot(net.W1))
# print(net.X.dot(net.W1) + net.B1)
# print(Z1.shape)
# print(A1.shape)
# print(net.W2.shape)
print(Z2)
# print(A2.shape)


# print(A2.shape)
# print(net.one_hot_y().shape)
# print(dZ2)
# print(A1)
# print(A1.T.dot(dZ2))

print(B2)
print(dB2)





[[-14.31349531  -4.7508303  -10.00685919 ... -10.79826952 -17.68297921
    6.51311735]
 [-13.54388526  -4.08149473 -10.31741994 ...  -8.79979781 -17.73558093
    8.14126137]
 [-14.14536632  -3.51309939 -11.39131399 ...  -9.18500377 -19.27570492
    8.98249326]
 ...
 [-13.2609656   -4.6266844   -9.87296981 ...  -9.20600311 -16.66727103
    7.05623591]
 [-12.10954076  -3.56227126 -11.34370076 ...  -8.32094252 -18.37671321
    6.24726364]
 [-14.94254774  -4.18128566 -11.14027497 ...  -7.35719809 -17.73002727
    8.67309177]]
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
[[-0.09869101 -0.11230059 -0.09929998  0.4990033  -0.09730349 -0.08980661
  -0.09855611 -0.1044166  -0.09751667  0.29888776]]


In [174]:
net = NeuralNetwork(X, y, n_hidden=20, alpha=0.8, epochs=200)

# net.forward_prop()
# dZ2, dW2, dB2, dZ1, dW1, dB1 = net.back_prop()

# import joblib

net.train()
# joblib.dump(model, 'model.joblib')





Iteration:  0
Accuracy:  0.09881666666666666
Iteration:  10
Accuracy:  0.09718333333333333
Iteration:  20
Accuracy:  0.0721
Iteration:  30
Accuracy:  0.10086666666666666
Iteration:  40
Accuracy:  0.15115
Iteration:  50
Accuracy:  0.10698333333333333
Iteration:  60
Accuracy:  0.14255
Iteration:  70
Accuracy:  0.13363333333333333


KeyboardInterrupt: 