In [1]:
import numpy as np

from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [2]:

import pickle
def load_pickle(file):
     with open(file, mode='rb') as f:
        try:
            obj = pickle.load(f)
            return obj
        except Exception as e:
            print(e)
mnist_X = load_pickle('mnist_X.pickle')
mnist_y = load_pickle('mnist_y.pickle')

In [3]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def deriv_sigmoid(x):
    return sigmoid(x)*(1 - sigmoid(x))

In [4]:
def ReLU(x):
    return np.where(x>0,x,0)

def deriv_ReLU(x):
    return np.where(x>0,1,0)

In [5]:
def softmax(x):
    ex = np.exp(x)
    return ex/np.sum(ex,axis=1)[:,np.newaxis]

In [6]:
train_X, test_X, train_y, test_y = train_test_split(mnist_X[:10000], mnist_y[:10000],
                                                    test_size=0.2,
                                                    random_state=43)

In [7]:
class Layer:
    def __init__(self, in_dim, out_dim, function, deriv_function):
        self.W = np.random.uniform(low= -0.08, high= 0.08, size=(in_dim, out_dim)).astype("float32")
        self.b = np.zeros(out_dim).astype("float32")
        self.function = function
        self.deriv_function = deriv_function
        self.u = None
        self.delta = None
        
    def f_prop(self, x):
        self.u = np.dot(x, self.W) + self.b
        self.z = self.function(self.u)
        return self.z
    
    def b_prop(self, delta, W):
        self.delta = self.deriv_function(self.u)*np.dot(delta, W.T)
        return self.delta
    
def f_props(layers, x):
    z = x
    for layer in layers:
        z = layer.f_prop(z)
    return z

def b_props(layers, delta):
    for i, layer in enumerate(layers[::-1]):
        if i == 0:
            layer.delta = delta
        else:
            delta = layer.b_prop(delta, _W)
        _W = layer.W

In [8]:
def train(X, t, eps=1.,l=1e-5):
    y = f_props(layers, X)
    delta = y - t
    b_props(layers, delta)

    z = X
    for i, layer in enumerate(layers):
        dW = np.dot(z.T, layer.delta)+l*layer.W
        db = np.dot(np.ones(z.shape[0]), layer.delta)+l*layer.b
        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db
        z = layer.z

def test(X, t):
    y = f_props(layers, X)
    return y

In [9]:
mean = train_X.mean(axis=0)
std = train_X.std(axis=0)
train_X_std = train_X
test_X_std = test_X

In [10]:
layers = [Layer(784, 300, ReLU, deriv_ReLU),
          Layer(300, 10, softmax, None)]
batch = 101
for epoch in range(100):
    i = 0
    while i+batch < len(train_X_std):
        train(train_X_std[i:i+batch], train_y[i:i+batch],1e-2)
        i+=batch
    train(train_X_std[i:], train_y[i:],1e-1)
    pred_train_y = test(train_X_std, train_y)
    print(accuracy_score(np.argmax(pred_train_y,axis=1), np.argmax(train_y,axis=1)))
    pred_y = test(test_X_std, test_y)
    print(accuracy_score(np.argmax(pred_y,axis=1), np.argmax(test_y,axis=1)))

0.41
0.398
0.801
0.7795
0.950375
0.9255
0.9715
0.9355
0.979375
0.939
0.984
0.943
0.989875
0.9465
0.993125
0.948
0.995625
0.95
0.9975
0.9505
0.9985
0.95
0.999625
0.9515
0.999875
0.953
0.999875
0.954
1.0
0.954
1.0
0.955
1.0
0.9545
1.0
0.9545
1.0
0.955
1.0
0.955
1.0
0.955
1.0
0.9555
1.0
0.955
1.0
0.955
1.0
0.9555
1.0
0.955
1.0
0.9545
1.0
0.954
1.0
0.954
1.0
0.954
1.0
0.954
1.0
0.954
1.0
0.954
1.0
0.954
1.0
0.9535
1.0
0.954
1.0
0.953
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.953
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9535
1.0
0.9

In [11]:
layers[0].b

array([ -3.34396594e-02,  -4.34336850e-03,  -4.14485873e-02,
        -3.26082680e-02,  -4.23745605e-02,  -5.14595779e-02,
        -2.58717311e-02,  -6.50979058e-02,  -5.25351105e-02,
        -5.24963235e-02,   8.34838498e-02,  -7.02306604e-02,
         1.88557036e-02,  -2.39676807e-02,  -1.80786709e-02,
         4.56097360e-02,  -1.28623516e-02,  -2.39203432e-02,
        -1.21870521e-01,  -1.07637423e-01,   3.82132457e-01,
        -9.05877776e-03,  -3.41983688e-02,  -2.92019772e-02,
         2.83706656e-02,  -1.08072862e-01,  -2.68558471e-02,
        -1.03561002e-02,  -3.12769945e-02,  -1.25578417e-02,
        -4.15110825e-02,   1.16731099e-02,   5.57172545e-02,
        -1.34237050e-01,   8.47518535e-03,   6.68806131e-02,
         1.69284642e-02,  -5.76905444e-02,   4.42302962e-02,
        -6.21315911e-04,  -5.49578683e-02,  -1.46421210e-02,
        -6.58683640e-02,  -6.91390869e-02,   9.21345919e-02,
        -1.96618806e-03,  -8.77059932e-02,  -5.52221987e-04,
         6.52231186e-02,