## Multi Layer Perceptron

In [43]:
import numpy as np

from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score
from sklearn.datasets import fetch_mldata
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [30]:
mnist = fetch_mldata('MNIST original')
mnist_X, mnist_y = shuffle(mnist.data, mnist.target.astype('int32'),
                           random_state=42)

mnist_X = mnist_X / 255.0
mnist_X, mnist_y = mnist_X[:1000], mnist_y[:1000]

train_X, test_X, train_y, test_y = train_test_split(mnist_X, mnist_y,
                                                    test_size=0.2,
                                                    random_state=43)

# one-of-k表現にする。
train_y = np.eye(10)[train_y]

In [9]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def deriv_sigmoid(x):
    return sigmoid(x)*(1 - sigmoid(x))

In [20]:
class Layer:
    def __init__(self, in_dim, out_dim, function, deriv_function):
        self.W = np.random.uniform(low= -0.08, high= 0.08, size=(in_dim, out_dim)).astype("float32")
        self.b = np.zeros(out_dim).astype("float32")
        self.function = function
        self.deriv_function = deriv_function
        self.u = None
        self.delta = None
        
    def f_prop(self, x):
        self.u = np.dot(x, self.W) + self.b
        self.z = self.function(self.u)
        return self.z
    
    def b_prop(self, delta, W):
        self.delta = self.deriv_function(self.u)*np.dot(delta, W.T)
        return self.delta
    
def f_props(layers, x):
    z = x
    for layer in layers:
        z = layer.f_prop(z)
    return z

def b_props(layers, delta):
    for i, layer in enumerate(layers[::-1]):
        if i == 0:
            layer.delta = delta
        else:
            delta = layer.b_prop(delta, _W)
        _W = layer.W

In [21]:
layers = [Layer(784, 100, sigmoid, deriv_sigmoid),
          Layer(100, 10, sigmoid, deriv_sigmoid)]

$$ E ( {\bf \theta} ) =  -\sum^N_{n=1} \left[ t_n \log y ({\bf x}_n ; {\bf \theta}) + (1 - t_n) \log \{ 1 - y ({\bf x}_n ; {\bf \theta}) \}\right] $$

In [28]:
def train(X, t, eps=1.0):
    y = f_props(layers, X)
    delta = y - t
    b_props(layers, delta)

    z = X
    for i, layer in enumerate(layers):
        dW = np.dot(z.T, layer.delta)
        db = np.dot(np.ones(len(z)), layer.delta)
        layer.W = layer.W - eps*dW
        layer.b = layer.b - eps*db
        z = layer.z

def test(X, t):
    y = f_props(layers, X)
    return y

In [47]:
pbar = tqdm(total=10)
for epoch in range(10):
    pbar.update(1)
    for x, y in zip(train_X, train_y):
        train(x[np.newaxis, :], y[np.newaxis, :])    
    pred_y = test(test_X, test_y)
    pred_y = [np.argmax(y) for y in pred_y]
    print(accuracy_score(pred_y, test_y))


  from ipykernel import kernelapp as app

 20%|██        | 2/10 [00:00<00:01,  5.86it/s]

0.765


[A
 30%|███       | 3/10 [00:00<00:01,  4.83it/s]

0.73


[A
 40%|████      | 4/10 [00:00<00:01,  4.43it/s]

0.755


[A
 50%|█████     | 5/10 [00:01<00:01,  4.21it/s]

0.785


[A
 60%|██████    | 6/10 [00:01<00:00,  4.02it/s]

0.77


[A
 70%|███████   | 7/10 [00:01<00:00,  3.87it/s]

0.745


[A
 80%|████████  | 8/10 [00:01<00:00,  3.81it/s]

0.765


[A
 90%|█████████ | 9/10 [00:02<00:00,  3.79it/s]

0.775


[A
100%|██████████| 10/10 [00:02<00:00,  3.59it/s]

0.75


[A

0.76



