# Multilayer perceptron - numpy

In [1]:
import numpy as np
import pandas as pd

## MNIST dataset

Load data from https://www.openml.org/d/554

In [2]:
df = pd.read_csv('mnist_784.csv')
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X = X / 255.

In [3]:
X_train, X_test = X[:60000], X[60000:]
y_train, y_test = y[:60000], y[60000:]

## One-hot Encode

In [4]:
def one_hot(data, class_num):
    data = np.array(data)
    x = np.zeros((data.shape[0], class_num))
    for i in range(data.shape[0]):
        x[i][int(data[i])] = 1
        
    return x

## Hidden Layer

In [5]:
class Hidden_Layer():
    def __init__(self, input_channel, output_channel, optimizer):
        self.weight = np.float64(np.random.rand(output_channel, input_channel) * 0.01)
        self.bias = np.zeros((output_channel, 1), dtype=np.float64)
        self.input_data = None
        self.weight_grad = None
        self.bias_grad = None
        self.optimizer = optimizer
    
    def forward(self, data):
        self.input_data = data
        output = np.dot(self.weight, data) + self.bias
        
        return output
    
    def backward(self, grad):
        data_grad = np.dot(self.weight.T, grad)
        self.weight_grad = np.dot(grad, self.input_data.T)
        self.bias_grad = np.sum(grad, axis=1).reshape(-1, 1)
        
        return data_grad
    
    def update(self):
        self.weight += self.optimizer(self.weight_grad)
        self.bias += self.optimizer(self.bias_grad)

## Activation Function

### ReLU

$ReLU(x)=max\{0,x\}$

$ReLU'(x)=
 \begin{cases}
   0 &\mbox{if  $ReLU(x) \leq 0$}\\
   1 &\mbox{if  $ReLU(x) > 0$}
   \end{cases}$

In [6]:
class ReLU():
    def __init__(self):
        self.result = None
    
    def forward(self, data):
        self.result = data
        self.result[data < 0] = 0
        
        return self.result
    
    def backward(self, grad):
        relu_grad = self.result
        relu_grad[grad > 0] = 1
        
        return relu_grad * grad
    
    def update(self):
        pass

### Sigmoid

$\sigma(x)=\frac{1}{1+e^{-x}}$

$\sigma'(x)=\sigma(x)(1-\sigma(x))$

In [7]:
class Sigmoid():
    def __init__(self):
        self.result = None
    
    def forward(self, data):
        self.result = 1 / (1 + np.exp(-data))
        
        return self.result
    
    def backward(self, grad):
        grad *= self.result * (1 - self.result)
        
        return grad
    
    def update(self):
        pass

## Cost Function

<font size=4>
$J(y, \hat{y})=-\frac{1}{m}\sum_{i=1}^m(y^ilog(\hat{y}^i)+(1-y^i)log(1-\hat{y}^i))$

$\frac{\partial{J}}{\partial{\hat{y}}}=-\frac{1}{m}(\frac{y}{\hat{y}}-\frac{1-y}{1-\hat{y}})$
</font>

In [8]:
def CostFunction(y, y_hat):
    m = y.shape[0]
    J = - (1 / m) * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))
    dJ = - (1 / m) * (y / y_hat + (1 - y) / (1 - y_hat))
    return dJ

## Optimizer

### SGD

<font size=4>
$w = w - \alpha\frac{\partial{J}}{\partial{w}}$
</font>

In [9]:
class SGD():
    def __init__(self, learning_rate):
        super(SGD, self).__init__()
        self.learning_rate = learning_rate
        
    def __call__(self, grad):
        return -self.learning_rate * grad

## Minibatches

In [10]:
def minibatches(length, batch_size=200):
    from random import  shuffle
    index_list = list(range(length))
    shuffle(index_list)
    batches = []
    while len(index_list):
        batches.append(list(index_list[:batch_size]))
        del index_list[:batch_size]
    return batches

## Accuracy

In [11]:
def accuracy(y_hat, lable):
    y_hat = np.argmax(y_hat, axis=0)
    return np.mean((y_hat == lable))

## MLP

In [12]:
class MLP():
    def __init__(self, mlp_list):
        self.mlp = mlp_list
        
    def forward(self, x):
        for layer in self.mlp:
            x = layer.forward(x)
        return x
    
    def backward(self, grad):
        last_grad = grad.copy()
        for layer in self.mlp[::-1]:
            last_grad = layer.backward(last_grad)
        return last_grad
    
    def update(self):
        for layer in self.mlp:
            layer.update()

## Do it!

In [13]:
class_num = 10
input_channel = 784
epochs = 10
sgd = SGD(0.001)

mlp_list = [Hidden_Layer(input_channel, class_num, sgd),
               Sigmoid(),
               ]

In [14]:
mlp = MLP(mlp_list)

for e in range(epochs):
    batches = minibatches(X_train.shape[0])
    for b, batch in enumerate(batches):
        data = X_train.iloc[batch]
        label = y_train.iloc[batch]
        result = mlp.forward(data.T)
        grad = CostFunction(result.T, one_hot(label, class_num))
        print('epoch ' + str(e) + ' batch ' + str(b) + ' accuracy: ' + str(accuracy(result, label)))
        mlp.backward(grad.T)
        mlp.update()

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.


epoch 0 batch 0 accuracy: 0.17
epoch 0 batch 1 accuracy: 0.08
epoch 0 batch 2 accuracy: 0.085
epoch 0 batch 3 accuracy: 0.08
epoch 0 batch 4 accuracy: 0.08
epoch 0 batch 5 accuracy: 0.12
epoch 0 batch 6 accuracy: 0.07
epoch 0 batch 7 accuracy: 0.115
epoch 0 batch 8 accuracy: 0.09
epoch 0 batch 9 accuracy: 0.125
epoch 0 batch 10 accuracy: 0.135
epoch 0 batch 11 accuracy: 0.12
epoch 0 batch 12 accuracy: 0.09
epoch 0 batch 13 accuracy: 0.115
epoch 0 batch 14 accuracy: 0.075
epoch 0 batch 15 accuracy: 0.16
epoch 0 batch 16 accuracy: 0.085
epoch 0 batch 17 accuracy: 0.105
epoch 0 batch 18 accuracy: 0.105
epoch 0 batch 19 accuracy: 0.115
epoch 0 batch 20 accuracy: 0.11
epoch 0 batch 21 accuracy: 0.17
epoch 0 batch 22 accuracy: 0.09
epoch 0 batch 23 accuracy: 0.09
epoch 0 batch 24 accuracy: 0.09
epoch 0 batch 25 accuracy: 0.135
epoch 0 batch 26 accuracy: 0.125
epoch 0 batch 27 accuracy: 0.1
epoch 0 batch 28 accuracy: 0.09
epoch 0 batch 29 accuracy: 0.115
epoch 0 batch 30 accuracy: 0.115
epoch

epoch 0 batch 274 accuracy: 0.12
epoch 0 batch 275 accuracy: 0.1
epoch 0 batch 276 accuracy: 0.11
epoch 0 batch 277 accuracy: 0.09
epoch 0 batch 278 accuracy: 0.105
epoch 0 batch 279 accuracy: 0.075
epoch 0 batch 280 accuracy: 0.095
epoch 0 batch 281 accuracy: 0.115
epoch 0 batch 282 accuracy: 0.11
epoch 0 batch 283 accuracy: 0.095
epoch 0 batch 284 accuracy: 0.07
epoch 0 batch 285 accuracy: 0.055
epoch 0 batch 286 accuracy: 0.085
epoch 0 batch 287 accuracy: 0.1
epoch 0 batch 288 accuracy: 0.105
epoch 0 batch 289 accuracy: 0.095
epoch 0 batch 290 accuracy: 0.085
epoch 0 batch 291 accuracy: 0.075
epoch 0 batch 292 accuracy: 0.1
epoch 0 batch 293 accuracy: 0.11
epoch 0 batch 294 accuracy: 0.055
epoch 0 batch 295 accuracy: 0.095
epoch 0 batch 296 accuracy: 0.07
epoch 0 batch 297 accuracy: 0.085
epoch 0 batch 298 accuracy: 0.09
epoch 0 batch 299 accuracy: 0.05
epoch 1 batch 0 accuracy: 0.06
epoch 1 batch 1 accuracy: 0.1
epoch 1 batch 2 accuracy: 0.135
epoch 1 batch 3 accuracy: 0.095
epoch 

epoch 1 batch 249 accuracy: 0.1
epoch 1 batch 250 accuracy: 0.105
epoch 1 batch 251 accuracy: 0.105
epoch 1 batch 252 accuracy: 0.1
epoch 1 batch 253 accuracy: 0.105
epoch 1 batch 254 accuracy: 0.125
epoch 1 batch 255 accuracy: 0.11
epoch 1 batch 256 accuracy: 0.105
epoch 1 batch 257 accuracy: 0.1
epoch 1 batch 258 accuracy: 0.08
epoch 1 batch 259 accuracy: 0.085
epoch 1 batch 260 accuracy: 0.075
epoch 1 batch 261 accuracy: 0.1
epoch 1 batch 262 accuracy: 0.085
epoch 1 batch 263 accuracy: 0.09
epoch 1 batch 264 accuracy: 0.07
epoch 1 batch 265 accuracy: 0.135
epoch 1 batch 266 accuracy: 0.125
epoch 1 batch 267 accuracy: 0.09
epoch 1 batch 268 accuracy: 0.055
epoch 1 batch 269 accuracy: 0.105
epoch 1 batch 270 accuracy: 0.12
epoch 1 batch 271 accuracy: 0.135
epoch 1 batch 272 accuracy: 0.13
epoch 1 batch 273 accuracy: 0.07
epoch 1 batch 274 accuracy: 0.085
epoch 1 batch 275 accuracy: 0.085
epoch 1 batch 276 accuracy: 0.095
epoch 1 batch 277 accuracy: 0.085
epoch 1 batch 278 accuracy: 0.

epoch 2 batch 205 accuracy: 0.06
epoch 2 batch 206 accuracy: 0.155
epoch 2 batch 207 accuracy: 0.07
epoch 2 batch 208 accuracy: 0.11
epoch 2 batch 209 accuracy: 0.115
epoch 2 batch 210 accuracy: 0.095
epoch 2 batch 211 accuracy: 0.09
epoch 2 batch 212 accuracy: 0.12
epoch 2 batch 213 accuracy: 0.12
epoch 2 batch 214 accuracy: 0.105
epoch 2 batch 215 accuracy: 0.095
epoch 2 batch 216 accuracy: 0.065
epoch 2 batch 217 accuracy: 0.1
epoch 2 batch 218 accuracy: 0.115
epoch 2 batch 219 accuracy: 0.105
epoch 2 batch 220 accuracy: 0.085
epoch 2 batch 221 accuracy: 0.105
epoch 2 batch 222 accuracy: 0.105
epoch 2 batch 223 accuracy: 0.095
epoch 2 batch 224 accuracy: 0.09
epoch 2 batch 225 accuracy: 0.105
epoch 2 batch 226 accuracy: 0.095
epoch 2 batch 227 accuracy: 0.085
epoch 2 batch 228 accuracy: 0.11
epoch 2 batch 229 accuracy: 0.08
epoch 2 batch 230 accuracy: 0.155
epoch 2 batch 231 accuracy: 0.085
epoch 2 batch 232 accuracy: 0.115
epoch 2 batch 233 accuracy: 0.13
epoch 2 batch 234 accuracy

epoch 3 batch 185 accuracy: 0.095
epoch 3 batch 186 accuracy: 0.095
epoch 3 batch 187 accuracy: 0.1
epoch 3 batch 188 accuracy: 0.125
epoch 3 batch 189 accuracy: 0.08
epoch 3 batch 190 accuracy: 0.11
epoch 3 batch 191 accuracy: 0.1
epoch 3 batch 192 accuracy: 0.09
epoch 3 batch 193 accuracy: 0.105
epoch 3 batch 194 accuracy: 0.085
epoch 3 batch 195 accuracy: 0.08
epoch 3 batch 196 accuracy: 0.095
epoch 3 batch 197 accuracy: 0.125
epoch 3 batch 198 accuracy: 0.12
epoch 3 batch 199 accuracy: 0.075
epoch 3 batch 200 accuracy: 0.11
epoch 3 batch 201 accuracy: 0.15
epoch 3 batch 202 accuracy: 0.1
epoch 3 batch 203 accuracy: 0.11
epoch 3 batch 204 accuracy: 0.09
epoch 3 batch 205 accuracy: 0.11
epoch 3 batch 206 accuracy: 0.115
epoch 3 batch 207 accuracy: 0.125
epoch 3 batch 208 accuracy: 0.1
epoch 3 batch 209 accuracy: 0.09
epoch 3 batch 210 accuracy: 0.125
epoch 3 batch 211 accuracy: 0.075
epoch 3 batch 212 accuracy: 0.11
epoch 3 batch 213 accuracy: 0.075
epoch 3 batch 214 accuracy: 0.11
e

epoch 4 batch 150 accuracy: 0.08
epoch 4 batch 151 accuracy: 0.085
epoch 4 batch 152 accuracy: 0.1
epoch 4 batch 153 accuracy: 0.075
epoch 4 batch 154 accuracy: 0.09
epoch 4 batch 155 accuracy: 0.11
epoch 4 batch 156 accuracy: 0.09
epoch 4 batch 157 accuracy: 0.1
epoch 4 batch 158 accuracy: 0.115
epoch 4 batch 159 accuracy: 0.11
epoch 4 batch 160 accuracy: 0.095
epoch 4 batch 161 accuracy: 0.085
epoch 4 batch 162 accuracy: 0.1
epoch 4 batch 163 accuracy: 0.1
epoch 4 batch 164 accuracy: 0.115
epoch 4 batch 165 accuracy: 0.09
epoch 4 batch 166 accuracy: 0.075
epoch 4 batch 167 accuracy: 0.085
epoch 4 batch 168 accuracy: 0.09
epoch 4 batch 169 accuracy: 0.105
epoch 4 batch 170 accuracy: 0.1
epoch 4 batch 171 accuracy: 0.1
epoch 4 batch 172 accuracy: 0.12
epoch 4 batch 173 accuracy: 0.115
epoch 4 batch 174 accuracy: 0.09
epoch 4 batch 175 accuracy: 0.1
epoch 4 batch 176 accuracy: 0.075
epoch 4 batch 177 accuracy: 0.095
epoch 4 batch 178 accuracy: 0.08
epoch 4 batch 179 accuracy: 0.065
epoc

epoch 5 batch 104 accuracy: 0.12
epoch 5 batch 105 accuracy: 0.13
epoch 5 batch 106 accuracy: 0.11
epoch 5 batch 107 accuracy: 0.09
epoch 5 batch 108 accuracy: 0.09
epoch 5 batch 109 accuracy: 0.155
epoch 5 batch 110 accuracy: 0.12
epoch 5 batch 111 accuracy: 0.095
epoch 5 batch 112 accuracy: 0.145
epoch 5 batch 113 accuracy: 0.14
epoch 5 batch 114 accuracy: 0.09
epoch 5 batch 115 accuracy: 0.095
epoch 5 batch 116 accuracy: 0.07
epoch 5 batch 117 accuracy: 0.125
epoch 5 batch 118 accuracy: 0.075
epoch 5 batch 119 accuracy: 0.115
epoch 5 batch 120 accuracy: 0.1
epoch 5 batch 121 accuracy: 0.11
epoch 5 batch 122 accuracy: 0.105
epoch 5 batch 123 accuracy: 0.11
epoch 5 batch 124 accuracy: 0.1
epoch 5 batch 125 accuracy: 0.09
epoch 5 batch 126 accuracy: 0.08
epoch 5 batch 127 accuracy: 0.095
epoch 5 batch 128 accuracy: 0.1
epoch 5 batch 129 accuracy: 0.09
epoch 5 batch 130 accuracy: 0.075
epoch 5 batch 131 accuracy: 0.095
epoch 5 batch 132 accuracy: 0.095
epoch 5 batch 133 accuracy: 0.08
e

epoch 6 batch 72 accuracy: 0.1
epoch 6 batch 73 accuracy: 0.095
epoch 6 batch 74 accuracy: 0.105
epoch 6 batch 75 accuracy: 0.09
epoch 6 batch 76 accuracy: 0.105
epoch 6 batch 77 accuracy: 0.09
epoch 6 batch 78 accuracy: 0.135
epoch 6 batch 79 accuracy: 0.105
epoch 6 batch 80 accuracy: 0.085
epoch 6 batch 81 accuracy: 0.125
epoch 6 batch 82 accuracy: 0.08
epoch 6 batch 83 accuracy: 0.105
epoch 6 batch 84 accuracy: 0.09
epoch 6 batch 85 accuracy: 0.09
epoch 6 batch 86 accuracy: 0.115
epoch 6 batch 87 accuracy: 0.085
epoch 6 batch 88 accuracy: 0.095
epoch 6 batch 89 accuracy: 0.1
epoch 6 batch 90 accuracy: 0.135
epoch 6 batch 91 accuracy: 0.1
epoch 6 batch 92 accuracy: 0.085
epoch 6 batch 93 accuracy: 0.095
epoch 6 batch 94 accuracy: 0.085
epoch 6 batch 95 accuracy: 0.15
epoch 6 batch 96 accuracy: 0.105
epoch 6 batch 97 accuracy: 0.155
epoch 6 batch 98 accuracy: 0.075
epoch 6 batch 99 accuracy: 0.11
epoch 6 batch 100 accuracy: 0.08
epoch 6 batch 101 accuracy: 0.08
epoch 6 batch 102 accur

epoch 7 batch 39 accuracy: 0.09
epoch 7 batch 40 accuracy: 0.085
epoch 7 batch 41 accuracy: 0.095
epoch 7 batch 42 accuracy: 0.12
epoch 7 batch 43 accuracy: 0.11
epoch 7 batch 44 accuracy: 0.09
epoch 7 batch 45 accuracy: 0.085
epoch 7 batch 46 accuracy: 0.115
epoch 7 batch 47 accuracy: 0.105
epoch 7 batch 48 accuracy: 0.105
epoch 7 batch 49 accuracy: 0.14
epoch 7 batch 50 accuracy: 0.125
epoch 7 batch 51 accuracy: 0.08
epoch 7 batch 52 accuracy: 0.075
epoch 7 batch 53 accuracy: 0.1
epoch 7 batch 54 accuracy: 0.125
epoch 7 batch 55 accuracy: 0.1
epoch 7 batch 56 accuracy: 0.075
epoch 7 batch 57 accuracy: 0.095
epoch 7 batch 58 accuracy: 0.14
epoch 7 batch 59 accuracy: 0.105
epoch 7 batch 60 accuracy: 0.085
epoch 7 batch 61 accuracy: 0.11
epoch 7 batch 62 accuracy: 0.165
epoch 7 batch 63 accuracy: 0.125
epoch 7 batch 64 accuracy: 0.07
epoch 7 batch 65 accuracy: 0.09
epoch 7 batch 66 accuracy: 0.095
epoch 7 batch 67 accuracy: 0.16
epoch 7 batch 68 accuracy: 0.115
epoch 7 batch 69 accuracy

epoch 8 batch 0 accuracy: 0.09
epoch 8 batch 1 accuracy: 0.12
epoch 8 batch 2 accuracy: 0.06
epoch 8 batch 3 accuracy: 0.11
epoch 8 batch 4 accuracy: 0.12
epoch 8 batch 5 accuracy: 0.065
epoch 8 batch 6 accuracy: 0.13
epoch 8 batch 7 accuracy: 0.095
epoch 8 batch 8 accuracy: 0.11
epoch 8 batch 9 accuracy: 0.1
epoch 8 batch 10 accuracy: 0.14
epoch 8 batch 11 accuracy: 0.105
epoch 8 batch 12 accuracy: 0.1
epoch 8 batch 13 accuracy: 0.1
epoch 8 batch 14 accuracy: 0.1
epoch 8 batch 15 accuracy: 0.095
epoch 8 batch 16 accuracy: 0.095
epoch 8 batch 17 accuracy: 0.12
epoch 8 batch 18 accuracy: 0.105
epoch 8 batch 19 accuracy: 0.07
epoch 8 batch 20 accuracy: 0.055
epoch 8 batch 21 accuracy: 0.095
epoch 8 batch 22 accuracy: 0.13
epoch 8 batch 23 accuracy: 0.085
epoch 8 batch 24 accuracy: 0.135
epoch 8 batch 25 accuracy: 0.095
epoch 8 batch 26 accuracy: 0.065
epoch 8 batch 27 accuracy: 0.095
epoch 8 batch 28 accuracy: 0.075
epoch 8 batch 29 accuracy: 0.085
epoch 8 batch 30 accuracy: 0.09
epoch 8

epoch 8 batch 262 accuracy: 0.075
epoch 8 batch 263 accuracy: 0.085
epoch 8 batch 264 accuracy: 0.11
epoch 8 batch 265 accuracy: 0.145
epoch 8 batch 266 accuracy: 0.105
epoch 8 batch 267 accuracy: 0.08
epoch 8 batch 268 accuracy: 0.105
epoch 8 batch 269 accuracy: 0.08
epoch 8 batch 270 accuracy: 0.1
epoch 8 batch 271 accuracy: 0.1
epoch 8 batch 272 accuracy: 0.095
epoch 8 batch 273 accuracy: 0.13
epoch 8 batch 274 accuracy: 0.1
epoch 8 batch 275 accuracy: 0.11
epoch 8 batch 276 accuracy: 0.11
epoch 8 batch 277 accuracy: 0.095
epoch 8 batch 278 accuracy: 0.085
epoch 8 batch 279 accuracy: 0.11
epoch 8 batch 280 accuracy: 0.085
epoch 8 batch 281 accuracy: 0.085
epoch 8 batch 282 accuracy: 0.09
epoch 8 batch 283 accuracy: 0.12
epoch 8 batch 284 accuracy: 0.115
epoch 8 batch 285 accuracy: 0.115
epoch 8 batch 286 accuracy: 0.085
epoch 8 batch 287 accuracy: 0.07
epoch 8 batch 288 accuracy: 0.11
epoch 8 batch 289 accuracy: 0.085
epoch 8 batch 290 accuracy: 0.095
epoch 8 batch 291 accuracy: 0.1

epoch 9 batch 212 accuracy: 0.1
epoch 9 batch 213 accuracy: 0.105
epoch 9 batch 214 accuracy: 0.13
epoch 9 batch 215 accuracy: 0.08
epoch 9 batch 216 accuracy: 0.13
epoch 9 batch 217 accuracy: 0.115
epoch 9 batch 218 accuracy: 0.08
epoch 9 batch 219 accuracy: 0.08
epoch 9 batch 220 accuracy: 0.135
epoch 9 batch 221 accuracy: 0.135
epoch 9 batch 222 accuracy: 0.085
epoch 9 batch 223 accuracy: 0.14
epoch 9 batch 224 accuracy: 0.085
epoch 9 batch 225 accuracy: 0.085
epoch 9 batch 226 accuracy: 0.095
epoch 9 batch 227 accuracy: 0.1
epoch 9 batch 228 accuracy: 0.1
epoch 9 batch 229 accuracy: 0.13
epoch 9 batch 230 accuracy: 0.08
epoch 9 batch 231 accuracy: 0.08
epoch 9 batch 232 accuracy: 0.075
epoch 9 batch 233 accuracy: 0.095
epoch 9 batch 234 accuracy: 0.08
epoch 9 batch 235 accuracy: 0.1
epoch 9 batch 236 accuracy: 0.095
epoch 9 batch 237 accuracy: 0.11
epoch 9 batch 238 accuracy: 0.09
epoch 9 batch 239 accuracy: 0.115
epoch 9 batch 240 accuracy: 0.11
epoch 9 batch 241 accuracy: 0.095
e

## Test

In [15]:
result = mlp.forward(X_test.T)
print('Test set accuracy: ' + str(accuracy(result, y_test)))

Test set accuracy: 0.098


In [16]:
result

array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       ...,
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan]])