In [39]:
import os
import numpy as np
from sklearn.datasets import fetch_openml

In [64]:
class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        y = np.dot(self.w, x) + self.b
        return y
    
    def backward(self, dx):
        self.dw = np.dot(dx ,self.input.T)
        self.db = np.sum(dx, axis=1, keepdims=True)
        return np.dot(self.w.T, dx)
    
    def update(self, lr=0.1):
        self.w -= self.dw * lr
        self.b -= self.db * lr
    
class Sigmoid:
    def __init__(self):
        self.output = None
        
    def forward(self, x):
        y = 1 / (1 + np.exp(-x))
        self.output = y
        return y
    
    def backward(self, dx):
        return dx * self.output * (1.0 - self.output)
    
class MSE:
    def __init__(self):
        self.output = None
        self.t = None
    
    def forward(self, z, t):
        data_num = z.shape[-1]
        loss = np.sum((z - t) ** 2) / (2 * data_num)
        self.z = z
        self.t = t
        return loss
    
    def backward(self):
        return self.z - self.t
    
class SigmoidAndCrossEntropy:
    def __init__(self):
        self.y = None
        self.t = None
        
    def forward(self, x, t):
        y = 1 / (1 + np.exp(-x))
        data_num = y.shape[-1]
        loss = -1 * (t * np.log(y) + (1 - t) * np.log(1 - y)).mean()
        self.y = y
        self.t = t
        return loss
        
    def backward(self):
        return self.y - self.t

## Exercise4.1-1

In [21]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = SigmoidAndCrossEntropy()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    loss = layer4.forward(q, t)
    
    dq = layer4.backward()
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', layer4.y)

epoch 0 loss 0.706575883796967 y [[0.587688   0.53509165 0.58682017 0.54249822]]
epoch 100 loss 0.6904405173924182 y [[0.50075134 0.52039929 0.47843631 0.49171858]]
epoch 200 loss 0.21629958627642565 y [[0.20741608 0.88634178 0.72301019 0.17118378]]
epoch 300 loss 0.0234047518994882 y [[0.02277354 0.97216757 0.97750472 0.01941201]]
epoch 400 loss 0.011920514314600485 y [[0.01151172 0.9850331  0.98898454 0.00989765]]
epoch 500 loss 0.007947573541286022 y [[0.0076516  0.98983841 0.99275481 0.00660224]]
epoch 600 loss 0.005946770652306348 y [[0.00571502 0.99232783 0.99461493 0.00494204]]
epoch 700 loss 0.004745125096296916 y [[0.00455449 0.99384518 0.99571994 0.00394472]]
epoch 800 loss 0.003944749204570458 y [[0.00378266 0.99486506 0.99645101 0.00328031]]
epoch 900 loss 0.0033739198761100543 y [[0.00323279 0.99559694 0.99697005 0.00280637]]


## Exercise4.1-2

In [22]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(output_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(output_dim, 1) - 1.0


layer1 = Affine(w, b)
layer2 = SigmoidAndCrossEntropy()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    loss = layer2.forward(p, t)
    
    dp = layer2.backward()
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', layer2.y)

epoch 0 loss 0.7424335089477943 y [[0.30996943 0.28534938 0.45070116 0.42173493]]
epoch 100 loss 0.6931471805599454 y [[0.5 0.5 0.5 0.5]]
epoch 200 loss 0.6931471805599452 y [[0.5 0.5 0.5 0.5]]
epoch 300 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]
epoch 400 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]
epoch 500 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]
epoch 600 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]
epoch 700 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]
epoch 800 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]
epoch 900 loss 0.6931471805599453 y [[0.5 0.5 0.5 0.5]]


## Exercise4.1-3

In [23]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0, 1, 0, 0, 1])

input_dim = 3
hidden_dim = 3
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = SigmoidAndCrossEntropy()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    loss = layer4.forward(q, t)
    
    dq = layer4.backward()
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', layer4.y)

epoch 0 loss 0.7569672267475516 y [[0.68545861 0.6991134  0.6918391  0.70733958 0.63240986 0.64532427
  0.64019012 0.65242227]]
epoch 100 loss 0.7073718682628555 y [[0.53652657 0.59627517 0.60144254 0.61633766 0.59582246 0.6147338
  0.61597021 0.62012729]]
epoch 200 loss 0.6724121053595216 y [[0.27892378 0.9027918  0.93097588 0.78106731 0.90232495 0.74981821
  0.7807005  0.70198355]]
epoch 300 loss 0.2946331684145387 y [[0.0154804  0.98377857 0.98341429 0.26103761 0.98371482 0.26104629
  0.26099229 0.25045144]]
epoch 400 loss 0.15213501825753073 y [[0.01807941 0.98631608 0.98653915 0.18770692 0.9864161  0.19438622
  0.18458219 0.58876505]]
epoch 500 loss 0.018658455420743073 y [[0.00813286 0.99146177 0.99143895 0.01927338 0.99147439 0.01926457
  0.01927361 0.94460283]]
epoch 600 loss 0.009111424717798097 y [[0.00623282 0.99464088 0.99461915 0.00893111 0.99464553 0.00892677
  0.0089311  0.9766841 ]]
epoch 700 loss 0.006058859138923263 y [[0.00494459 0.99609562 0.99607875 0.00573614 0.99

## Exercise4.1-4

In [26]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0, 1, 0, 0, 1])

input_dim = 3
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = SigmoidAndCrossEntropy()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    loss = layer4.forward(q, t)
    
    dq = layer4.backward()
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', layer4.y)

epoch 0 loss 0.9147069950881772 y [[0.18132317 0.20212241 0.19277975 0.2141089  0.18903368 0.21034223
  0.20094485 0.22196107]]
epoch 100 loss 0.6799459912079164 y [[0.2238357  0.41961782 0.42089837 0.44748853 0.4176084  0.44611058
  0.44698556 0.4480957 ]]
epoch 200 loss 0.31889976174503487 y [[0.04768285 0.95429884 0.95419229 0.27965573 0.95431132 0.279603
  0.27965005 0.25210487]]
epoch 300 loss 0.29268989371340814 y [[0.01671347 0.98719876 0.98717467 0.25770721 0.98720166 0.2576925
  0.25770563 0.24858294]]
epoch 400 loss 0.2878071102047187 y [[0.00976738 0.99276831 0.99275731 0.25432396 0.99276964 0.25431711
  0.25432322 0.2489568 ]]
epoch 500 loss 0.2857938320621163 y [[0.00684852 0.99501346 0.99500686 0.25297237 0.99501426 0.25296831
  0.25297193 0.24919429]]
epoch 600 loss 0.28470357514924327 y [[0.00525623 0.99621417 0.99620964 0.25225178 0.99621472 0.25224905
  0.25225148 0.24934629]]
epoch 700 loss 0.2840225558607353 y [[0.00425744 0.99695782 0.99695446 0.25180624 0.99695823

## Exercise4.1-5

In [28]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0, 1, 0, 0, 1])

input_dim = 3
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = SigmoidAndCrossEntropy()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    loss = layer2.forward(p, t)
    
    dp = layer2.backward()
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', layer2.y)

epoch 0 loss 0.7553541323511623 y [[0.50464905 0.65204633 0.61957603 0.74973485 0.39398684 0.54459751
  0.50964084 0.65656362]
 [0.45240716 0.57220675 0.27511962 0.3806027  0.30495321 0.41532105
  0.16774805 0.24603684]]
epoch 100 loss 1.6560977441839155 y [[0.85757581 0.93908255 0.939082   0.97528834 0.93907841 0.97528683
  0.9752866  0.99019947]
 [0.1424422  0.06092417 0.06091703 0.02471053 0.06091784 0.02471087
  0.02470786 0.00979801]]
epoch 200 loss 1.6560541576131014 y [[0.85759341 0.93908164 0.93908164 0.97528433 0.93908164 0.97528433
  0.97528433 0.990197  ]
 [0.14240664 0.06091836 0.06091836 0.02471566 0.06091836 0.02471566
  0.02471566 0.00980299]]
epoch 300 loss 1.6560540436134215 y [[0.85759345 0.93908164 0.93908164 0.97528433 0.93908164 0.97528433
  0.97528433 0.990197  ]
 [0.14240655 0.06091836 0.06091836 0.02471567 0.06091836 0.02471567
  0.02471567 0.009803  ]]
epoch 400 loss 1.65605404331527 y [[0.85759345 0.93908164 0.93908164 0.97528433 0.93908164 0.97528433
  0.9752

## Exercise4.2

### MNISTデータダウンロード

In [4]:
data_dir = os.path.join(os.path.abspath('..'), 'data')
if not os.path.exists(data_dir):
    os.makedirs(data_dir)

X, Y = fetch_openml('mnist_784', data_home=data_dir, return_X_y=True)

### 学習設定・データローダー実装

In [57]:
train_x, train_y = (X[:60000], Y[:60000])
test_x, test_y = (X[60000:], Y[60000:])

train_y = np.identity(10)[train_y.astype(np.int)]
test_y = np.identity(10)[test_y.astype(np.int)]

epochs = 10
batch_size = 100

input_dim = 784
hidden_dim = 10
output_dim = 10

class DataLoader:
    def __init__(self, inputs, outputs, batch_size=100):
        if not isinstance(inputs, np.ndarray):
            raise TypeError('{} must be numpy.ndarray'.format('inputs'))
        if not isinstance(outputs, np.ndarray):
            raise TypeError('{} must be numpy.ndarray'.format('outputs'))
        if inputs.shape[0] != outputs.shape[0]:
            raise ValueError('The shapes of inputs and outputs must be same.')
        self._inputs = inputs
        self._outputs = outputs
        self._batch_size = batch_size
        self._i = 0
        
    def __iter__(self):
        return self
    
    def __next__(self):
        if (self._i * self._batch_size) >= len(self._inputs):
            raise StopIteration()
        
        if ((self._i + 1) * self._batch_size) < len(self._inputs):
            x, y = self._inputs[self._i * self._batch_size:(self._i + 1) * self._batch_size], \
                self._outputs[self._i * self._batch_size:(self._i + 1) * self._batch_size]
        else:
            x, y = self._inputs[self._i * self._batch_size:], \
                self._outputs[self._i * self._batch_size:]
        self._i += 1
        
        return x, y

### モデル学習

In [65]:
w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

for epoch in range(epochs):
    for x, y in DataLoader(train_x, train_y, batch_size=batch_size):
        p = layer1.forward(x.T)
        y = layer2.forward(p)
        q = layer3.forward(y)
        z = layer4.forward(q)
        loss = layer5.forward(z, y)
        
        dz = layer5.backward()
        dq = layer4.backward(dz)
        dy = layer3.backward(dq)
        dp = layer2.backward(dy)
        dx = layer1.backward(dp)
        
        layer1.update()
        layer3.update()
    print('epoch:{epoch} loss:{loss}'.format(epoch=epoch, loss=loss))

  y = 1 / (1 + np.exp(-x))


epoch:0 loss:0.002510053070384594
epoch:1 loss:0.0009945945638161454
epoch:2 loss:0.0005158744987348487
epoch:3 loss:0.000338962218386652
epoch:4 loss:0.0002395038986965784
epoch:5 loss:0.00013654707009375124
epoch:6 loss:0.00014226613123414448
epoch:7 loss:0.00011749993955195664
epoch:8 loss:0.0001325618325690182
epoch:9 loss:9.852824521709377e-05


### モデル推論

In [26]:
isinstance([0], np.ndarray)

False