In [23]:
import numpy as np
np.random.seed(seed=777)

## Exercise3.1

In [34]:
class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        y = np.dot(self.w, x) + self.b
        return y
    
    def backward(self, dx):
        self.dw = np.dot(dx ,self.input.T)
        self.db = np.sum(dx, axis=1, keepdims=True)
        return np.dot(self.w.T, dx)
    
    def update(self, lr=0.1):
        self.w -= self.dw * lr
        self.b -= self.db * lr
    
class Sigmoid:
    def __init__(self):
        self.output = None
        
    def forward(self, x):
        y = 1 / (1 + np.exp(-x))
        self.output = y
        return y
    
    def backward(self, dx):
        return dx * self.output * (1.0 - self.output)

In [25]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1.1, 0.7],
              [0.3, 0.5]])
b = np.array([[0.1],
              [ 3]])
u = np.array([[ 2, 2.1]])
c = np.array([0.9])

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.98111578 0.98685385 0.98859632 0.99093144]]


In [26]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1, 0.5],
              [0.11,1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.92383547 0.93812899 0.93840737 0.9475542 ]]


## Exercise3.2

In [27]:
class MSE:
    def __init__(self):
        self.output = None
        self.t = None
    
    def forward(self, z, t):
        data_num = z.shape[-1]
        loss = np.sum((z - t) ** 2) / (2 * data_num)
        self.z = z
        self.t = t
        return loss
    
    def backward(self):
        return self.z - self.t

In [28]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])
w = np.array([[ 1, 0.5],
              [0.11, 1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.2198690776011607


## Exercise3.3

In [29]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.1254586464038719


## Exercise3.4

In [30]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 0, 0, 1])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(1, 2) - 1.0
b = 2.0 * np.random.rand(1, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    loss = layer3.forward(y, t)
    
    dy = layer3.backward()
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', y)

epoch 0 loss 0.11059875819525353 y [[0.30140249 0.16058501 0.36526844 0.20329552]]
epoch 100 loss 0.07264510880272602 y [[0.22688731 0.27025334 0.39495912 0.45168125]]
epoch 200 loss 0.0527648595295762 y [[0.14751532 0.29044564 0.34971152 0.55988491]]
epoch 300 loss 0.0417154667463111 y [[0.09939268 0.28169867 0.30972685 0.61456522]]
epoch 400 loss 0.03449939597385088 y [[0.07045038 0.26694585 0.28070435 0.65218267]]
epoch 500 loss 0.029323894473012743 y [[0.05202754 0.25143319 0.25849141 0.68086392]]
epoch 600 loss 0.025403024800220022 y [[0.03971709 0.23684354 0.24062763 0.70394218]]
epoch 700 loss 0.022326384745484495 y [[0.03115796 0.22363927 0.22575441 0.72312334]]
epoch 800 loss 0.019851247873327545 y [[0.02500759 0.21184582 0.21307449 0.73941371]]
epoch 900 loss 0.017821589787150982 y [[0.02046322 0.20134291 0.20208214 0.75346842]]


In [31]:
print(layer1.w)
print(layer1.b)

[[2.62166652 2.61870924]]
[[-4.05594338]]


## Exercise3.5

In [46]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.15204323825415406 z [[0.27212618 0.25355694 0.27947781 0.25683413]]
epoch 100 loss 0.12491812302528318 z [[0.4974564  0.47701241 0.51375045 0.49186123]]
epoch 200 loss 0.12486327922122517 z [[0.50061296 0.4810215  0.51545798 0.49413889]]
epoch 300 loss 0.12481235152507161 z [[0.50103826 0.48223945 0.51448467 0.49361762]]
epoch 400 loss 0.12475999489193047 z [[0.50138341 0.48334983 0.51353687 0.49307288]]
epoch 500 loss 0.12470479051775153 z [[0.50167645 0.48439872 0.51262477 0.49252387]]
epoch 600 loss 0.1246452849893923 z [[0.50191949 0.4854047  0.51173512 0.4919636 ]]
epoch 700 loss 0.12457991090224935 z [[0.50211313 0.48638417 0.51085588 0.49138427]]
epoch 800 loss 0.12450690297160244 z [[0.50225678 0.4873522  0.50997619 0.49077718]]
epoch 900 loss 0.12442420165686195 z [[0.50234852 0.48832301 0.50908609 0.4901324 ]]


In [47]:
print(layer1.w)
print(layer1.b)
print(layer3.w)
print(layer3.b)

[[ 0.86372139 -0.53982643]
 [-0.50788136  0.73945878]]
[[0.68495925]
 [0.37703061]]
[[-0.30449143 -0.57104264]]
[[0.55069743]]


## Exercise3.6

In [21]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 0, 0, 0, 0, 0, 0, 1])

input_dim = 3
hidden_dim = 3
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.17690235209354777 z [[0.62928399 0.57178164 0.61500756 0.56010094 0.66933126 0.61392665
  0.65486437 0.59775619]]
epoch 100 loss 0.052713013162974214 z [[0.11659838 0.10989886 0.117338   0.11243693 0.17479033 0.16268824
  0.17256007 0.1606378 ]]
epoch 200 loss 0.04931787762367572 z [[0.09855812 0.10819367 0.11218716 0.12304287 0.1645086  0.17825294
  0.18168291 0.19495647]]
epoch 300 loss 0.0456762582078188 z [[0.08217233 0.1072837  0.10878269 0.14025589 0.1577561  0.19899507
  0.19596753 0.23962946]]
epoch 400 loss 0.042464826817693896 z [[0.06474862 0.10163783 0.10146459 0.15459146 0.14623711 0.21354401
  0.2050499  0.27771944]]
epoch 500 loss 0.03946374387171253 z [[0.04920858 0.09324442 0.09239715 0.16621272 0.13172221 0.22363489
  0.21119627 0.31285167]]
epoch 600 loss 0.03647316948172048 z [[0.03639883 0.08311034 0.08252411 0.17554266 0.11513199 0.22975958
  0.21501158 0.34774374]]
epoch 700 loss 0.03343941497473246 z [[0.02636951 0.07196478 0.07234721 0.18257792 0