In [2]:
import numpy as np
np.random.seed(seed=777)

## Exercise3.1

In [132]:
class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        y = np.dot(self.w, x) + self.b
        return y
    
    def backward(self, dx):
        self.dw = np.dot(dx ,self.input.T)
        self.db = np.sum(dx, axis=1, keepdims=True)
        return np.dot(self.w.T, dx)
    
    def update(self, lr=1.0):
        self.w -= self.dw * lr
        self.b -= self.db * lr
    
class Sigmoid:
    def __init__(self):
        self.output = None
        
    def forward(self, x):
        y = 1 / (1 + np.exp(-x))
        self.output = y
        return y
    
    def backward(self, dx):
        return dx * self.output * (1.0 - self.output)

In [4]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1.1, 0.7],
              [0.3, 0.5]])
b = np.array([[0.1],
              [ 3]])
u = np.array([[ 2, 2.1]])
c = np.array([0.9])

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.98111578 0.98685385 0.98859632 0.99093144]]


In [5]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1, 0.5],
              [0.11,1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.92383547 0.93812899 0.93840737 0.9475542 ]]


## Exercise3.2

In [108]:
class MSE:
    def __init__(self):
        self.output = None
        self.t = None
    
    def forward(self, z, t):
        data_num = z.shape[-1]
        loss = np.sum((z - t) ** 2) / (2 * data_num)
        self.z = z
        self.t = t
        return loss
    
    def backward(self):
        return self.z - self.t

In [7]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])
w = np.array([[ 1, 0.5],
              [0.11, 1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.2198690776011607


## Exercise3.3

In [65]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.15823737724358658


## Exercise3.4

In [134]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 0, 0, 1])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(1, 2) - 1.0
b = 2.0 * np.random.rand(1, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    loss = layer3.forward(y, t)
    
    dy = layer3.backward()
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', y)

epoch 0 loss 0.12274927579404002 y [[0.42118093 0.36994107 0.26797782 0.22803347]]
epoch 100 loss 0.016585570256661034 y [[0.01791774 0.19488418 0.19474379 0.76238997]]
epoch 200 loss 0.008009002282706441 y [[0.00486048 0.13626446 0.13626083 0.83594703]]
epoch 300 loss 0.005099666614781936 y [[0.00224043 0.10894766 0.1089473  0.86941207]]
epoch 400 loss 0.0036895097409325598 y [[0.00130266 0.09276136 0.0927613  0.88907186]]
epoch 500 loss 0.0028703283529771884 y [[8.60775155e-04 8.18689834e-02 8.18689663e-02 9.02240971e-01]]
epoch 600 loss 0.0023394297756996665 y [[6.16368144e-04 7.39428175e-02 7.39428117e-02 9.11795813e-01]]
epoch 700 loss 0.0019692527718526013 y [[4.66255568e-04 6.78625991e-02 6.78625968e-02 9.19110320e-01]]
epoch 800 loss 0.0016972884097868988 y [[3.67003943e-04 6.30180961e-02 6.30180951e-02 9.24929288e-01]]
epoch 900 loss 0.0014894932708022076 y [[2.97695256e-04 5.90463740e-02 5.90463735e-02 9.29694175e-01]]


In [122]:
print(layer1.dw)
print(layer1.db)

[[-0.01216047 -0.01210709]]
[[0.0178823]]


## Exercise3.5

In [137]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.1377542423135002 z [[0.34117964 0.37766343 0.30479485 0.33914214]]
epoch 100 loss 0.12369464342718157 z [[0.48652369 0.51932947 0.48629532 0.50785352]]
epoch 200 loss 0.10597456474695832 z [[0.37441855 0.55458263 0.54203068 0.54724285]]
epoch 300 loss 0.055305278217205975 z [[0.21395756 0.68947782 0.68782394 0.45031819]]
epoch 400 loss 0.0150238597696577 z [[0.13022495 0.83569979 0.83574558 0.22194203]]
epoch 500 loss 0.006414430188250171 z [[0.0912224  0.89236481 0.89239939 0.14082148]]
epoch 600 loss 0.003790164049349883 z [[0.07231166 0.91719764 0.9172223  0.10669563]]
epoch 700 loss 0.002619634635755314 z [[0.06117187 0.93113792 0.93115699 0.08794165]]
epoch 800 loss 0.0019765163366099953 z [[0.05374175 0.94017492 0.94019048 0.0759456 ]]
epoch 900 loss 0.0015758089467432785 z [[0.04837598 0.94657752 0.9465907  0.06752571]]


## Exercise3.6

In [138]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 0, 0, 0, 0, 0, 0, 1])

input_dim = 3
hidden_dim = 3
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.1011810174725194 z [[0.44590255 0.40003897 0.44845481 0.40332604 0.43142076 0.38703963
  0.43493192 0.39076845]]
epoch 100 loss 0.04597226955867939 z [[0.08342249 0.11351742 0.13081018 0.17096784 0.15514303 0.19351316
  0.21496189 0.25057077]]
epoch 200 loss 0.011145032855742615 z [[0.00501565 0.0215071  0.0182419  0.1600938  0.01742877 0.1652226
  0.16074945 0.68626864]]
epoch 300 loss 0.002875006594077605 z [[0.00104237 0.00444909 0.00402101 0.08572154 0.00330181 0.0868778
  0.08420441 0.84519097]]
epoch 400 loss 0.0013486099237470706 z [[5.27304693e-04 2.09325310e-03 2.04331226e-03 5.95167163e-02
  1.55151202e-03 5.96330998e-02 5.81600724e-02 8.94711935e-01]]
epoch 500 loss 0.0008258504277519683 z [[3.53312981e-04 1.33329766e-03 1.37129908e-03 4.68894508e-02
  9.92197197e-04 4.66375359e-02 4.57031609e-02 9.17863649e-01]]
epoch 600 loss 0.0005782922473226347 z [[2.68071489e-04 9.75664601e-04 1.04163841e-03 3.94022932e-02
  7.28693465e-04 3.89885975e-02 3.83448035e-02 9