In [11]:
import numpy as np
np.random.seed(seed=7)

## Exercise3.1

In [12]:
class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        y = np.dot(self.w, x) + self.b
        return y
    
    def backward(self, dx):
        self.dw = np.dot(dx ,self.input.T)
        self.db = np.sum(dx, axis=1, keepdims=True)
        return np.dot(self.w.T, dx)
    
    def update(self, lr=0.1):
        self.w -= self.dw * lr
        self.b -= self.db * lr
    
class Sigmoid:
    def __init__(self):
        self.output = None
        
    def forward(self, x):
        y = 1 / (1 + np.exp(-x))
        self.output = y
        return y
    
    def backward(self, dx):
        return dx * self.output * (1.0 - self.output)

In [13]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1.1, 0.7],
              [0.3, 0.5]])
b = np.array([[0.1],
              [ 3]])
u = np.array([[ 2, 2.1]])
c = np.array([0.9])

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.98111578 0.98685385 0.98859632 0.99093144]]


In [14]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1, 0.5],
              [0.11,1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.92383547 0.93812899 0.93840737 0.9475542 ]]


## Exercise3.2

In [15]:
class MSE:
    def __init__(self):
        self.output = None
        self.t = None
    
    def forward(self, z, t):
        data_num = z.shape[-1]
        loss = np.sum((z - t) ** 2) / (2 * data_num)
        self.z = z
        self.t = t
        return loss
    
    def backward(self):
        return self.z - self.t

In [16]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])
w = np.array([[ 1, 0.5],
              [0.11, 1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.2198690776011607


## Exercise3.3

In [17]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.14895786372912956


## Exercise3.4

In [18]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 0, 0, 1])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(1, 2) - 1.0
b = 2.0 * np.random.rand(1, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    loss = layer3.forward(y, t)
    
    dy = layer3.backward()
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', y)

epoch 0 loss 0.17984365730733037 y [[0.6473653  0.72430913 0.64731165 0.7242622 ]]
epoch 100 loss 0.07646778591085235 y [[0.25767515 0.36920048 0.32705439 0.45038902]]
epoch 200 loss 0.055273262753735314 y [[0.16072115 0.33450184 0.31516072 0.54707942]]
epoch 300 loss 0.043399149694522174 y [[0.10699714 0.30486142 0.29579914 0.60590967]]
epoch 400 loss 0.03569916807029434 y [[0.0751324  0.28008397 0.27567597 0.64573428]]
epoch 500 loss 0.03022033415647614 y [[0.05507433 0.25957375 0.25733276 0.67576119]]
epoch 600 loss 0.02609702841548001 y [[0.04179032 0.24234113 0.24114997 0.69975466]]
epoch 700 loss 0.02287827579236778 y [[0.03262121 0.22763927 0.22697877 0.71960195]]
epoch 800 loss 0.020299413696406685 y [[0.02607236 0.21493478 0.2145539  0.73639953]]
epoch 900 loss 0.0181917419395414 y [[0.02125843 0.20383877 0.20361113 0.7508529 ]]


In [19]:
print(layer1.w)
print(layer1.b)

[[2.59552786 2.5964267 ]]
[[-4.02027178]]


## Exercise3.5

In [25]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update(1.0)
    layer3.update(1.0)
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.14865152481975835 z [[0.30973394 0.29240501 0.27691086 0.26406156]]
epoch 100 loss 0.12493005821637666 z [[0.51293776 0.50855688 0.49162716 0.4861852 ]]
epoch 200 loss 0.12465843369022318 z [[0.51062719 0.51422939 0.48651222 0.48670789]]
epoch 300 loss 0.12365131549983245 z [[0.51059522 0.53086825 0.47145759 0.47860356]]
epoch 400 loss 0.11606619291149047 z [[0.49631644 0.6006541  0.42374649 0.43663975]]
epoch 500 loss 0.0929762025847744 z [[0.45748439 0.74956163 0.39729043 0.32945326]]
epoch 600 loss 0.04646288191252964 z [[0.35850376 0.79248915 0.60704202 0.21377882]]
epoch 700 loss 0.009311833893073414 z [[0.14334216 0.86844748 0.8509094  0.12005676]]
epoch 800 loss 0.004461025559716306 z [[0.09645524 0.90192461 0.90219031 0.08484735]]
epoch 900 loss 0.0028436916336632635 z [[0.07621343 0.91943909 0.92374011 0.06808389]]


In [26]:
print(layer1.w)
print(layer1.b)
print(layer3.w)
print(layer3.b)

[[ 4.22786955 -4.5941637 ]
 [ 5.50647153 -5.78792623]]
[[-2.25110444]
 [ 3.17054042]]
[[ 7.18034835 -6.66447265]]
[[3.03835718]]


## Exercise3.6

In [22]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 0, 0, 0, 0, 0, 0, 1])

input_dim = 3
hidden_dim = 3
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.20561897103820503 z [[0.6644826  0.6611677  0.68830996 0.68519603 0.66937829 0.66616216
  0.69265339 0.68959471]]
epoch 100 loss 0.055582011550532565 z [[0.14231997 0.14143149 0.17639407 0.17552511 0.11108769 0.11033755
  0.13872199 0.1378099 ]]
epoch 200 loss 0.05424170092587373 z [[0.12697608 0.13155549 0.16410898 0.16905545 0.10355733 0.10701018
  0.13591852 0.14076965]]
epoch 300 loss 0.05287675984563447 z [[0.12067616 0.13131137 0.1644597  0.17544722 0.10381013 0.11284863
  0.1446393  0.15622151]]
epoch 400 loss 0.05142679109765186 z [[0.11396193 0.13149581 0.16688459 0.18468494 0.10382358 0.12003371
  0.15558898 0.17465411]]
epoch 500 loss 0.04981690998482133 z [[0.10548966 0.13078278 0.16947179 0.19591026 0.10273703 0.12781021
  0.16785491 0.1958413 ]]
epoch 600 loss 0.04787568620553225 z [[0.09491987 0.12868696 0.17088008 0.20939326 0.10049125 0.13658202
  0.18164291 0.22176607]]
epoch 700 loss 0.0454279543232675 z [[0.08208315 0.12435929 0.16867913 0.22445125 0.