In [2]:
import numpy as np
np.random.seed(seed=777)

## Exercise3.1

In [132]:
class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        y = np.dot(self.w, x) + self.b
        return y
    
    def backward(self, dx):
        self.dw = np.dot(dx ,self.input.T)
        self.db = np.sum(dx, axis=1, keepdims=True)
        return np.dot(self.w.T, dx)
    
    def update(self, lr=1.0):
        self.w -= self.dw * lr
        self.b -= self.db * lr
    
class Sigmoid:
    def __init__(self):
        self.output = None
        
    def forward(self, x):
        y = 1 / (1 + np.exp(-x))
        self.output = y
        return y
    
    def backward(self, dx):
        return dx * self.output * (1.0 - self.output)

In [4]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1.1, 0.7],
              [0.3, 0.5]])
b = np.array([[0.1],
              [ 3]])
u = np.array([[ 2, 2.1]])
c = np.array([0.9])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.98111578 0.98685385 0.98859632 0.99093144]]


In [5]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1, 0.5],
              [0.11,1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.92383547 0.93812899 0.93840737 0.9475542 ]]


## Exercise3.2

In [108]:
class MSE:
    def __init__(self):
        self.output = None
        self.t = None
    
    def forward(self, z, t):
        data_num = z.shape[-1]
        loss = np.sum((z - t) ** 2) / (2 * data_num)
        self.z = z
        self.t = t
        return loss
    
    def backward(self):
        return self.z - self.t

In [7]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])
w = np.array([[ 1, 0.5],
              [0.11, 1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.2198690776011607


## Exercise3.3

In [65]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.15823737724358658


## Exercise3.4

In [134]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 0, 0, 1])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(1, 2) - 1.0
b = 2.0 * np.random.rand(1, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    loss = layer3.forward(y, t)
    
    dy = layer3.backward()
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', y)

epoch 0 loss 0.12274927579404002 y [[0.42118093 0.36994107 0.26797782 0.22803347]]
epoch 100 loss 0.016585570256661034 y [[0.01791774 0.19488418 0.19474379 0.76238997]]
epoch 200 loss 0.008009002282706441 y [[0.00486048 0.13626446 0.13626083 0.83594703]]
epoch 300 loss 0.005099666614781936 y [[0.00224043 0.10894766 0.1089473  0.86941207]]
epoch 400 loss 0.0036895097409325598 y [[0.00130266 0.09276136 0.0927613  0.88907186]]
epoch 500 loss 0.0028703283529771884 y [[8.60775155e-04 8.18689834e-02 8.18689663e-02 9.02240971e-01]]
epoch 600 loss 0.0023394297756996665 y [[6.16368144e-04 7.39428175e-02 7.39428117e-02 9.11795813e-01]]
epoch 700 loss 0.0019692527718526013 y [[4.66255568e-04 6.78625991e-02 6.78625968e-02 9.19110320e-01]]
epoch 800 loss 0.0016972884097868988 y [[3.67003943e-04 6.30180961e-02 6.30180951e-02 9.24929288e-01]]
epoch 900 loss 0.0014894932708022076 y [[2.97695256e-04 5.90463740e-02 5.90463735e-02 9.29694175e-01]]


In [122]:
print(layer1.dw)
print(layer1.db)

[[-0.01216047 -0.01210709]]
[[0.0178823]]


## Exercise3.5

In [133]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.1439256472890366 z [[0.68729137 0.67988078 0.70781997 0.70084967]]
epoch 100 loss 0.12500178614539453 z [[0.49318656 0.50074094 0.49968683 0.50715721]]
epoch 200 loss 0.12497692079132813 z [[0.49379354 0.49920332 0.50148016 0.50662013]]
epoch 300 loss 0.12489211893116936 z [[0.49358058 0.49585361 0.50539987 0.5066777 ]]
epoch 400 loss 0.12437281087467442 z [[0.49304896 0.48652225 0.51677071 0.50469324]]
epoch 500 loss 0.11920089374138372 z [[0.48292057 0.45119859 0.56978457 0.48386628]]
epoch 600 loss 0.08183306141600848 z [[0.38008431 0.45053299 0.73022088 0.3681109 ]]
epoch 700 loss 0.01761637060788426 z [[0.1895397  0.76735063 0.84616433 0.16496825]]
epoch 800 loss 0.006505045206132975 z [[0.11562799 0.86410165 0.89954902 0.10055729]]
epoch 900 loss 0.0036990246437935423 z [[0.08712833 0.89897438 0.92246743 0.07604852]]
