In [1]:
import numpy as np
np.random.seed(seed=777)

## Exercise3.1

In [2]:
class Affine:
    def __init__(self, w, b):
        self.w = w
        self.b = b
        self.dw = None
        self.db = None
        self.input = None
    
    def forward(self, x):
        self.input = x
        y = np.dot(self.w, x) + self.b
        return y
    
    def backward(self, dx):
        self.dw = np.dot(dx ,self.input.T)
        self.db = np.sum(dx, axis=1, keepdims=True)
        return np.dot(self.w.T, dx)
    
    def update(self, lr=1.0):
        self.w -= self.dw * lr
        self.b -= self.db * lr
    
class Sigmoid:
    def __init__(self):
        self.output = None
        
    def forward(self, x):
        y = 1 / (1 + np.exp(-x))
        self.output = y
        return y
    
    def backward(self, dx):
        return dx * self.output * (1.0 - self.output)

In [3]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1.1, 0.7],
              [0.3, 0.5]])
b = np.array([[0.1],
              [ 3]])
u = np.array([[ 2, 2.1]])
c = np.array([0.9])

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.98111578 0.98685385 0.98859632 0.99093144]]


In [4]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
w = np.array([[ 1, 0.5],
              [0.11,1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)

print(z)

[[0.92383547 0.93812899 0.93840737 0.9475542 ]]


## Exercise3.2

In [5]:
class MSE:
    def __init__(self):
        self.output = None
        self.t = None
    
    def forward(self, z, t):
        data_num = z.shape[-1]
        loss = np.sum((z - t) ** 2) / (2 * data_num)
        self.z = z
        self.t = t
        return loss
    
    def backward(self):
        return self.z - self.t

In [6]:
x = np.array([[ 0, 0, 1, 1],
              [ 0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])
w = np.array([[ 1, 0.5],
              [0.11, 1]])
b = np.array([[0.3],
              [ 2]])
u = np.array([[ 1, 1.5]])
c = np.array([0.6])


layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.2198690776011607


## Exercise3.3

In [7]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

p = layer1.forward(x)
y = layer2.forward(p)
q = layer3.forward(y)
z = layer4.forward(q)
loss = layer5.forward(z, t)

print(loss)

0.1254586464038719


## Exercise3.4

In [8]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 0, 0, 1])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(1, 2) - 1.0
b = 2.0 * np.random.rand(1, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    loss = layer3.forward(y, t)
    
    dy = layer3.backward()
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'y', y)

epoch 0 loss 0.11059875819525353 y [[0.30140249 0.16058501 0.36526844 0.20329552]]
epoch 100 loss 0.01603109890229218 y [[0.01683037 0.19142871 0.19181786 0.76649093]]
epoch 200 loss 0.007857524503265288 y [[0.00470154 0.13497637 0.13498714 0.83752556]]
epoch 300 loss 0.005033642102344097 y [[0.00219162 0.10824434 0.10824542 0.87026787]]
epoch 400 loss 0.0036534755311751757 y [[0.0012816  0.09230959 0.09230979 0.88961883]]
epoch 500 loss 0.00284790866765143 y [[8.49775545e-04 8.15500325e-02 8.15500863e-02 9.02625861e-01]]
epoch 600 loss 0.002324238778257206 y [[6.09874652e-04 7.37032833e-02 7.37033016e-02 9.12084201e-01]]
epoch 700 loss 0.0019583259795641645 y [[4.62085108e-04 6.76747158e-02 6.76747232e-02 9.19336136e-01]]
epoch 800 loss 0.0016890744636436322 y [[3.64155515e-04 6.28659065e-02 6.28659098e-02 9.25111962e-01]]
epoch 900 loss 0.0014831060945209795 y [[2.95656574e-04 5.89200069e-02 5.89200085e-02 9.29845693e-01]]


In [9]:
print(layer1.w)
print(layer1.b)

[[5.47896682 5.4789668 ]]
[[-8.31114433]]


## Exercise3.5

In [10]:
x = np.array([[0, 0, 1, 1],
              [0, 1, 0, 1]])
t = np.array([0, 1, 1, 0])

input_dim = 2
hidden_dim = 2
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.1511950506231787 z [[0.27683793 0.28085116 0.26195186 0.2665165 ]]
epoch 100 loss 0.12469058166392134 z [[0.48881315 0.50095413 0.50338763 0.51275309]]
epoch 200 loss 0.12236396793827176 z [[0.4640924  0.51379481 0.50975341 0.53553037]]
epoch 300 loss 0.10662726735318692 z [[0.34994823 0.58056418 0.54407039 0.58886005]]
epoch 400 loss 0.08441147613040437 z [[0.20192356 0.69312628 0.56384439 0.59170557]]
epoch 500 loss 0.051144180910469815 z [[0.18455268 0.72762976 0.66882714 0.43730167]]
epoch 600 loss 0.012574234890169114 z [[0.13693242 0.84500565 0.8466402  0.18520507]]
epoch 700 loss 0.0055524877653663944 z [[0.10067005 0.89753476 0.89870016 0.11629558]]
epoch 800 loss 0.0033750296092984584 z [[0.08175318 0.92041547 0.92128683 0.08824508]]
epoch 900 loss 0.0023769992116009333 z [[0.07015272 0.93337913 0.9340749  0.07287064]]


## Exercise3.6

In [11]:
x = np.array([[0, 0, 0, 0, 1, 1, 1, 1],
              [0, 0, 1, 1, 0, 0, 1, 1],
              [0, 1, 0, 1, 0, 1, 0, 1]])
t = np.array([0, 0, 0, 0, 0, 0, 0, 1])

input_dim = 3
hidden_dim = 3
output_dim = 1

w = 2.0 * np.random.rand(hidden_dim, input_dim) - 1.0
b = 2.0 * np.random.rand(hidden_dim, 1) - 1.0
u = 2.0 * np.random.rand(output_dim, hidden_dim) -1.0
c = 2.0 * np.random.rand(output_dim, 1) - 1.0

layer1 = Affine(w, b)
layer2 = Sigmoid()
layer3 = Affine(u, c)
layer4 = Sigmoid()
layer5 = MSE()

epoch = 1000
for i in range(epoch):
    p = layer1.forward(x)
    y = layer2.forward(p)
    q = layer3.forward(y)
    z = layer4.forward(q)
    loss = layer5.forward(z, t)
    
    dz = layer5.backward()
    dq = layer4.backward(dz)
    dy = layer3.backward(dq)
    dp = layer2.backward(dy)
    dx = layer1.backward(dp)
    
    layer1.update()
    layer3.update()
    
    if i % 100 == 0:
        print('epoch', i, 'loss', loss, 'z', z)

epoch 0 loss 0.09878065470304687 z [[0.41560448 0.37502515 0.4532894  0.4116474  0.43901709 0.39845916
  0.47311903 0.43710806]]
epoch 100 loss 0.027184888566827395 z [[0.0235625  0.05928902 0.06759481 0.20402096 0.06728326 0.19543462
  0.24339796 0.46827645]]
epoch 200 loss 0.005710145438401555 z [[0.00206513 0.01070227 0.00777105 0.11794959 0.00758125 0.11505224
  0.11257245 0.77349619]]
epoch 300 loss 0.0021440680417222097 z [[6.34670562e-04 4.02296515e-03 2.75534076e-03 7.38441486e-02
  2.58430090e-03 7.16458394e-02 6.95558401e-02 8.62704219e-01]]
epoch 400 loss 0.001183598411317012 z [[3.32802035e-04 2.31513607e-03 1.55278045e-03 5.51844218e-02
  1.43984808e-03 5.35241702e-02 5.17692085e-02 8.98326938e-01]]
epoch 500 loss 0.0007834980057750519 z [[2.17649246e-04 1.59817846e-03 1.05924826e-03 4.49919360e-02
  9.81419314e-04 4.36874617e-02 4.21390998e-02 9.17400309e-01]]
epoch 600 loss 0.0005732685883029496 z [[1.59564847e-04 1.21436123e-03 7.98014419e-04 3.85135160e-02
  7.41999723