In [64]:
import numpy as np


#多層パーセプトロン
class MLP(object):
    
    def __init__(self, input_dim, hidden_dim, output_dim):
        
        self.l1 = Layer(input_dim = input_dim,
                       output_dim = hidden_dim,
                       activation = sigmoid,
                       dactivation = dsigmoid)
        
        self.l2 = Layer(input_dim = hidden_dim,
                       output_dim = output_dim,
                       activation = sigmoid,
                       dactivation = dsigmoid)
        
        self.layers = [self.l1, self.l2]
        
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        h = self.l1(x)
        y = self.l2(h)
        return y

def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def dsigmoid(x):
    return sigmoid(x) * (1 - sigmoid(x))
        
        
        
class Layer(object):
    #層間の結合
    
    def __init__(self, input_dim, output_dim, activation, dactivation):
        self.W = np.random.normal(size = (input_dim, output_dim))
        self.b = np.zeros(output_dim)
        
        self.activation = activation
        self.dactivation = dactivation
        
    def __call__(self, x):
        return self.forward(x)
    
    def forward(self, x):
        self._input = x
        self._pre_activation = np.matmul(x, self.W) + self.b
        return self.activation(self._pre_activation)
    
    def backward(self, delta, W):
        delta = self.dactivation(self._pre_activation) * np.matmul(delta, W.T)
        return delta
    
    def compute_gradients(self, delta):
        dW = np.matmul(self._input.T, delta)
        db = np.matmul(np.ones(self._input.shape[0]), delta)
        return dW, db
    
    
    
    
#データの準備
#XOR
x = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
t = np.array([[0], [1], [1], [0]])



#モデルの構築
np.random.seed(123)
model = MLP(2, 2, 1)



#モデルの学習
def compute_loss(t, y):
    return (-t * np.log(y) - (1 - t) * np.log(1 - y)).sum()

def train_step(x, t):
    y = model(x)
    for i, layer in enumerate(model.layers[::-1]):
        if i == 0:
            delta = y - t
        else:
            delta = layer.backward(delta, W)
        
        dW, db = layer.compute_gradients(delta)
        layer.W = layer.W - 0.1 * dW
        layer.b = layer.b - 0.1 * db
        
        W = layer.W
    
    loss = compute_loss(t, y)
    return loss


epochs = 1000


for epoch in range(epochs):
    train_loss = train_step(x, t)
    
    if epoch % 100 == 0 or epoch == epochs - 1:
        print("epoch: {}, loss: {:.3f}".format(epoch+1, train_loss))
        
        
        
#モデルの評価
for input in x:
    print("{} => {:.3f}".format(input, model(input)[0]))

    
    
    

epoch: 1, loss: 2.940
epoch: 101, loss: 2.696
epoch: 201, loss: 2.572
epoch: 301, loss: 2.403
epoch: 401, loss: 2.250
epoch: 501, loss: 2.110
epoch: 601, loss: 1.849
epoch: 701, loss: 1.274
epoch: 801, loss: 0.738
epoch: 901, loss: 0.470
epoch: 1000, loss: 0.336
[0 0] => 0.096
[0 1] => 0.909
[1 0] => 0.944
[1 1] => 0.078


In [57]:
print(x, end = "\n\n")

print(model.l1.W, end = "\n\n")

print(model.l1.b, end = "\n\n")

print(model.l2.W, end = "\n\n")

print(model.l2.b, end = "\n\n")

[[0 0]
 [0 1]
 [1 0]
 [1 1]]

[[-4.42048635  5.55073125]
 [ 4.01363434 -5.62585416]]

[-2.16209935 -3.35415386]

[[6.27321307]
 [6.59263655]]

[-3.11766916]



In [63]:

print(model(x))

[[0.09555042]
 [0.90930218]
 [0.94395859]
 [0.07840033]]


In [65]:
print(abc)

[]
