In [1]:
import numpy as np
from collections import OrderedDict

In [93]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)

    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)

        x[idx] = tmp_val - h
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)

        x[idx] = tmp_val # 값 복원
        it.iternext()
    return grad

def sigmoid(x):
    return 1 / (1 + np.exp(-x))


def sigmoid_grad(x):
    return (1.0 - sigmoid(x)) * sigmoid(x)

def MSELoss(self, y, t):
    return 0.5 * np.sum((y-t)**2)


In [102]:
class Sigmoid:
    def __init__(self):
        self.out = None

    def forward(self, x):
        out = sigmoid(x)
        self.out = out
        return out

    def backward(self, dout):
        dx = dout * (1.0 - self.out) * self.out

        return dx

class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b

        self.x = None
        self.original_x_shape = None
        # 가중치와 편향 매개변수의 미분
        self.dW = None
        self.db = None

    def forward(self, x):
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x
        
        print('x = ',self.x, self.x.shape)
        print('w = ',self.W, self.W.shape)
        print('b = ',self.b, self.b.shape)
        out = self.x@self.W + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)

        dx = dx.reshape(*self.original_x_shape)  # 입력 데이터 모양 변경(텐서 대응)
        return dx


In [103]:
class Network:
    def __init__(self, N, alpha):
        self.alpha = alpha

        self.params = {}
        
        self.params['W1'] = np.random.randn(2,N)
        self.params['b1'] = np.zeros(N)
        self.params['W2'] = np.random.randn(N,1)
        self.params['b2'] = np.zeros(1)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Sigmoid()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        
        self.lastLayer = Sigmoid()
        
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)

        return x


    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y-t)
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])

        return grads

    def gradient(self, x, t):
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db

        return grads


    def training(self, X, y, iters):
        for i in range(iters):
            print(i)
            for (x, Y_) in zip(X, y):
                # self.numerical_gradient(x,Y_)
                print('x = ',x)
                print('y = ',Y_)
                print('W = ',self.params['W1'])
                print('b = ',self.params['b1'])
                self.predict(x)
                self.gradient(x,Y_)

In [104]:
Neuron = Network(3, 0.03)
print('W1 = ',Neuron.params['W1'])
print(Neuron.params['W1'].shape)
print('W2 = ',Neuron.params['W2'])
print(Neuron.params['W2'].shape)
print('b1 = ',Neuron.params['b1'])
print(Neuron.params['b1'].shape)
print('b2 = ',Neuron.params['b2'])
print(Neuron.params['b2'].shape)
print('\n\n\n')

#print(Neuron.diff2(Neuron.MSELoss,Neuron.params['W1']))

W1 =  [[ 0.23411074  1.37953705  1.1092132 ]
 [-0.05986979 -1.40967788 -1.02200051]]
(2, 3)
W2 =  [[-0.06894541]
 [ 0.22807299]
 [ 0.35436502]]
(3, 1)
b1 =  [0. 0. 0.]
(3,)
b2 =  [0.]
(1,)



In [105]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
Y = np.array([[0], [1], [1], [1]])
Neuron.training(X, Y, 1)

0
x =  [0 0]
y =  [0]
W =  [[ 0.23411074  1.37953705  1.1092132 ]
 [-0.05986979 -1.40967788 -1.02200051]]
b =  [0. 0. 0.]
x =  [[0]
 [0]] (2, 1)
w =  [[ 0.23411074  1.37953705  1.1092132 ]
 [-0.05986979 -1.40967788 -1.02200051]] (2, 3)
b =  [0. 0. 0.] (3,)


ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 2 is different from 1)

In [58]:
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]])
for x in X:
    print(Neuron.predict(x))
print(Neuron.params['W1'],Neuron.params['W1'].shape)
print(Neuron.params['b1'],Neuron.params['b1'].shape)

self.x: [0 0]
self.W: [[-0.2965295   0.43182406 -0.26991567]
 [ 0.77626428 -0.249126    0.29819814]]
self.b: [0. 0. 0.]
self.x: [0.5 0.5 0.5]
self.W: []
self.b: []
[]
self.x: [0 1]
self.W: [[-0.2965295   0.43182406 -0.26991567]
 [ 0.77626428 -0.249126    0.29819814]]
self.b: [0. 0. 0.]
self.x: [0.68487442 0.43803863 0.57400198]
self.W: []
self.b: []
[]
self.x: [1 0]
self.W: [[-0.2965295   0.43182406 -0.26991567]
 [ 0.77626428 -0.249126    0.29819814]]
self.b: [0. 0. 0.]
self.x: [0.42640609 0.60630915 0.4329278 ]
self.W: []
self.b: []
[]
self.x: [1 1]
self.W: [[-0.2965295   0.43182406 -0.26991567]
 [ 0.77626428 -0.249126    0.29819814]]
self.b: [0. 0. 0.]
self.x: [0.61768524 0.54554789 0.50707015]
self.W: []
self.b: []
[]
[[-0.2965295   0.43182406 -0.26991567]
 [ 0.77626428 -0.249126    0.29819814]] (2, 3)
[0. 0. 0.] (3,)
