In [2]:
import numpy as np

In [None]:
w1 = np.random.randn(2, 4)
b1 = np.random.randn(4)

x = np.random.randn(10, 2)
h = np.matmul(x, w1) + b1

## 계층으로 만들기

In [3]:
class Sigmoid :
    def __init__(self) :
        self.params = []
        
    def forward(self, x) :
        return 1 / (1 + np.exp(-x))

    
class Affine :
    def __init__(self, W, b) :
        self.params = [W, b]
        
    def forward(self, x) :
        W, b = self.params
        out = np.matmul(x, W)+ b
        return out

In [10]:
class TwoLayerNet :
    def __init__(self, input_size, hidden_size, output_size) :
        I, H, O = input_size, hidden_size, output_size
        
        W1 = np.random.randn(I, H)
        b1 = np.random.randn(H)
        W2 = np.random.randn(H, O)
        b2 = np.random.randn(O)
        
        self.layers = [
            Affine(W1, b1),
            Sigmoid(),
            Affine(W2, b2)
        ]
        
        self.params = []
        
        for layer in self.layers :
            self.params += layer.params
        
    def predict(self, x) :
        for layer in self.layers :
            x = layer.forward(x)
        return x

In [11]:
#input value
x = np.random.randn(10, 2)

model = TwoLayerNet(2, 4, 3)
s = model.predict(x)

s

array([[-0.04411292,  1.37784706, -0.9409082 ],
       [ 0.37810317,  1.16930838, -0.4991253 ],
       [ 0.14480866,  0.98764384,  0.23725392],
       [ 0.28351659,  1.25484844, -0.7991732 ],
       [ 0.43303004,  1.08457562, -0.18753145],
       [ 0.29096472,  1.20865834, -0.8157494 ],
       [ 0.18198885,  1.30350319, -0.9011623 ],
       [ 0.44016468,  1.13558073, -0.51785821],
       [-0.16376718,  1.30977163, -0.66596243],
       [ 0.30055334,  1.10461868, -0.46647481]])

## Repeat node

- 복제 노드, (D, 1) -> node -> (D, N)

In [4]:
D, N = 8, 7

x = np.random.randn(10)
y = np.repeat(x, N, axis=0)

x, y

(array([ 0.92013951, -1.1807143 ,  0.0422098 , -0.66389058, -2.43583411,
         0.19480744,  0.57837282, -0.64826315, -0.84184005,  1.03847774]),
 array([ 0.92013951,  0.92013951,  0.92013951,  0.92013951,  0.92013951,
         0.92013951,  0.92013951, -1.1807143 , -1.1807143 , -1.1807143 ,
        -1.1807143 , -1.1807143 , -1.1807143 , -1.1807143 ,  0.0422098 ,
         0.0422098 ,  0.0422098 ,  0.0422098 ,  0.0422098 ,  0.0422098 ,
         0.0422098 , -0.66389058, -0.66389058, -0.66389058, -0.66389058,
        -0.66389058, -0.66389058, -0.66389058, -2.43583411, -2.43583411,
        -2.43583411, -2.43583411, -2.43583411, -2.43583411, -2.43583411,
         0.19480744,  0.19480744,  0.19480744,  0.19480744,  0.19480744,
         0.19480744,  0.19480744,  0.57837282,  0.57837282,  0.57837282,
         0.57837282,  0.57837282,  0.57837282,  0.57837282, -0.64826315,
        -0.64826315, -0.64826315, -0.64826315, -0.64826315, -0.64826315,
        -0.64826315, -0.84184005, -0.84184005, -0

In [6]:
dy = np.random.randn(N, D)

dx = np.sum(dy, axis = 0, keepdims = True)

dx

array([[ 2.4977484 ,  1.09165039,  1.43433324,  3.55991852, -0.28218988,
        -1.86933281, -4.31023823,  7.2361679 ]])

## Sum node
- np.random.randn(N, D)
- forward : np.sum
- backward : np.repeat 함수를 통해 구현 가능

## MatMul 노드
- y = x*W 의 연산을 수행하는 노드
- dy / dx = W
- 전체 loss function : L
- $$ \cfrac{dL}{dx} = \cfrac{dL}{dy} * W^t $$
- $$ \cfrac{dL}{dw} = x^t * \cfrac{dL}{dy} $$

In [7]:
class MatMul :
    def __init__(self, W) :
        self.params = [W]
        self.grads = [np.zeros_like(W)]
        
    def forward(self, x) :
        W,  = self.params
        out = np.matmul(x, W)
        self.x = x
        
        return out
    
    def backward(self, dout) :
        W,  = self.params
        dx = np.matmul(dout, W, T)
        dW = np.matmul(self.x.T, dout)
        # ... 은 깊은 복사를 위한 생략 기호
        self.grads[0][...] = dW
        
        return dx

## 기울기 도출과 역전파 구현
- $$ y = \cfrac{1}{1+(-x)^e} $$

In [8]:
class Sigmoid :
    def __init__(self) :
        self.params, self.grads = [], []
        self.out = None
        
    def forward(self, x) :
        out = 1 / (1 + np.exp(-x))
        self.out = out
        
        return out
    
    def backward(self, dout) :
        dx = dout * (1.0 - self.out) * self.out
        return dx

## 가중치 갱신

In [9]:
class SGD :
    def __init__ (self, lr = 0.01) :
        self.lr = lr
        
    def update(self, params, grads) :
        for i in range(len(params)) :
            params[i] -= self.lr * grads[i]

### train_loop

In [10]:
max_epoch = 300
batch_size = 30
hidden_size = 10
learning_rate = 1.0

x, t = spiral.load_data()
model = TwoLayerNet(input_size=2, hidden_size=hidden_size, output_size=3)
optimizer = SGD(lr=learning_rate)

data_size = len(x)
max_iters = data_size // batch_size
total_loss = 0
loss_count = 0
loss_list = []

for epoch in range(max_epoch) :
    idx = np.random.permutation(data_size)
    x = x[idx]
    t = t[idx]
    
    for iters in range(max_iters) :
        batch_x = x[iters*batch_size : (iters+1)*batch_size]
        batch_t = t[iters*batch_size : (iters+1)*batch_size]
        
        loss = model.forward(batch_x, batch_t)
        model.backward()
        optimizer.update(model.params, model.grads)
        
        total_loss += loss
        loss_count += 1
        
        if (iters+1) % 10 == 0 :
            avg_loss = total_loss / loss_count
            print(epoch+1, iters+1, max_iters, avg_loss)
            
            loss_list.append(avg_loss)
            total_loss, loss_count = 0, 0

NameError: name 'spiral' is not defined

## GPU 사용
- 넘파이와 쿠파이... 쿠파이!