# 신경망 학습

In [2]:
import numpy as np
from dataset.mnist import load_mnist

## Helper functions

In [3]:
# 손실함수 (Cross Entropy)
def cross_entropy_error(y, t):
    delta = 1e-7 # 0.0000001
    
    if y.ndim == 1:
        t = t.reshape(1, t.size) 
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t*np.log(y+delta))/batch_size

In [4]:
# 신경망에서 사용할 W(Matrix 형태)의 편미분 행렬을 구하는 함수
# 신경망의 기울기 : 그레디언트 (편미분 벡터)
def numerical_gradient(f, x): # x의 shape이 (784, 20) => grads 도 (784, 20)
    h = 1e-4 # 0.0001
    grads = np.zeros_like(x)
    
    it = np.nditer(x, flags=["multi_index"], op_flags=["readwrite"])
        
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]

        x[idx] = tmp_val + h
        fxh1 = f(x) # f(x+h)
        x[idx] = tmp_val - h
        fxh2 = f(x) # f(x-h)
    
        grads[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
        it.iternext()
        
    return grads

In [5]:
# Softmax
def softmax(x):
    if x.ndim == 2:
        x = x.T # 10*100
        x = x - np.max(x, axis=0) # 10*100 - 100 = 10*100
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

In [6]:
# Sigmoid
def sigmoid(x):
    return 1/(1+np.exp(-x))

## 2층 신경망 구현하기

In [17]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 모델파라미터 초기화
        # W1 shape (784, 20), b1 shape (20,), W2 shape (20, 10), b2 shape (10,)
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2'] 
    
        a1 = np.dot(x, W1) + b1  # (20,)
        z1 = sigmoid(a1)         # (20,)
        a2 = np.dot(z1, W2) + b2 # (10,)
        y = softmax(a2)          # (10,)
        return y
    
    def loss(self, x, t):
        y = self.predict(x)
        loss = cross_entropy_error(y, t)
        return loss
    
    def numerical_gradient(self, x, t):
        f = lambda w : self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(f, self.params['W1']) # W1 (784, 20) --> dW (784, 20)
        grads['b1'] = numerical_gradient(f, self.params['b1']) # b1 (20,) --> db (20,)
        grads['W2'] = numerical_gradient(f, self.params['W2']) # W2 (20, 10) --> dW2 (20, 10)
        grads['b2'] = numerical_gradient(f, self.params['b2']) # b2 (10,) --> db2 (10,)
        
        return grads
    
    def accuracy(self):
        pass
    

In [18]:
(X_train, y_train), (X_test, y_test) = load_mnist(normalize=True, flatten=True, one_hot_label=True)

In [19]:
network = TwoLayerNet(input_size = 784, hidden_size=20, output_size=10)

In [None]:
# 하이퍼파라미터
iters_num = 1000
batch_size = 100 # 미니배치 사이즈
learning_rate = 0.1

for i in range(iters_num):
    batch_mask = np.random.choice(60000, 100) # 랜덤하게 뽑은 배치의 인덱스
    x_batch = X_train[batch_mask]
    t_batch = y_train[batch_mask]

    # 1. Gradient 
    grads = network.numerical_gradient(x_batch, t_batch)

    # 2. Gradent Descent (모델 파라미터 업데이트)
    for keys in ('W1', 'W2', 'b1', 'b2'):
        # W(new) <- W(old) - (lr * Gradient) : 경사 하강법
        network.params[keys] = network.params[keys] - (learning_rate * grads[keys])

    loss = network.loss(x_batch, t_batch)  
    print(i, loss)

0 2.2994508537739495
1 2.301847412939289
2 2.2974665195961546
3 2.3017517466980126
4 2.299750466479626
5 2.293010337795922
6 2.2910569834661954
7 2.3059323653113952
8 2.2996606236225334
9 2.297703519134499
10 2.291573730719461
11 2.2976872859387525
12 2.301089186192442
13 2.3006563904555475
14 2.2951417491264077
15 2.299251368467017
16 2.280728978150129
17 2.3016078608253436
18 2.3015157623336844
19 2.302748785790564
20 2.3046901108950197
21 2.293777745267885
22 2.291815765236617
23 2.294805905707565
24 2.299722949758889
25 2.295240066430731
26 2.2982777149738784
27 2.289313999223268
28 2.2834870883098866
29 2.2865976989820935
30 2.2878849171365725
31 2.3048895335457336
32 2.3027215214049206
33 2.286920190749916
