# Model training with Backpropagation

- 막히는 부분 있으면, https://github.com/WegraLee/deep-learning-from-scratch/tree/master/ch05 참고!

In [27]:
import sys, os
sys.path.append(os.pardir)  # 상위 경로에 있는 functions, gradient 등을 import할 수 있게

import numpy as np

## 1. Define layers as class

* All layers have three methods
    - `__init__(self, ...)`
    - `forward(self, x)`
    - `backward(self, dout)`

### 1.1. Activation layers
- ReLU
- Sigmoid

<figure>

<img src="https://i.imgur.com/FrxDrr5.png" width="600">

<figcaption align="center"> - Backpropagation of relu node
</figcaption>
    
</figure>

In [10]:
class ReLU:
    def __init__(self):
        self.mask = None  # input에서 어떤 원소가 0 이하인지 저장하는 mask
    
    def forward(self, x):
        ### Type your code below ###
        
        return None
    
    def backward(self, dout):
        ### Type your code below ###

        return None

<figure>

<img src="https://i.imgur.com/riURjqG.png" width="500">

<figcaption align="center"> - Backpropagation of sigmoid node
</figcaption>
    
</figure>

In [28]:
class Sigmoid:
    def __init__(self):
        self.out = None
    
    def forward(self, x):
        ### Type your code below ###
        
        return None
    
    def backward(self, dout):
        ### Type your code below ###
        
        return None

### 1.2. Affine layer

<figure>

<img src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=http%3A%2F%2Fcfile7.uf.tistory.com%2Fimage%2F994510365B98F75122F136" width="600">

<figcaption align="center"> - Backpropagation of affine layer
</figcaption>
    
</figure>

In [12]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        self.x = None
        self.dW = None
        self.db = None

    def forward(self, x):
        ### Type your code below ###
        
        return None
    
    def backward(self, dout):
        # 역전파가 이어져야 할 것은 input X에 대한 미분값뿐. (X = 이전 layer의 output이므로)
        # W와 b에 대한 미분값은 이 layer의 W와 b를 업데이트할 때 쓰기 위해 이 layer에다 저장해놓으면 됨.
        
        ### Type your code below ###
        
        return None

### 1.3. Softmax-with-Loss layer

<figure>

<img src="https://img1.daumcdn.net/thumb/R1280x0/?scode=mtistory2&fname=https%3A%2F%2Fblog.kakaocdn.net%2Fdn%2FrWMeM%2FbtqQptySbcy%2FOcmx41ncd8SD6e7nPhVAkK%2Fimg.png" width="600">

<figcaption align="center"> - Backpropagation of softmax-with-loss layer
</figcaption>
    
</figure>

In [29]:
from functions import softmax, cross_entropy_error  # 지난번에 이미 구현했으므로 import해서 씁시다

class SoftmaxWithLoss:
    def __init__(self):
        self.loss = None
        self.y = None  # softmax의 출력 (확률 벡터)
        self.t = None  # 정답 label (one-hot vector)
    
    def forward(self, x, t):
        ### Type your code below ###
        
        return None

    def backward(self, dout=1):
        # 이 layer의 output이 최종 loss이므로, 상류에서 오는 미분값은 항상 1
        
        batch_size = self.t.shape[0]  # 구한 dx를 batch_size로 나누어 sample 1개당 오차를 앞 계층으로 전파하는 것 주의!
        
        ### Type your code below ###
        
        return None

## 2. Implement Deep Neural Network

In [20]:
from gradient import numerical_gradient  # 지난주에 구현했으니 import해서 사용
from collections import OrderedDict  # layer들을 순서대로 저장하기 위해 사용

class AnyLayerNet:
    """
    원하는 만큼 layer를 추가하여 구현해 보세요.
    """
    def __init__(self, input_size, hidden_size, output_size,
                 weight_init_std=0.01):
        # Initialize parameters
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        ### Type your code below ###

        # Create layers
        self.layers = OrderedDict()  # 순서를 기억하는 dictionary
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = ReLU()
        ### Type your code below ###

        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x):
        ### Type your code below ###
        # Hint: SoftmaxWithLoss layer는 여기서 call하지 않음. (prediction 시에는 logit 값만 내놓으면 되니 그런 듯)
        # self.layers에 담긴 layer들의 forward만 call할 것.
        
        for layer in self.layers.values():
            pass
        
        return None

    def loss(self, x, t):
        # x: 입력 데이터, t: 정답 label
        
        ### Type your code below ###
        # Hint: self.predict에서 lastLayer는 call하지 않았으므로, 여기서 따로 call해줄 것.
        
        return None
    
    def accuracy(self, x, t):
        ### Type your code below ###
        
        return None
    
    def numerical_gradient(self, x, t):
        # 저번주에 구현했으니 그대로 씁시다
        """다만, Layer를 2층보다 더 추가했다면 아래 code에도 추가해주어야 함!"""
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    def gradient(self, x, t):
        ### Type your code below ###
        
        # 1. Forward path (calculate loss)
        pass

        # 2. Backpropagation
        # Hint: self.layers를 list로 만들어 순서를 뒤집은 뒤, for loop
  
        dout = 1  # dL/dL
        pass

        # grads dictionary에 gradient 저장
        grads = {}
        pass

        return grads

## 3. Compare numerical gradient & Backpropagation
- 배치 하나로 gradient를 구하는데 각각 얼마나 걸리는지 비교해봅시다

In [22]:
from mnist import load_mnist

(X_train, y_train), (X_test, y_test) = load_mnist(normalize=True, 
                                                  flatten=True, 
                                                  one_hot_label=True)

X_batch = X_train[:64]
y_batch = y_train[:64]

### Type your code below ###
network = AnyLayerNet(...)

In [25]:
%%time
# Get gradient with numerical_gradient

### Type your code below ###


CPU times: user 2 µs, sys: 0 ns, total: 2 µs
Wall time: 5.25 µs


In [26]:
%%time
# Get gradient with backrpopagation

### Type your code below ###


CPU times: user 7 µs, sys: 0 ns, total: 7 µs
Wall time: 13.8 µs


## 4. Train Network
    - Backpropagation을 통해 모델을 트레이닝하고,
    - Epoch별 Train accuracy, Test accuracy를 plotting해보세요.

In [30]:
# 데이터 읽기
(X_train, y_train), (X_test, y_test) = load_mnist(normalize=True,
                                                  flatten=True,
                                                  one_hot_label=True)


### Type your code below ###
network = AnyLayerNet(...)

iters_num = None
train_size = X_train.shape[0]
batch_size = None
learning_rate = None

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    # 미니배치
    batch_mask = np.random.choice(train_size, batch_size)
    X_batch = X_train[batch_mask]
    y_batch = y_train[batch_mask]
    
    # 기울기 계산
    pass
    
    # 파라미터 갱신
    pass
    
    # Loss 계산하여 list에 추가
    loss = network.loss(X_batch, y_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        # 매 epoch마다, accuracy를 계산하여 list에 추가하고 print
        pass
    
        print(train_acc, test_acc)

TypeError: __init__() missing 2 required positional arguments: 'hidden_size' and 'output_size'

In [None]:
import matplotlib.pyplot as plt

### Type your code below ###

