In [1]:
import numpy as np

# Loss function을 정의해 봅시다

In [2]:
def sum_squares_error(y_pred, y_real):
    result = np.sum((y_real-y_pred)**2)
    return result

def cross_entropy_error(y_pred, y_real, is_onehot = True):
    if y_real.ndim==1:
        y_real = y_real.reshape(1,y_real.size)
        y_pred = y_pred.reshape(1,y_pred.size)

    batch_size = y_pred.shape[0]
    if is_onehot:
        result = -np.sum(y_real * np.log(y_pred+1e-7)) / batch_size # 입실론 트릭
    else:
        result = -np.sum(np.log(y_pred[np.arange(batch_size), y_real] + 1e-7)) / batch_size

    return result

In [3]:
def sse(pred, target):
    return np.sum((target-pred)**2)

def cee(pred, target):
    softmax_value = softmax(pred)
    return -np.sum(target*np.log(pred))

# 수치 미분 정의

In [4]:
def numerical_grad(f, x):
    h = 1e-4
    result = (f(x+h) - f(x-h))/(2*h)
    return result

In [5]:
# 편미분 
def numerical_grad_1d(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        x_i = x[idx]
        
        x[idx] = x_i + h
        f_x_plus_h = f(x)
        
        x[idx] = x_i-h
        f_x_minus_h = f(x)
        
        x[idx] = x_i
        grad[idx] = (f_x_plus_h - f_x_minus_h)/(2*h)
    
    return grad

def numerical_grad_2d(f,x): 
    if x.ndim == 1:
        grad = numerical_grad_1d(f,x)
    else:
        grad = np.zeros_like(x)
        for idx in range(x.shape[0]):
            grad[idx] = numerical_grad_1d(f,x[idx])
    return grad

In [6]:
def my_func(x):
    return x[0]**2 + x[1]**2

In [7]:
numerical_grad_1d(my_func, np.array([0.0,2.0]))

array([0., 4.])

# Gradient Descent 정의

In [8]:
def gradient_descent(f, init_weight, lr=0.01, step_num=100):
    weight = init_weight
    for i in range(step_num):
        grad = numerical_grad2d(f,weight)
        weight = weight - lr*grad
    return weight

# 다음과 같은 과정을 수행해보자.

- <span style = 'font-size:1.2em;line-height:1.5em'>1. 간단한 neural network class작성하기</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>2. 이 neural network에서 numerical gradient calculation으로 gradient descent하기</span>

In [9]:
from act_fn import *

In [10]:
# 1. 간단한 neural network 작성하기

class SimpleNet:
    # 이 SimpleNet은 입력층의 뉴런이 2개, 출력층 뉴런이 3개인 NN
    def __init__(self):
        self.W = np.random.randn(2,3)
        
    # 2차원 데이터 x가 들어오면 3차원 데이터인 result 반환
    def predict(self, x):
        result = np.dot(x, self.W)
        result = softmax_prev(result)
        print(result.shape)
        return result
    
    def loss(self, x, y_real, is_onehot=True):
        y_pred = self.predict(x)
        loss = cross_entropy_error(y_pred, y_real)
        return loss

In [11]:
net=SimpleNet()
print('Initial weight (W)')
print(net.W)
print()

x = np.array([0.6, 0.9])
y = np.array([0,0,1])

p = net.predict(x)
print('NN result with softmax activation')
print(p)
print()

print('Cross Entropy Loss')
print(net.loss(x, y))

Initial weight (W)
[[-0.06366021 -0.30344345  0.410768  ]
 [ 0.50913737  0.27970113  0.28726009]]

(3,)
NN result with softmax activation
[0.35802331 0.25220331 0.38977338]

Cross Entropy Loss
(3,)
0.9421895387204898


In [12]:
# 2. Gradient Descent 수행하기
f = lambda w: net.loss(x, y)
grad = numerical_grad_2d(f, net.W)

(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)
(3,)


In [13]:
net.W.dtype

dtype('float64')

In [14]:
grad

array([[ 0.21481393,  0.15132195, -0.36613588],
       [ 0.3222209 ,  0.22698292, -0.54920382]])

# 이제 MNIST를 분류하는 FFNN을 직접 학습해봅시다

<span style = 'font-size:1.2em;line-height:1.5em'>다음과 같은 과정을 수행해야 합니다.</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>1. neural network class 정의하기: 여기선, Two-layer FFNN을 정의하겠습니다.이 클래스에는 다음과 같은 속성과 메소드가 필요합니다.</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'><b>(1)params 속성: </b>Network의 weight들이 들어있는 dictionary.</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'><b>(2)predict() 메소드: </b>Forward Propagation을 진행하는 메소드</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'><b>(3)get_loss() 메소드: </b>predict()의 결과와 실제 레이블을 비교하여 cross entropy loss를 계산하는 메소드</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'><b>(4)get_grad() 메소드: </b>현재 W상태에서 gradient를 산출하는 메소드</span>
    - <span style = 'font-size:1.1em;line-height:1.5em'><b>(5)get_accuracy() 메소드: </b>(Optional) 현재 상태에서 네트워크가 inference하는 결과의 정확도를 산출하는 메소드</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>2. Mini-batch 데이터를 데이터에 입력</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>3. 현재 W상태에서 gradient를 계산</span>
- <span style = 'font-size:1.2em;line-height:1.5em'>4. gradient descent로 learning_rate만큼 이동</span>

In [15]:
from act_fn import *
from mnist import load_mnist

In [16]:
class MyNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = dict()
        self.params['W1'] = np.random.randn(input_size, hidden_size) * weight_init_std
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = np.random.randn(hidden_size, output_size) * weight_init_std
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        result = np.dot(x, self.params['W1']) + self.params['b1']
        result = sigmoid_prev(result)
        result = np.dot(result, self.params['W2']) + self.params['b2']
        result = softmax_prev(result)
        
        return result
    
    def get_loss(self, x, y_real, is_onehot):
        y_pred = self.predict(x)
        result = cross_entropy_error(y_pred, y_real, is_onehot=is_onehot)
        return result
    
    def get_grad(self, x, y_real, is_onehot=True):
        loss = lambda W: self.get_loss(x, y_real, is_onehot=is_onehot)
        grads = dict()
        grads['W1'] = numerical_grad_2d(loss, self.params['W1'])
        grads['b1'] = numerical_grad_2d(loss, self.params['b1'])
        grads['W2'] = numerical_grad_2d(loss, self.params['W2'])
        grads['b2'] = numerical_grad_2d(loss, self.params['b2'])
        return grads

In [17]:
is_onehot = True
(x_trn, y_trn), (x_tst, y_tst) = load_mnist(normalize=True, 
                                            flatten=True, 
                                            one_hot_label=is_onehot)

In [18]:
network = MyNet(784, 100, 10)

iters_num = 10000  # 반복횟수
train_size = x_trn.shape[0]
batch_size = 100  # 미니배치 크기
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

# 1에폭당 반복 수
iter_per_epoch = max(train_size / batch_size, 1)

for i in range(iters_num):
    # print(i)
    # 미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_trn[batch_mask]
    t_batch = y_trn[batch_mask]

    # 오차역전파법으로 기울기 계산
    print('>> GEt Gradient')
    grad = network.get_grad(x_batch, t_batch)

    # 매개변수 갱신
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]

    # 학습 경과 기록
    loss = network.get_loss(x_batch, t_batch)
    train_loss_list.append(loss)

    # 1에폭 당 정확도 계산
    if i % iter_per_epoch == 0:
        train_acc = network.get_accuracy(x_trn, y_trn)
        test_acc = network.get_accuracy(x_tst, y_tst)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print("train acc, test acc | " + str(train_acc) + ", " + str(test_acc))


>> GEt Gradient


TypeError: get_loss() missing 1 required positional argument: 'is_onehot'