<h3> 1. 손실함수 구현하기 </h3>

In [1]:
import numpy as np

## sum_of_squares error의 경우에는 (y-t)^2 * 0.5인 것을 이용
def sum_squares_error(y, t):
    return 0.5 * np.sum((y-t) ** 2)

## cross_entropy_error의 경우에는 원-핫 인코딩의 형태로 바꿔주는 거 필요
def cross_entropy_error(y, t):
    delta = 1e-7
    if y.ndim == 1:
        y = y.reshape(1, y.size)
        t = t.reshape(1, t.size)
        
    if y.size == t.size:
        t = np.argmax(t, axis = 1)
        
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + 1e-7)) / batch_size        

<h3> 2. 수치미분 구현하기 </h3>

In [4]:
def numerical_gradient(f, x):
    h = 1e-4 ## 0이 되는 거 방지
    grads = np.zeros_like(x) ## x의 shape와 동일한 것 생성 (이유 : gradient 저장)
    
    ## 편미분을 iterative하게 진행하기 위하여 nditer를 이용하여 미분 진행하기
    it = np.nditer(x, flags = ['multi_index'], op_flags = ['readwrite'])
    while not it.finished():
        idx = it.multi_index
        ## idx는 현재 인덱스임. ex) 2 x 3행렬에선 (1,1), (1, 2), (1, 3) 이런 식
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x)
        
        x[idx] = float(tmp_val) - h
        fxh2 = f(x)
        
        grads[idx] = (fxh1 - fxh2) / (2*h)
        return grads
    
## gradient descent 식에 맞춰서 구현하면 됨. 
def gradient_descent(f, init_x, lr, step_num):
    x = init_x
    
    for idx in range(step_num):
        grad = numerical_gradient(f, x) ## 각 idx 마다 미분값 도출
        x = x - lr * grad
        
    return x

In [5]:
t = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
y = np.array([0.1, 0.7, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.02, 0.06])

sum_squares_error(y, t), cross_entropy_error(y, t)

(0.05320000000000002, 0.3566748010815999)

<h3> 3. 2층 신경망을 통해 가중치 업데이트 하기 </h3>

In [7]:
import sys, os
from common.functions import *
from common.gradient import numerical_gradient

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        ## 입력받는 숫자에 맞게 2층 신경망 구현하기
        self.params = {} ## 파라미터를 저장하는 딕셔너리 변수
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = weight_init_std * np.random.randn(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = weight_init_std * np.random.randn(output_size)
        
    ## init에 저장하였던 파라미터들을 이용하여 순전파 계산하기    
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2'] ## 각각 weight
        b1, b2 = self.params['b1'], self.params['b2'] ## 각각 편향
        
        
        a1 = np.dot(x, W1) + b1 ## 입력층 to 은닉층
        z1 = sigmoid(a1) ## 편향
        a2 = np.dot(z1, W2) + b2 ## 은닉층 to 출력층
        y = softmax(a2) ## softmax 이용하여 최종 라벨 도출
        
        return y
    
    ## 예측한 것 정확도 확인
    def accuracy(self, x, t):
        y = self.predict(x) ## 순전파 계산
        y = np.argmax(y, axis = 1) ## 라벨값(예측값) 도출
        t = np.argmax(t, axis = 1) ## 실제값
        
        accuracy = np.sum(y == t) / float(x.shape[0]) ## 정확도 계산
        
        return accuracy
    
    
    ## 손실 계산
    def loss(self, x, t):
        y = self.predict(x) ## 순전파 계산을 통해 예측값 도출
        
        return cross_entropy_error(y, t) ## 예측값과 실제값 사이에 gradient 도출
    
    ## gradient descent에 이용할 gradient 계산
    def numerical_gradient(self, x, t):
        loss_W = lambda W : self.loss(x, t) ## loss 계산하기
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
net = TwoLayerNet(input_size = 784, hidden_size = 100, output_size = 10)
print(net.params['W1'].shape, net.params['b1'].shape, net.params['W2'].shape, net.params['b2'].shape)
    

(784, 100) (100,) (100, 10) (10,)


In [11]:
import numpy as np
import sys
sys.path.append('C://Users/revol/Deep Learning/HW/dataset')
from mnist import load_mnist
from common.two_layer_net import TwoLayerNet

(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True)

batch_size = 100
train_size = x_train.shape[0]
iter_num = 2
learning_rate = 0.1

train_loss_list = []
train_acc_list = []
test_acc_list = []

iter_per_epoch = max(train_size / batch_size, 1)
network = TwoLayerNet(input_size = 784, hidden_size = 100, output_size = 10)
init_W1 = network.params['W1']

for iter in range(iter_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.numerical_gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if iter % iter_per_epoch == 0:
        train_acc = network.accuracy(x_batch, t_batch)
        test_acc = network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        print('train acc, test acc | ' + str(train_acc) + ', '+ str(test_acc))

train acc, test acc | 0.19, 0.1184


<h3> 4. 3층 신경망 구현하기 </h3>

In [12]:
import sys, os
from common.functions import *
from common.gradient import numerical_gradient

class ThreeLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        self.params = {} ## 2층 신경망과 동일한 방법으로 파라미터 초기화
        
        ## 은닉층이 2개가 포함되기 때문에, 배열을 이용하여 첫 번째와 두 번째 은닉층 구별
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size[0])
        self.params['b1'] = weight_init_std * np.random.randn(hidden_size[0])
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size[0], hidden_size[1])
        self.params['b2'] = weight_init_std * np.random.randn(hidden_size[1])
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size[1], output_size)
        self.params['b3'] = weight_init_std * np.random.randn(output_size)
        
    def predict(self, x): ## 순전파 계산 진행 (2층 신경망과 동일한 방법)
        W1, W2, W3 = self.params['W1'], self.params['W2'], self.params['W3']
        b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        z2 = sigmoid(a2)
        a3 = np.dot(z2, W3) + b3
        y = softmax(a3)
        
        return y
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis = 1)
        t = np.argmax(t, axis = 1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        
        return accuracy
    
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W : self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
        grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
        
        return grads
    
net = ThreeLayerNet(input_size = 784, hidden_size = [100, 50], output_size = 10)
print(net.params['W2'].shape, net.params['b2'].shape, net.params['W3'].shape, net.params['b3'].shape)
    

(100, 50) (50,) (50, 10) (10,)


In [18]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize = True, one_hot_label = True)

batch_size = 128
train_size = x_train.shape[0]
iter_num = 1
learning_rate = 0.1

init_W1 = network.params['W1']
for iter in range(iter_num):
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = net.numerical_gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2', 'W3', 'b3'):
        net.params[key] -= learning_rate * grad[key]
        

print('업데이트된 W1의 가중치 : ', net.params['W1'] - init_W1)  

업데이트된 W1의 가중치 :  [[-0.02293887 -0.02129477 -0.00916254 ...  0.00477386 -0.00622735
  -0.0118936 ]
 [-0.0105399  -0.02015497 -0.00522983 ... -0.02797655 -0.00894673
   0.00579542]
 [ 0.01038812 -0.01574766 -0.00011639 ... -0.01864678 -0.02131355
   0.00587509]
 ...
 [-0.00774502  0.00380826 -0.00392613 ...  0.00920175 -0.01071928
  -0.00867553]
 [ 0.00557131  0.02119032  0.0091312  ...  0.00886956 -0.01722848
   0.02901   ]
 [-0.00112854 -0.00897207  0.02043213 ... -0.01263106  0.00472207
  -0.01842378]]
