### 교차 엔트로피 오차

In [1]:
from IPython.display import Image
import numpy as np
from dataset.mnist import load_mnist

In [2]:
def cross_entropy_error(y, t):
    delta = 1e-7
    return -np.sum(t* np.log(y + delta))

In [3]:
# 실제 2차원 함수라고 가정을 하고 코드를 만들어야함

def cross_entropy_error(y, t):
    delta = 1e-7
    # 맨 아랫줄이 2차원이니까 혹시 1차원이 나오지 않도록해주자
    if y.ndim==1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t* np.log(y + delta))/ batch_size

### 미분

y = 0.01x^2 + 0.1x

In [4]:
import matplotlib.pylab as plt

In [5]:
def function_1(x):
    return 0.01*x**2 + 0.1*x

In [6]:
def numerical_diff(f, x):
    h = 1e-4
    return (f(x+h) - f(x-h)) / (2*h)

- 그래디언트(편미분의 벡터)

In [7]:
def function_2(x):
    return x[0]**2 + x[1]**2

- 경사하강법

In [8]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x # 임의의 초기값
    
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x = x - (lr*grad)
        print(x)
    return x

- 신경망에서의 기울기

In [9]:
def numerical_gradient(f, x):
    h = 1e-4 # 0.0001
    grad = np.zeros_like(x)
    
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        idx = it.multi_index
        tmp_val = x[idx]
        x[idx] = float(tmp_val) + h
        fxh1 = f(x) # f(x+h)
        
        x[idx] = tmp_val - h 
        fxh2 = f(x) # f(x-h)
        grad[idx] = (fxh1 - fxh2) / (2*h)
        
        x[idx] = tmp_val # 값 복원
        it.iternext()   
        
    return grad

In [10]:
def softmax(x):
    if x.ndim == 2:
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # 오버플로 대책
    return np.exp(x) / np.sum(np.exp(x))

In [11]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3) # 정규분포로 초기화
        
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        return loss

In [12]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

- 2층 신경망 구현하기

In [13]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    
    def predict(self, x):
        # 예측 구하기 (가중치의 합 => 시그모이드 통과 => 가중치의 합 => 소프트맥스 통과)
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    def loss(self, x, t):
        # 손실(비용)함수의 결과 구하기
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def numerical_gradient(self, x, t):
        # loss값을 가져와서 그레디언트(편미분의 벡터) 구하기
        loss_W = lambda W : self.loss(x, t)
        
        grads = {}
        
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    # def gradient()

In [14]:
# 데이터를 로드하고 트레인 하고 예측해보자

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

In [15]:
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10) # 인풋아웃풋은 MNIST에 정해져있음

In [20]:
# 하이퍼 파라미터 설정
iters_num = 10
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1

In [21]:
train_loss_list = []

for i in range(iters_num): # 만번을 돌리면서 경사하강법 진행
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    grad = network.numerical_gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] = network.params[key] - (learning_rate * grad[key])
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    print(i, loss)

0 2.288221612889668
1 2.2851582617870094
2 2.283489557517339
3 2.272522743214133
4 2.2591304301882467
5 2.3033296214220726
6 2.2576161634316847
7 2.297612903327835
8 2.281177840100554
9 2.2673175674924972


In [22]:
# 강사님 주신 코드는 따로 진행해 볼 것