In [1]:
import numpy as np

## 오차제곱합

$$E=\frac{1}{2} \sum_{k}(y_{k}-t_{k})^{2}$$

In [2]:
def sum_squares_error(y, t):
    #### 코드 ####
    return 0.5 * np.sum((y-t)**2)

In [6]:
t = [0,0,1,0,0,0,0,0,0,0]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert sum_squares_error(np.array(y), np.array(t)) == 0.09750000000000003, '오답입니다.'

y = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
assert sum_squares_error(np.array(y), np.array(t)) == 0.5975, '오답입니다.'

## 교차 엔트로피 오차

$E=-\sum_{k} t_{k}\log y_{k}$

In [7]:
def cross_entropy_error(y, t):
    delta = 1e-7
    #### 코드 ####
    return -np.sum(t * np.log(y + delta))

In [10]:
t = [0,0,1,0,0,0,0,0,0,0]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 0.510825457099338, '오답입니다.'

y = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 2.302584092994546, '오답입니다.'

## 배치용 교차 엔트로피 오차

In [14]:
def cross_entropy_error(y, t):
    delta = 1e-7
    #### 코드 ####
    if y.ndim == 1:
        y = y.reshape(1, y.size)
        t = t.reshape(1, t.size)

    if t.size == y.size:
        t = t.argmax(axis=1)
    
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size), t] + delta)) / batch_size

In [16]:
t = [0,0,1,0,0,0,0,0,0,0]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 0.510825457099338, '오답입니다.'

y = [0.1,0.05,0.1,0.0,0.05,0.1,0.0,0.6,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 2.302584092994546, '오답입니다.'

t = [2]
y = [0.1,0.05,0.6,0.0,0.05,0.1,0.0,0.1,0.0,0.0]
assert cross_entropy_error(np.array(y), np.array(t)) == 0.510825457099338, '오답입니다.'

## 수치 미분

$$\frac {df(x)} {dx} =\lim_{h \to 0} \frac{f(x+h)-f(x-h)}{2h}$$

In [17]:
def numerical_diff(f, x):
    h = 1e-4
    #### 코드 ####
    return (f(x+h) - f(x-h)) / (2*h)

In [20]:
def function_1(x):
    return 0.01*x**2 + 0.1*x

assert numerical_diff(function_1, 5) == 0.1999999999990898, '미분 함수를 다시 확인하세요.'
assert numerical_diff(function_1, 10) == 0.2999999999986347, '미분 함수를 다시 확인하세요.'

## 기울기

In [21]:
def numerical_gradient(f, x:np.ndarray):
    h = 1e-4
    #### 시작 ####
    grad = np.zeros_like(x)

    for idx in range(x.size):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)

        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val
    #### 끝 ####
    return grad

function_2
$$f(x_{0}, x_{1})=x^{2}_{0} + x^{2}_{1}$$

In [29]:
def function_2(x):
    return x[0]**2 + x[1]**2

y = numerical_gradient(function_2, np.array([3.0, 4.0]))
assert np.allclose(y,  np.array([6., 8.])), '오답입니다.'

y = numerical_gradient(function_2, np.array([0.0, 2.0]))
assert np.allclose(y,  np.array([0., 4.])), '오답입니다.'

y = numerical_gradient(function_2, np.array([3.0, 0.0]))
assert np.allclose(y,  np.array([6., 0.])), '오답입니다.'

## 경사 하강법

$$x_{0} = x_{0} - \eta \frac {\partial f}{\partial x_{0}}$$
$$x_{1} = x_{1} - \eta \frac {\partial f}{\partial x_{1}}$$

In [30]:
def graident_descent(f, init_x, lr=0.01, step_num=100):
    #### 시작 ####
    x = init_x

    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    #### 끝 ####
    return x

In [33]:
init_x = np.array([-3.0, 4.0])
y = graident_descent(function_2, init_x=init_x, lr=0.1, step_num=100)
assert np.allclose(y, np.array([-6.11110793e-10,  8.14814391e-10])), '오답입니다.'

## 2층 신경망 클래스

In [34]:
from common.functions import *
from common.gradient import numerical_gradient

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        #### 시작 ####
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        #### 끝 ####
    
    def predict(self, x):
        #### 시작 ####
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        #### 끝 ####
        return y
    
    def loss(self, x, t):
        #### 시작 ####
        y = self.predict(x)
        #### 끝 ####
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        #### 시작 ####
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0])
        #### 끝 ####
        return accuracy
    
    def numerical_gradient(self, x, t):
        #### 시작 ####
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        #### 끝 ####
        return grads

In [45]:
np.random.seed(42)

net = TwoLayerNet(input_size=784, hidden_size=100, output_size=10)

x = np.random.rand(100, 784)
y = net.predict(x)
assert np.allclose(y[0], np.array([0.09875959, 0.0941718 , 0.09746621, 0.10482647, 0.09399622,
       0.09873031, 0.10032557, 0.10438459, 0.09940792, 0.10793132])), '오답입니다.'