## 오차제곱합

In [1]:
def sum_squares_error(y, t):
    return 0.5 * np.sum((y-t)**2)

## 교차엔트로피 오차

In [2]:
def cross_entropy_error(y, t):
    delta = 1e-7 # log 안의 값이 0이 되지 않도록 매우 작은 값을 더해줌
    return -np.sum(t * np.log(y + delta))

## 미니배치 학습

In [3]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist

In [4]:
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

print(x_train.shape)
print(t_train.shape) # 정답

(60000, 784)
(60000, 10)


In [5]:
train_size = x_train.shape[0] # 60000
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

In [6]:
np.random.choice(60000, 10)

array([31629, 16416, 57962, 44650, 48822,  1268, 36231, 16035, 14114,
       21754])

In [7]:
def cross_entropy_error(y, t): # y 신경망의 출력, t 정답 레이블
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    batch_size = y.shape[0]
    return -np.sum(t * np.log(y + 1e-7)) / batch_size

## 미분

In [8]:
def numerical_diff(f, x):
    h = 1e-4
    return (f(x+h) - f(x-h)) / 2*h

## 편미분

In [9]:
def function_2(x):
    return x[0]**2 + x[1]**2

In [10]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)

    for idx in range(x.size): 
        tmp_val = x[idx]
        
        x[idx] = tmp_val + h
        fxh1 = f(x)
        
        x[idx] = tmp_val - h
        fxh2 = f(x)

        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tmp_val

    return grad

In [11]:
print(numerical_gradient(function_2, np.array([3.0, 4.0])))

[6. 8.]


## 경사하강법

In [12]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):
    x = init_x
    
    for i in range(step_num):
        print(x)
        grad = numerical_gradient(f, x)
        x -= lr * grad
        print(grad)
       
    
    return x

In [13]:
# 경사법으로 함수 f의 최솟값을 구하라

def function_2(x):
    return x[0]**2 + x[1]**2

init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=0.1, step_num=100)

[-3.  4.]
[-6.  8.]
[-2.4  3.2]
[-4.8  6.4]
[-1.92  2.56]
[-3.84  5.12]
[-1.536  2.048]
[-3.072  4.096]
[-1.2288  1.6384]
[-2.4576  3.2768]
[-0.98304  1.31072]
[-1.96608  2.62144]
[-0.786432  1.048576]
[-1.572864  2.097152]
[-0.6291456  0.8388608]
[-1.2582912  1.6777216]
[-0.50331648  0.67108864]
[-1.00663296  1.34217728]
[-0.40265318  0.53687091]
[-0.80530637  1.07374182]
[-0.32212255  0.42949673]
[-0.64424509  0.85899346]
[-0.25769804  0.34359738]
[-0.51539608  0.68719477]
[-0.20615843  0.27487791]
[-0.41231686  0.54975581]
[-0.16492674  0.21990233]
[-0.32985349  0.43980465]
[-0.1319414   0.17592186]
[-0.26388279  0.35184372]
[-0.10555312  0.14073749]
[-0.21110623  0.28147498]
[-0.08444249  0.11258999]
[-0.16888499  0.22517998]
[-0.06755399  0.09007199]
[-0.13510799  0.18014399]
[-0.0540432   0.07205759]
[-0.10808639  0.14411519]
[-0.04323456  0.05764608]
[-0.08646911  0.11529215]
[-0.03458765  0.04611686]
[-0.06917529  0.09223372]
[-0.02767012  0.03689349]
[-0.05534023  0.07378698]


array([-6.11110793e-10,  8.14814391e-10])

In [14]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.functions import softmax, cross_entropy_error
from common.gradient import numerical_gradient

In [15]:
class simpleNet:
    def __init__(self):
        self.W = np.random.randn(2, 3) # 정규분포로 초기화
    
    def predict(self, x):
        return np.dot(x, self.W)
    
    def loss(self, x, t):
        z = self.predict(x)
        y = softmax(z)
        loss = cross_entropy_error(y, t)
        
        return loss

In [16]:
net = simpleNet()
print(net.W)

[[-0.81545957  0.60626962 -1.76149833]
 [ 1.0127327   0.99372941  2.51080156]]


In [17]:
x = np.array([0.6, 0.9])
p = net.predict(x)
print(p)

[0.42218369 1.25811824 1.20282241]


In [18]:
np.argmax(p) # 최댓값의 인덱스

1

In [19]:
t = np.array([0, 0, 1])
net.loss(x, t)

0.9222592452197866

## 학습 알고리즘 구현하기

In [20]:
import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient

In [21]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size,  hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
    
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
    def loss(self, x, t):
        y = self.predict(x)
        
        return cross_entropy_error(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
    
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

## 미니배치 학습 구현하기

In [24]:
import numpy as np
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True, one_hot_label=True)

train_loss_list = []

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100
learning_rate = 0.1
network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    # 미니배치 획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    grad = network.numerical_gradient(x_batch, t_batch)
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key] -= learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    print(loss)

10000
2.2913691245532313
2.290514724048532
2.27531511929577
2.2905102113963896
2.3071452310448497
2.300860875030318
2.2894065338997422
2.290141978421042
2.2986255283757844
2.289106351646101
2.287014362830554


KeyboardInterrupt: 