# Loss Function

In [1]:
import numpy as np

In [2]:
def sum_squares_error(y, t):   # y: 예측값,   t: 실제 정답
    return 0.5 * np.sum((y-t)**2)

In [3]:
def cross_entropy_error(y, t):
    delta = 1e-7    # 0이 되지 않도록 delta를 자연로그의 변수에 더함
    return -np.sum(t * np.log(y+delta))

In [4]:
t = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])    # 5번째에 정답
y = np.array([0.1, 0.05, 0.6, 0.0, 0.05, 0.1, 0.0, 0.1, 0.0, 0.0])   # 3번째에 정답이 있을 것으로 예측
print(sum_squares_error(y, t))
print(cross_entropy_error(y, t))

0.6475
2.9957302735559908


-> 오차가 크므로, SSE 값이 큼

In [5]:
t = np.array([0, 0, 0, 0, 1, 0, 0, 0, 0, 0])    # 5번째에 정답
y = np.array([0.1, 0.05, 0.05, 0.0, 0.7, 0.0, 0.0, 0.1, 0.0, 0.0])   # 5번째에 정답이 있을 것으로 예측
print(sum_squares_error(y, t))
print(cross_entropy_error(y, t))

0.057500000000000016
0.3566748010815999


-> 오차가 작으므로, SSE 값이 작음

# Numercial Differential

## 1-dimensional

In [6]:
def numerical_diff(f, x):
    h = 1e-4
    return (f(x+h) - f(x-h))/(2*h)

In [7]:
def function_1(x):
    return 0.01*(x**2) + 0.1*x

In [8]:
x = np.arange(0.0, 20.0, 0.1)
print(numerical_diff(function_1, 5))
print(numerical_diff(function_1, 10))

0.1999999999990898
0.2999999999986347


## multi-dimensional

In [9]:
def numerical_gradient(f, x):
    h = 1e-4
    grad = np.zeros_like(x)
    for idx in range(x.shape[0]):
        tmp_val = x[idx]
        x[idx] = tmp_val + h
        fxh1 = f(x)
        
        x[idx] = tmp_val - h
        fxh2 = f(x)
        
        x[idx] = tmp_val
        grad[idx] = (fxh1 - fxh2) / (2*h)
    return grad

In [10]:
def function_2(x):
    return x[0]**2 + x[1]**2
print(numerical_gradient(function_2, np.array([3.0, 4.0])))
print(numerical_gradient(function_2, np.array([0.0, 2.0])))

[6. 8.]
[0. 4.]


# Gradient Descent

In [11]:
def gradient_descent(f, init_x, lr=0.01, step_num=100):  # lr : learning rate
    x = init_x
    for i in range(step_num):
        grad = numerical_gradient(f, x)
        x -= lr * grad
    return x

In [12]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x)

array([-0.39785867,  0.53047822])

### 학습률이 너무 클 때

In [13]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=10.0, step_num=100)

array([-2.58983747e+13, -1.29524862e+12])

: 결과값이 발산

### 학습률이 너무 작을 때

In [14]:
init_x = np.array([-3.0, 4.0])
gradient_descent(function_2, init_x=init_x, lr=1e-10, step_num=100)

array([-2.99999994,  3.99999992])

: 결과값이 초기 데이터에서 거의 바뀌지 않았음

# Neural Network(Numerical Gradient)

## 데이터 불러오기

In [15]:
import pandas as pd
data = pd.read_csv("./Data/mnist_train_small.csv")
data

Unnamed: 0,6,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,...,0.581,0.582,0.583,0.584,0.585,0.586,0.587,0.588,0.589,0.590
0,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,7,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,5,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19994,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19995,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19996,2,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
19997,9,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
t = data.iloc[:, 0]
t.head()

0    5
1    7
2    9
3    5
4    2
Name: 6, dtype: int64

In [17]:
x = data.iloc[:, 1:]
x.head()

Unnamed: 0,0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,...,0.581,0.582,0.583,0.584,0.585,0.586,0.587,0.588,0.589,0.590
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## sigmoid, softmax 정의

In [18]:
def sigmoid(x):
    return np.exp(x) / (np.exp(x) + 1)

def softmax(a):
    c = np.max(a)
    exp_a = np.exp(a - c)
    sum_exp_a = np.sum(exp_a)
    y = exp_a / sum_exp_a
    return y

## TowLayer Network 정의

In [19]:
class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        self.params = {}  # params : 신경망의 매개변수(가중치)를 저장
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size) 
        #(input_size x hidden_size) 크기의 0~1사이의 entry 값을 갖는 행렬(변동으로 weight_init_std 사용)
        self.params['b1'] = np.zeros(hidden_size) #(hidden_size) 크기의 0의 entry값을 갖는 행렬
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size)
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self, x):
        W1, W2 = self.params['W1'], self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']

        a1 = np.dot(x, W1) + b1
        z1 = sigmoid(a1)
        a2 = np.dot(z1, W2) + b2
        y = softmax(a2)
        
        return y
    
  # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        z = np.argmax(y, axis=1)
        return cross_entropy_error(z, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1) # 각 열(데이터)에서 가장 큰 값의 index, 즉 정답의 위치 출력
        t = np.argmax(t, axis=1)

        accuracy = np.sum(y == t) / float(x.shape[0]) # 정답 / 전체
        return accuracy

    def numerical_gradient(self, x, t):  # 가중치 매개변수의 기울기 계산
        loss_W = lambda W: self.loss(x, t)

        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])   # W1의 특정 기울기 변동 -> predict값 변화 -> loss 값 변화
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads

## 데이터에 network 적용

In [20]:
def learning():
    x = np.array(x)
    t = np.array(t)
    iter_num = 10000
    learning_rate = 0.1
    network = TwoLayerNet(input_size = 784, hidden_size=50, output_size=10)
    train_loss_list = []

    for i in range(iter_num):
        grad = network.numerical_gradient(x, t)
        for key in ('W1', 'b1', 'W2', 'b2'):
            self.params[key] -= learning_rate * grad[key]
        loss = network.loss(x, t)
        train_loss_list.append(loss)