In [1]:
import numpy as np

## 1.Sum of Squares for error

In [2]:
def sum_squares_error(y, t): 
    return 0.5 * np.sum((y-t)**2)

In [3]:
t = np.zeros(5)
t[1] = 1
print(t)

[0. 1. 0. 0. 0.]


In [4]:
#1일 확률이 제일 높다고 추정
y1 = [0.1,0.6,0.05,0.03,0.3]
#4일 확률이 제일 높다고 추정
y2 = [0.1,0.1,0.3,0.05,0.4]
print("y1 : ",sum_squares_error(y1,t))
print("y2 : ",sum_squares_error(y2,t))

y1 :  0.13170000000000004
y2 :  0.53625


## Cross Entropy error

In [6]:
def cross_entropy(y,t):
    delta = 1e-7
    return -np.sum(t*np.log(y + delta))

In [7]:
#1일 확률이 제일 높다고 추정
y1 = [0.1,0.6,0.05,0.03,0.3]
#4일 확률이 제일 높다고 추정
y2 = [0.1,0.1,0.3,0.05,0.4]
print("y1 : ",cross_entropy(np.array(y1),np.array(t)))
print("y2 : ",cross_entropy(np.array(y2),np.array(t)))

y1 :  0.510825457099338
y2 :  2.302584092994546


## Mini batch

In [8]:
import sys,os
sys.path.append(os.pardir)
import numpy as np
from dataset.mnist import load_mnist

(x_train,t_train),(x_test,t_test) = load_mnist(normalize = True,one_hot_label = True)
print(x_train.shape)
print(t_train.shape)

(60000, 784)
(60000, 10)


In [9]:
train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size,batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

In [10]:
# 28*28 * 10(개)
x_batch.size

7840

In [11]:
#batch용 cross entropy error
# y = predict
# t = label(0,1,2,3,4,...)
def cross_entropy_error(y,t):
    # 1차원 배열 -> 2차원 배열 ( [a,b,c] --> [[a,b,c]])
    #형태를 통일
    if y.ndim == 1:
        t = t.reshape(1,t.size)
        y = y.reshape(1,y.size)
    
    batch_size = y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size),t] + 1e-7)) /  batch_size # t = 1 이므로 생략


In [12]:
y = [[0.1,0.6,0.05,0.03,0.3],[0.1,0.1,0.3,0.05,0.4]]
t = [[2,4]]
y = np.array(y)
t = np.array(t)
print(y[np.arange(2),t])
print(cross_entropy_error(y,t))

[[0.05 0.4 ]]
1.9560103777150886


## Gradient

In [26]:
#input : function / x(point)
def numerical_gradient(f,x):
    h = 1e-4
    grad = np.zeros_like(x)
    
    for idx in range(x.size):
        
        tem_val = x[idx]
        
        # x +h
        x[idx] =  tem_val + h
        # f(x + h) 계산
        fxh1 = f(x)
        
        x[idx] = tem_val - h
        fxh2 = f(x)
        
        grad[idx] = (fxh1 - fxh2) / (2*h)
        x[idx] = tem_val #값 복원
        
    return grad

def numerical_gradient_V2(f,X):
    h = 1e-4
    if X.ndim == 1 :
        return numerical_gradient(f,X)
    
    else:
        grad = np.zeros_like(X)
        for idx , x in enumerate(X):
            grad[idx] = numerical_gradient(f,x)
    
    return grad

#y = x1**2 = x2**2
def function_2(x):
    return x[0]**2 + x[1]**2

print(numerical_gradient(function_2,np.array([3.0,4.0]))) #Not np.array([3,4]) --> 소수점을 다 버리고 int로 Return

def gradient_descent(f,init_x,learning_rate = 0.01,step_num = 100):
    x = init_x
    
    for i in range(step_num):
        grad = numerical_gradient_V2(f,x)
        x = x - learning_rate*grad
    
    return x

print(gradient_descent(function_2,init_x = np.array([3.0,4.0]),learning_rate = 0.1,step_num = 100))

[6. 8.]
[6.11110793e-10 8.14814391e-10]


## Gradient Descent in Neural Network

In [15]:
import sys,os
sys.path.append(os.pardir)
import numpy as np

In [16]:
class simpleNet:
    
    def __init__(self):
        #정규분포로 초기화( 크기는 (2,3))
        self.W = np.random.randn(2,3)
    
    # WX
    def predict(self,x):
        return np.dot(x,self.W)
    
    def soft_max(self,x):
        c = np.max(x)
        a = np.exp(x-c)
        sum_a = np.sum(a)
        return a / sum_a
    
    def loss(self,x,t):
        z = self.predict(x)
        y = self.soft_max(z)
        loss = cross_entropy_error(y,t)
        
        return loss

net = simpleNet()
print(net.W)

[[ 0.53594649 -0.81226192  0.17115805]
 [-0.02069657 -0.75178745  0.43149146]]


In [17]:
# W의 size가 (2,3) 이므로 (1,2)로 사이즈 지정
x = np.array([0.6,0.9])
p = net.predict(x)
#랜덤 가중치에 의해 예측된 다음값
print(p)

[ 0.30294099 -1.16396585  0.49103714]


In [18]:
#어느것이 가장 정답일까?
np.argmax(p)

2

In [19]:
#정답데이터를 t로 입력하고 loss를 계산(예측 --> softmax --> cross entropy)
t = np.array([0,0,1])
net.loss(x,t)

4.139931573926755

In [20]:
#dummy function
def f(W):
    return net.loss(x,t)

In [27]:
#기울기 계산해보기
dW = numerical_gradient_V2(f,net.W)
print(dW)

[[-0.46156501 -0.42968843  0.89125344]
 [-0.69234752 -0.64453264  1.33688016]]


In [None]:
#기울기 갱신하기(Gradient Descent)