In [2]:
import sys, os
sys.path.append(os.pardir)
from common.functions import *
from common.gradient import numerical_gradient

class TwoLayerNet:
    def __init__(self, input_size, hidden_size, output_size,weight_init_std = 0.01):
        #가중치 초기화
        self.params={}
        self.params['W1'] = weight_init_std * np.random.randn(input_size,hidden_size) #배열생성 후 정규분포로 랜덤한 값
        self.params['b1'] = np.zeros(hidden_size) #출력값 사이즈와 동일하게
        self.params['W2'] = weight_init_std *  np.random.randn(hidden_size,output_size) #배열생성 후 정규분포로 랜덤한 값
        self.params['b2'] = np.zeros(output_size)
        
    def predict(self,x):
        W1,W2 = self.params['W1'],  self.params['W2']
        b1, b2 = self.params['b1'], self.params['b2']
        
        a1 = np.dot(x,W1) + b1
        z1 = sigmoid(a1)
        
        a2 = np.dot(z1,W2) + b2
        y = sigmoid(a2)
        
        return  y 
    
    def loss(self,x,t):
        y = self.predict(x)
        
        return cross_entropy_error(y,t)
    
    def accuracy(self,x,t):
        y = self.predict(x)
        y = np.argmax(y,axis=1)
        t = np.argmax(t,axis=1)
        
        accuracy = np.sum(y==t) / float(x.shape[0]) #각 행의 예측 label과 정답 label이 같은 경우를 합 / x의 row수로 나눈값
        return accuracy
    
    def numerical_gradient(self,x,t): #수치미분 방식으로 편미분을 구하는 함수
        loss_W = lambda W : self.loss(x,t)
        
        grads={}
        grads['W1'] = numerical_gradient(loss_W,self.params['W1']) #W1에 대한 편미분
        grads['b1'] = numerical_gradient(loss_W,self.params['b1']) #b1에 대한 편미분
        grads['W2'] = numerical_gradient(loss_W,self.params['W2']) #W2에 대한 편미분
        grads['b2'] = numerical_gradient(loss_W,self.params['b2']) #b2에 대한 편미분
        
        return grads

In [3]:
net = TwoLayerNet(input_size=784, hidden_size=100,output_size=10)
print(net.params['W1'].shape)
print(net.params['b1'].shape)
print(net.params['W2'].shape)
print(net.params['b2'].shape)

(784, 100)
(100,)
(100, 10)
(10,)


In [4]:
x = np.random.rand(100,784) #더미데이터 생성
y = net.predict(x)

In [5]:
x = np.random.rand(100,784) #더미 입력데이터 (100장)
t = np.random.rand(100,10)  #더미 정답레이블 (100장)

grads = net.numerical_gradient(x,t)

print(grads['W1'].shape)
print(grads['b1'].shape)
print(grads['W2'].shape)
print(grads['b2'].shape)

(784, 100)
(100,)
(100, 10)
(10,)


### 미니배치 학습 구현하기

In [6]:
import numpy as np
from dataset.mnist import load_mnist
from two_layer_net import TwoLayerNet

(X_train,t_train),(X_test,t_test) = load_mnist(normalize=True,one_hot_label=True)

train_loss_list=[]

iters_num = 10000
train_size = X_train.shape[0]
batch_size = 100
learning_rate = 0.1

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

for i in range(iters_num):
    #미니배치획득
    batch_mask = np.random.choice(train_size, batch_size)
    x_batch = X_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    #기울기 계산
    grad = network.numerical_gradient(x_batch,t_batch)
    #grad = network.gradient(x_batch,t_batch) 성능 개선
    
    #매개변수 갱신
    for key in ('W1','b1','W2','b2'):
        network.params[key] -= learning_rate * grad[key]
        
    
    #학습 경과 기록
    loss = network.loss(x_batch,t_batch)
    train_loss_list.append(loss)
    print(f"{i+1} epoch complete")

1 epoch complete
2 epoch complete
3 epoch complete
4 epoch complete
5 epoch complete
6 epoch complete
7 epoch complete


KeyboardInterrupt: 