In [4]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

import numpy as np
import pandas as pd
from mnist import load_mnist

(x_train , t_train) , ( x_test , t_test) = load_mnist (normalize=True , one_hot_label = True)



## MNIST 테스트 

In [7]:
x_train.shape
t_train.shape

x_test.shape

(60000, 784)

(60000, 10)

(10000, 784)

## 미니배치 테스트 

In [14]:
train_size = x_train.shape[0]
batch_size = 10

batch_mask = np.random.choice( train_size , batch_size )

batch_mask

x_batch = x_train[batch_mask]

x_batch
                

array([ 4985, 37504,  6586, 24567, 35841, 13640, 48567,  7015, 31931,
         963])

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

## 교차엔트로피 오차 

In [16]:
from functions import cross_entropy_error

In [17]:
class Affine:
    def __init__(self, W, b):
        self.W = W
        self.b = b
        
        self.x = None
        self.original_x_shape = None
        # 가중치와 편향 매개변수의 미분
        self.dW = None
        self.db = None

    def forward(self, x):
        # 텐서 대응
        self.original_x_shape = x.shape
        x = x.reshape(x.shape[0], -1)
        self.x = x

        out = np.dot(self.x, self.W) + self.b

        return out

    def backward(self, dout):
        dx = np.dot(dout, self.W.T)
        self.dW = np.dot(self.x.T, dout)
        self.db = np.sum(dout, axis=0)
        
        dx = dx.reshape(*self.original_x_shape)  # 입력 데이터 모양 변경(텐서 대응)
        return dx


In [21]:
# Two layer deep learning architecture 구현하기

import numpy as np
from layers import *
from gradient import numerical_gradient
from collections import OrderedDict

class TwoLayerNet:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 가중치 초기화
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)
        
        # Affine () 
        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'], self.params['b2'])
        
        self.lastlayer=SoftmaxWithLoss()
        
        
        # Affine 값을 forward(x) 한다 
        # self.layers.values() = Affine
    def predict(self,x):
        for layer in self.layers.values():
            x=layer.forward(x)
            
        return x
    
        # predict 값을 손실함수 사용 
        
    def loss(self, x, t):
        y=self.predict(x)
        return self.lastlayer.forward(y,t)
    
        # 결과값 출력 방식  1 이면 ? 0(or-1) 이면 ?
    def accuracy(self,x,t):
        y=self.predict(x)
        y=np.argmax(y, axis=1)
        if t.ndim !=1 : t=np.argmax(t, axis=1)
            
        accuracy=np.sum(y == t)/float(x.shape[0])
        return accuracy
    
    # 경사하강법 (dw)
    def numerical_gradient(self, x,t):
        loss_W=lambda W: self.loss(x,t)
        
        grads={}
        grads['W1']=numerical_gradient(loss_W, self.params['W1'])
        grads['b1']=numerical_gradient(loss_W, self.params['b1'])
        grads['W2']=numerical_gradient(loss_W, self.params['W2'])
        grads['b2']=numerical_gradient(loss_W, self.params['b2'])
        
        return grads
    
    # 오차역전파
    
    def gradient(self, x, t):
        #forward
        self.loss(x,t)
        #backward
        dout=1 # 마지막값 Z 
        dout=self.lastlayer.backward(dout)
        
        # layers 는 Affine  
        # layers 에 list 화 하여 W 값 넣기 
        
        # layer 수 만큼 backword()로 편미분 
        # 편미분된 dout 값을 지속적으로 편미분 
        # ( =  dout = layer.backward(dout) ) 
        
        # 따라서 모든 편미분값  = dout 
        
        # Affine class 보면 dout = dW 
        layers=list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout=layer.backward(dout)
            
        grads={}
        # dout = dW 임.
        # dW  = 업데이트룰 로 grads 로 출력 
        grads['W1'], grads['b1']=self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2']=self.layers['Affine2'].dW, self.layers['Affine2'].db
        
        return grads

In [22]:
## 학습 및 실행

import numpy as np
from mnist import load_mnist

(x_train, t_train), (x_test, t_test)=load_mnist(normalize=True, one_hot_label=True)

network=TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num=10000
train_size=x_train.shape[0]
batch_size=100
learning_rate=0.1

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]

iter_per_epoch=max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_mask=np.random.choice(train_size, batch_size)
    x_batch=x_train[batch_mask]
    t_batch=t_train[batch_mask]
    
    # 오차역전파를 이용하여 기울기 계산
    
    grad=network.gradient(x_batch, t_batch) 
    
    # 가중치 업데이트
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        
    loss =network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc=network.accuracy(x_train, t_train)
        test_acc=network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        
        print(train_acc, test_acc)
        
    
    
    
    

0.09988333333333334 0.0994
0.9016 0.9075
0.9233333333333333 0.9243
0.9349166666666666 0.935
0.9455333333333333 0.9435
0.9511166666666667 0.9488
0.9557833333333333 0.9523
0.9604666666666667 0.9566
0.9643333333333334 0.9612
0.9653333333333334 0.9619
0.96825 0.9631
0.9709833333333333 0.9639
0.9738 0.9646
0.97325 0.966
0.9765833333333334 0.9678
0.9784 0.968
0.9782833333333333 0.9691


In [23]:
print(network.params['b2'])

[-0.35810582  0.42012936 -0.19795103 -0.04888804  0.20891777  0.32133481
 -0.16912143  0.16439903 -0.31297253 -0.02774213]
