In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [7]:
import sys, os
import numpy as np
import pandas as pd
import seaborn as sn
import matplotlib.pyplot as plt
%matplotlib inline
import matplotlib as mpl
sn.set()

import matplotlib.font_manager as fm
fl = fm.FontProperties(fname ="C:\Windows\Fonts\malgun.ttf").get_name()
plt.rc('font',family=fl)

from patsy import dmatrices
import statsmodels.api as sm

from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.formula.api as smf

from mnist import load_mnist
from PIL import Image
import pickle

from collections import OrderedDict


# MNIST - 손글씨 판별 딥러닝 코딩

## 함수 설정 

In [14]:
import numpy as np


def sigmoid(x):
    return 1 / ( 1 + np.exp(-x))

def softmax(x):
    if x.ndim == 2: # x 크기는 2 
        x = x.T
        x = x - np.max(x, axis=0)
        y = np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T 

    x = x - np.max(x) # softmax 값이 너무 클때를 방지 # 오버플로우 
    return np.exp(x) / np.sum(np.exp(x))

def cross_entropy_error( y,t):
    if y.ndim == 1:
        t = t.reshape(1, t.size)
        y = y.reshape(1, y.size)
        
    if t.size == y.size : # t.szie 와 y.size 는 값이 같아야 함
        t = t.armax(axis=1)
            
    batch_size = y.shape[0]
    return -np.sum(np.log ( y[ np.arrange(batch_size) , t ]+1e-7 ) )/ batch_size
    
##############################################################################################            
        
        
class Relu: 
    def __init__(self):
        self.mask = None
        
    def forward(self, x): # x 는 인풋값 
        self.mask = (x<=0) # 0 보다 작으면 
        out = x.copy() # x를 그대로 아웃
        out[self.mask] = 0 # 0 보다 작으면 0 으로 아웃
            
    def backward( self , dout ): # dout 은 미분의 곱, 미분의 토탈 
        dout[self.mask] = 0 
        dx = dout
        return dx 
    
class Relu:
    def __init__(self):
        self.mask = None

    def forward(self, x):
        self.mask = (x <= 0)
        out = x.copy()
        out[self.mask] = 0

        return out

    def backward(self, dout):
        dout[self.mask] = 0
        dx = dout

        return dx

    
##############################################################################################    
    
class Sigmoid:
    def __init__(self):
        self.out = None 
        
    def forward(self , x ): # x 를 시그모이드 적용후 바로 아웃 
        out = sigmoid(x)
        self.out = out
        return out 
    
    def backward( self , dout ): # dout 은 z->y 에서의 y 이다.
        dx = dout * ( 1.0 - self.out) * self.out
        return dx 
    
##############################################################################################    
    
class Affine:
    def __init__(self , W , b ): # Y = np.dot(X,W)+B 부분, 입력값 과 weight 값 두개 가 필요 
        self.W = W
        self.b = b
        
        self.x = None
        self.original_x_shape = None # transpose 안한 x shape
        
        self.dw = None
        self.db = None
        
    
    def forward(self , x):
        self.original_x_shape = x.shape  # x.shape [ 행 , 열 ]
        x = x.reshape(x.shape[0], -1) # x[행, 열]
        self.x = x
        out = np.dot(self.x , self.W)+self.b
        
        return out 
        
    def backward(self , dout):
        
        dx = np.dot(dout , self.W.T)
        
        self.dw = np.dot(self.x.T , dout )
        
        self.db = np.sum( dout , axis = 0)
        
        dx =  dx.reshape(*self.original_x_shape)
        
        return dx
        
##############################################################################################            

class SoftmaxWithLoss:
    def __init__ (self):
        self.loss = None
        self.y = None #   y 의 순전파값
        self.t = None # 0 또는 1,  실제값
        
    def forward( self , x ,t ):
        self.t = t 
        self.y = softmax(y)
        self.loss = cross_entropy_error( self.y , self.t)
        
        return self.loss
    
    def backwaord ( self , dout=1):
        batch_size = self.t.shape[0] 
        
        if self.t.size == self.y.size :
            dx = (self.y - self.x) / batch_size # 배치사이즈로 나누기 = 평균으로 나눔 
            # dx 는 t 사이즈 - y 사이즈 서로간의 차이의 평균 즉, dx = 미분값 
            
        else: # y 의 값이 0 또는 1 둘다 아닐경우 ( 에러방지)
            dx = self.y.copy()
            dx[ np.arrange(batch_size), self.t ] -= 1 
            dx = dx /batch_size
            
            

## 신경망 구조 설정

In [15]:
import numpy as np
from collections import OrderedDict  
# key 와 value 에서 key 값의 순서를 자동지정함으로써 Affine 계층 ( ex 1,2,3...), Relu 계층의 순서를 명확하게 함.

class TwoLayerNet:
    def __init__( self, input_size , hidden_size , output_size , weight_std = 0.01):
        # 가중치 초기화 
        
        self.params = {}
        self.params['W1']=weight_std * np.random.randn(input_size , hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_std * np.random.randn(hidden_size,output_size)
        self.params['b2']=np.zeros(output_size)
        
        # 계층 , layers
        
        self.layers = OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'] , self.params['b1'])
        self.layers['Relu']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'] , self.params['b2'])
        self.layers['Relu']=Relu()
        
        self.lastlayer = SoftmaxWithLoss()
        
    def predict(self , x): # x가 들어올때, y 의 값은? 즉, SoftmaxWithloss 값은 무엇이냐 
        
        for layers in self.layers.values(): 
        # self.layers.values() = ['Affine1'] 과 ['Affine2']  의 결과값 을 이용해 매층마다 순전파로 Affine 계산 
            x = layers.forward(x)
            
        return x
    
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastlayer.forward(y,t)
    
    def accuracy( self ,  x , t ): # predict() 에서 x 값으로 결과값 출력하기 , t 는 테스트 셋 
        y = self.predict(x) 
        y = np.argmax(y, axis=1)
        
        if t.ndim !=1 : t = np.argmax(t, axis =1)
            
        
        accuracy = np.sum ( y == t) / float(x.shape[0]) # y 와 t 가 같은 값은 몇개인가 
        return accuracy
    
    # 오차역전파 
    def gradient ( self , x , t):
        # forward 
        self.loss(x,t)
        
        # backward
        
        dout = 1
        dout = self.lastlayer.backward(dout) # dout  은 편미분값 
        
        layers = list(self.layers.values()) # layers 는 Affine 이후 values 들 
        layers.reverse() # 역행 
        
        for layer in layers : 
            dout = layer.backward(dout) # 계층만큼 편미분 누적화 
            
        grads={} 
        grads['W1'] , grads['b1'] = self.layers['Affine1'].dW , self.layers['Affine1'].db # Affine 의 dw , db 
        grads['W2'] , grads['b2'] = self.layers['Affine2'].dW , self.layers['Affine2'].db
        
        return grads

In [16]:
# 학습 및 실행 
from mnist import load_mnist

(x_train , t_train) , ( x_test , t_test) = load_mnist(normalize=True , one_hot_label=True)

network =TwoLayerNet( input_size= 784 , hidden_size=50 , output_size=10)

iters_num = 10000
train_size = x_train.shape[0]
batch_size = 100  # 미니배치 
learning_rate = 0.1 

train_loss_list=[] # loss 값 넣을 준비
train_acc_list=[] # 트레이닝 값  넣을 준비 
test_acc_list=[] # 테스트 값 넣을 준비 

iter_per_ephoch = max(train_size/ batch_size,1)


##############################################################################################    

for i in range(iters_num):
    batch_mask = np.random.choice(train_size , batch_size)
    x_batch = x_train[batch_mask]
    t_batch = t_train[batch_mask]
    
    # 오차역전파 
    
    grad = network.gradient( x_batch , t_batch )
    
    # 업데이트 
    
##############################################################################################    
    
    for key in ('W1','b1','W2','b2'):
        network.params[key] -=learning_rate * grad[key]
        
    loss = network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_ephoch == 0 :
        train_acc = network.accuracy(x_train , t_train)
        test_acc = network.accuracy( x_test , t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)

        print( train_acc , test_acc )

NameError: name 'y' is not defined