## MNIST - 손글씨 판별 딥러닝 코딩 

In [85]:
import numpy as np


def sigmoid(x):
    return 1/(1+np.exp(-x))
    
def softmax(x):
    if x.ndim==2:
        x=x.T
        x=x-np.max(x, axis=0)
        y=np.exp(x) / np.sum(np.exp(x), axis=0)
        return y.T
    x=x-np.max(x)
    return np.exp(x)/np.sum(np.exp(x))

def cross_entropy_error(y,t):
    if y.ndim==1:
        t=t.reshape(1,t.size)
        y=y.reshape(1,y.size)
        
    if t.size == y.size:
        t=t.argmax(axis=1)
        
    batch_size=y.shape[0]
    return -np.sum(np.log(y[np.arange(batch_size),t]+1e-7))/batch_size


class Relu:
    def __init__(self):
        self.mask=None
        
    def forward(self, x):
        self.mask= (x <= 0)
        out = x.copy()
        out[self.mask] = 0
        return out
        
    def backward(self, dout):
        dout[self.mask]=0
        dx=dout
        return dx
    
class Sigmoid:
    def __init__(self):
        self.out=None
        
    def forward(self, x):
        out=sigmoid(x)
        self.out=out
        return out
    
    def backward(self, dout):
        dx=dout*(1.0-self.out)*self.out
        return dx
    
class Affine:
    def __init__(self, W, b):
        self.W=W
        self.b=b
        
        self.x=None
        self.original_x_shape=None
        
        self.dW=None
        self.db=None
        
    def forward(self, x):
        self.original_x_shape=x.shape
        x=x.reshape(x.shape[0],-1)
        self.x=x
        out=np.dot(self.x, self.W)+self.b
        return out
    
    def backward(self, dout):
        dx=np.dot(dout, self.W.T)
        self.dW=np.dot(self.x.T, dout)
        self.db=np.sum(dout, axis=0)
        
        dx=dx.reshape(*self.original_x_shape)
        return dx

    
    
class SoftmaxWithLoss:
    def __init__(self):
        self.loss=None
        self.y=None
        self.t=None
        
    def forward(self, x, t):
        self.t=t
        self.y=softmax(x)
        self.loss=cross_entropy_error(self.y, self.t)
        
        return self.loss
    
    def backward(self, dout=1):
        batch_size=self.t.shape[0]
        if self.t.size==self.y.size:
            dx=(self.y-self.t)/batch_size
        else:
            dx=self.y.copy()
            dx[np.arange(batch_size), self.t] -=1
            dx=dx/batch_size
        return dx


In [86]:
# Two layer deep learning architecture 구현하기

import numpy as np
#from layers import *
from collections import OrderedDict

class TwoLayerNet:
    
    def __init__(self, input_size, hidden_size, output_size, weight_init_std=0.01):
        # 가중치 초기화
        self.params={}
        self.params['W1']=weight_init_std*np.random.randn(input_size, hidden_size)
        self.params['b1']=np.zeros(hidden_size)
        self.params['W2']=weight_init_std*np.random.randn(hidden_size, output_size)
        self.params['b2']=np.zeros(output_size)
        
        # 계층생성
        self.layers=OrderedDict()
        self.layers['Affine1']=Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu']=Relu()
        self.layers['Affine2']=Affine(self.params['W2'], self.params['b2'])
        
        self.lastlayer=SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x=layer.forward(x)
            
        return x
    
    def loss(self, x, t):
        y=self.predict(x)
        return self.lastlayer.forward(y,t)
    
    def accuracy(self,x,t):
        y=self.predict(x)
        y=np.argmax(y, axis=1)
        if t.ndim !=1 : t=np.argmax(t, axis=1)
            
        accuracy=np.sum(y == t)/float(x.shape[0])
        return accuracy
    
    def gradient(self, x, t):
        #forward
        self.loss(x,t)
        #backward
        
        dout=1
        dout=self.lastlayer.backward(dout)
        
        layers=list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout=layer.backward(dout)
            
        grads={}
        grads['W1'], grads['b1']=self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2']=self.layers['Affine2'].dW, self.layers['Affine2'].db
        
        return grads
            

In [87]:
## 학습 및 실행

import numpy as np
from mnist import load_mnist

(x_train, t_train), (x_test, t_test)=load_mnist(normalize=True, one_hot_label=True)

network=TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num=10000
train_size=x_train.shape[0]
batch_size=100
learning_rate=0.1

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]

iter_per_epoch=max(train_size/batch_size, 1)

for i in range(iters_num):
    batch_mask=np.random.choice(train_size, batch_size)
    x_batch=x_train[batch_mask]
    t_batch=t_train[batch_mask]
    
    # 오차역전파를 이용하여 기울기 계산
    
    grad=network.gradient(x_batch, t_batch) 
    
    # 가중치 업데이트
    
    for key in ('W1', 'b1', 'W2', 'b2'):
        network.params[key]-=learning_rate*grad[key]
        
    loss =network.loss(x_batch, t_batch)
    train_loss_list.append(loss)
    
    if i % iter_per_epoch == 0:
        train_acc=network.accuracy(x_train, t_train)
        test_acc=network.accuracy(x_test, t_test)
        train_acc_list.append(train_acc)
        test_acc_list.append(test_acc)
        
        print(train_acc, test_acc)
        

0.08608333333333333 0.0815
0.9008333333333334 0.9048
0.92495 0.9289
0.9356333333333333 0.9358
0.9451833333333334 0.944
0.9518333333333333 0.9499
0.95655 0.954
0.9617 0.9587
0.9637 0.9601
0.9667333333333333 0.963
0.96955 0.9629
0.97255 0.9666
0.9745666666666667 0.9678
0.9755666666666667 0.9683
0.9770666666666666 0.9671
0.9790833333333333 0.9712
0.9802833333333333 0.97


In [53]:
import numpy as np
from mnist import load_mnist

(x_train, t_train), (x_test, t_test)=load_mnist(normalize=True, one_hot_label=True)

network=TwoLayerNet(input_size=784, hidden_size=50, output_size=10)

iters_num=10000
train_size=x_train.shape[0]
batch_size=100
learning_rate=0.1

train_loss_list=[]
train_acc_list=[]
test_acc_list=[]

iter_per_epoch=max(train_size/batch_size, 1)


batch_mask=np.random.choice(train_size, batch_size)
x_batch=x_train[batch_mask]
t_batch=t_train[batch_mask]
    

In [54]:
x_batch

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [55]:
x_batch.shape

(100, 784)

In [59]:
grad=network.gradient(x_batch, t_batch) 

AttributeError: 'NoneType' object has no attribute 'shape'

In [60]:
x=network.predict(x_batch)

AttributeError: 'NoneType' object has no attribute 'shape'

In [64]:
W1=0.01*np.random.randn(784, 50)
b1=np.zeros(50)
Affine1=Affine(W1, b1)


In [67]:
x=Affine1.forward(x_batch)
x2=Relu(x)

TypeError: __init__() takes 1 positional argument but 2 were given

In [66]:
print(x)

[[ 0.08420629 -0.08232492 -0.03954939 ...  0.03508672  0.02525325
   0.0005405 ]
 [-0.00516788 -0.1412316  -0.03407288 ...  0.05796211 -0.06171116
   0.06419538]
 [ 0.06674986 -0.05767816 -0.02170107 ...  0.05289186 -0.08335637
   0.08633747]
 ...
 [ 0.02653229 -0.12284559 -0.01766063 ...  0.15730392 -0.07062759
  -0.07726711]
 [-0.04078751 -0.0915718   0.02009514 ...  0.05609301 -0.07002943
  -0.00428548]
 [ 0.08796555 -0.21347877  0.09168308 ...  0.01867811  0.07396485
   0.0044796 ]]


### Debugging

1. Affine 함수 w, W 혼돈하여 사용
2. Relu 함수 forward 부분에 return missing
3. Affine 함수 backward 부분에 return missing

확인완료!