In [18]:
import numpy as np

In [19]:
def get_mnist():
    train_mnist="./MNIST/mnist_train.csv"
    test_mnist="./MNIST/mnist_test.csv"
    train_data=np.loadtxt(fname=train_mnist,delimiter=',')
    test_data=np.loadtxt(fname=test_mnist,delimiter=',')
    train_label=train_data[:,0]
    all_train_data=train_data[:,1:]
    test_label=test_data[:,0]
    all_test_data=test_data[:,1:]
    train_label_1ofK=np.identity(10)[train_label.astype(np.int)]
    test_label_1ofK=np.identity(10)[test_label.astype(np.int)]
    
    return (train_label_1ofK,all_train_data/255,test_label_1ofK,all_test_data/255)

In [20]:
def relu(x):
    return np.maximum(0,x)

dif_relu=lambda x:np.where(x>0,1,0)

In [21]:
def softmax(vin):
    cor=np.max(vin)
    vout=np.exp(vin-cor)/np.tile(np.sum(np.exp(vin-cor),axis=1),(10,1)).T
    return vout

In [22]:
def cross_entropy(x,t):
    return -1*np.sum(t*np.log(x))/t.shape[1]

In [23]:
def batchnorm_forward(x,gamma,beta,eps):
    N,D=x.shape
    mu=1./N*np.sum(x,axis=0)
    xmu=x-mu
    sq=xmu**2
    var=1./N*np.sum(sq,axis=0)
    sqrtvar=np.sqrt(var+eps)
    ivar=1./sqrtvar
    xhat=xmu*ivar
    gammax=gamma*xhat
    out=gammax+beta
    cache=(xhat,gamma,xmu,ivar,sqrtvar,var,eps)
    return out,cache

In [24]:
def batchnorm_backward(dout,cache):
    xhat,gamma,xmu,ivar,sqrtvar,var,eps=cache
    N,D=dout.shape
    dbeta=np.sum(dout,axis=0)
    dgammax=dout
    dgamma=np.sum(dgammax*xhat,axis=0)
    dxhat=dgammax*gamma
    divar=np.sum(dxhat*xmu,axis=0)
    dxmu1=dxhat*ivar
    dsqrtvar=-1./(sqrtvar**2)*divar
    dvar=0.5*1./np.sqrt(var+eps)*dsqrtvar
    dsq=1./N*np.ones((N,D))*dvar
    dxmu=2*xmu*dsq
    dx1=(dxmu1+dxmu2)
    dmu=1.*np.sum(dxmu1+dxmu2,axis=0)
    dx2=1./N*np.ones((N,D))*dmu
    dx=dx1+dx2
    return dx,dgamma,dbeta

In [25]:
class NN:
    def __init__(self, input_size,hidden_size,output_size,batch_size):
        self.W1=0.2*(np.random.rand(input_size,hidden_size)-0.5)
        self.W2=0.2*(np.random.rand(hidden_size,output_size)-0.5)
        self.b1=np.zeros(hidden_size)
        self.b2=np.zeros(output_size)
        self.z1=None
        self.z2=None
        self.batch_size=batch_size
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.output_size=output_size
        self.grad_b1=np.empty(hidden_size)
        self.grad_b2=np.empty(output_size)
        self.grad_W1=np.empty((input_size,hidden_size))
        self.grad_W2=np.empty((hidden_size,output_size))
        #self.beta=None
        #self.dbeta=None
        #self.gamma=None
        #self.dgamma=None
        #self.eps=None
        
    def bn_forward(self,x):
        gamma=1
        beta=0
        eps=10e-7
        out=np.dot(x, self.W1) - self.b1
        out=batchnorm_forward(out,gamma,beta,eps)
        self.z1=relu(out)
        out=np.dot(self.z1,self.W2) - self.b2
        out=batchnorm_forward(out,gamma,beta,eps)
        self.z2=softmax(out)

        
    def forward(self,x):
        self.z1=relu(np.dot(x, self.W1) - self.b1)
        self.z2=softmax(np.dot(self.z1,self.W2) - self.b2)
        
    def bn_calc_grad(self,input_data,teaching_data):
        self.bn_forward(input_data)
        cross_entropy_error=cross_entropy(self.z2,teaching_data)
        cross_entropy_error,dgamma,dbeta=batchnorm_backward(self.z2,cache)
        delta2=self.z2-teaching_data
        delta1=dif_relu(self.z1)*np.dot(delta2,self.W2.T)
        self.grad_b1=-1*np.sum(delta1,axis=0)/self.batch_size
        self.grad_b2=-1*np.sum(delta2,axis=0)/self.batch_size
        self.grad_W1=np.dot(input_data.T,delta1)
        self.grad_W2=np.dot(self.z1.T,delta2)
        return cross_entropy_error,dgamma,dbeta
    
    def calc_grad(self,input_data,teaching_data):
        self.forward(input_data)
        cross_entropy_error=cross_entropy(self.z2,teaching_data)
        delta2=self.z2-teaching_data
        delta1=dif_relu(self.z1)*np.dot(delta2,self.W2.T)
        self.grad_b1=-1*np.sum(delta1,axis=0)/self.batch_size
        self.grad_b2=-1*np.sum(delta2,axis=0)/self.batch_size
        self.grad_W1=np.dot(input_data.T,delta1)
        self.grad_W2=np.dot(self.z1.T,delta2)
        return cross_entropy_error
    
    def backward(self,rate):
        self.b1-=rate*self.grad_b1
        self.b2-=rate*self.grad_b2
        self.W1-=rate*self.grad_W1
        self.W2-=rate*self.grad_W2
        self.gamma-=rate*dgamma
        self.beta-=rate*dbeta
        
    def calc_accuracy(self,input_data,teaching_data):
        self.bn_forward(input_data)
        arg_z2=np.argmax(self.z2,axis=1)
        arg_z2_1ofK=np.identity(10)[arg_z2.astype(np.int)]
        return np.sum(arg_z2_1ofK*teaching_data)/self.batch_size
    
    def parameter_output(self):
        file_b1="./parameter/b1.csv"
        file_b2="./parameter/b2.csv"
        file_W1="./parameter/W1.csv"
        file_W2="./parameter/W2.csv"
        np.savetxt(fname=file_b1,X=self.b1,delimiter=',')
        np.savetxt(fname=file_b2,X=self.b2,delimiter=',')
        np.savetxt(fname=file_W1,X=self.W1,delimiter=',')
        np.savetxt(fname=file_W2,X=self.W2,delimiter=',')

In [26]:
def main():
    input_N=784
    hidden_N=200
    output_N=10
    batch_N=100
    learning_rate=0.01
    epoch=100

    parameter_output=False
    accuracy_output=False
    t_train,x_train,t_test,x_test=get_mnist()
    print("FileloadCompleted")
    
    train_data_N=t_train.shape[0]
    Neural_Network=NN(input_size=input_N,hidden_size=hidden_N,output_size=output_N,batch_size=batch_N)
    updates=train_data_N//batch_N+1
    epoch_cnt=0
    acc_err=[]
    
    for i in range (epoch*updates):
        choices_train=np.random.randint(0,train_data_N-1,batch_N)
        t_train_batch=t_train[choices_train]
        x_train_batch=x_train[choices_train]
        
        loss=Neural_Network.bn_calc_grad(x_train_batch,t_train_batch)
        Neural_Network.backward(learning_rate)
        if (i+1)%updates ==0 or i==0:
            accuracy=Neural_Network.calc_accuracy(x_test,t_test)
            print("accuracy",[epoch_cnt],"=",accuracy)
            print("loss=",loss)
            print()
            epoch_cnt+=1
            acc_err.append([accuracy,loss])
    if parameter_output : Neural_Network.parameter_output()
    if accuracy_output : np.savetxt(fname="./accuracy.csv",X=acc_err,delimiter=',')
            
if __name__=="__main__":
    main()

FileloadCompleted


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()