In [1]:
# two_layer_net.py 
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from collections import OrderedDict


class TwoLayerNet:

    def __init__(self, input_size, hidden_size, output_size, weight_init_std = 0.01):
        # 가중치 초기화
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size)
        self.params['b1'] = np.zeros(hidden_size)
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size, output_size) 
        self.params['b2'] = np.zeros(output_size)

        # 계층 생성
        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['Relu1'] = Relu()
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])

        self.lastLayer = SoftmaxWithLoss()
        
    def predict(self, x):
        for layer in self.layers.values():
            x = layer.forward(x)
        
        return x
        
    # x : 입력 데이터, t : 정답 레이블
    def loss(self, x, t):
        y = self.predict(x)
        return self.lastLayer.forward(y, t)
    
    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy
        
    # x : 입력 데이터, t : 정답 레이블
    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t)
        
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        
        return grads
        
    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)
        
        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        grads['W1'], grads['b1'] = self.layers['Affine1'].dW, self.layers['Affine1'].db
        grads['W2'], grads['b2'] = self.layers['Affine2'].dW, self.layers['Affine2'].db

        return grads


In [2]:
####################################
# modified from train_neuralnet.py
import sys, os
sys.path.append(os.pardir)
import numpy as np

def train_neuralnet_mnist(x_train, t_train, x_test, t_test, 
                          input_size=64, hidden_size=10, output_size=10, 
                          iters_num = 1000, batch_size = 10, learning_rate = 0.1,
                          verbose=True):
    
    network = TwoLayerNet(input_size, hidden_size, output_size)

    # Train Parameters
    train_size = x_train.shape[0]
    iter_per_epoch = int(max(train_size / batch_size, 1))

    train_loss_list, train_acc_list, test_acc_list = [], [], []

    for step in range(1, iters_num+1):
        # get mini-batch
        batch_mask = np.random.choice(train_size, batch_size)
        x_batch = x_train[batch_mask]
        t_batch = t_train[batch_mask]

        # 기울기 계산
        #grad = network.numerical_gradient(x_batch, t_batch) # 수치 미분 방식
        grad = network.gradient(x_batch, t_batch) # 오차역전파법 방식(압도적으로 빠르다)

        # Update
        for key in ('W1', 'b1', 'W2', 'b2'):
            network.params[key] -= learning_rate * grad[key]

        # loss
        loss = network.loss(x_batch, t_batch)
        train_loss_list.append(loss)

        if verbose and step % iter_per_epoch == 0:
            train_acc = network.accuracy(x_train, t_train)
            test_acc = network.accuracy(x_test, t_test)
            train_acc_list.append(train_acc)
            test_acc_list.append(test_acc)
            print('Step: {:4d}\tTrain acc: {:.5f}\tTest acc: {:.5f}'.format(step, 
                                                                            train_acc,
                                                                            test_acc))
    tracc, teacc = network.accuracy(x_train, t_train), network.accuracy(x_test, t_test)
    if verbose:
        print('Optimization finished!')
        print('Training accuracy: %.2f' % tracc)
        print('Test accuracy: %.2f' % teacc)
    return tracc, teacc

In [3]:
from sklearn.datasets import load_digits

def load_mnist1(normalize=True, one_hot_label=False, shuffled=True):
    def _change_one_hot_label(X):
        T = np.zeros((X.size, 10))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
            
        return T

    def train_test_split(data, target, test_size, shuffled=True, seed=1004):
        import numpy as np
        
        test_num = int(data.shape[0] * test_size)
        train_num = data.shape[0] - test_num

        if shuffled:
            np.random.seed(seed)
            shuffled = np.random.permutation(data.shape[0])
            data = data[shuffled,:]
            target = target[shuffled]
        else:
            idx = np.argsort(target)
            data = data[idx]
            target = target[idx]

        x_train = data[:train_num]
        x_test = data[train_num:]
        t_train = target[:train_num]
        t_test = target[train_num:]

        return x_train, x_test, t_train, t_test

    data = load_digits().data
    target = load_digits().target
    

    x_train, x_test, t_train, t_test = train_test_split(data, target, test_size=0.4, shuffled = shuffled)
    if normalize:
        x_train = x_train / 16.
        x_test = x_test / 16.

    if one_hot_label:
        t_train = _change_one_hot_label(t_train)
        t_test = _change_one_hot_label(t_test)  

    return (x_train, t_train), (x_test, t_test)

(x_train, t_train), (x_test, t_test) = load_mnist1(shuffled=False)

train_neuralnet_mnist(x_train, t_train, x_test, t_test, 
                     input_size=64, hidden_size=10, output_size=10, 
                     iters_num = 3000, batch_size = 10, learning_rate = 0.1)

Step:  107	Train acc: 0.48285	Test acc: 0.00000
Step:  214	Train acc: 0.91381	Test acc: 0.00557
Step:  321	Train acc: 0.95922	Test acc: 0.00557
Step:  428	Train acc: 0.97034	Test acc: 0.00557
Step:  535	Train acc: 0.97220	Test acc: 0.00557
Step:  642	Train acc: 0.98054	Test acc: 0.00557
Step:  749	Train acc: 0.98703	Test acc: 0.00557
Step:  856	Train acc: 0.98703	Test acc: 0.00557
Step:  963	Train acc: 0.98981	Test acc: 0.00557
Step: 1070	Train acc: 0.98795	Test acc: 0.00557
Step: 1177	Train acc: 0.99166	Test acc: 0.00557
Step: 1284	Train acc: 0.99073	Test acc: 0.00557
Step: 1391	Train acc: 0.99537	Test acc: 0.00557
Step: 1498	Train acc: 0.99444	Test acc: 0.00557
Step: 1605	Train acc: 0.99629	Test acc: 0.00557
Step: 1712	Train acc: 0.99351	Test acc: 0.00557
Step: 1819	Train acc: 0.99444	Test acc: 0.00557
Step: 1926	Train acc: 0.99722	Test acc: 0.00557
Step: 2033	Train acc: 0.99537	Test acc: 0.00557
Step: 2140	Train acc: 0.99815	Test acc: 0.00557
Step: 2247	Train acc: 0.99815	Test acc: 

(0.9990732159406858, 0.005571030640668524)

In [16]:
# modified from: http://scikit-learn.org/stable/modules/naive_bayes.html
import numpy as np
from sklearn import datasets

from sklearn.datasets import load_digits

def load_mnist2(normalize=True, one_hot_label=False, shuffled=True):
    def _change_one_hot_label(X):
        T = np.zeros((X.size, 10))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
            
        return T

    def train_test_split(data, target, test_size, shuffled=True, seed=1004):
        import numpy as np
        
        test_num = int(data.shape[0] * test_size)
        train_num = data.shape[0] - test_num

        if shuffled:
            np.random.seed(seed)
            shuffled = np.random.permutation(data.shape[0])
            data = data[shuffled,:]
            target = target[shuffled]
        else:
            idx = np.argsort(target)
            data = data[idx]
            target = target[idx]

        idx = np.where(target == 0)
        data_split = data[idx]
        target_split = target[idx]

        test_num = int(data_split.shape[0] * test_size)
        train_num = data_split.shape[0] - test_num

        x_train = data_split[:train_num]
        x_test = data_split[train_num:]
        t_train = target_split[:train_num]
        t_test = target_split[train_num:]

        for i in range(1,10):
            idx = np.where(target == i)
            data_split = data[idx]
            target_split = target[idx]

            test_num = int(data_split.shape[0] * 0.4)
            train_num = data_split.shape[0] - test_num

            x_train = np.append(x_train, data_split[:train_num], axis=0)
            x_test = np.append(x_test, data_split[train_num:], axis=0)
            t_train = np.append(t_train, target_split[:train_num], axis=0)
            t_test = np.append(t_test, target_split[train_num:], axis=0)
        


        # x_train = data[:train_num]
        # x_test = data[train_num:]
        # t_train = target[:train_num]
        # t_test = target[train_num:]

        return x_train, x_test, t_train, t_test

    data = load_digits().data
    target = load_digits().target
    

    x_train, x_test, t_train, t_test = train_test_split(data, target, test_size=0.4, shuffled = shuffled)
    if normalize:
        x_train = x_train / 16.
        x_test = x_test / 16.

    if one_hot_label:
        t_train = _change_one_hot_label(t_train)
        t_test = _change_one_hot_label(t_test)  

    return (x_train, t_train), (x_test, t_test)

(x_train, t_train), (x_test, t_test) = load_mnist2(shuffled=False)


train_neuralnet_mnist(x_train, t_train, x_test, t_test, 
                     input_size=64, hidden_size=10, output_size=10, 
                     iters_num = 1000, batch_size = 10, learning_rate = 0.1)



Step:  108	Train acc: 0.18652	Test acc: 0.17647
Step:  216	Train acc: 0.29548	Test acc: 0.30532
Step:  324	Train acc: 0.82087	Test acc: 0.78291
Step:  432	Train acc: 0.87073	Test acc: 0.82493
Step:  540	Train acc: 0.90397	Test acc: 0.85434
Step:  648	Train acc: 0.93998	Test acc: 0.88095
Step:  756	Train acc: 0.95014	Test acc: 0.89216
Step:  864	Train acc: 0.95476	Test acc: 0.89076
Step:  972	Train acc: 0.96399	Test acc: 0.89636
Optimization finished!
Training accuracy: 0.96
Test accuracy: 0.90


(0.9630655586334257, 0.9019607843137255)

In [19]:
import numpy as np
from sklearn import datasets

from sklearn.datasets import load_digits

def load_mnist3(normalize=True, one_hot_label=False, shuffled=True):
    def _change_one_hot_label(X):
        T = np.zeros((X.size, 10))
        for idx, row in enumerate(T):
            row[X[idx]] = 1
            
        return T

    def train_test_split(data, target, test_size, shuffled=True, seed=1004):
        import numpy as np
        
        test_num = int(data.shape[0] * test_size)
        train_num = data.shape[0] - test_num

        if shuffled:
            np.random.seed(seed)
            shuffled = np.random.permutation(data.shape[0])
            data = data[shuffled,:]
            target = target[shuffled]
        else:
            idx = np.argsort(target)
            data = data[idx]
            target = target[idx]

        idx = np.where(target == 0)
        data_split = data[idx]
        target_split = target[idx]
        
        np.random.seed(seed)
        shuffled = np.random.permutation(data_split.shape[0])
        data_split = data_split[shuffled,:]
        target_split = target_split[shuffled]

        test_num = int(data_split.shape[0] * test_size)
        train_num = data_split.shape[0] - test_num

        x_train = data_split[:train_num]
        x_test = data_split[train_num:]
        t_train = target_split[:train_num]
        t_test = target_split[train_num:]

        for i in range(1,10):
            idx = np.where(target == i)
            data_split = data[idx]
            target_split = target[idx]

            np.random.seed(seed)
            shuffled = np.random.permutation(data_split.shape[0])
            data_split = data_split[shuffled,:]
            target_split = target_split[shuffled]

            test_num = int(data_split.shape[0] * 0.4)
            train_num = data_split.shape[0] - test_num

            x_train = np.append(x_train, data_split[:train_num], axis=0)
            x_test = np.append(x_test, data_split[train_num:], axis=0)
            t_train = np.append(t_train, target_split[:train_num], axis=0)
            t_test = np.append(t_test, target_split[train_num:], axis=0)
    


        # x_train = data[:train_num]
        # x_test = data[train_num:]
        # t_train = target[:train_num]
        # t_test = target[train_num:]

        return x_train, x_test, t_train, t_test

    data = load_digits().data
    target = load_digits().target
    

    x_train, x_test, t_train, t_test = train_test_split(data, target, test_size=0.4, shuffled = shuffled)
    if normalize:
        x_train = x_train / 16.
        x_test = x_test / 16.

    if one_hot_label:
        t_train = _change_one_hot_label(t_train)
        t_test = _change_one_hot_label(t_test)  

    return (x_train, t_train), (x_test, t_test)

(x_train, t_train), (x_test, t_test) = load_mnist3(shuffled=False)


train_neuralnet_mnist(x_train, t_train, x_test, t_test, 
                     input_size=64, hidden_size=100, output_size=10, 
                     iters_num = 10000, batch_size = 20, learning_rate = 0.1)


'''
# training and testing
gnb = GaussianNB().fit(iris.data[Itr,:], iris.target[Itr])
y_pred = gnb.predict(iris.data[Ite,:])
nmisses = (iris.target[Ite] != y_pred).sum()
print('Number of mislabeled out of a total %d samples : %d (%.2f%%)'
        % (sum(Ite), nmisses, float(nmisses)/sum(Ite)*100.0))

# [Multiple Executions]
# Number of mislabeled out of a total 60 samples : 3 (5.00%)
# Number of mislabeled out of a total 60 samples : 4 (6.67%)
# Number of mislabeled out of a total 60 samples : 0 (0.00%)
# Number of mislabeled out of a total 60 samples : 5 (8.33%)
# Number of mislabeled out of a total 60 samples : 1 (1.67%)
'''

Step:   54	Train acc: 0.28624	Test acc: 0.28992
Step:  108	Train acc: 0.38412	Test acc: 0.37395
Step:  162	Train acc: 0.66667	Test acc: 0.68627
Step:  216	Train acc: 0.79224	Test acc: 0.78711
Step:  270	Train acc: 0.85411	Test acc: 0.84034
Step:  324	Train acc: 0.88181	Test acc: 0.85854
Step:  378	Train acc: 0.91043	Test acc: 0.89776
Step:  432	Train acc: 0.92336	Test acc: 0.89916
Step:  486	Train acc: 0.93352	Test acc: 0.92017
Step:  540	Train acc: 0.94552	Test acc: 0.93277
Step:  594	Train acc: 0.94829	Test acc: 0.93137
Step:  648	Train acc: 0.93813	Test acc: 0.92017
Step:  702	Train acc: 0.96030	Test acc: 0.94818
Step:  756	Train acc: 0.96307	Test acc: 0.94678
Step:  810	Train acc: 0.95753	Test acc: 0.94118
Step:  864	Train acc: 0.97045	Test acc: 0.94818
Step:  918	Train acc: 0.97230	Test acc: 0.95098
Step:  972	Train acc: 0.96676	Test acc: 0.95378
Step: 1026	Train acc: 0.97507	Test acc: 0.96218
Step: 1080	Train acc: 0.97138	Test acc: 0.95658
Step: 1134	Train acc: 0.96861	Test acc: 

"\n# training and testing\ngnb = GaussianNB().fit(iris.data[Itr,:], iris.target[Itr])\ny_pred = gnb.predict(iris.data[Ite,:])\nnmisses = (iris.target[Ite] != y_pred).sum()\nprint('Number of mislabeled out of a total %d samples : %d (%.2f%%)'\n        % (sum(Ite), nmisses, float(nmisses)/sum(Ite)*100.0))\n\n# [Multiple Executions]\n# Number of mislabeled out of a total 60 samples : 3 (5.00%)\n# Number of mislabeled out of a total 60 samples : 4 (6.67%)\n# Number of mislabeled out of a total 60 samples : 0 (0.00%)\n# Number of mislabeled out of a total 60 samples : 5 (8.33%)\n# Number of mislabeled out of a total 60 samples : 1 (1.67%)\n"

In [6]:
from sklearn.model_selection import train_test_split

data = load_digits().data
target = load_digits().target

x_train, x_test, t_train, t_test = train_test_split(data, target, test_size=0.4, shuffle=True)

x_train = x_train / 16.
x_test = x_test / 16.

train_neuralnet_mnist(x_train, t_train, x_test, t_test, 
                     input_size=64, hidden_size=10, output_size=10, 
                     iters_num = 3000, batch_size = 20, learning_rate = 0.1)


Step:  107	Train acc: 0.10111	Test acc: 0.09736
Step:  214	Train acc: 0.25788	Test acc: 0.23088
Step:  321	Train acc: 0.65306	Test acc: 0.62587
Step:  428	Train acc: 0.77087	Test acc: 0.75939
Step:  535	Train acc: 0.81911	Test acc: 0.82058
Step:  642	Train acc: 0.91002	Test acc: 0.91099
Step:  749	Train acc: 0.92301	Test acc: 0.91655
Step:  856	Train acc: 0.92950	Test acc: 0.93046
Step:  963	Train acc: 0.94805	Test acc: 0.94159
Step: 1070	Train acc: 0.95083	Test acc: 0.94019
Step: 1177	Train acc: 0.95547	Test acc: 0.94576
Step: 1284	Train acc: 0.95547	Test acc: 0.93880
Step: 1391	Train acc: 0.96568	Test acc: 0.95132
Step: 1498	Train acc: 0.96568	Test acc: 0.94854
Step: 1605	Train acc: 0.95826	Test acc: 0.93185
Step: 1712	Train acc: 0.97495	Test acc: 0.94854
Step: 1819	Train acc: 0.98052	Test acc: 0.95410
Step: 1926	Train acc: 0.97774	Test acc: 0.95688
Step: 2033	Train acc: 0.97124	Test acc: 0.95410
Step: 2140	Train acc: 0.98237	Test acc: 0.95828
Step: 2247	Train acc: 0.98516	Test acc: 

(0.9833024118738405, 0.9513212795549374)

In [21]:
# HO4: Reproducible Random Sampling
# Random sampling by sklearn.model_selection.train_test_split
# source: https://scikit-learn.org/stable/modules/cross_validation.html

from sklearn.model_selection import train_test_split

data = load_digits().data
target = load_digits().target

x_train, x_test, t_train, t_test = train_test_split(data, target, test_size=0.4, shuffle=True, random_state=len(target))

np.random.seed(len(target))

x_train = x_train / 16.
x_test = x_test / 16.

# fix the SEED of random permutation to be the number of samples, 
# to reproduce the same random sequence at every execution
np.random.seed(len(target))

train_neuralnet_mnist(x_train, t_train, x_test, t_test,
                     input_size=64, hidden_size=30, output_size=10, 
                     iters_num = 4000, batch_size = 20, learning_rate = 0.1)


Step:   53	Train acc: 0.10111	Test acc: 0.10153
Step:  106	Train acc: 0.18275	Test acc: 0.17942
Step:  159	Train acc: 0.47032	Test acc: 0.47288
Step:  212	Train acc: 0.56865	Test acc: 0.56885
Step:  265	Train acc: 0.79685	Test acc: 0.80389
Step:  318	Train acc: 0.82931	Test acc: 0.85257
Step:  371	Train acc: 0.86364	Test acc: 0.88595
Step:  424	Train acc: 0.89703	Test acc: 0.91238
Step:  477	Train acc: 0.91558	Test acc: 0.90682
Step:  530	Train acc: 0.92115	Test acc: 0.92350
Step:  583	Train acc: 0.93599	Test acc: 0.93324
Step:  636	Train acc: 0.91558	Test acc: 0.91933
Step:  689	Train acc: 0.95269	Test acc: 0.94576
Step:  742	Train acc: 0.95640	Test acc: 0.94576
Step:  795	Train acc: 0.95640	Test acc: 0.94576
Step:  848	Train acc: 0.96475	Test acc: 0.94576
Step:  901	Train acc: 0.96475	Test acc: 0.94576
Step:  954	Train acc: 0.95918	Test acc: 0.95410
Step: 1007	Train acc: 0.95918	Test acc: 0.93880
Step: 1060	Train acc: 0.96846	Test acc: 0.95549
Step: 1113	Train acc: 0.97124	Test acc: 

(0.9972170686456401, 0.9638386648122392)

In [22]:
# HO5: Stratified Random Sampling}
from sklearn.model_selection import train_test_split
X = load_digits().data
y = load_digits().target
X = X / 16.

# per-class random sampling by passing y to variable stratify, 
Xtr,Xte,ytr,yte = train_test_split(X, y, test_size=0.4, shuffle=True, stratify=y)

# check number of samples of the individual classes
print('test: %d %d %d %d %d %d %d %d %d %d %d,  '%(sum(yte==0),%(sum(yte==1),%(sum(yte==2),%(sum(yte==3),%(sum(yte==4),%(sum(yte==5),%(sum(yte==6),%(sum(yte==7),sum(yte==8),sum(yte==9)),end='')
print('training: %d %d %d %d %d %d %d %d %d %d %d,  '%(sum(ytr==0),%(sum(ytr==1),%(sum(ytr==2),%(sum(ytr==3),%(sum(ytr==4),%(sum(ytr==5),%(sum(ytr==6),%(sum(ytr==7),sum(ytr==8),sum(ytr==9)),end='')

# due to the random initialization of the weights, the performance varies
# so we have to set the random seed for TwoLayerNet's initialization values
np.random.seed(len(y))

train_neuralnet_mnist(Xtr,ytr,Xte,yte,
                     input_size=64, hidden_size=10, output_size=10, 
                     iters_num = 1000, batch_size = 10, learning_rate = 0.1)

"""# training and testing 
y_pred = GaussianNB().fit(Xtr, ytr).predict(Xte)
nmisses = (yte != y_pred).sum()
print('Number of mislabeled out of a total %d samples : %d (%.2f%%)'
        % (len(yte), nmisses, float(nmisses)/len(yte)*100.0))

# [Multiple Executions]
# test: 20 20 20,  training: 30 30 30
# Number of mislabeled out of a total 60 samples : 6 (10.00%)
# test: 20 20 20,  training: 30 30 30
# Number of mislabeled out of a total 60 samples : 3 (5.00%)
# test: 20 20 20,  training: 30 30 30
# Number of mislabeled out of a total 60 samples : 4 (6.67%)
# test: 20 20 20,  training: 30 30 30
# Number of mislabeled out of a total 60 samples : 3 (5.00%)
"""

test: 71 73 71,  training: 107 109 106
Step:  107	Train acc: 0.19852	Test acc: 0.19611
Step:  214	Train acc: 0.29314	Test acc: 0.29624
Step:  321	Train acc: 0.67625	Test acc: 0.68150
Step:  428	Train acc: 0.83766	Test acc: 0.84284
Step:  535	Train acc: 0.89796	Test acc: 0.89013
Step:  642	Train acc: 0.92393	Test acc: 0.89291
Step:  749	Train acc: 0.92393	Test acc: 0.90542
Step:  856	Train acc: 0.93135	Test acc: 0.91377
Step:  963	Train acc: 0.93878	Test acc: 0.92907
Optimization finished!
Training accuracy: 0.96
Test accuracy: 0.92


"# training and testing \ny_pred = GaussianNB().fit(Xtr, ytr).predict(Xte)\nnmisses = (yte != y_pred).sum()\nprint('Number of mislabeled out of a total %d samples : %d (%.2f%%)'\n        % (len(yte), nmisses, float(nmisses)/len(yte)*100.0))\n\n# [Multiple Executions]\n# test: 20 20 20,  training: 30 30 30\n# Number of mislabeled out of a total 60 samples : 6 (10.00%)\n# test: 20 20 20,  training: 30 30 30\n# Number of mislabeled out of a total 60 samples : 3 (5.00%)\n# test: 20 20 20,  training: 30 30 30\n# Number of mislabeled out of a total 60 samples : 4 (6.67%)\n# test: 20 20 20,  training: 30 30 30\n# Number of mislabeled out of a total 60 samples : 3 (5.00%)\n"

In [18]:
# Repeated Random Subsampling
# Repeating stratified random sampling K times

from sklearn.model_selection import train_test_split
X = load_digits().data
y = load_digits().target
X = X / 16.

# due to the random initialization of the weights, the performance varies
# so we have to set the random seed for TwoLayerNet's initialization values
np.random.seed(len(y))

K = 20
Acc = np.zeros([K,2], dtype=float)
for k in range(K):
    # stratified random sampling
    Xtr, Xte, ytr, yte = train_test_split(X, y, test_size=0.4, shuffle=True, random_state=None, stratify=y)
    Acc[k,0], Acc[k,1] = train_neuralnet_mnist(Xtr,ytr,Xte,yte,
                                  input_size=64, hidden_size=10, output_size=10, 
                                  iters_num = 1000, batch_size = 10, learning_rate = 0.1, 
                                  verbose = False)
    print('Trial %d: accuracy %.3f %.3f' % (k, Acc[k,0], Acc[k,1]))

# 20 trials, average mislabeled out of a total 60 samples : 2.6 (4.42%)
# 20 trials, average mislabeled out of a total 60 samples : 2.8 (4.67%)
# 20 trials, average mislabeled out of a total 60 samples : 2.6 (4.33%)
# 20 trials, average mislabeled out of a total 60 samples : 2.8 (4.58%)
# 20 trials, average mislabeled out of a total 60 samples : 3.0 (5.08%)
# 20 trials, average mislabeled out of a total 60 samples : 2.7 (4.50%)

Trial 0: accuracy 0.956 0.917
Trial 1: accuracy 0.944 0.918
Trial 2: accuracy 0.931 0.922
Trial 3: accuracy 0.946 0.924
Trial 4: accuracy 0.952 0.921
Trial 5: accuracy 0.910 0.871
Trial 6: accuracy 0.939 0.911
Trial 7: accuracy 0.933 0.911
Trial 8: accuracy 0.938 0.924
Trial 9: accuracy 0.955 0.937
Trial 10: accuracy 0.942 0.922
Trial 11: accuracy 0.942 0.929
Trial 12: accuracy 0.949 0.926
Trial 13: accuracy 0.932 0.912
Trial 14: accuracy 0.946 0.898
Trial 15: accuracy 0.955 0.924
Trial 16: accuracy 0.875 0.841
Trial 17: accuracy 0.929 0.872
Trial 18: accuracy 0.943 0.918
Trial 19: accuracy 0.936 0.897
