In [11]:
import sys, os
sys.path.append(os.pardir)
from dataset.mnist import load_mnist

(x_train, t_train), (x_test, t_test) = \
    load_mnist(normalize=True, one_hot_label=True)

np.random.choice(60000, 10)

train_size = x_train.shape[0]
batch_size = 10
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]

(60000, 784)
(60000, 10)


In [12]:
import sys, os
sys.path.append(os.pardir)
import numpy as np
from common.layers import *
from common.gradient import numerical_gradient
from common.trainer import Trainer
from collections import OrderedDict

class ThreeLayerNet:

    def __init__(self, input_size, hidden_size1, hidden_size2, output_size, weight_init_std=0.01):
        
        self.params = {}
        self.params['W1'] = weight_init_std * np.random.randn(input_size, hidden_size1)
        self.params['b1'] = np.zeros(hidden_size1)
        self.params['gamma1'] = np.ones(hidden_size1)
        self.params['beta1'] = np.zeros(hidden_size1)
        
        self.params['W2'] = weight_init_std * np.random.randn(hidden_size1, hidden_size2)
        self.params['b2'] = np.zeros(hidden_size2)
        self.params['gamma2'] = np.ones(hidden_size2)
        self.params['beta2'] = np.zeros(hidden_size2)
        
        self.params['W3'] = weight_init_std * np.random.randn(hidden_size2, output_size)
        self.params['b3'] = np.zeros(output_size)
        self.params['gamma3'] = np.ones(output_size)
        self.params['beta3'] = np.zeros(output_size)

        self.layers = OrderedDict()
        self.layers['Affine1'] = Affine(self.params['W1'], self.params['b1'])
        self.layers['BatchNorm1'] = BatchNormalization(self.params['gamma1'], self.params['beta1'])
        self.layers['Relu1'] = Relu()
        
        self.layers['Affine2'] = Affine(self.params['W2'], self.params['b2'])
        self.layers['BatchNorm2'] = BatchNormalization(self.params['gamma2'], self.params['beta2'])
        self.layers['Relu2'] = Relu()
        
        self.layers['Affine3'] = Affine(self.params['W3'], self.params['b3'])
        self.layers['BatchNorm3'] = BatchNormalization(self.params['gamma3'], self.params['beta3'])

        self.lastLayer = SoftmaxWithLoss()

    def predict(self, x, train_flg=False):
        for layer in self.layers.values():
            if isinstance(layer, BatchNormalization):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)
        return x

    def loss(self, x, t, train_flg=False):
        y = self.predict(x, train_flg)
        return self.lastLayer.forward(y, t)

    def accuracy(self, x, t):
        y = self.predict(x)
        y = np.argmax(y, axis=1)
        if t.ndim != 1:
            t = np.argmax(t, axis=1)
        accuracy = np.sum(y == t) / float(x.shape[0])
        return accuracy

    def numerical_gradient(self, x, t):
        loss_W = lambda W: self.loss(x, t, train_flg=True)
        grads = {}
        grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
        grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
        grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
        grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
        grads['W3'] = numerical_gradient(loss_W, self.params['W3'])
        grads['b3'] = numerical_gradient(loss_W, self.params['b3'])
        grads['gamma1'] = numerical_gradient(loss_W, self.params['gamma1'])
        grads['beta1'] = numerical_gradient(loss_W, self.params['beta1'])
        grads['gamma2'] = numerical_gradient(loss_W, self.params['gamma2'])
        grads['beta2'] = numerical_gradient(loss_W, self.params['beta2'])
        grads['gamma3'] = numerical_gradient(loss_W, self.params['gamma3'])
        grads['beta3'] = numerical_gradient(loss_W, self.params['beta3'])
        return grads

    def gradient(self, x, t):
        # forward
        self.loss(x, t, train_flg=True)

        # backward
        dout = 1
        dout = self.lastLayer.backward(dout)

        layers = list(self.layers.values())
        layers.reverse()
        for layer in layers:
            dout = layer.backward(dout)

        grads = {
            'W1': self.layers['Affine1'].dW, 'b1': self.layers['Affine1'].db,
            'W2': self.layers['Affine2'].dW, 'b2': self.layers['Affine2'].db,
            'W3': self.layers['Affine3'].dW, 'b3': self.layers['Affine3'].db,
            'gamma1': self.layers['BatchNorm1'].dgamma, 'beta1': self.layers['BatchNorm1'].dbeta,
            'gamma2': self.layers['BatchNorm2'].dgamma, 'beta2': self.layers['BatchNorm2'].dbeta,
            'gamma3': self.layers['BatchNorm3'].dgamma, 'beta3': self.layers['BatchNorm3'].dbeta
        }
        return grads

In [13]:
import numpy as np
import matplotlib.pyplot as plt
from common.trainer import Trainer

learning_rates = np.arange(0.001, 0.02, 0.001)
hidden_layer_sizes = np.arange(5, 16)

best_accuracy = 0
best_hyperparameters = None

for lr in learning_rates:
    for hidden_size in hidden_layer_sizes:
        print(f"Training with learning rate {lr} and hidden size {hidden_size}...")
        
        network = ThreeLayerNet(input_size=784, hidden_size1=hidden_size, hidden_size2=hidden_size, output_size=10)

        trainer = Trainer(network, x_train, t_train, x_test, t_test, 
                          epochs=200, mini_batch_size=50,
                          optimizer='sgd', optimizer_param={'lr': lr},
                          verbose=False)

        trainer.train()

        test_accuracy = network.accuracy(x_test, t_test)
        train_accuracy = network.accuracy(x_train, t_train)

        print(f"Train accuracy: {train_accuracy}, Test accuracy: {test_accuracy}")
        
        if test_accuracy >= 0.95 and train_accuracy >= 0.95:
            if test_accuracy > best_accuracy:
                best_accuracy = test_accuracy
                best_hyperparameters = {'learning_rate': lr, 'hidden_size': hidden_size}
                best_network = network

print("Best hyperparameters:", best_hyperparameters)
print("Best test accuracy:", best_accuracy)

plt.plot(trainer.train_acc_list, label='Train accuracy')
plt.plot(trainer.test_acc_list, label='Test accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()


Training with learning rate 0.001 and hidden size 5...


KeyboardInterrupt: 