In [1]:
# coding: utf-8
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import pickle
import numpy as np
import matplotlib.pyplot as plt
from collections import OrderedDict
from common.layers import *
from common.gradient import numerical_gradient
from common.util import shuffle_dataset
from common.trainer import Trainer
from dataset.mnist import load_mnist

class SimpleConvNet:
    def __init__(self, input_dim=(1, 28, 28), 
                 conv_param_1={'filter_num':20, 'filter_size':5, 'pad':1, 'stride':1},
                 conv_param_2={'filter_num':40, 'filter_size':5, 'pad':1, 'stride':1},
                 hidden_size=100, output_size=10):
        
        pre_node_nums = np.array([1*5*5, 20*5*5, 40*5*5, hidden_size])
        weight_init_scales = np.sqrt(2.0 / pre_node_nums)
        
        self.params = {}
        pre_channel_num = input_dim[0]
        for idx, conv_param in enumerate([conv_param_1, conv_param_2]):
            self.params['W' + str(idx+1)] = weight_init_scales[idx] * np.random.randn(conv_param['filter_num'], pre_channel_num, conv_param['filter_size'], conv_param['filter_size'])
            self.params['b' + str(idx+1)] = np.zeros(conv_param['filter_num'])
            pre_channel_num = conv_param['filter_num']
        self.params['W3'] = weight_init_scales[2] * np.random.randn(40*5*5, hidden_size)
        self.params['b3'] = np.zeros(hidden_size)
        self.params['W4'] = weight_init_scales[3] * np.random.randn(hidden_size, output_size)
        self.params['b4'] = np.zeros(output_size)
        
        # 계층 생성===========
        self.layers = []
        self.layers.append(Convolution(self.params['W1'], self.params['b1'], 
                           conv_param_1['stride'], conv_param_1['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Convolution(self.params['W2'], self.params['b2'], 
                           conv_param_2['stride'], conv_param_2['pad']))
        self.layers.append(Relu())
        self.layers.append(Pooling(pool_h=2, pool_w=2, stride=2))
        self.layers.append(Affine(self.params['W3'], self.params['b3']))
        self.layers.append(Relu())
        self.layers.append(Dropout(0.5))
        self.layers.append(Affine(self.params['W4'], self.params['b4']))
        self.layers.append(Dropout(0.5))
        
        self.last_layer = SoftmaxWithLoss()
        
    def predict(self, x, train_flg=False):
        for layer in self.layers:
            if isinstance(layer, Dropout):
                x = layer.forward(x, train_flg)
            else:
                x = layer.forward(x)

        return x

    def loss(self, x, t):
        y = self.predict(x, train_flg=True)
        return self.last_layer.forward(y, t)

    def accuracy(self, x, t, batch_size=100):
        if t.ndim != 1 : t = np.argmax(t, axis=1)
        
        acc = 0.0
        
        for i in range(int(x.shape[0] / batch_size)):
            tx = x[i*batch_size:(i+1)*batch_size]
            tt = t[i*batch_size:(i+1)*batch_size]
            y = self.predict(tx)
            y = np.argmax(y, axis=1)
            acc += np.sum(y == tt) 
        
        return acc / x.shape[0]

    def gradient(self, x, t):
        # forward
        self.loss(x, t)

        # backward
        dout = 1
        dout = self.last_layer.backward(dout)

        tmp_layers = self.layers.copy()
        tmp_layers.reverse()
        for layer in tmp_layers:
            dout = layer.backward(dout)

        # 결과 저장
        grads = {}
        for i, layer_idx in enumerate((0, 3, 6, 9)):
            grads['W' + str(i+1)] = self.layers[layer_idx].dW
            grads['b' + str(i+1)] = self.layers[layer_idx].db

        return grads
        
    def save_params(self, file_name="params.pkl"):
        params = {}
        for key, val in self.params.items():
            params[key] = val
        with open(file_name, 'wb') as f:
            pickle.dump(params, f)

    def load_params(self, file_name="params.pkl"):
        with open(file_name, 'rb') as f:
            params = pickle.load(f)
        for key, val in params.items():
            self.params[key] = val

        for i, key in enumerate(0, 3, 6, 9):
            self.layers[layer_idx].W = self.params['W' + str(i+1)]
            self.layers[layer_idx].b = self.params['b' + str(i+1)]


In [2]:
def cross_validation(x_train, t_train, N):
    v_r = 1/N
    v_n = int(x_train.shape[0] * v_r)
    lr_list = []
    optimization_trial = 5
    max_acc = 0
    selected_lr = 0
    def __train(lr, epochs=10):
            network = SimpleConvNet()
                        
            trainer = Trainer(network, x_t, t_t, x_v, t_v,
                  epochs=epochs, mini_batch_size=1000,
                  optimizer='Adam', optimizer_param={'lr': lr},
                  evaluate_sample_num_per_epoch=1000)
            
            trainer.train()

            return trainer.test_acc_list, trainer.train_acc_list
    
    for j in range(optimization_trial):
         # 탐색한 하이퍼파라미터의 범위 지정===============
        lr = 10 ** np.random.uniform(-3, -2)
        lr_list.append(lr)
        # ================================================
    print("lr_list:" + str(lr_list))
    print("=====================================================")
    for k in range(optimization_trial):
        print("lr = {}".format(lr_list[k]))
        val_acc = []
        
        for i in range(N):
            print("-----------------------------------------------------")
            print("Data {}/{}".format(i+1, N))
            print("-----------------------------------------------------")
            x_v = x_train[i*v_n:(i+1)*v_n]
            t_v = t_train[i*v_n:(i+1)*v_n]
            x_t = np.array(list(x_train[:i*v_n])+list(x_train[(i+1)*v_n:]))
            t_t = np.array(list(t_train[:i*v_n])+list(t_train[(i+1)*v_n:]))
                       
            val_acc_list, train_acc_list = __train(lr_list[k])
            
            print("val acc:" + str(val_acc_list[-1]))
            val_acc.append(val_acc_list[-1])
            
        val_t = np.array(val_acc)
            
        print("-----------------------------------------------------")
        print(val_acc)
        print("lr:" + str(lr_list[k]) + " | accuracy average:" + str(val_t.mean()))
        print("-----------------------------------------------------")
        if max_acc < val_t.mean():
            max_acc = val_t.mean()
            selected_lr = lr_list[k]
    print("=====================================================")
    print("lr:" + str(selected_lr) + " | acc:" + str(max_acc))
    print("=====================================================")

In [3]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten = False)

In [4]:
x_train, t_train = shuffle_dataset(x_train, t_train)

In [5]:
x_train, t_train = x_train[:5000], t_train[:5000]

In [6]:
cross_validation(x_train, t_train, 5)

lr_list:[0.0017702595551351487, 0.008182457454720574, 0.0014616910952868916, 0.0035785153684761667, 0.003802650387151003]
lr = 0.0017702595551351487
-----------------------------------------------------
Data 1/5
-----------------------------------------------------
=== epoch:1, train acc:0.131, test acc:0.103 ===
=== epoch:2, train acc:0.687, test acc:0.654 ===
=== epoch:3, train acc:0.758, test acc:0.725 ===
=== epoch:4, train acc:0.803, test acc:0.776 ===
=== epoch:5, train acc:0.843, test acc:0.838 ===
=== epoch:6, train acc:0.884, test acc:0.872 ===
=== epoch:7, train acc:0.896, test acc:0.879 ===
=== epoch:8, train acc:0.904, test acc:0.898 ===
=== epoch:9, train acc:0.927, test acc:0.905 ===
=== epoch:10, train acc:0.931, test acc:0.915 ===
test acc:0.923
val acc:0.915
-----------------------------------------------------
Data 2/5
-----------------------------------------------------
=== epoch:1, train acc:0.184, test acc:0.189 ===
=== epoch:2, train acc:0.687, test acc:0.665 ===

=== epoch:1, train acc:0.139, test acc:0.147 ===
=== epoch:2, train acc:0.629, test acc:0.597 ===
=== epoch:3, train acc:0.78, test acc:0.781 ===
=== epoch:4, train acc:0.826, test acc:0.837 ===
=== epoch:5, train acc:0.867, test acc:0.868 ===
=== epoch:6, train acc:0.881, test acc:0.87 ===
=== epoch:7, train acc:0.9, test acc:0.889 ===
=== epoch:8, train acc:0.916, test acc:0.904 ===
=== epoch:9, train acc:0.936, test acc:0.913 ===
=== epoch:10, train acc:0.936, test acc:0.919 ===
test acc:0.93
val acc:0.919
-----------------------------------------------------
Data 3/5
-----------------------------------------------------
=== epoch:1, train acc:0.318, test acc:0.303 ===
=== epoch:2, train acc:0.643, test acc:0.588 ===
=== epoch:3, train acc:0.772, test acc:0.722 ===
=== epoch:4, train acc:0.843, test acc:0.795 ===
=== epoch:5, train acc:0.867, test acc:0.817 ===
=== epoch:6, train acc:0.887, test acc:0.845 ===
=== epoch:7, train acc:0.909, test acc:0.868 ===
=== epoch:8, train acc:0.

=== epoch:5, train acc:0.908, test acc:0.883 ===
=== epoch:6, train acc:0.941, test acc:0.905 ===
=== epoch:7, train acc:0.95, test acc:0.914 ===
=== epoch:8, train acc:0.962, test acc:0.929 ===
=== epoch:9, train acc:0.966, test acc:0.937 ===
=== epoch:10, train acc:0.966, test acc:0.946 ===
test acc:0.947
val acc:0.946
-----------------------------------------------------
Data 4/5
-----------------------------------------------------
=== epoch:1, train acc:0.098, test acc:0.107 ===
=== epoch:2, train acc:0.482, test acc:0.505 ===
=== epoch:3, train acc:0.694, test acc:0.702 ===
=== epoch:4, train acc:0.806, test acc:0.794 ===
=== epoch:5, train acc:0.86, test acc:0.837 ===
=== epoch:6, train acc:0.9, test acc:0.878 ===
=== epoch:7, train acc:0.902, test acc:0.892 ===
=== epoch:8, train acc:0.93, test acc:0.908 ===
=== epoch:9, train acc:0.927, test acc:0.911 ===
=== epoch:10, train acc:0.938, test acc:0.925 ===
test acc:0.93
val acc:0.925
---------------------------------------------

In [7]:
(x_train, t_train), (x_test, t_test) = load_mnist(flatten = False)

In [8]:
max_epochs = 20

network = SimpleConvNet()
                        
trainer = Trainer(network, x_train, t_train, x_test, t_test,
                  epochs=max_epochs, mini_batch_size=1000,
                  optimizer='Adam', optimizer_param={'lr': 0.008182457454720574},
                  evaluate_sample_num_per_epoch=1000)

In [9]:
trainer.train()

=== epoch:1, train acc:0.119, test acc:0.1 ===
=== epoch:2, train acc:0.976, test acc:0.976 ===
=== epoch:3, train acc:0.985, test acc:0.984 ===
=== epoch:4, train acc:0.986, test acc:0.985 ===
=== epoch:5, train acc:0.989, test acc:0.987 ===
=== epoch:6, train acc:0.99, test acc:0.985 ===
=== epoch:7, train acc:0.992, test acc:0.99 ===
=== epoch:8, train acc:0.99, test acc:0.988 ===
=== epoch:9, train acc:0.991, test acc:0.988 ===
=== epoch:10, train acc:0.992, test acc:0.991 ===
=== epoch:11, train acc:0.995, test acc:0.989 ===
=== epoch:12, train acc:0.992, test acc:0.992 ===
=== epoch:13, train acc:0.995, test acc:0.989 ===
=== epoch:14, train acc:0.993, test acc:0.989 ===
=== epoch:15, train acc:0.996, test acc:0.992 ===
=== epoch:16, train acc:0.993, test acc:0.985 ===
=== epoch:17, train acc:0.995, test acc:0.987 ===
=== epoch:18, train acc:0.994, test acc:0.992 ===
=== epoch:19, train acc:0.995, test acc:0.991 ===
=== epoch:20, train acc:0.993, test acc:0.989 ===
test acc:0.991