In [1]:
import sys, os
sys.path.append(os.pardir)  # 부모 디렉터리의 파일을 가져올 수 있도록 설정
import numpy as np
import matplotlib.pyplot as plt
from dataset.mnist import load_mnist
from common.multi_layer_net import MultiLayerNet
from common.util import shuffle_dataset
from common.trainer import Trainer

In [2]:
# train 데이터와 시험 데이터를 load
# 시험할것, 정답
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True)

In [3]:
# 결과를 빠르게 얻기 위해 훈련 데이터를 줄임
x_train = x_train[:500]
t_train = t_train[:500]

In [4]:
# 20%를 검증 데이터로 분할
validation_rate = 0.20
validation_num = int(x_train.shape[0] * validation_rate)
# 섞은후
x_train, t_train = shuffle_dataset(x_train, t_train)
# 자른다.
x_val = x_train[:validation_num]
t_val = t_train[:validation_num]
x_train = x_train[validation_num:]
t_train = t_train[validation_num:]

In [5]:
# hyper parameter : lr(학습률), weight_decay(가중치 감소), epocs(에폭은 기본 50으로)
# epocs이란 것은 배치만큼 *한번* 학습망을 통화하며 트레이닝한 단위
# train 함수
def __train(lr, weight_decay, epocs=50):
    network = MultiLayerNet(input_size=784, hidden_size_list=[100, 100, 100, 100, 100, 100],
                            output_size=10, weight_decay_lambda=weight_decay)
    trainer = Trainer(network, x_train, t_train, x_val, t_val,
                      epochs=epocs, mini_batch_size=100,
                      optimizer='sgd', optimizer_param={'lr': lr}, verbose=False)
    trainer.train()

    return trainer.test_acc_list, trainer.train_acc_list

In [6]:
optimization_trial = 100  # 최적화 시도횟수
results_val = {}          # 결과
results_train = {}        # 학습

In [None]:
# 하이퍼파라미터 무작위 탐색======================================
for i in range(optimization_trial):
    # 탐색할 하이퍼파라미터의 범위 지정===============
    weight_decay = 10 ** np.random.uniform(-8, -4)
    lr = 10 ** np.random.uniform(-6, -2)
    # ================================================
    
    #val은 검증데이터의 accuracy
    # lr과 decay값을 키로,, value는 에폭당 정확도, 에폭은 50씩만 돌려본다.
    val_acc_list, train_acc_list = __train(lr, weight_decay)
    print(i,": val acc:" + str(val_acc_list[-1]) + " | lr:" + str(lr) + ", weight decay:" + str(weight_decay))
    key = "lr:" + str(lr) + ", weight decay:" + str(weight_decay)
    results_val[key] = val_acc_list
    results_train[key] = train_acc_list

0 : val acc:0.08 | lr:0.00017702822593166194, weight decay:1.5252862931737688e-08
1 : val acc:0.09 | lr:5.758057584381829e-05, weight decay:2.219185342958944e-07
2 : val acc:0.08 | lr:9.987829488407817e-06, weight decay:8.593031440200376e-08
3 : val acc:0.02 | lr:2.9067501641371875e-06, weight decay:8.946967236943471e-07
4 : val acc:0.78 | lr:0.009343059442440339, weight decay:1.905471939321358e-08
5 : val acc:0.12 | lr:1.588262470017465e-05, weight decay:2.3499546341145764e-07
6 : val acc:0.16 | lr:2.6089567764362987e-05, weight decay:2.3286360958150222e-07
7 : val acc:0.09 | lr:0.00020693756085455038, weight decay:5.110425011102715e-06
8 : val acc:0.11 | lr:0.00036628442187792244, weight decay:2.2051677861145347e-08
9 : val acc:0.08 | lr:0.00017027558688626293, weight decay:5.975787008872115e-08
10 : val acc:0.05 | lr:5.111794225558727e-06, weight decay:1.0660834814714081e-05
11 : val acc:0.09 | lr:2.9832633685187496e-06, weight decay:1.711691659984403e-08
12 : val acc:0.24 | lr:0.00

In [None]:
results_val

In [None]:
results_train

In [None]:
# 그래프 그리기========================================================
print("=========== Hyper-Parameter Optimization Result ===========")
graph_draw_num = 20
col_num = 5
row_num = int(np.ceil(graph_draw_num / col_num))
i = 0

# key = 가장 마지막 학습된 정확도를 기준으로 reverse sort
for key, val_acc_list in sorted(results_val.items(), key=lambda x:x[1][-1], reverse=True):
    print("Best-" + str(i+1) + "(val acc:" + str(val_acc_list[-1]) + ") | " + key)

    plt.subplot(row_num, col_num, i+1)
    plt.title("Best-" + str(i+1))
    plt.ylim(0.0, 1.0)
    if i % 5: plt.yticks([])
    plt.xticks([])
    x = np.arange(len(val_acc_list))
    plt.plot(x, val_acc_list)
    plt.plot(x, results_train[key], "--")
    i += 1

    if i >= graph_draw_num:
        break
        
# 각 best마다, 에폭증가당 정확도 증가량 그래프
plt.show()