# Optuna+KerasでCNNのハイパーパラメータを最適化

https://github.com/optuna/optuna  
https://qiita.com/ryota717/items/28e2167ea69bee7e250d

In [1]:
#CUDAバージョン確認
!nvcc -V

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2019 NVIDIA Corporation
Built on Sun_Jul_28_19:12:52_Pacific_Daylight_Time_2019
Cuda compilation tools, release 10.1, V10.1.243


In [2]:
import tensorflow as tf
print(tf.__version__)

2.1.0


In [3]:
from tensorflow.python.client import device_lib
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 14586215435764202715,
 name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 3059115622
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 13820954393504673661
 physical_device_desc: "device: 0, name: GeForce GTX 1650 with Max-Q Design, pci bus id: 0000:02:00.0, compute capability: 7.5"]

In [4]:
#ライブラリのインポート
import optuna
import keras.backend as K
from keras.datasets import fashion_mnist
from keras.layers import Convolution2D, Input, Dense, GlobalAveragePooling2D
from keras.models import Model
from keras.utils import to_categorical

Using TensorFlow backend.


In [5]:
#学習用データの前処理
(train_x, train_y), (test_x, test_y) = fashion_mnist.load_data()
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

train_x = train_x.reshape(-1,28,28,1) / 255
test_x = test_x.reshape(-1,28,28,1) / 255
train_y = to_categorical(train_y)
test_y = to_categorical(test_y)
print(train_x.shape, train_y.shape, test_x.shape, test_y.shape)

(60000, 28, 28) (60000,) (10000, 28, 28) (10000,)
(60000, 28, 28, 1) (60000, 10) (10000, 28, 28, 1) (10000, 10)


In [6]:
def create_model(num_layer, activation, mid_units, num_filters):
    """
    num_layer : 畳込み層の数
    activation : 活性化関数
    mid_units : FC層のユニット数
    num_filters : 各畳込み層のフィルタ数
    """
    inputs = Input((28,28,1))
    x = Convolution2D(filters=num_filters[0], kernel_size=(3,3), padding="same", activation=activation)(inputs)
    for i in range(1,num_layer):
        x = Convolution2D(filters=num_filters[i], kernel_size=(3,3), padding="same", activation=activation)(x)
        
    x = GlobalAveragePooling2D()(x)
    x = Dense(units=mid_units, activation=activation)(x)
    x = Dense(units=10, activation="softmax")(x)
    
    model = Model(inputs=inputs, outputs=x)
    return model

In [7]:
def objective(trial):
    #セッションのクリア
    K.clear_session()
    
    #最適化するパラメータの設定
    #畳込み層の数
    num_layer = trial.suggest_int("num_layer", 3, 7)
    
    #FC層のユニット数
    mid_units = int(trial.suggest_discrete_uniform("mid_units", 100, 500, 100))
    
    #各畳込み層のフィルタ数
    num_filters = [int(trial.suggest_discrete_uniform("num_filter_"+str(i), 16, 128, 16)) for i in range(num_layer)]
    
    #活性化関数
    activation = trial.suggest_categorical("activation", ["relu", "sigmoid", "tanh"])
    
    #optimizer
    optimizer = trial.suggest_categorical("optimizer", ["sgd", "adam", "rmsprop"])
                                          
    model = create_model(num_layer, activation, mid_units, num_filters)
    model.compile(optimizer=optimizer,
          loss="categorical_crossentropy",
          metrics=["accuracy"])
    
    history = model.fit(train_x, train_y, verbose=0, epochs=5, batch_size=128, validation_split=0.1)
    
    #検証用データに対する正答率が最大となるハイパーパラメータを求める
    return 1 - history.history["val_accuracy"][-1] # Tf 2.x版

In [8]:
study = optuna.create_study()

[I 2020-10-08 21:47:24,915] A new study created in memory with name: no-name-105053a5-619f-4693-850f-e358d38e754b


In [9]:
%%time
study.optimize(objective, n_trials=50) # 100から変更

[I 2020-10-08 21:48:08,579] Trial 0 finished with value: 0.20800000429153442 and parameters: {'num_layer': 4, 'mid_units': 100.0, 'num_filter_0': 32.0, 'num_filter_1': 32.0, 'num_filter_2': 16.0, 'num_filter_3': 16.0, 'activation': 'tanh', 'optimizer': 'adam'}. Best is trial 0 with value: 0.20800000429153442.
[I 2020-10-08 21:49:46,686] Trial 1 finished with value: 0.21450001001358032 and parameters: {'num_layer': 3, 'mid_units': 100.0, 'num_filter_0': 128.0, 'num_filter_1': 112.0, 'num_filter_2': 16.0, 'activation': 'relu', 'optimizer': 'rmsprop'}. Best is trial 0 with value: 0.20800000429153442.
[I 2020-10-08 21:52:25,558] Trial 2 finished with value: 0.8968333303928375 and parameters: {'num_layer': 7, 'mid_units': 500.0, 'num_filter_0': 64.0, 'num_filter_1': 32.0, 'num_filter_2': 112.0, 'num_filter_3': 16.0, 'num_filter_4': 96.0, 'num_filter_5': 16.0, 'num_filter_6': 96.0, 'activation': 'sigmoid', 'optimizer': 'adam'}. Best is trial 0 with value: 0.20800000429153442.
[I 2020-10-08 2

[I 2020-10-08 23:12:15,535] Trial 23 finished with value: 0.11733335256576538 and parameters: {'num_layer': 6, 'mid_units': 200.0, 'num_filter_0': 128.0, 'num_filter_1': 112.0, 'num_filter_2': 128.0, 'num_filter_3': 112.0, 'num_filter_4': 112.0, 'num_filter_5': 112.0, 'activation': 'relu', 'optimizer': 'adam'}. Best is trial 12 with value: 0.10199999809265137.
[I 2020-10-08 23:16:02,235] Trial 24 finished with value: 0.11983335018157959 and parameters: {'num_layer': 5, 'mid_units': 300.0, 'num_filter_0': 112.0, 'num_filter_1': 112.0, 'num_filter_2': 96.0, 'num_filter_3': 112.0, 'num_filter_4': 96.0, 'activation': 'relu', 'optimizer': 'adam'}. Best is trial 12 with value: 0.10199999809265137.
[I 2020-10-08 23:20:54,334] Trial 25 finished with value: 0.4923333525657654 and parameters: {'num_layer': 6, 'mid_units': 200.0, 'num_filter_0': 112.0, 'num_filter_1': 128.0, 'num_filter_2': 112.0, 'num_filter_3': 128.0, 'num_filter_4': 128.0, 'num_filter_5': 64.0, 'activation': 'relu', 'optimizer

[I 2020-10-09 00:58:31,686] Trial 46 finished with value: 0.3816666603088379 and parameters: {'num_layer': 7, 'mid_units': 200.0, 'num_filter_0': 64.0, 'num_filter_1': 32.0, 'num_filter_2': 96.0, 'num_filter_3': 64.0, 'num_filter_4': 32.0, 'num_filter_5': 112.0, 'num_filter_6': 96.0, 'activation': 'tanh', 'optimizer': 'sgd'}. Best is trial 12 with value: 0.10199999809265137.
[I 2020-10-09 01:06:15,844] Trial 47 finished with value: 0.12283331155776978 and parameters: {'num_layer': 6, 'mid_units': 100.0, 'num_filter_0': 64.0, 'num_filter_1': 128.0, 'num_filter_2': 112.0, 'num_filter_3': 96.0, 'num_filter_4': 64.0, 'num_filter_5': 128.0, 'activation': 'tanh', 'optimizer': 'rmsprop'}. Best is trial 12 with value: 0.10199999809265137.
[I 2020-10-09 01:13:32,947] Trial 48 finished with value: 0.1223333477973938 and parameters: {'num_layer': 5, 'mid_units': 200.0, 'num_filter_0': 48.0, 'num_filter_1': 112.0, 'num_filter_2': 128.0, 'num_filter_3': 112.0, 'num_filter_4': 96.0, 'activation': 't

Wall time: 3h 32min 52s


In [10]:
study.best_params

{'num_layer': 6,
 'mid_units': 200.0,
 'num_filter_0': 112.0,
 'num_filter_1': 128.0,
 'num_filter_2': 128.0,
 'num_filter_3': 128.0,
 'num_filter_4': 128.0,
 'num_filter_5': 128.0,
 'activation': 'relu',
 'optimizer': 'adam'}

In [11]:
study.best_value

0.10199999809265137