## 超參數優化（Fine Tune）
### 細部超參數微調

### 前置動作

先進行相同的資料處理

In [3]:
# import 套件
from hyperopt import hp, fmin, tpe, Trials, STATUS_OK
import numpy as np
import pandas as pd
from sklearn.model_selection import KFold
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input

In [4]:
# 資料處理
# 讀取資料集
train = pd.read_csv('/kaggle/input/digit-recognizer/train.csv')
test_x = pd.read_csv('/kaggle/input/digit-recognizer/test.csv')

# 提取x, y
train_x = train.drop(['label'], axis=1)
train_y = train['label']

# 分割資料集
kf = KFold(n_splits=4, shuffle=True, random_state=123)
tr_idx, va_idx = list(kf.split(train_x))[0]
tr_x, va_x = train_x.iloc[tr_idx] / 255.0, train_x.iloc[va_idx] / 255.0
tr_y, va_y = train_y.iloc[tr_idx], train_y.iloc[va_idx]

tr_x, va_x = np.array(tr_x), np.array(va_x)
tr_y, va_y = to_categorical(tr_y, 10), to_categorical(va_y, 10)

### 建立模型

撰寫建立模型用的函式

In [5]:
def Create_model(params):
    # 建立模型
    model = Sequential()
    model.add(Input(shape=(784,)))
    
    model.add(Dense(784, activation=params['act1']))
    model.add(Dropout(params['drop1']))
    
    model.add(Dense(200, activation=params['act2']))
    model.add(Dropout(params['drop2']))
    
    model.add(Dense(25, activation=params['act3']))
    model.add(Dropout(params['drop3']))
    model.add(Dense(10, activation='softmax'))

    # 編譯模型
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    # 訓練模型
    result = model.fit(tr_x, tr_y,
                       validation_data=(va_x, va_y),
                       epochs=20,
                       batch_size=int(params['batch_size']),
                       verbose=0)

    # 輸出結果
    val_acc = np.max(result.history['val_accuracy'])
    print("Best validation acc of epoch:", val_acc)

    return {'loss': -val_acc, 'status': STATUS_OK, 'model': model}

### 超參數搜尋

這次的目標是激活函數和丟棄率

In [6]:
# 定義搜尋空間
space = {'act1': hp.choice('act1', ['tanh', 'relu']),
         'drop1': hp.quniform('drop1', 0.2, 0.4, 0.05),
         'act2': hp.choice('act2', ['tanh', 'relu']),
         'drop2': hp.quniform('drop2', 0.2, 0.4, 0.05),
         'act3': hp.choice('act3', ['tanh', 'relu']),
         'drop3': hp.quniform('drop3', 0.2, 0.4, 0.05),
         'batch_size': hp.choice('batch_size', [100, 200])}

In [7]:
trials = Trials()
best = fmin(fn=Create_model,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)

print("Best hyperparameters:", best)

  0%|          | 0/100 [00:00<?, ?trial/s, best loss=?]

I0000 00:00:1748576726.230555      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 13942 MB memory:  -> device: 0, name: Tesla T4, pci bus id: 0000:00:04.0, compute capability: 7.5
I0000 00:00:1748576726.231237      35 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 13942 MB memory:  -> device: 1, name: Tesla T4, pci bus id: 0000:00:05.0, compute capability: 7.5
I0000 00:00:1748576729.271800     101 service.cc:148] XLA service 0x79097009e570 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1748576729.272490     101 service.cc:156]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1748576729.272509     101 service.cc:156]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5
I0000 00:00:1748576729.490695     101 cuda_dnn.cc:529] Loaded cuDNN version 90300
I0000 00:00:1748576731.660535     101 device_compiler.h:188] Compiled clust

Best validation acc of epoch:                          
0.9705714583396912                                     
Best validation acc of epoch:                                                     
0.9727619290351868                                                                
Best validation acc of epoch:                                                     
0.9739047884941101                                                                
Best validation acc of epoch:                                                     
0.9800000190734863                                                                
Best validation acc of epoch:                                                     
0.9798095226287842                                                                
Best validation acc of epoch:                                                     
0.9748571515083313                                                                
Best validation acc of epoch:                             

### 驗證模型

用驗證集對模型進行驗證

In [8]:
# 使用驗證資料來檢驗模型。
# 取得最佳模型
bestTrialIdx = np.argmin([trial['result']['loss'] for trial in trials.trials])
bestModel = trials.trials[bestTrialIdx]['result']['model']

# 輸出準確度最好的模型。
print(bestModel.summary())

# 使用驗證集評估模型
loss, accuracy = bestModel.evaluate(va_x, va_y)
print(f"val_loss: , {loss:.4f}")
print(f"val_acc: , {accuracy:.4f}")

None
[1m329/329[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9769 - loss: 0.1427
val_loss: , 0.1413
val_acc: , 0.9775


結果準確度從原本的0.9398進步到0.9775了!