
## ハイパーパラメータチューニング（optuna）を試す。

### 背景
- パラメータによって、`u2r`の学習率に大きく影響があると考えられるため。
### 目的
- 最適なパラメータを知ること
### 使用するデータセット
- KDD99 10%
### 手法
### 結果
### 考察

In [2]:
from utils_kdd99 import *
print_version()

python:      3.10.5
sklearn:     1.2.2
tensorflow:  2.13.0-rc0
keras:       2.13.1rc0
numpy:       1.23.5
pandas:      1.5.3


In [4]:
# データの読み込み
X, y = load_data(use_full_dataset=False, standard_scale=True, verbose=0, )
# 分類モデルによっては数値ラベルしか対応していないため、目的変数を分類クラスから数値へ変換
y = y.map(lambda x: attack_label_class[x]).map(lambda x: correspondences[x])
y.value_counts()

0    391458
1     97278
2      4107
3      1126
4        52
Name: true_label, dtype: int64

In [6]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=RANDOM_SEED, stratify=y)

### Dosのみを学習したオートエンコーダの作成
- 隠れ層の次元数(38->10->5->10->38)
- 活性化関数：ReLU
- 最適化関数：adam
- 損失関数：平均二乗誤差
- エポック数：5
- バッチサイズ：32

In [10]:
ae_model = keras.Sequential([
    Dense(units=10, activation='relu', input_dim=38, name='encoder1'),
    Dense(units=5, activation='relu', name='encoder2'),
    Dense(units=10, activation='relu'),
    Dense(units=38, activation='relu'),
])
ae_model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
ae_model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder1 (Dense)            (None, 10)                390       
                                                                 
 encoder2 (Dense)            (None, 5)                 55        
                                                                 
 dense_2 (Dense)             (None, 10)                60        
                                                                 
 dense_3 (Dense)             (None, 38)                418       
                                                                 
Total params: 923 (3.61 KB)
Trainable params: 923 (3.61 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
# Dosのみのデータセットを作成
dos_x_train = x_train[y_train == correspondences['dos']]

In [12]:
ae_model.fit(dos_x_train, dos_x_train,
            epochs=5, # データセットを使って学習する回数
            batch_size=32,
            shuffle=True,
            verbose=1,
            use_multiprocessing=True
          )

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.src.callbacks.History at 0x10a049f1630>

In [14]:
# エンコーダー部分を抜き取る
encoder = keras.Sequential([ae_model.get_layer('encoder1'),
                            ae_model.get_layer('encoder2')])
# DOSエンコーダを用いた特徴量に命名
dos_columns = list(map(lambda x: 'dos' + str(x), range(5)))
dos_columns

['dos0', 'dos1', 'dos2', 'dos3', 'dos4']

In [15]:
# 特徴量抽出し、マージする。
x_train_encoded = pd.DataFrame(data=encoder.predict(x_train), index=x_train.index, columns=dos_columns)
x_test_encoded = pd.DataFrame(data=encoder.predict(x_test), index=x_test.index, columns=dos_columns)
x_train_encoded = x_train.merge(x_train_encoded, right_index=True, left_index=True)
x_test_encoded = x_test.merge(x_test_encoded, right_index=True, left_index=True)
x_train_encoded.head()



Unnamed: 0,duration,src_bytes,dst_bytes,land,wrong_fragment,urgent,hot,num_failed_logins,logged_in,num_compromised,...,dst_host_srv_diff_host_rate,dst_host_serror_rate,dst_host_srv_serror_rate,dst_host_rerror_rate,dst_host_srv_rerror_rate,dos0,dos1,dos2,dos3,dos4
212221,-0.067792,-0.002017,-0.026287,-0.006673,-0.04772,-0.002571,-0.044136,-0.009782,-0.417192,-0.005679,...,-0.158629,-0.464418,-0.463202,-0.25204,-0.249464,0.971426,3.588529,0.0,1.988687,1.14327
30903,-0.067792,-0.002774,0.472896,-0.006673,-0.04772,-0.002571,-0.044136,-0.009782,2.39698,-0.005679,...,0.553404,-0.464418,-0.463202,-0.25204,-0.249464,1.204473,0.534857,1.009054,0.0,0.0
9739,-0.067792,-0.002017,-0.026287,-0.006673,-0.04772,-0.002571,-0.044136,-0.009782,-0.417192,-0.005679,...,-0.158629,-0.464418,-0.463202,-0.25204,-0.249464,0.970986,3.58872,0.0,1.990658,1.145948
37540,-0.067792,-0.002776,-0.01412,-0.006673,-0.04772,-0.002571,-0.044136,-0.009782,2.39698,-0.005679,...,0.790749,-0.464418,-0.463202,-0.25204,-0.249464,0.947306,0.529504,0.977704,0.0,0.0
418638,-0.067792,-0.002535,-0.026287,-0.006673,-0.04772,-0.002571,-0.044136,-0.009782,-0.417192,-0.005679,...,-0.158629,-0.464418,-0.463202,-0.25204,-0.249464,0.971012,3.588875,0.0,1.990835,1.146163


### optuna＋lightGBMを用いた学習
- 元の特徴量のみを使う。
- 

In [31]:
import optuna.integration.lightgbm as lgb
lgb_train = lgb.Dataset(x_train, y_train)
lgb_eval = lgb.Dataset(x_test, y_test, reference=lgb_train)
# LightGBM parameters
params = {
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': 'multiclass',
        'num_class': 5,
        'metric': 'multi_error', # 評価指標 : 誤り率(= 1-正答率)  another multi_logloss
        'learning_rate': 0.1,
        'num_leaves': 23,
        'min_data_in_leaf': 1,
        'verbose': -1,
        'random_state': RANDOM_SEED, 
}

# モデルの学習
model = lgb.train(params, # パラメータ
                   lgb_train, # トレーニングデータの指定
                    valid_sets=[lgb_eval], # 検証データの指定
                    callbacks=[lgb.early_stopping(50)],
               )


[I 2023-08-01 06:57:04,200] A new study created in memory with name: no-name-d1de0070-58f3-492c-89bc-242f87a029a6
feature_fraction, val_score: inf:   0%|          | 0/7 [00:00<?, ?it/s]

Training until validation scores don't improve for 50 rounds


feature_fraction, val_score: 0.001055:  14%|#4        | 1/7 [00:02<00:16,  2.69s/it][I 2023-08-01 06:57:06,893] Trial 0 finished with value: 0.0010550399627055642 and parameters: {'feature_fraction': 0.4}. Best is trial 0 with value: 0.0010550399627055642.
feature_fraction, val_score: 0.001055:  14%|#4        | 1/7 [00:02<00:16,  2.69s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.00105504


feature_fraction, val_score: 0.000810:  29%|##8       | 2/7 [00:05<00:13,  2.77s/it][I 2023-08-01 06:57:09,717] Trial 1 finished with value: 0.000809681831843805 and parameters: {'feature_fraction': 0.5}. Best is trial 1 with value: 0.000809681831843805.
feature_fraction, val_score: 0.000810:  29%|##8       | 2/7 [00:05<00:13,  2.77s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000809682


feature_fraction, val_score: 0.000810:  43%|####2     | 3/7 [00:07<00:10,  2.58s/it][I 2023-08-01 06:57:12,081] Trial 2 finished with value: 0.000809681831843805 and parameters: {'feature_fraction': 0.8}. Best is trial 1 with value: 0.000809681831843805.
feature_fraction, val_score: 0.000810:  43%|####2     | 3/7 [00:07<00:10,  2.58s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000809682


feature_fraction, val_score: 0.000779:  57%|#####7    | 4/7 [00:10<00:07,  2.46s/it][I 2023-08-01 06:57:14,346] Trial 3 finished with value: 0.0007790120654860851 and parameters: {'feature_fraction': 1.0}. Best is trial 3 with value: 0.0007790120654860851.
feature_fraction, val_score: 0.000779:  57%|#####7    | 4/7 [00:10<00:07,  2.46s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


feature_fraction, val_score: 0.000779:  71%|#######1  | 5/7 [00:12<00:04,  2.39s/it][I 2023-08-01 06:57:16,605] Trial 4 finished with value: 0.0007790120654860851 and parameters: {'feature_fraction': 0.8999999999999999}. Best is trial 3 with value: 0.0007790120654860851.
feature_fraction, val_score: 0.000779:  71%|#######1  | 5/7 [00:12<00:04,  2.39s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


feature_fraction, val_score: 0.000779:  86%|########5 | 6/7 [00:14<00:02,  2.36s/it][I 2023-08-01 06:57:18,923] Trial 5 finished with value: 0.0007790120654860851 and parameters: {'feature_fraction': 0.7}. Best is trial 3 with value: 0.0007790120654860851.
feature_fraction, val_score: 0.000779:  86%|########5 | 6/7 [00:14<00:02,  2.36s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


feature_fraction, val_score: 0.000779: 100%|##########| 7/7 [00:17<00:00,  2.41s/it][I 2023-08-01 06:57:21,415] Trial 6 finished with value: 0.0007790120654860851 and parameters: {'feature_fraction': 0.6}. Best is trial 3 with value: 0.0007790120654860851.
feature_fraction, val_score: 0.000779: 100%|##########| 7/7 [00:17<00:00,  2.46s/it]


Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


num_leaves, val_score: 0.000779:   5%|5         | 1/20 [00:03<01:04,  3.40s/it][I 2023-08-01 06:57:24,820] Trial 7 finished with value: 0.0007790120654860851 and parameters: {'num_leaves': 79}. Best is trial 7 with value: 0.0007790120654860851.
num_leaves, val_score: 0.000779:   5%|5         | 1/20 [00:03<01:04,  3.40s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


num_leaves, val_score: 0.000779:  10%|#         | 2/20 [00:05<00:48,  2.68s/it][I 2023-08-01 06:57:27,000] Trial 8 finished with value: 0.0007790120654860851 and parameters: {'num_leaves': 14}. Best is trial 7 with value: 0.0007790120654860851.
num_leaves, val_score: 0.000779:  10%|#         | 2/20 [00:05<00:48,  2.68s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


num_leaves, val_score: 0.000779:  15%|#5        | 3/20 [00:11<01:07,  3.96s/it][I 2023-08-01 06:57:32,474] Trial 9 finished with value: 0.0007790120654860851 and parameters: {'num_leaves': 219}. Best is trial 7 with value: 0.0007790120654860851.
num_leaves, val_score: 0.000779:  15%|#5        | 3/20 [00:11<01:07,  3.96s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


num_leaves, val_score: 0.000779:  20%|##        | 4/20 [00:16<01:15,  4.69s/it][I 2023-08-01 06:57:38,283] Trial 10 finished with value: 0.0007790120654860851 and parameters: {'num_leaves': 247}. Best is trial 7 with value: 0.0007790120654860851.
num_leaves, val_score: 0.000779:  20%|##        | 4/20 [00:16<01:15,  4.69s/it]

Early stopping, best iteration is:
[7]	valid_0's multi_error: 0.000779012


num_leaves, val_score: 0.000736:  25%|##5       | 5/20 [00:21<01:08,  4.60s/it][I 2023-08-01 06:57:42,721] Trial 11 finished with value: 0.0007360743925852773 and parameters: {'num_leaves': 141}. Best is trial 11 with value: 0.0007360743925852773.
num_leaves, val_score: 0.000736:  25%|##5       | 5/20 [00:21<01:08,  4.60s/it]

Early stopping, best iteration is:
[4]	valid_0's multi_error: 0.000736074


num_leaves, val_score: 0.000736:  30%|###       | 6/20 [00:25<01:03,  4.55s/it][I 2023-08-01 06:57:47,179] Trial 12 finished with value: 0.0007360743925852773 and parameters: {'num_leaves': 146}. Best is trial 11 with value: 0.0007360743925852773.
num_leaves, val_score: 0.000736:  30%|###       | 6/20 [00:25<01:03,  4.55s/it]

Early stopping, best iteration is:
[4]	valid_0's multi_error: 0.000736074


num_leaves, val_score: 0.000687:  35%|###5      | 7/20 [00:30<00:58,  4.54s/it][I 2023-08-01 06:57:51,691] Trial 13 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 148}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  35%|###5      | 7/20 [00:30<00:58,  4.54s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  40%|####      | 8/20 [00:34<00:53,  4.47s/it][I 2023-08-01 06:57:56,030] Trial 14 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 147}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  40%|####      | 8/20 [00:34<00:53,  4.47s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  45%|####5     | 9/20 [00:39<00:50,  4.63s/it][I 2023-08-01 06:58:00,992] Trial 15 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 175}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  45%|####5     | 9/20 [00:39<00:50,  4.63s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  50%|#####     | 10/20 [00:43<00:42,  4.28s/it][I 2023-08-01 06:58:04,504] Trial 16 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 80}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  50%|#####     | 10/20 [00:43<00:42,  4.28s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  55%|#####5    | 11/20 [00:48<00:40,  4.48s/it][I 2023-08-01 06:58:09,445] Trial 17 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 191}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  55%|#####5    | 11/20 [00:48<00:40,  4.48s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  60%|######    | 12/20 [00:51<00:33,  4.25s/it][I 2023-08-01 06:58:13,146] Trial 18 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 85}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  60%|######    | 12/20 [00:51<00:33,  4.25s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  65%|######5   | 13/20 [00:56<00:30,  4.33s/it][I 2023-08-01 06:58:17,664] Trial 19 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 116}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  65%|######5   | 13/20 [00:56<00:30,  4.33s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  70%|#######   | 14/20 [01:02<00:28,  4.76s/it][I 2023-08-01 06:58:23,432] Trial 20 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 179}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  70%|#######   | 14/20 [01:02<00:28,  4.76s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  75%|#######5  | 15/20 [01:06<00:24,  4.80s/it][I 2023-08-01 06:58:28,317] Trial 21 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 174}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  75%|#######5  | 15/20 [01:06<00:24,  4.80s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  80%|########  | 16/20 [01:10<00:18,  4.58s/it][I 2023-08-01 06:58:32,393] Trial 22 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 128}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  80%|########  | 16/20 [01:10<00:18,  4.58s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  85%|########5 | 17/20 [01:15<00:13,  4.57s/it][I 2023-08-01 06:58:36,942] Trial 23 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 163}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  85%|########5 | 17/20 [01:15<00:13,  4.57s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  90%|######### | 18/20 [01:21<00:09,  4.85s/it][I 2023-08-01 06:58:42,435] Trial 24 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 208}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  90%|######### | 18/20 [01:21<00:09,  4.85s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687:  95%|#########5| 19/20 [01:24<00:04,  4.59s/it][I 2023-08-01 06:58:46,413] Trial 25 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 109}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687:  95%|#########5| 19/20 [01:24<00:04,  4.59s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


num_leaves, val_score: 0.000687: 100%|##########| 20/20 [01:29<00:00,  4.65s/it][I 2023-08-01 06:58:51,225] Trial 26 finished with value: 0.0006870027664129255 and parameters: {'num_leaves': 154}. Best is trial 13 with value: 0.0006870027664129255.
num_leaves, val_score: 0.000687: 100%|##########| 20/20 [01:29<00:00,  4.49s/it]


Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  10%|#         | 1/10 [00:05<00:49,  5.49s/it][I 2023-08-01 06:58:56,718] Trial 27 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.8912269381781016, 'bagging_freq': 5}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  10%|#         | 1/10 [00:05<00:49,  5.49s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  20%|##        | 2/10 [00:09<00:38,  4.82s/it][I 2023-08-01 06:59:01,078] Trial 28 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.40353455216217776, 'bagging_freq': 1}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  20%|##        | 2/10 [00:09<00:38,  4.82s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  30%|###       | 3/10 [00:15<00:37,  5.30s/it][I 2023-08-01 06:59:06,949] Trial 29 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.9497800988935022, 'bagging_freq': 7}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  30%|###       | 3/10 [00:15<00:37,  5.30s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  40%|####      | 4/10 [00:20<00:30,  5.03s/it][I 2023-08-01 06:59:11,557] Trial 30 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.6052745915969244, 'bagging_freq': 1}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  40%|####      | 4/10 [00:20<00:30,  5.03s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  50%|#####     | 5/10 [00:25<00:25,  5.07s/it][I 2023-08-01 06:59:16,702] Trial 31 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.7381236546090373, 'bagging_freq': 4}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  50%|#####     | 5/10 [00:25<00:25,  5.07s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  60%|######    | 6/10 [00:29<00:19,  4.84s/it][I 2023-08-01 06:59:21,089] Trial 32 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.43860156053127425, 'bagging_freq': 3}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  60%|######    | 6/10 [00:29<00:19,  4.84s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  70%|#######   | 7/10 [00:35<00:15,  5.15s/it][I 2023-08-01 06:59:26,886] Trial 33 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.9976019913201992, 'bagging_freq': 7}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  70%|#######   | 7/10 [00:35<00:15,  5.15s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  80%|########  | 8/10 [00:40<00:10,  5.18s/it][I 2023-08-01 06:59:32,131] Trial 34 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.7811807818252131, 'bagging_freq': 3}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  80%|########  | 8/10 [00:40<00:10,  5.18s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687:  90%|######### | 9/10 [00:45<00:05,  5.08s/it][I 2023-08-01 06:59:36,995] Trial 35 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.6003572734581141, 'bagging_freq': 5}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687:  90%|######### | 9/10 [00:45<00:05,  5.08s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


bagging, val_score: 0.000687: 100%|##########| 10/10 [00:51<00:00,  5.29s/it][I 2023-08-01 06:59:42,751] Trial 36 finished with value: 0.0006870027664129255 and parameters: {'bagging_fraction': 0.8523346697883871, 'bagging_freq': 2}. Best is trial 27 with value: 0.0006870027664129255.
bagging, val_score: 0.000687: 100%|##########| 10/10 [00:51<00:00,  5.15s/it]


Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


feature_fraction_stage2, val_score: 0.000687:  33%|###3      | 1/3 [00:04<00:08,  4.37s/it][I 2023-08-01 06:59:47,130] Trial 37 finished with value: 0.0006870027664129255 and parameters: {'feature_fraction': 0.92}. Best is trial 37 with value: 0.0006870027664129255.
feature_fraction_stage2, val_score: 0.000687:  33%|###3      | 1/3 [00:04<00:08,  4.37s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


feature_fraction_stage2, val_score: 0.000687:  67%|######6   | 2/3 [00:08<00:04,  4.43s/it][I 2023-08-01 06:59:51,594] Trial 38 finished with value: 0.0006870027664129255 and parameters: {'feature_fraction': 0.9520000000000001}. Best is trial 37 with value: 0.0006870027664129255.
feature_fraction_stage2, val_score: 0.000687:  67%|######6   | 2/3 [00:08<00:04,  4.43s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


feature_fraction_stage2, val_score: 0.000687: 100%|##########| 3/3 [00:13<00:00,  4.75s/it][I 2023-08-01 06:59:56,724] Trial 39 finished with value: 0.0006870027664129255 and parameters: {'feature_fraction': 0.9840000000000001}. Best is trial 37 with value: 0.0006870027664129255.
feature_fraction_stage2, val_score: 0.000687: 100%|##########| 3/3 [00:13<00:00,  4.66s/it]


Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


regularization_factors, val_score: 0.000687:   5%|5         | 1/20 [00:02<00:51,  2.73s/it][I 2023-08-01 06:59:59,455] Trial 40 finished with value: 0.0006870027664129255 and parameters: {'lambda_l1': 1.5567494941696047, 'lambda_l2': 3.230672386309174e-07}. Best is trial 40 with value: 0.0006870027664129255.
regularization_factors, val_score: 0.000687:   5%|5         | 1/20 [00:02<00:51,  2.73s/it]

Early stopping, best iteration is:
[6]	valid_0's multi_error: 0.000687003


regularization_factors, val_score: 0.000270:  10%|#         | 2/20 [00:09<01:28,  4.94s/it][I 2023-08-01 07:00:05,947] Trial 41 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.1052047345281341e-08, 'lambda_l2': 5.318307788626882}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  10%|#         | 2/20 [00:09<01:28,  4.94s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  15%|#5        | 3/20 [00:15<01:35,  5.65s/it][I 2023-08-01 07:00:12,432] Trial 42 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.3662441974883361e-08, 'lambda_l2': 6.9231520324100835}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  15%|#5        | 3/20 [00:15<01:35,  5.65s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  20%|##        | 4/20 [00:22<01:35,  5.97s/it][I 2023-08-01 07:00:18,892] Trial 43 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.558445785641092e-08, 'lambda_l2': 6.221304395721906}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  20%|##        | 4/20 [00:22<01:35,  5.97s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  25%|##5       | 5/20 [00:28<01:32,  6.19s/it][I 2023-08-01 07:00:25,477] Trial 44 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.0962488187373397e-08, 'lambda_l2': 9.24869640919352}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  25%|##5       | 5/20 [00:28<01:32,  6.19s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  30%|###       | 6/20 [00:35<01:28,  6.30s/it][I 2023-08-01 07:00:31,994] Trial 45 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.6950451919215518e-08, 'lambda_l2': 9.681727148862432}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  30%|###       | 6/20 [00:35<01:28,  6.30s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  35%|###5      | 7/20 [00:41<01:22,  6.36s/it][I 2023-08-01 07:00:38,475] Trial 46 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.3150868005702281e-08, 'lambda_l2': 9.630181297405345}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  35%|###5      | 7/20 [00:41<01:22,  6.36s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  40%|####      | 8/20 [00:48<01:17,  6.45s/it][I 2023-08-01 07:00:45,132] Trial 47 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.1764069481798252e-08, 'lambda_l2': 7.361824463895732}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  40%|####      | 8/20 [00:48<01:17,  6.45s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  45%|####5     | 9/20 [00:54<01:11,  6.46s/it][I 2023-08-01 07:00:51,604] Trial 48 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.5816358367123752e-08, 'lambda_l2': 7.167107668263824}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  45%|####5     | 9/20 [00:54<01:11,  6.46s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  50%|#####     | 10/20 [01:01<01:04,  6.46s/it][I 2023-08-01 07:00:58,052] Trial 49 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.0261742629975995e-08, 'lambda_l2': 7.306385585314022}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  50%|#####     | 10/20 [01:01<01:04,  6.46s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  55%|#####5    | 11/20 [01:07<00:58,  6.52s/it][I 2023-08-01 07:01:04,706] Trial 50 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.557727127220928e-08, 'lambda_l2': 8.335279170364611}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  55%|#####5    | 11/20 [01:07<00:58,  6.52s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  60%|######    | 12/20 [01:14<00:52,  6.53s/it][I 2023-08-01 07:01:11,263] Trial 51 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.1337406450551889e-08, 'lambda_l2': 8.056257183162769}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  60%|######    | 12/20 [01:14<00:52,  6.53s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  65%|######5   | 13/20 [01:21<00:45,  6.53s/it][I 2023-08-01 07:01:17,810] Trial 52 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 2.019281100729572e-08, 'lambda_l2': 8.188133192513936}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  65%|######5   | 13/20 [01:21<00:45,  6.53s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  70%|#######   | 14/20 [01:27<00:38,  6.40s/it][I 2023-08-01 07:01:23,890] Trial 53 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 5.886611283768599e-07, 'lambda_l2': 0.22590371032847295}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  70%|#######   | 14/20 [01:27<00:38,  6.40s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  75%|#######5  | 15/20 [01:33<00:31,  6.32s/it][I 2023-08-01 07:01:30,022] Trial 54 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 9.60052678638952e-07, 'lambda_l2': 0.2215924216373986}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  75%|#######5  | 15/20 [01:33<00:31,  6.32s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  80%|########  | 16/20 [01:39<00:24,  6.24s/it][I 2023-08-01 07:01:36,089] Trial 55 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 2.557251385675423e-07, 'lambda_l2': 0.37546011563474035}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  80%|########  | 16/20 [01:39<00:24,  6.24s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  85%|########5 | 17/20 [01:45<00:18,  6.24s/it][I 2023-08-01 07:01:42,315] Trial 56 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 1.6364273955992725e-07, 'lambda_l2': 0.46794490809275885}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  85%|########5 | 17/20 [01:45<00:18,  6.24s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  90%|######### | 18/20 [01:51<00:12,  6.28s/it][I 2023-08-01 07:01:48,699] Trial 57 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 2.006079319741042e-07, 'lambda_l2': 0.7758768589895395}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  90%|######### | 18/20 [01:51<00:12,  6.28s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270:  95%|#########5| 19/20 [02:00<00:06,  6.91s/it][I 2023-08-01 07:01:57,089] Trial 58 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 7.452076024237545e-08, 'lambda_l2': 0.0035775214336155204}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270:  95%|#########5| 19/20 [02:00<00:06,  6.91s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


regularization_factors, val_score: 0.000270: 100%|##########| 20/20 [02:06<00:00,  6.77s/it][I 2023-08-01 07:02:03,526] Trial 59 finished with value: 0.000269893943947935 and parameters: {'lambda_l1': 3.2605916771158563e-06, 'lambda_l2': 1.314762739210139}. Best is trial 41 with value: 0.000269893943947935.
regularization_factors, val_score: 0.000270: 100%|##########| 20/20 [02:06<00:00,  6.34s/it]


Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


min_data_in_leaf, val_score: 0.000270:  20%|##        | 1/5 [00:06<00:26,  6.55s/it][I 2023-08-01 07:02:10,087] Trial 60 finished with value: 0.000269893943947935 and parameters: {'min_child_samples': 10}. Best is trial 60 with value: 0.000269893943947935.
min_data_in_leaf, val_score: 0.000270:  20%|##        | 1/5 [00:06<00:26,  6.55s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


min_data_in_leaf, val_score: 0.000270:  40%|####      | 2/5 [00:13<00:19,  6.55s/it][I 2023-08-01 07:02:16,642] Trial 61 finished with value: 0.000269893943947935 and parameters: {'min_child_samples': 100}. Best is trial 60 with value: 0.000269893943947935.
min_data_in_leaf, val_score: 0.000270:  40%|####      | 2/5 [00:13<00:19,  6.55s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


min_data_in_leaf, val_score: 0.000270:  60%|######    | 3/5 [00:19<00:13,  6.54s/it][I 2023-08-01 07:02:23,175] Trial 62 finished with value: 0.000269893943947935 and parameters: {'min_child_samples': 50}. Best is trial 60 with value: 0.000269893943947935.
min_data_in_leaf, val_score: 0.000270:  60%|######    | 3/5 [00:19<00:13,  6.54s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


min_data_in_leaf, val_score: 0.000270:  80%|########  | 4/5 [00:26<00:06,  6.50s/it][I 2023-08-01 07:02:29,619] Trial 63 finished with value: 0.000269893943947935 and parameters: {'min_child_samples': 5}. Best is trial 60 with value: 0.000269893943947935.
min_data_in_leaf, val_score: 0.000270:  80%|########  | 4/5 [00:26<00:06,  6.50s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894


min_data_in_leaf, val_score: 0.000270: 100%|##########| 5/5 [00:32<00:00,  6.56s/it][I 2023-08-01 07:02:36,275] Trial 64 finished with value: 0.000269893943947935 and parameters: {'min_child_samples': 25}. Best is trial 60 with value: 0.000269893943947935.
min_data_in_leaf, val_score: 0.000270: 100%|##########| 5/5 [00:32<00:00,  6.55s/it]

Early stopping, best iteration is:
[120]	valid_0's multi_error: 0.000269894





In [41]:
y_pred_prob = params.predict(x_test)
# テストデータの予測 (予測クラス(0 or 1 or...)を返す)
y_pred = np.argmax(y_pred_prob, axis=1) # 一番大きい予測確率のクラスを予測クラスに
y_pred = pd.Series(y_pred)
y_pred.value_counts()

0    129185
1     32118
2      1346
3       362
4        16
dtype: int64

In [42]:
print(classification_report(y_test, y_pred, target_names=correspondences.keys()))

              precision    recall  f1-score   support

         dos       1.00      1.00      1.00    129181
      normal       1.00      1.00      1.00     32102
       probe       1.00      0.99      0.99      1355
         r2l       1.00      0.97      0.98       372
         u2r       0.75      0.71      0.73        17

    accuracy                           1.00    163027
   macro avg       0.95      0.93      0.94    163027
weighted avg       1.00      1.00      1.00    163027


In [46]:
# 
model.params

{'task': 'train',
 'boosting_type': 'gbdt',
 'objective': 'multiclass',
 'num_class': 5,
 'metric': 'multi_error',
 'learning_rate': 0.1,
 'num_leaves': 148,
 'min_data_in_leaf': 1,
 'verbose': -1,
 'random_state': 2018,
 'feature_pre_filter': False,
 'lambda_l1': 1.1052047345281341e-08,
 'lambda_l2': 5.318307788626882,
 'feature_fraction': 1.0,
 'bagging_fraction': 1.0,
 'bagging_freq': 0,
 'min_child_samples': 20,
 'num_iterations': 1000,
 'early_stopping_round': None}

In [47]:
for key, confusion_matrix in zip(correspondences.keys(), multilabel_confusion_matrix(y_test, y_pred)):
    print(f"{key}    TP: {confusion_matrix[1][1]}, TN: {confusion_matrix[0][0]}, FP: {confusion_matrix[0][1]}, FN: {confusion_matrix[1][0]}")

dos    TP: 129177, TN: 33838, FP: 8, FN: 4
normal    TP: 32090, TN: 130897, FP: 28, FN: 12
probe    TP: 1343, TN: 161669, FP: 3, FN: 12
r2l    TP: 361, TN: 162654, FP: 1, FN: 11
u2r    TP: 12, TN: 163006, FP: 4, FN: 5
