In [1]:
import lightgbm as lgb
from hyperopt import STATUS_OK, Trials, hp, space_eval, tpe, fmin



In [2]:
##Hyperopt

In [3]:
from sklearn.metrics import auc
from collections import Counter
def calculate_mAP(preds,label):
    ## mAP calculation
    AP = []
    num_class = 10
    predict_label_count_dict = Counter(preds)
    predict_label_count_dict = dict(sorted(predict_label_count_dict.items()))

    # For each class
    for c, freq in predict_label_count_dict.items() :
        TP = 0
        FN = 0

        temp_precision = []
        temp_recall = []

        for i in range(len(preds)):
            # Calculate TP and FN
            if label[i] == c and preds[i] == c :
                TP += 1

            elif label[i] != c and preds[i] == c :
                FN += 1

            # Calculate precision and recall
            if TP+FN != 0:
                temp_precision.append(TP/(TP+FN))
                temp_recall.append(TP/freq)

        # Save the AP value of each class to AP array
        AP.append(auc(temp_recall, temp_precision))

    # Calculate mAP
    mAP = sum(AP) / num_class

    return mAP

In [4]:
import lightgbm as lgb
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from hyperopt import STATUS_OK, Trials, hp, space_eval, tpe, fmin
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from collections import Counter

In [7]:
!pip install hyperopt



In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
# 데이터 로드
train = pd.read_csv('/content/drive/MyDrive/124000_augmented.csv')
train.isnull().any().sum()
test = pd.read_csv('/content/drive/MyDrive/public_test_data.csv')

In [7]:
print(train.shape)
print(test.shape)

(124000, 785)
(10000, 785)


In [8]:
df_train = train.copy()
df_test = test.copy()

In [9]:
X_train= df_train.drop(['label'],axis = 1)
X_label = df_train['label']
y_test = df_test.drop(['label'],axis = 1)
y_label = df_test['label']

X_train = X_train.astype('float32')
y_test = y_test.astype('float32')


In [10]:
"""
public_test_label.txt -> y_label
필요한 경우에 실행
"""

with open('label.txt', 'r') as file:
    lines = file.readlines()

series_data = pd.Series([int(line.strip().split()[1]) if line.strip().split()[1].isdigit() else 0 for line in lines], name='label', dtype='int64')
y_label = series_data

In [11]:
X_label = X_label.values # change to array for mAP
y_label = y_label.values # change to array for mAP

In [14]:
!pip install optuna




In [None]:
import optuna
from optuna.integration import LightGBMPruningCallback
from optuna import Trial, create_study
from sklearn.model_selection import cross_val_score

# 로깅 설정
optuna.logging.set_verbosity(optuna.logging.INFO)

def objective(trial: Trial):
    params = {
        'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
        'max_depth': trial.suggest_int('max_depth', 1, 10),
        'num_leaves': trial.suggest_int('num_leaves', 2, 100),
        'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
        'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 7),
        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
        'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
        'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
        'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
        'max_bin': trial.suggest_int('max_bin', 128, 512),
        'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),
        'objective': 'multiclass',
        'num_class': 10,
        'random_state': 45,
        'verbosity': -1,
        'metric': 'multi_logloss'
    }

    model = lgb.LGBMClassifier(**params)

    pca = PCA(n_components=700, random_state=45)
    lgbm_pipe = Pipeline([
        ('pca', pca),
        ('lgbm', model)
    ])

    train_x, valid_x, train_y, valid_y = train_test_split(X_train, X_label, test_size=0.3, stratify=X_label)

    lgbm_pipe.fit(train_x, train_y)

    preds = lgbm_pipe.predict(valid_x)

    mAP = calculate_mAP(preds, valid_y)

    return mAP

# Callback 함수 정의
def print_param(study, trial):
    print("Trial {} params: {}".format(trial.number, trial.params))

study = create_study(direction="maximize", pruner=optuna.pruners.MedianPruner())
study.optimize(objective, n_trials=100, callbacks=[print_param], show_progress_bar=True)

best_params = study.best_params

print('Best trial: score {}, params {}'.format(study.best_value, best_params))


[I 2023-11-30 15:04:55,286] A new study created in memory with name: no-name-8a1677b3-bf52-439a-a3f3-3834c4e5816d


  0%|          | 0/100 [00:00<?, ?it/s]

  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:09:04,226] Trial 0 finished with value: 0.8535687068354122 and parameters: {'learning_rate': 0.18969656382927796, 'max_depth': 5, 'num_leaves': 28, 'feature_fraction': 0.7180625251224497, 'bagging_fraction': 0.6271814580704598, 'bagging_freq': 3, 'min_child_samples': 94, 'lambda_l1': 3.067253387681708e-08, 'lambda_l2': 2.9031496199429708e-06, 'min_gain_to_split': 0.18707053466105983, 'max_bin': 341, 'scale_pos_weight': 63.33787294936572}. Best is trial 0 with value: 0.8535687068354122.
Trial 0 params: {'learning_rate': 0.18969656382927796, 'max_depth': 5, 'num_leaves': 28, 'feature_fraction': 0.7180625251224497, 'bagging_fraction': 0.6271814580704598, 'bagging_freq': 3, 'min_child_samples': 94, 'lambda_l1': 3.067253387681708e-08, 'lambda_l2': 2.9031496199429708e-06, 'min_gain_to_split': 0.18707053466105983, 'max_bin': 341, 'scale_pos_weight': 63.33787294936572}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:12:04,470] Trial 1 finished with value: 0.737342209062977 and parameters: {'learning_rate': 0.10435649313896261, 'max_depth': 9, 'num_leaves': 5, 'feature_fraction': 0.7154220157223932, 'bagging_fraction': 0.7463876479427098, 'bagging_freq': 1, 'min_child_samples': 24, 'lambda_l1': 1.101092303802611e-07, 'lambda_l2': 0.0022595035589772355, 'min_gain_to_split': 0.17650014868007144, 'max_bin': 466, 'scale_pos_weight': 91.20601325777042}. Best is trial 0 with value: 0.8535687068354122.
Trial 1 params: {'learning_rate': 0.10435649313896261, 'max_depth': 9, 'num_leaves': 5, 'feature_fraction': 0.7154220157223932, 'bagging_fraction': 0.7463876479427098, 'bagging_freq': 1, 'min_child_samples': 24, 'lambda_l1': 1.101092303802611e-07, 'lambda_l2': 0.0022595035589772355, 'min_gain_to_split': 0.17650014868007144, 'max_bin': 466, 'scale_pos_weight': 91.20601325777042}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:13:24,432] Trial 2 finished with value: 0.6541544011200943 and parameters: {'learning_rate': 0.19013211717659598, 'max_depth': 1, 'num_leaves': 57, 'feature_fraction': 0.4972318681139001, 'bagging_fraction': 0.7004801300066532, 'bagging_freq': 4, 'min_child_samples': 62, 'lambda_l1': 0.0009358403001215338, 'lambda_l2': 0.0005635240903162413, 'min_gain_to_split': 0.9851298207567799, 'max_bin': 274, 'scale_pos_weight': 3.7809309020116424}. Best is trial 0 with value: 0.8535687068354122.
Trial 2 params: {'learning_rate': 0.19013211717659598, 'max_depth': 1, 'num_leaves': 57, 'feature_fraction': 0.4972318681139001, 'bagging_fraction': 0.7004801300066532, 'bagging_freq': 4, 'min_child_samples': 62, 'lambda_l1': 0.0009358403001215338, 'lambda_l2': 0.0005635240903162413, 'min_gain_to_split': 0.9851298207567799, 'max_bin': 274, 'scale_pos_weight': 3.7809309020116424}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:19:04,293] Trial 3 finished with value: 0.8647058017156384 and parameters: {'learning_rate': 0.24296823109889737, 'max_depth': 6, 'num_leaves': 73, 'feature_fraction': 0.8380755494477532, 'bagging_fraction': 0.6023088694150138, 'bagging_freq': 7, 'min_child_samples': 57, 'lambda_l1': 0.008641925818590673, 'lambda_l2': 0.014477718630188548, 'min_gain_to_split': 0.12276192858821057, 'max_bin': 374, 'scale_pos_weight': 31.22051936930558}. Best is trial 3 with value: 0.8647058017156384.
Trial 3 params: {'learning_rate': 0.24296823109889737, 'max_depth': 6, 'num_leaves': 73, 'feature_fraction': 0.8380755494477532, 'bagging_fraction': 0.6023088694150138, 'bagging_freq': 7, 'min_child_samples': 57, 'lambda_l1': 0.008641925818590673, 'lambda_l2': 0.014477718630188548, 'min_gain_to_split': 0.12276192858821057, 'max_bin': 374, 'scale_pos_weight': 31.22051936930558}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:25:21,670] Trial 4 finished with value: 0.8534872044816012 and parameters: {'learning_rate': 0.08958003828028381, 'max_depth': 9, 'num_leaves': 38, 'feature_fraction': 0.7297859843793966, 'bagging_fraction': 0.44034327225512365, 'bagging_freq': 7, 'min_child_samples': 6, 'lambda_l1': 1.7772459232795499, 'lambda_l2': 3.041832762296966, 'min_gain_to_split': 0.1568223295538266, 'max_bin': 460, 'scale_pos_weight': 97.93941521344719}. Best is trial 3 with value: 0.8647058017156384.
Trial 4 params: {'learning_rate': 0.08958003828028381, 'max_depth': 9, 'num_leaves': 38, 'feature_fraction': 0.7297859843793966, 'bagging_fraction': 0.44034327225512365, 'bagging_freq': 7, 'min_child_samples': 6, 'lambda_l1': 1.7772459232795499, 'lambda_l2': 3.041832762296966, 'min_gain_to_split': 0.1568223295538266, 'max_bin': 460, 'scale_pos_weight': 97.93941521344719}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:31:00,492] Trial 5 finished with value: 0.8532997954319729 and parameters: {'learning_rate': 0.06642945821105768, 'max_depth': 8, 'num_leaves': 47, 'feature_fraction': 0.954935175201057, 'bagging_fraction': 0.4957874544068901, 'bagging_freq': 3, 'min_child_samples': 87, 'lambda_l1': 0.4215437423975616, 'lambda_l2': 1.324680128490514e-07, 'min_gain_to_split': 0.3283660155639027, 'max_bin': 225, 'scale_pos_weight': 4.61486296719513}. Best is trial 3 with value: 0.8647058017156384.
Trial 5 params: {'learning_rate': 0.06642945821105768, 'max_depth': 8, 'num_leaves': 47, 'feature_fraction': 0.954935175201057, 'bagging_fraction': 0.4957874544068901, 'bagging_freq': 3, 'min_child_samples': 87, 'lambda_l1': 0.4215437423975616, 'lambda_l2': 1.324680128490514e-07, 'min_gain_to_split': 0.3283660155639027, 'max_bin': 225, 'scale_pos_weight': 4.61486296719513}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:35:16,750] Trial 6 finished with value: 0.8728423883553574 and parameters: {'learning_rate': 0.2413930536276879, 'max_depth': 8, 'num_leaves': 96, 'feature_fraction': 0.4923552930217907, 'bagging_fraction': 0.604740441249538, 'bagging_freq': 3, 'min_child_samples': 53, 'lambda_l1': 2.289853630871441e-06, 'lambda_l2': 1.2448746570024297e-07, 'min_gain_to_split': 0.1666665334512744, 'max_bin': 499, 'scale_pos_weight': 96.99215201644404}. Best is trial 6 with value: 0.8728423883553574.
Trial 6 params: {'learning_rate': 0.2413930536276879, 'max_depth': 8, 'num_leaves': 96, 'feature_fraction': 0.4923552930217907, 'bagging_fraction': 0.604740441249538, 'bagging_freq': 3, 'min_child_samples': 53, 'lambda_l1': 2.289853630871441e-06, 'lambda_l2': 1.2448746570024297e-07, 'min_gain_to_split': 0.1666665334512744, 'max_bin': 499, 'scale_pos_weight': 96.99215201644404}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:39:48,704] Trial 7 finished with value: 0.8194842386003224 and parameters: {'learning_rate': 0.07592189542346271, 'max_depth': 5, 'num_leaves': 78, 'feature_fraction': 0.839961702350688, 'bagging_fraction': 0.7417125100973534, 'bagging_freq': 4, 'min_child_samples': 24, 'lambda_l1': 2.239621788971206e-06, 'lambda_l2': 0.0009925165083050336, 'min_gain_to_split': 0.17991403002448741, 'max_bin': 220, 'scale_pos_weight': 57.968678420732495}. Best is trial 6 with value: 0.8728423883553574.
Trial 7 params: {'learning_rate': 0.07592189542346271, 'max_depth': 5, 'num_leaves': 78, 'feature_fraction': 0.839961702350688, 'bagging_fraction': 0.7417125100973534, 'bagging_freq': 4, 'min_child_samples': 24, 'lambda_l1': 2.239621788971206e-06, 'lambda_l2': 0.0009925165083050336, 'min_gain_to_split': 0.17991403002448741, 'max_bin': 220, 'scale_pos_weight': 57.968678420732495}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:45:46,937] Trial 8 finished with value: 0.8736730161378873 and parameters: {'learning_rate': 0.2418104149098813, 'max_depth': 8, 'num_leaves': 36, 'feature_fraction': 0.7096527384034726, 'bagging_fraction': 0.8459259918044021, 'bagging_freq': 4, 'min_child_samples': 79, 'lambda_l1': 0.0004210865957682166, 'lambda_l2': 0.02649328806344821, 'min_gain_to_split': 0.21550788670622034, 'max_bin': 464, 'scale_pos_weight': 31.824509705666713}. Best is trial 8 with value: 0.8736730161378873.
Trial 8 params: {'learning_rate': 0.2418104149098813, 'max_depth': 8, 'num_leaves': 36, 'feature_fraction': 0.7096527384034726, 'bagging_fraction': 0.8459259918044021, 'bagging_freq': 4, 'min_child_samples': 79, 'lambda_l1': 0.0004210865957682166, 'lambda_l2': 0.02649328806344821, 'min_gain_to_split': 0.21550788670622034, 'max_bin': 464, 'scale_pos_weight': 31.824509705666713}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:47:38,736] Trial 9 finished with value: 0.6827314018714004 and parameters: {'learning_rate': 0.2842768677278804, 'max_depth': 1, 'num_leaves': 97, 'feature_fraction': 0.5728181658508811, 'bagging_fraction': 0.9763375324320668, 'bagging_freq': 4, 'min_child_samples': 95, 'lambda_l1': 0.6387714659568857, 'lambda_l2': 8.594587686990977, 'min_gain_to_split': 0.7819766775466057, 'max_bin': 398, 'scale_pos_weight': 39.49373981190776}. Best is trial 8 with value: 0.8736730161378873.
Trial 9 params: {'learning_rate': 0.2842768677278804, 'max_depth': 1, 'num_leaves': 97, 'feature_fraction': 0.5728181658508811, 'bagging_fraction': 0.9763375324320668, 'bagging_freq': 4, 'min_child_samples': 95, 'lambda_l1': 0.6387714659568857, 'lambda_l2': 8.594587686990977, 'min_gain_to_split': 0.7819766775466057, 'max_bin': 398, 'scale_pos_weight': 39.49373981190776}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:50:31,678] Trial 10 finished with value: 0.6371013834755077 and parameters: {'learning_rate': 0.01902287805645525, 'max_depth': 3, 'num_leaves': 14, 'feature_fraction': 0.5965420452738068, 'bagging_fraction': 0.8990581060104739, 'bagging_freq': 6, 'min_child_samples': 74, 'lambda_l1': 7.727228725335469e-05, 'lambda_l2': 0.09036975861819559, 'min_gain_to_split': 0.30044803387229957, 'max_bin': 430, 'scale_pos_weight': 25.277489431152162}. Best is trial 8 with value: 0.8736730161378873.
Trial 10 params: {'learning_rate': 0.01902287805645525, 'max_depth': 3, 'num_leaves': 14, 'feature_fraction': 0.5965420452738068, 'bagging_fraction': 0.8990581060104739, 'bagging_freq': 6, 'min_child_samples': 74, 'lambda_l1': 7.727228725335469e-05, 'lambda_l2': 0.09036975861819559, 'min_gain_to_split': 0.30044803387229957, 'max_bin': 430, 'scale_pos_weight': 25.277489431152162}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:54:36,548] Trial 11 finished with value: 0.8721191123466557 and parameters: {'learning_rate': 0.29926551416223485, 'max_depth': 7, 'num_leaves': 93, 'feature_fraction': 0.4603777799311719, 'bagging_fraction': 0.836199384794792, 'bagging_freq': 2, 'min_child_samples': 37, 'lambda_l1': 5.4491991492925e-05, 'lambda_l2': 5.104039800580497e-06, 'min_gain_to_split': 0.11355641374171481, 'max_bin': 502, 'scale_pos_weight': 75.20139287634984}. Best is trial 8 with value: 0.8736730161378873.
Trial 11 params: {'learning_rate': 0.29926551416223485, 'max_depth': 7, 'num_leaves': 93, 'feature_fraction': 0.4603777799311719, 'bagging_fraction': 0.836199384794792, 'bagging_freq': 2, 'min_child_samples': 37, 'lambda_l1': 5.4491991492925e-05, 'lambda_l2': 5.104039800580497e-06, 'min_gain_to_split': 0.11355641374171481, 'max_bin': 502, 'scale_pos_weight': 75.20139287634984}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 15:57:07,381] Trial 12 finished with value: 0.8707582538959162 and parameters: {'learning_rate': 0.24546297074791762, 'max_depth': 10, 'num_leaves': 66, 'feature_fraction': 0.42340866544570316, 'bagging_fraction': 0.8187150437921669, 'bagging_freq': 5, 'min_child_samples': 73, 'lambda_l1': 3.6345614292006126e-06, 'lambda_l2': 6.435952620830518e-08, 'min_gain_to_split': 0.27093428337148023, 'max_bin': 140, 'scale_pos_weight': 73.00649920228685}. Best is trial 8 with value: 0.8736730161378873.
Trial 12 params: {'learning_rate': 0.24546297074791762, 'max_depth': 10, 'num_leaves': 66, 'feature_fraction': 0.42340866544570316, 'bagging_fraction': 0.8187150437921669, 'bagging_freq': 5, 'min_child_samples': 73, 'lambda_l1': 3.6345614292006126e-06, 'lambda_l2': 6.435952620830518e-08, 'min_gain_to_split': 0.27093428337148023, 'max_bin': 140, 'scale_pos_weight': 73.00649920228685}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:01:31,791] Trial 13 finished with value: 0.8661515097921324 and parameters: {'learning_rate': 0.22982580325079932, 'max_depth': 7, 'num_leaves': 29, 'feature_fraction': 0.5984932160827827, 'bagging_fraction': 0.5951485912387372, 'bagging_freq': 2, 'min_child_samples': 41, 'lambda_l1': 0.0035880774915597927, 'lambda_l2': 1.1959071636390917e-08, 'min_gain_to_split': 0.24415918593663802, 'max_bin': 479, 'scale_pos_weight': 46.93843736669042}. Best is trial 8 with value: 0.8736730161378873.
Trial 13 params: {'learning_rate': 0.22982580325079932, 'max_depth': 7, 'num_leaves': 29, 'feature_fraction': 0.5984932160827827, 'bagging_fraction': 0.5951485912387372, 'bagging_freq': 2, 'min_child_samples': 41, 'lambda_l1': 0.0035880774915597927, 'lambda_l2': 1.1959071636390917e-08, 'min_gain_to_split': 0.24415918593663802, 'max_bin': 479, 'scale_pos_weight': 46.93843736669042}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:05:34,421] Trial 14 finished with value: 0.8675555850578913 and parameters: {'learning_rate': 0.15628951977068592, 'max_depth': 10, 'num_leaves': 54, 'feature_fraction': 0.4037315762362188, 'bagging_fraction': 0.5358290747637546, 'bagging_freq': 5, 'min_child_samples': 76, 'lambda_l1': 3.5331182709173266e-06, 'lambda_l2': 5.3264132610607015e-05, 'min_gain_to_split': 0.40173400395930364, 'max_bin': 512, 'scale_pos_weight': 84.84487065697722}. Best is trial 8 with value: 0.8736730161378873.
Trial 14 params: {'learning_rate': 0.15628951977068592, 'max_depth': 10, 'num_leaves': 54, 'feature_fraction': 0.4037315762362188, 'bagging_fraction': 0.5358290747637546, 'bagging_freq': 5, 'min_child_samples': 76, 'lambda_l1': 3.5331182709173266e-06, 'lambda_l2': 5.3264132610607015e-05, 'min_gain_to_split': 0.40173400395930364, 'max_bin': 512, 'scale_pos_weight': 84.84487065697722}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:10:04,972] Trial 15 finished with value: 0.8640520989962545 and parameters: {'learning_rate': 0.2720720667344474, 'max_depth': 8, 'num_leaves': 82, 'feature_fraction': 0.5299678614030033, 'bagging_fraction': 0.6588438013521062, 'bagging_freq': 3, 'min_child_samples': 46, 'lambda_l1': 0.00017983875391696484, 'lambda_l2': 6.275590560774882e-05, 'min_gain_to_split': 0.1059323752113329, 'max_bin': 423, 'scale_pos_weight': 97.33747321369549}. Best is trial 8 with value: 0.8736730161378873.
Trial 15 params: {'learning_rate': 0.2720720667344474, 'max_depth': 8, 'num_leaves': 82, 'feature_fraction': 0.5299678614030033, 'bagging_fraction': 0.6588438013521062, 'bagging_freq': 3, 'min_child_samples': 46, 'lambda_l1': 0.00017983875391696484, 'lambda_l2': 6.275590560774882e-05, 'min_gain_to_split': 0.1059323752113329, 'max_bin': 423, 'scale_pos_weight': 97.33747321369549}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:13:29,817] Trial 16 finished with value: 0.8431685401195509 and parameters: {'learning_rate': 0.21055508673693368, 'max_depth': 6, 'num_leaves': 22, 'feature_fraction': 0.6486203183860668, 'bagging_fraction': 0.41553679006895305, 'bagging_freq': 5, 'min_child_samples': 65, 'lambda_l1': 0.025380304531691385, 'lambda_l2': 0.26323490085892715, 'min_gain_to_split': 0.22490177847494203, 'max_bin': 332, 'scale_pos_weight': 56.92074191915911}. Best is trial 8 with value: 0.8736730161378873.
Trial 16 params: {'learning_rate': 0.21055508673693368, 'max_depth': 6, 'num_leaves': 22, 'feature_fraction': 0.6486203183860668, 'bagging_fraction': 0.41553679006895305, 'bagging_freq': 5, 'min_child_samples': 65, 'lambda_l1': 0.025380304531691385, 'lambda_l2': 0.26323490085892715, 'min_gain_to_split': 0.22490177847494203, 'max_bin': 332, 'scale_pos_weight': 56.92074191915911}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:16:01,658] Trial 17 finished with value: 0.8007161058707222 and parameters: {'learning_rate': 0.26302386608804496, 'max_depth': 3, 'num_leaves': 37, 'feature_fraction': 0.4905627253125614, 'bagging_fraction': 0.98542064397748, 'bagging_freq': 1, 'min_child_samples': 83, 'lambda_l1': 1.0083125805029834e-08, 'lambda_l2': 0.008812909881874336, 'min_gain_to_split': 0.15224312293131265, 'max_bin': 387, 'scale_pos_weight': 75.3679486643717}. Best is trial 8 with value: 0.8736730161378873.
Trial 17 params: {'learning_rate': 0.26302386608804496, 'max_depth': 3, 'num_leaves': 37, 'feature_fraction': 0.4905627253125614, 'bagging_fraction': 0.98542064397748, 'bagging_freq': 1, 'min_child_samples': 83, 'lambda_l1': 1.0083125805029834e-08, 'lambda_l2': 0.008812909881874336, 'min_gain_to_split': 0.15224312293131265, 'max_bin': 387, 'scale_pos_weight': 75.3679486643717}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:22:12,661] Trial 18 finished with value: 0.8743478314636335 and parameters: {'learning_rate': 0.2171904245266381, 'max_depth': 8, 'num_leaves': 45, 'feature_fraction': 0.6466722895979564, 'bagging_fraction': 0.7789377915338855, 'bagging_freq': 2, 'min_child_samples': 52, 'lambda_l1': 2.4968051291919353e-05, 'lambda_l2': 1.4650982431317401e-06, 'min_gain_to_split': 0.13290032726743076, 'max_bin': 440, 'scale_pos_weight': 50.531187440318135}. Best is trial 18 with value: 0.8743478314636335.
Trial 18 params: {'learning_rate': 0.2171904245266381, 'max_depth': 8, 'num_leaves': 45, 'feature_fraction': 0.6466722895979564, 'bagging_fraction': 0.7789377915338855, 'bagging_freq': 2, 'min_child_samples': 52, 'lambda_l1': 2.4968051291919353e-05, 'lambda_l2': 1.4650982431317401e-06, 'min_gain_to_split': 0.13290032726743076, 'max_bin': 440, 'scale_pos_weight': 50.531187440318135}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:27:22,390] Trial 19 finished with value: 0.8768369901783185 and parameters: {'learning_rate': 0.2105063738406757, 'max_depth': 7, 'num_leaves': 44, 'feature_fraction': 0.6321950149577108, 'bagging_fraction': 0.8086977751211619, 'bagging_freq': 2, 'min_child_samples': 32, 'lambda_l1': 0.06239170104946315, 'lambda_l2': 0.0001927034824133381, 'min_gain_to_split': 0.12807985798558277, 'max_bin': 289, 'scale_pos_weight': 46.591689947898594}. Best is trial 19 with value: 0.8768369901783185.
Trial 19 params: {'learning_rate': 0.2105063738406757, 'max_depth': 7, 'num_leaves': 44, 'feature_fraction': 0.6321950149577108, 'bagging_fraction': 0.8086977751211619, 'bagging_freq': 2, 'min_child_samples': 32, 'lambda_l1': 0.06239170104946315, 'lambda_l2': 0.0001927034824133381, 'min_gain_to_split': 0.12807985798558277, 'max_bin': 289, 'scale_pos_weight': 46.591689947898594}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


[I 2023-11-30 16:30:34,635] Trial 20 finished with value: 0.8256041799828793 and parameters: {'learning_rate': 0.15104018169601616, 'max_depth': 4, 'num_leaves': 46, 'feature_fraction': 0.6199333611319688, 'bagging_fraction': 0.7558467670567142, 'bagging_freq': 2, 'min_child_samples': 27, 'lambda_l1': 0.07743997959326938, 'lambda_l2': 2.0564542050773438e-06, 'min_gain_to_split': 0.13209339810252, 'max_bin': 283, 'scale_pos_weight': 47.13488936177953}. Best is trial 19 with value: 0.8768369901783185.
Trial 20 params: {'learning_rate': 0.15104018169601616, 'max_depth': 4, 'num_leaves': 46, 'feature_fraction': 0.6199333611319688, 'bagging_fraction': 0.7558467670567142, 'bagging_freq': 2, 'min_child_samples': 27, 'lambda_l1': 0.07743997959326938, 'lambda_l2': 2.0564542050773438e-06, 'min_gain_to_split': 0.13209339810252, 'max_bin': 283, 'scale_pos_weight': 47.13488936177953}


  'learning_rate': trial.suggest_uniform('learning_rate', 0.01, 0.3),
  'feature_fraction': trial.suggest_uniform('feature_fraction', 0.4, 1.0),
  'bagging_fraction': trial.suggest_uniform('bagging_fraction', 0.4, 1.0),
  'lambda_l1': trial.suggest_loguniform('lambda_l1', 1e-8, 10.0),
  'lambda_l2': trial.suggest_loguniform('lambda_l2', 1e-8, 10.0),
  'min_gain_to_split': trial.suggest_loguniform('min_gain_to_split', 0.1, 1.0),
  'scale_pos_weight': trial.suggest_uniform('scale_pos_weight', 1.0, 100.0),


In [None]:
# 시각화
optuna.visualization.plot_optimization_history(study)

In [None]:
# 파라미터들관의 관계
optuna.visualization.plot_parallel_coordinate(study)

In [None]:
# 하이퍼파라미터 중요도
optuna.visualization.plot_param_importances(study)