In [22]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import optuna
from datetime import datetime
from preprocess.data_loader import load_data
from preprocess.preprocessing import preprocess
from preprocess.feature_selection import select_features
from models.dnn import build_dnn
from models.traditional import train_traditional_models
from evaluations.evaluation import evaluate_model
from configs import config
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from pytorch_tabnet.tab_model import TabNetClassifier
import os

In [23]:
# ---------- (1) 각 모델별 objective 함수 정의 ----------

def objective_dnn(trial, X_train, y_train):
    # 아키텍처(은닉층 구성)와 드롭아웃 비율 탐색
    layers = trial.suggest_categorical(
        'layers',
        [(512, 256), (1024, 512, 256), (1024, 512, 256, 128)]
    )
    dropout = trial.suggest_float('dropout', 0.1, 0.5)

    model = build_dnn(X_train.shape[1], layers, dropout=dropout, optimizer='Adam')
    history = model.fit(
        X_train, y_train,
        epochs=30, batch_size=32,
        validation_split=0.2, verbose=0
    )
    return max(history.history['val_accuracy'])

def objective_rf(trial, X_train, y_train):
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.model_selection import cross_val_score
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 5, 30),
        'min_samples_split': trial.suggest_int('min_samples_split', 2, 10)
    }
    rf = RandomForestClassifier(**params, random_state=config.SEED)
    return cross_val_score(rf, X_train, y_train, cv=3, scoring='accuracy').mean()

def objective_xgb(trial, X_train, y_train):
    import xgboost as xgb
    from sklearn.model_selection import cross_val_score
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 400),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0)
    }
    clf = xgb.XGBClassifier(**params, random_state=config.SEED, use_label_encoder=False, eval_metric='mlogloss')
    return cross_val_score(clf, X_train, y_train, cv=3, scoring='accuracy').mean()

def objective_lgbm(trial, X_train, y_train):
    import lightgbm as lgb
    from sklearn.model_selection import cross_val_score
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 400),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.7, 1.0)
    }
    clf = lgb.LGBMClassifier(**params, random_state=config.SEED)
    return cross_val_score(clf, X_train, y_train, cv=3, scoring='accuracy').mean()

def objective_svm(trial, X_train, y_train):
    from sklearn.svm import SVC
    from sklearn.model_selection import cross_val_score
    params = {
        'C': trial.suggest_float('C', 0.1, 10.0, log=True),
        'kernel': trial.suggest_categorical('kernel', ['rbf', 'linear', 'poly']),
        'gamma': trial.suggest_categorical('gamma', ['scale', 'auto'])
    }
    clf = SVC(**params, probability=True, random_state=config.SEED)
    return cross_val_score(clf, X_train, y_train, cv=3, scoring='accuracy').mean()

def objective_lr(trial, X_train, y_train):
    from sklearn.linear_model import LogisticRegression
    from sklearn.model_selection import cross_val_score
    params = {
        'C': trial.suggest_float('C', 0.01, 10.0, log=True),
        'solver': trial.suggest_categorical('solver', ['lbfgs', 'liblinear'])
    }
    clf = LogisticRegression(**params, max_iter=500, random_state=config.SEED)
    return cross_val_score(clf, X_train, y_train, cv=3, scoring='accuracy').mean()

def objective_tabnet(trial, X_train, y_train, X_valid, y_valid):
    params = {
        'n_d': trial.suggest_int('n_d', 8, 64),
        'n_a': trial.suggest_int('n_a', 8, 64),
        'n_steps': trial.suggest_int('n_steps', 3, 10),
        'gamma': trial.suggest_float('gamma', 1.0, 2.0),
        'lambda_sparse': trial.suggest_float('lambda_sparse', 1e-5, 1e-1, log=True)
    }
    clf = TabNetClassifier(**params, seed=config.SEED)
    clf.fit(
        X_train, y_train,
        eval_set=[(X_valid, y_valid)],
        patience=10, max_epochs=100, batch_size=1024
    )
    preds = clf.predict(X_valid)
    return evaluate_model(y_valid, preds)['accuracy']


In [24]:
# ---------- (2) 메인실행 ----------

def main(search_time_minutes=5):
    # 데이터 로딩 및 전처리
    df_original = load_data(config.DATA_PATH)
    df, numeric_cols, categorical_cols = preprocess(df_original)
    df['mh_PHQ_S_grouped'] = df['mh_PHQ_S'].apply(lambda x: 0 if x <= 4 else 1 if x <= 9 else 2)
    X = df.drop(['mh_PHQ_S', 'mh_PHQ_S_grouped'], axis=1)
    y = df['mh_PHQ_S_grouped']
    X = pd.DataFrame(SimpleImputer(strategy='mean').fit_transform(X), columns=X.columns)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=config.SEED)
    X_train_selected, selected_features, selector = select_features(X_train, y_train, config.SELECTED_FEATURES)
    X_test_selected = selector.transform(X_test)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train_selected)
    X_test_scaled = scaler.transform(X_test_selected)
    
    # 탐색 시간(초) 설정
    timeout = search_time_minutes * 1

    # ------ (3) 모델 리스트에 원하는 objective 추가 ------
    model_objectives = [
        ('DNN', objective_dnn),
        ('RandomForest', objective_rf),
        ('XGBoost', objective_xgb),
        ('LightGBM', objective_lgbm),
        ('SVM', objective_svm),
        ('LogisticRegression', objective_lr),
    ]
    studies = {}
    for model_name, objective in model_objectives:
        print(f"\n[Optuna] {model_name} 하이퍼파라미터 튜닝 시작")
        study = optuna.create_study(direction='maximize')
        study.optimize(lambda trial: objective(trial, X_train_scaled, y_train), timeout=timeout)
        studies[model_name] = study
        print(f"[Optuna] {model_name} Best score: {study.best_value}, Best params: {study.best_params}")
            # TabNet만 별도로(입력 float32)
    print("\n[Optuna] TabNet 하이퍼파라미터 튜닝 시작")
    tabnet_study = optuna.create_study(direction='maximize')
    tabnet_study.optimize(
        lambda trial: objective_tabnet(
            trial,
            X_train_scaled.astype(np.float32), y_train.values,
            X_test_scaled.astype(np.float32), y_test.values
        ),
        timeout=timeout
    )
    studies['TabNet'] = tabnet_study
    print(f"[Optuna] TabNet Best score: {tabnet_study.best_value}, Best params: {tabnet_study.best_params}")
   # ------ (4) Best 파라미터로 성능 재학습/측정 ------
    model_records = []
    param_records = []

    for model_name, study in studies.items():
        best_params = study.best_params
        print(f"\n[최종 모델 학습] {model_name} | best_params: {best_params}")
        if model_name == 'DNN':
            model = build_dnn(
                X_train_scaled.shape[1],
                best_params['layers'],
                dropout=best_params['dropout']
            )
            model.fit(X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=0)
            y_pred = model.predict(X_test_scaled).argmax(axis=1)
        elif model_name == 'RandomForest':
            from sklearn.ensemble import RandomForestClassifier
            model = RandomForestClassifier(**best_params, random_state=config.SEED)
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        elif model_name == 'XGBoost':
            import xgboost as xgb
            model = xgb.XGBClassifier(**best_params, random_state=config.SEED, use_label_encoder=False, eval_metric='mlogloss')
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        elif model_name == 'LightGBM':
            import lightgbm as lgb
            model = lgb.LGBMClassifier(**best_params, random_state=config.SEED)
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        elif model_name == 'SVM':
            from sklearn.svm import SVC
            model = SVC(**best_params, probability=True, random_state=config.SEED)
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        elif model_name == 'LogisticRegression':
            from sklearn.linear_model import LogisticRegression
            model = LogisticRegression(**best_params, max_iter=500, random_state=config.SEED)
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        elif model_name == 'TabNet':
            model = TabNetClassifier(**best_params, seed=config.SEED)
            model.fit(X_train_scaled.astype(np.float32), y_train.values, max_epochs=100, batch_size=1024)
            y_pred = model.predict(X_test_scaled.astype(np.float32))
        else:
            continue

        metrics = evaluate_model(y_test, y_pred)
        model_records.append({'Model': model_name, **metrics})
        param_records.append({'Model': model_name, 'Hyperparameters': best_params})

    # ------ (5) 결과 저장 ------
    now = datetime.now().strftime("%Y%m%d_%H%M%S")
    os.makedirs("reports", exist_ok=True)
    excel_filename = f"reports/{now}_detailed_model_report.xlsx"
    with pd.ExcelWriter(excel_filename, engine='xlsxwriter') as writer:
        pd.DataFrame(model_records).to_excel(writer, sheet_name='Performance', index=False)
        pd.DataFrame(param_records).to_excel(writer, sheet_name='Hyperparameters', index=False)
    print(f"\n모든 모델 결과가 '{excel_filename}'에 저장되었습니다.")

if __name__ == "__main__":
    main(search_time_minutes=5)

[I 2025-06-17 00:22:49,119] A new study created in memory with name: no-name-a544fd32-57d7-43b1-8453-8bb905760394



[Optuna] DNN 하이퍼파라미터 튜닝 시작


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
I0000 00:00:1750087369.544804   21211 gpu_device.cc:2022] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 9502 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Ti, pci bus id: 0000:01:00.0, compute capability: 8.9
I0000 00:00:1750087371.531310   22206 service.cc:148] XLA service 0x7f00c8006970 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1750087371.531497   22206 service.cc:156]   StreamExecutor device (0): NVIDIA GeForce RTX 4070 Ti, Compute Capability 8.9
2025-06-17 00:22:51.572996: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:268] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1750087371.830940   22206 cuda_dnn.cc:529] Loaded cuDNN version 90501
I0000 00:00:1750087373.560891   22206 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most on

[Optuna] DNN Best score: 0.8480446934700012, Best params: {'layers': (1024, 512, 256, 128), 'dropout': 0.45974702987733895}

[Optuna] RandomForest 하이퍼파라미터 튜닝 시작


[I 2025-06-17 00:23:05,212] Trial 0 finished with value: 0.8504024895082652 and parameters: {'n_estimators': 250, 'max_depth': 15, 'min_samples_split': 9}. Best is trial 0 with value: 0.8504024895082652.
[I 2025-06-17 00:23:07,358] Trial 1 finished with value: 0.8448123491133228 and parameters: {'n_estimators': 409, 'max_depth': 28, 'min_samples_split': 2}. Best is trial 0 with value: 0.8504024895082652.
[I 2025-06-17 00:23:08,404] Trial 2 finished with value: 0.8495080850501968 and parameters: {'n_estimators': 216, 'max_depth': 27, 'min_samples_split': 6}. Best is trial 0 with value: 0.8504024895082652.
[I 2025-06-17 00:23:10,209] Trial 3 finished with value: 0.8459304672179235 and parameters: {'n_estimators': 361, 'max_depth': 29, 'min_samples_split': 4}. Best is trial 0 with value: 0.8504024895082652.
[I 2025-06-17 00:23:10,210] A new study created in memory with name: no-name-ee1dda42-41ef-4e8c-a9f4-8509ad9de28c
Parameters: { "use_label_encoder" } are not used.



[Optuna] RandomForest Best score: 0.8504024895082652, Best params: {'n_estimators': 250, 'max_depth': 15, 'min_samples_split': 9}

[Optuna] XGBoost 하이퍼파라미터 튜닝 시작


Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

[I 2025-06-17 00:23:11,931] Trial 0 finished with value: 0.8246868234012577 and parameters: {'n_estimators': 244, 'max_depth': 12, 'learning_rate': 0.2861942009547847, 'subsample': 0.9235584447011451}. Best is trial 0 with value: 0.8246868234012577.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

[I 2025-06-17 00:23:13,078] Trial 1 finished with value: 0.8197675238605383 and parameters: {'n_estimators': 310, 'max_depth': 7, 'learning_rate': 0.2727324385246051, 'subsample': 0.9543382935305029}. Best is trial 0 with value: 0.8246868234012577.
Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

Parameters: { "use_label_encoder" } are not used.

[I 2025-06-17 00:23:14,304] Trial 2 finished with value: 0.8266993459639268 and parameters: {

[Optuna] XGBoost Best score: 0.8282647413189053, Best params: {'n_estimators': 155, 'max_depth': 8, 'learning_rate': 0.1774275966648856, 'subsample': 0.8145226031687541}

[Optuna] LightGBM 하이퍼파라미터 튜닝 시작
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000980 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000080 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 

[I 2025-06-17 00:23:15,826] Trial 0 finished with value: 0.8526381255467181 and parameters: {'n_estimators': 111, 'max_depth': 10, 'learning_rate': 0.018785241035251175, 'subsample': 0.9426541494630125}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000108 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 346
[LightGBM] [Info] Number of data points in the train set: 2982, number of used features: 11
[LightGBM] [Info] Start training from score -0.402452
[LightGBM] [Info] Start training from score -2.228908
[LightGBM] [Info] Start training from score -1.497559
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000111 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000091 seconds

[I 2025-06-17 00:23:16,341] Trial 1 finished with value: 0.8307249912600135 and parameters: {'n_estimators': 168, 'max_depth': 12, 'learning_rate': 0.13455409615938602, 'subsample': 0.7776798727405171}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 346
[LightGBM] [Info] Number of data points in the train set: 2982, number of used features: 11
[LightGBM] [Info] Start training from score -0.402452
[LightGBM] [Info] Start training from score -2.228908
[LightGBM] [Info] Start training from score -1.497559
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000081 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start trai

[I 2025-06-17 00:23:16,645] Trial 2 finished with value: 0.837209386070337 and parameters: {'n_estimators': 230, 'max_depth': 4, 'learning_rate': 0.12122238031691711, 'subsample': 0.9184702886624903}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000146 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000077 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start trai

[I 2025-06-17 00:23:17,543] Trial 3 finished with value: 0.8228986146558697 and parameters: {'n_estimators': 313, 'max_depth': 11, 'learning_rate': 0.1540939118326303, 'subsample': 0.9223472379052813}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000144 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start trai

[I 2025-06-17 00:23:17,897] Trial 4 finished with value: 0.8347498863426644 and parameters: {'n_estimators': 157, 'max_depth': 6, 'learning_rate': 0.10001674085098382, 'subsample': 0.705520330210784}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000113 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000079 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start training from score -1.497224
[LightGBM] [Info] Auto-choosing col-wi

[I 2025-06-17 00:23:18,446] Trial 5 finished with value: 0.8222291241858309 and parameters: {'n_estimators': 319, 'max_depth': 5, 'learning_rate': 0.19817791018710745, 'subsample': 0.8140520006409816}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000114 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000073 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start training from score -1.497224
[LightGBM] [Info] Auto-choosing col-wi

[I 2025-06-17 00:23:19,213] Trial 6 finished with value: 0.8173093745170501 and parameters: {'n_estimators': 302, 'max_depth': 7, 'learning_rate': 0.281611148374803, 'subsample': 0.9711302747216319}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000115 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000078 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start training from score -1.497224
[LightGBM] [Info] Auto-choosing col-wi

[I 2025-06-17 00:23:19,738] Trial 7 finished with value: 0.8372095361130242 and parameters: {'n_estimators': 244, 'max_depth': 6, 'learning_rate': 0.05558234425674237, 'subsample': 0.7852187380167105}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000074 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start trai

[I 2025-06-17 00:23:20,154] Trial 8 finished with value: 0.8307248412173264 and parameters: {'n_estimators': 248, 'max_depth': 5, 'learning_rate': 0.1260929438452848, 'subsample': 0.8419405668013356}. Best is trial 0 with value: 0.8526381255467181.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000078 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 349
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.228573
[LightGBM] [Info] Start training from score -1.498724
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000075 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 347
[LightGBM] [Info] Number of data points in the train set: 2981, number of used features: 11
[LightGBM] [Info] Start training from score -0.402116
[LightGBM] [Info] Start training from score -2.231693
[LightGBM] [Info] Start trai

[I 2025-06-17 00:23:20,516] Trial 9 finished with value: 0.8389987951572223 and parameters: {'n_estimators': 201, 'max_depth': 5, 'learning_rate': 0.08318606744738592, 'subsample': 0.7782367954809913}. Best is trial 0 with value: 0.8526381255467181.
[I 2025-06-17 00:23:20,517] A new study created in memory with name: no-name-600ded4d-577a-4e7b-8009-981188de8aef


[Optuna] LightGBM Best score: 0.8526381255467181, Best params: {'n_estimators': 111, 'max_depth': 10, 'learning_rate': 0.018785241035251175, 'subsample': 0.9426541494630125}

[Optuna] SVM 하이퍼파라미터 튜닝 시작


[I 2025-06-17 00:23:21,805] Trial 0 finished with value: 0.6688283316603574 and parameters: {'C': 0.6844619858225305, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 0 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,305] Trial 1 finished with value: 0.6688283316603574 and parameters: {'C': 8.104571307616187, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 0 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,306] A new study created in memory with name: no-name-789938b6-4081-4c8a-8ec7-bc85cceb0fb7
[I 2025-06-17 00:23:28,338] Trial 0 finished with value: 0.6686046180138249 and parameters: {'C': 1.25948281203452, 'solver': 'liblinear'}. Best is trial 0 with value: 0.6686046180138249.
[I 2025-06-17 00:23:28,356] Trial 1 finished with value: 0.6688283316603574 and parameters: {'C': 0.10587757114207608, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,377] Trial 2 finished with value: 0.6688283316603574 and parameters: {'C': 0.

[Optuna] SVM Best score: 0.6688283316603574, Best params: {'C': 0.6844619858225305, 'kernel': 'linear', 'gamma': 'auto'}

[Optuna] LogisticRegression 하이퍼파라미터 튜닝 시작


[I 2025-06-17 00:23:28,516] Trial 8 finished with value: 0.6688283316603574 and parameters: {'C': 0.015697018887196306, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,560] Trial 9 finished with value: 0.6683810544099796 and parameters: {'C': 4.149677919259978, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,582] Trial 10 finished with value: 0.6688283316603574 and parameters: {'C': 0.321958177773173, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,606] Trial 11 finished with value: 0.6688283316603574 and parameters: {'C': 0.06701818960580298, 'solver': 'lbfgs'}. Best is trial 1 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,629] Trial 12 finished with value: 0.6688283316603574 and parameters: {'C': 0.2129428223025101, 'solver': 'liblinear'}. Best is trial 1 with value: 0.6688283316603574.
[I 2025-06-17 00:23:28,651] Trial 13 finished with val

[Optuna] LogisticRegression Best score: 0.6688283316603574, Best params: {'C': 0.10587757114207608, 'solver': 'liblinear'}

[Optuna] TabNet 하이퍼파라미터 튜닝 시작
epoch 0  | loss: 1.41169 | val_0_accuracy: 0.67024 |  0:00:00s
epoch 1  | loss: 0.79598 | val_0_accuracy: 0.82127 |  0:00:00s
epoch 2  | loss: 0.66404 | val_0_accuracy: 0.82216 |  0:00:00s
epoch 3  | loss: 0.58917 | val_0_accuracy: 0.81412 |  0:00:00s
epoch 4  | loss: 0.54759 | val_0_accuracy: 0.82216 |  0:00:00s
epoch 5  | loss: 0.51486 | val_0_accuracy: 0.83467 |  0:00:00s
epoch 6  | loss: 0.49775 | val_0_accuracy: 0.83646 |  0:00:01s
epoch 7  | loss: 0.48735 | val_0_accuracy: 0.82484 |  0:00:01s
epoch 8  | loss: 0.47454 | val_0_accuracy: 0.83646 |  0:00:01s
epoch 9  | loss: 0.46705 | val_0_accuracy: 0.83467 |  0:00:01s
epoch 10 | loss: 0.47721 | val_0_accuracy: 0.83557 |  0:00:01s
epoch 11 | loss: 0.47551 | val_0_accuracy: 0.83646 |  0:00:01s
epoch 12 | loss: 0.46562 | val_0_accuracy: 0.83735 |  0:00:01s
epoch 13 | loss: 0.46311 | 

[I 2025-06-17 00:23:38,304] Trial 0 finished with value: 0.839142091152815 and parameters: {'n_d': 31, 'n_a': 59, 'n_steps': 3, 'gamma': 1.2808957449155602, 'lambda_sparse': 0.000319414706685981}. Best is trial 0 with value: 0.839142091152815.


epoch 32 | loss: 0.44755 | val_0_accuracy: 0.83914 |  0:00:03s

Early stopping occurred at epoch 32 with best_epoch = 22 and best_val_0_accuracy = 0.83914




epoch 0  | loss: 1.09571 | val_0_accuracy: 0.81859 |  0:00:00s
epoch 1  | loss: 0.86944 | val_0_accuracy: 0.7328  |  0:00:00s
epoch 2  | loss: 0.74953 | val_0_accuracy: 0.82663 |  0:00:00s
epoch 3  | loss: 0.6593  | val_0_accuracy: 0.81948 |  0:00:00s
epoch 4  | loss: 0.58631 | val_0_accuracy: 0.75246 |  0:00:00s
epoch 5  | loss: 0.55814 | val_0_accuracy: 0.83467 |  0:00:00s
epoch 6  | loss: 0.53907 | val_0_accuracy: 0.83557 |  0:00:01s
epoch 7  | loss: 0.53337 | val_0_accuracy: 0.83289 |  0:00:01s
epoch 8  | loss: 0.51341 | val_0_accuracy: 0.83646 |  0:00:01s
epoch 9  | loss: 0.51034 | val_0_accuracy: 0.83646 |  0:00:01s
epoch 10 | loss: 0.49909 | val_0_accuracy: 0.83646 |  0:00:01s
epoch 11 | loss: 0.48226 | val_0_accuracy: 0.83646 |  0:00:02s
epoch 12 | loss: 0.48172 | val_0_accuracy: 0.83646 |  0:00:02s
epoch 13 | loss: 0.49258 | val_0_accuracy: 0.83646 |  0:00:02s
epoch 14 | loss: 0.46662 | val_0_accuracy: 0.83735 |  0:00:02s
epoch 15 | loss: 0.47256 | val_0_accuracy: 0.83735 |  0

[I 2025-06-17 00:23:42,621] Trial 1 finished with value: 0.837354781054513 and parameters: {'n_d': 27, 'n_a': 58, 'n_steps': 5, 'gamma': 1.1557493059690649, 'lambda_sparse': 0.008455200538713134}. Best is trial 0 with value: 0.839142091152815.
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[Optuna] TabNet Best score: 0.839142091152815, Best params: {'n_d': 31, 'n_a': 59, 'n_steps': 3, 'gamma': 1.2808957449155602, 'lambda_sparse': 0.000319414706685981}

[최종 모델 학습] DNN | best_params: {'layers': (1024, 512, 256, 128), 'dropout': 0.45974702987733895}
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step

[최종 모델 학습] RandomForest | best_params: {'n_estimators': 250, 'max_depth': 15, 'min_samples_split': 9}

[최종 모델 학습] XGBoost | best_params: {'n_estimators': 155, 'max_depth': 8, 'learning_rate': 0.1774275966648856, 'subsample': 0.8145226031687541}


Parameters: { "use_label_encoder" } are not used.




[최종 모델 학습] LightGBM | best_params: {'n_estimators': 111, 'max_depth': 10, 'learning_rate': 0.018785241035251175, 'subsample': 0.9426541494630125}
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000102 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 383
[LightGBM] [Info] Number of data points in the train set: 4472, number of used features: 11
[LightGBM] [Info] Start training from score -0.402228
[LightGBM] [Info] Start training from score -2.229724
[LightGBM] [Info] Start training from score -1.497836

[최종 모델 학습] SVM | best_params: {'C': 0.6844619858225305, 'kernel': 'linear', 'gamma': 'auto'}

[최종 모델 학습] LogisticRegression | best_params: {'C': 0.10587757114207608, 'solver': 'liblinear'}

[최종 모델 학습] TabNet | best_params: {'n_d': 31, 'n_a': 59, 'n_steps': 3, 'gamma': 1.2808957449155602, 'lambda_sparse': 0.000319414706685981}
epoch 0  | 



epoch 1  | loss: 0.75721 |  0:00:00s
epoch 2  | loss: 0.62052 |  0:00:00s
epoch 3  | loss: 0.58739 |  0:00:00s
epoch 4  | loss: 0.55685 |  0:00:00s
epoch 5  | loss: 0.50375 |  0:00:00s
epoch 6  | loss: 0.50461 |  0:00:00s
epoch 7  | loss: 0.4871  |  0:00:00s
epoch 8  | loss: 0.48564 |  0:00:00s
epoch 9  | loss: 0.47916 |  0:00:00s
epoch 10 | loss: 0.4714  |  0:00:01s
epoch 11 | loss: 0.47023 |  0:00:01s
epoch 12 | loss: 0.46859 |  0:00:01s
epoch 13 | loss: 0.47329 |  0:00:01s
epoch 14 | loss: 0.46771 |  0:00:01s
epoch 15 | loss: 0.46487 |  0:00:01s
epoch 16 | loss: 0.46419 |  0:00:01s
epoch 17 | loss: 0.45148 |  0:00:01s
epoch 18 | loss: 0.46222 |  0:00:01s
epoch 19 | loss: 0.45709 |  0:00:01s
epoch 20 | loss: 0.46398 |  0:00:02s
epoch 21 | loss: 0.44753 |  0:00:02s
epoch 22 | loss: 0.45005 |  0:00:02s
epoch 23 | loss: 0.45791 |  0:00:02s
epoch 24 | loss: 0.45281 |  0:00:02s
epoch 25 | loss: 0.45521 |  0:00:02s
epoch 26 | loss: 0.45792 |  0:00:02s
epoch 27 | loss: 0.45294 |  0:00:02s
e