In [1]:
import pandas as pd
import numpy as np
import copy
import shap
import seaborn as sns
import matplotlib.pyplot as plt
import optuna
from optuna.samplers import TPESampler
from optuna.trial import Trial
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.feature_selection import RFECV
from sklearn.preprocessing import MinMaxScaler, MaxAbsScaler, StandardScaler, RobustScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV, train_test_split, KFold, LeaveOneOut, StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix
from sklearn.utils import shuffle
from sklearn.base import clone
from sklearn import preprocessing

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
filename = './머신러닝데이터.xlsx'

In [4]:
# Imputation한 데이터를 불러온다
scoring_transition_1 = pd.read_excel(filename, sheet_name='scoring_result_merged', engine='openpyxl')
scoring_transition_0 = pd.read_excel(filename, sheet_name='scoring_origin', engine='openpyxl')
task_completion_time = pd.read_excel(filename, sheet_name='task_completion_time_mean_imput', engine='openpyxl')
sms_reply = pd.read_excel(filename, sheet_name='SMS_reply', engine='openpyxl')
weather_searching_site_region = pd.read_excel(filename, sheet_name='weather_searching_사이트주소', engine='openpyxl')

# 모든 Feature List
sms_reply, weather_searching_사이트주소는 feature 리스트가 같습니다

In [5]:
# scoring = ['Routine_Screen_Unlock_Pattern', 'Routine_Phone_Register', 'Routine_Phone_Receive', 'Routine_SMS_Reply', 'Routine_Camera', 
# 'Routine_Location_Searching', 'Routine_Weather_Searching', 'Routine_Transfer', 'Routine_Location_Switching', 'Routine_Weather_Switching', 
# 'Transition_Phone_Register', 'Transition_Phone_Receive', 'Transition_SMS_Reply', 'Transition_Camera', 'Transition_Location_Searching', 
# 'Transition_Weather_Searching', 'Transition_Transfer', 'Transition_Location_Switching', 'Transition_Weather_Switching', 'Result_Phone_Register', 
# 'Result_Phone_Receive', 'Result_SMS_Reply', 'Result_Camera', 'Result_Location_Searching', 'Result_Weather_Searching', 'Result_Transfer', 
# 'Result_Location_Switching', 'Result_Weather_Switching', 'routine_sum_trial', 'transition_sum_trial', 'result_sum_trial', 'all_sum_trial']

# task = ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_total_time_nt', 
# 'phone_register_screen_unlocking_time', 'phone_register_sms_start_time', 'phone_register_instruction_check_time', 'phone_register_total_time', 
# 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_receive_noti response', 'phone_receive_total_time_nt', 'phone_receive_total_time', 
# 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_noti_response_time', 'sms_reply_total_time_nt', 'sms_reply_compeltion_time', 
# 'sms_reply_total_time', 'camera_noti_response', 'camera_total_time_nt', 'camera_instruction_check_time', 'camera_total_time', 'camera_taken_time', 
# 'camera_gallery_delete_time', 'transfer_noti_response_time', 'transfer_total_time_nt', 'transfer_instruction_check_time', 'transfer_total_time', 
# 'transfer_usage_time', 'transfer_share_time', 'weather_searching_noti_response', 'weather_searching_total_time_nt', 'weather_searching_instruction_check_time', 
# 'weather_searching_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_switching_notification_response_time', 
# 'weather_switching_total_time_nt', 'weather_switching_instruction_check_time', 'weather_switching_total_time', 
# 'weather_switching_first_searching_keyword_typing_time', 'weather_switching_second_searching_keyword_typing_time', 
# 'weather_switching_information_searching_time', 'weather_switching_information_sharing_texting_time', 'location_searching_noti_response_time', 
# 'location_searching_total_time_nt', 'location_searching_instruction_check_time', 'location_searching_total_time', 'location_searching_food_typing_time', 
# 'location_searching_foodtyping_mapfinding_time', 'location_searching_mapfinding_time', 'location_searching_route_sharing_texting_time', 
# 'location_switching_notification_response', 'location_switching_total_time_nt', 'location_switching_instruction_check', 'location_switching_total_time', 
# 'location_switching_first_map_typing_time', 'location_switching_second_map_typing_time', 'location_switching_map_finding_time', 
# 'location_switching_map_route_sharing_time']

# sms_and_ws = ['average intercharacter time', 'std intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
# 'total_time (S)', 'total_count', 'IS (C+IF+INF+F)', 'backspace (F)', 'C', 'IF', 'INF', 'Shift', 'ISø (IS+S)', 'T (total_count-F)', 'WPS', 'WPM', 'AdjWPS', 
# 'AdjWPM', 'CPS', 'KSPS', 'GPS', 'KSPC', 'GPC', 'MSD', 'COER', 'UER', 'TER', 'CE', 'PC', 'UB', 'WB', 'CPC']

In [6]:
# BAC column만 가져온다(Label)
y = scoring_transition_1['BAC']

# Participant, BAC column을 제거한다
scoring_transition_1 = scoring_transition_1.drop(['Participant', 'BAC'], axis=1)
scoring_transition_0 = scoring_transition_0.drop(['Participant', 'BAC'], axis=1)
task_completion_time = task_completion_time.drop(['Participant', 'BAC'], axis=1)
sms_reply = sms_reply.drop(['Participant', 'BAC'], axis=1)
weather_searching_site_region = weather_searching_site_region.drop(['Participant', 'BAC'], axis=1)

In [7]:
# 라벨 3개용 features
# 각 데이터별로 필요한 column들만 뽑는다
scoring_transition_1_columns = []

scoring_transition_0_columns = ['Routine_Screen_Unlock_Pattern', 'Transition_Phone_Register', 'Transition_Phone_Receive', 'Transition_SMS_Reply', 
'Transition_Camera', 'Transition_Weather_Searching', 'Transition_Transfer', 'transition_sum_trial']

task_completion_time_columns = ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 
'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 
'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 
'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 
'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time']

sms_reply_columns = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'GPS', 'COER', 'UER', 'TER', 'UB', 'WB']

weather_searching_site_region_columns = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'COER', 'UB', 'WB']

In [8]:
# 라벨을 2개용 features
scoring_transition_1_columns_two_label = []

scoring_transition_0_columns_two_label = ['Routine_Screen_Unlock_Pattern', 'Transition_Phone_Register', 'Transition_Phone_Receive', 'Transition_SMS_Reply', 
'Transition_Camera', 'Transition_Weather_Searching', 'Transition_Transfer', 'transition_sum_trial']

task_completion_time_columns_two_label = ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 
'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 
'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 
'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 
'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time']

sms_reply_columns_two_label = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'GPS', 'COER', 'UER', 'TER', 'UB', 'WB']

weather_searching_site_region_columns_two_label = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'COER', 'UB', 'WB']

In [9]:
# 위에서 정한 feature들로 각각의 데이터 프레임을 생성한다
scoring_transition_1_df = scoring_transition_1[scoring_transition_1_columns]
scoring_transition_0_df = scoring_transition_0[scoring_transition_0_columns]
task_completion_time_df = task_completion_time[task_completion_time_columns]
sms_reply_df = sms_reply[sms_reply_columns]
weather_searching_site_region_df = weather_searching_site_region[weather_searching_site_region_columns]

scoring_transition_1_df_two_label = scoring_transition_1[scoring_transition_1_columns_two_label]
scoring_transition_0_df_two_label = scoring_transition_0[scoring_transition_0_columns_two_label]
task_completion_time_df_two_label = task_completion_time[task_completion_time_columns_two_label]
sms_reply_df_two_label = sms_reply[sms_reply_columns_two_label]
weather_searching_site_region_df_two_label = weather_searching_site_region[weather_searching_site_region_columns_two_label]

# feature명 수정하기
위에서 적은 feature명을 그대로 복사해서 가져온 다음 바꾸려는 feature만 수정하면 됩니다

In [10]:
# 라벨 3개용 features의 feature명을 수정한다!
scoring_transition_1_columns = []

scoring_transition_0_columns = ['Routine_Screen_Unlock_Pattern', 'Transition_Phone_Register', 'Transition_Phone_Receive', 'Transition_SMS_Reply', 
'Transition_Camera', 'Transition_Weather_Searching', 'Transition_Transfer', 'transition_sum_trial']

task_completion_time_columns = ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 
'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 
'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 
'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 
'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time']

sms_reply_columns = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'GPS', 'COER', 'UER', 'TER', 'UB', 'WB']

weather_searching_site_region_columns = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'COER', 'UB', 'WB']

scoring_transition_1_df.columns = scoring_transition_1_columns
scoring_transition_0_df.columns = scoring_transition_0_columns
task_completion_time_df.columns = task_completion_time_columns
sms_reply_df.columns = sms_reply_columns
weather_searching_site_region_df.columns = weather_searching_site_region_columns

In [11]:
# 라벨을 2개용 features의 feature명을 수정한다!
scoring_transition_1_columns_two_label = []

scoring_transition_0_columns_two_label = ['Routine_Screen_Unlock_Pattern', 'Transition_Phone_Register', 'Transition_Phone_Receive', 'Transition_SMS_Reply', 
'Transition_Camera', 'Transition_Weather_Searching', 'Transition_Transfer', 'transition_sum_trial']

task_completion_time_columns_two_label = ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 
'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 
'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 
'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 
'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time']

sms_reply_columns_two_label = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'GPS', 'COER', 'UER', 'TER', 'UB', 'WB']

weather_searching_site_region_columns_two_label = ['average intercharacter time', 'median intercharacter time', 'min intercharacter time', 'max intercharacter time', 
'CPS', 'KSPS', 'COER', 'UB', 'WB']

scoring_transition_1_df_two_label.columns = scoring_transition_1_columns_two_label
scoring_transition_0_df_two_label.columns = scoring_transition_0_columns_two_label
task_completion_time_df_two_label.columns = task_completion_time_columns_two_label
sms_reply_df_two_label.columns = sms_reply_columns_two_label
weather_searching_site_region_df_two_label.columns = weather_searching_site_region_columns_two_label

In [12]:
# sms reply와 weather searching 사이트 주소의 feature명이 똑같으므로 feature명을 수정한다
sms_reply_df_columns = ['sms_' + col_name for col_name in sms_reply_df.columns] # sms reply feature 이름 앞에 sms_를 붙인다
weather_searching_site_region_df_columns = ['ws_' + col_name for col_name in weather_searching_site_region_df.columns] # ws feature 이름 앞에 ws_를 붙인다

sms_reply_df.columns = sms_reply_df_columns # 수정된 sms reply feature명을 적용한다
weather_searching_site_region_df.columns = weather_searching_site_region_df_columns # 수정된 ws feature명을 적용한다

# 위와 동일하게 작동한다
sms_reply_df_two_label_columns = ['sms_' + col_name for col_name in sms_reply_df_two_label.columns]
weather_searching_site_region_df_two_label_columns = ['ws_' + col_name for col_name in weather_searching_site_region_df_two_label.columns]

sms_reply_df_two_label.columns = sms_reply_df_two_label_columns
weather_searching_site_region_df_two_label.columns = weather_searching_site_region_df_two_label_columns

### merged_df : Label 3개인 데이터 집합
### merged_df_two_label : Label 2개인 데이터 집합

In [13]:
# 필요한 데이터 프레임들을 합친다
merged_df_list = [scoring_transition_1_df, scoring_transition_0_df, task_completion_time_df, sms_reply_df, weather_searching_site_region_df]
merged_df_list_two_label = [scoring_transition_1_df_two_label, scoring_transition_0_df_two_label, task_completion_time_df_two_label, 
sms_reply_df_two_label, weather_searching_site_region_df_two_label]

merged_df = pd.concat(merged_df_list, axis=1)
merged_df_two_label = pd.concat(merged_df_list_two_label, axis=1)

In [14]:
# Categorical 데이터에 대해서 인코딩한다
# 어떤 데이터가 Categorical 데이터인가?

# 라벨에 대해서 인코딩한다
encoder = preprocessing.LabelEncoder()
y = encoder.fit_transform(y)

### N : 3개 라벨에 대한 feature 개수 / M : 2개 라벨에 대한 feature 개수
merged_df.shape = (360, N), y.shape = (360, 1), 라벨 수: 3 </br>
merged_df_two_label.shape = (360, M), changed_label.shape = (360, 1), 라벨 수: 2 </br>
changed_df.shape = (240, M), changed_y.shape = (240, 1), 라벨 수: 2 </br>

In [15]:
# 라벨에 대해서 인코딩한다(0 -> 0, 0.06 -> 1, 0.09 -> 2)
encoder = preprocessing.LabelEncoder()
y = encoder.fit_transform(y)

# 3개의 라벨을 2개의 라벨로 바꾼다
changed_label = copy.deepcopy(y)

for i in range(len(changed_label)):
    if changed_label[i] == 1:
        changed_label[i] = 0

for i in range(len(changed_label)):
    if changed_label[i] == 2:
        changed_label[i] = 1

# 라벨 0과 라벨 0.06의 평균을 구해서 하나의 라벨로 만든다
temp_y = pd.Series(y, name='Label')
temp_df = pd.concat([temp_y, merged_df_two_label], axis=1)

changed_df = pd.DataFrame(columns=temp_df.columns)

num = 0
for row_idx in range(0, len(temp_df), 9):
    for i in range(3):
        changed_df.loc[num] = (temp_df.loc[row_idx + i] + temp_df.loc[row_idx + i + 3])/2
        num += 1
    for i in range(3):
        changed_df.loc[num] = temp_df.loc[row_idx + 6 + i]
        num += 1

changed_y = changed_df['Label']
changed_df = changed_df.drop(['Label'], axis=1)
changed_y = encoder.fit_transform(changed_y)

# 3 Labels, 2 Labels(just change), 2 Labels(average)에 대한 데이터
label_processing = ['3 Labels', '2 Labels(just change)', '2 Labels(average)']
data_list = [(merged_df, y), (merged_df_two_label, changed_label), (changed_df, changed_y)]

In [16]:
scaler_list = {
    'minmax': MinMaxScaler(),
    'maxabs': MaxAbsScaler(),
    'standard': StandardScaler(),
    'robust': RobustScaler()
}

label_list = {
    'three_label': data_list[0],
    'two_label_changed': data_list[1],
    'two_label_average': data_list[2]
}

# scaler_name과 label_name을 인자로 받아서 해당하는 X, y를 반환한다
def getXy(scaler_name, label_name):
    if scaler_name not in scaler_list.keys() and scaler_name != 'no_scale':
        raise ValueError("scaler_name must be 'minmax' or 'maxabs' or 'standard' or 'robust' or 'no_scale!")

    if label_name not in label_list.keys():
        raise ValueError("label_name must be 'three_label' or 'two_label_changed' or 'two_label_average'!")

    X, y = label_list[label_name]

    if scaler_name != 'no_scale':
        scaler = scaler_list[scaler_name]
        df = X.copy()

        df[df.columns] = scaler.fit_transform(df[df.columns])

        X = df
        X[X.columns] = scaler.fit_transform(X[X.columns])
    
    return (X, y)

# 어떤 label의 데이터로 돌릴 것인지, 어떤 scaler를 적용할 것인지, inner loop에서 어떤 cv 방법을 적용할 것인지, Stratified K-fold의 K를 정한다

### scaler_name = 'minmax' or 'maxabs' or 'standard' or 'robust' or 'no_scale'
### label_name = 'three_label' or 'two_label_changed' or 'two_label_average'
### cv_method = 'loso' or 'k_fold'
### n_fold = 39(default)

In [17]:
scaler_name = 'no_scale'
label_name = 'two_label_changed'
cv_method = 'k_fold'
n_fold = 9

X, y = getXy(scaler_name=scaler_name, label_name=label_name)

# Optuna의 반복 수를 설정한다

In [18]:
n_trials = 1

In [59]:
def NestedCVwithOptuna(objective, clf, clf_name):
    metric_df = pd.DataFrame(columns=['model', 'accuracy', 'precision', 'recall', 'f1_score', 'roc_auc_score'])

    # 클래스가 몇 개인지 확인한다
    class_nums = len(set(y))
    print('class_nums : ', class_nums)
    n_splits = 40
    kfold = KFold(n_splits=n_splits)

    mean_accuracy = 0
    mean_precision = 0
    mean_recall = 0
    mean_f1 = 0
    mean_roc_auc = 0
    
    list_shap_values = list()
    list_test_sets = list()
    
    shap_df_list = [[], [], []]

    for num, (train_idx, val_idx) in enumerate(kfold.split(X)):
        X_train, X_val = X.iloc[train_idx, :], X.iloc[val_idx, :]
        y_train, y_val = y[train_idx], y[val_idx]
        
        classifier = clone(clf)

        # Feature Selection을 진행한다
        selector = RFECV(classifier, step=2, cv=9, min_features_to_select=20)
        selector = selector.fit(X_train, y_train)

        columns = X_train.columns
        selected_list = selector.support_

        selected_features = [col_name for i, col_name in enumerate(columns) if selected_list[i]]

        # 뽑힌 feature들로 데이터를 만든다
        X_train = X_train[selected_features]
        X_val = X_val[selected_features]

        # Bayesian Optimization을 진행한다
        sampler = TPESampler()

        study = optuna.create_study(direction='maximize', sampler=sampler, study_name=f'{clf_name} Study')
        study.optimize(objective(X_train, y_train), n_trials=n_trials)

        best_params = study.best_params
        print('Optuna Best score : ', study.best_value)
        print('Best parameters : ', best_params)
        print('Selected features Num: ', len(selected_features))
        print('Selected features : ', selected_features)

        if clf_name == 'lightgbm':
            best_clf = LGBMClassifier(**best_params, random_state=42)
        elif clf_name == 'xgboost':
            best_clf = XGBClassifier(**best_params, random_state=42)
        elif clf_name == 'randomforest':
            best_clf = RandomForestClassifier(**best_params, random_state=42)
        elif clf_name == 'gbm':
            best_clf = GradientBoostingClassifier(**best_params, random_state=42)
        
        best_clf.fit(X_train, y_train)

        y_pred = best_clf.predict(X_val)
        y_pred_prob = best_clf.predict_proba(X_val)

        # 개별 performance metric을 구한다
        accuracy = accuracy_score(y_val, y_pred)
        precision = precision_score(y_val, y_pred, average='macro')
        recall = recall_score(y_val, y_pred, average='macro')
        f1 = f1_score(y_val, y_pred, average='macro')
        if class_nums > 2:
            roc_auc = roc_auc_score(y_val, y_pred_prob, average='macro', multi_class='ovr')
        else:
            roc_auc = roc_auc_score(y_val, y_pred)
        
        # 개별 performance metric을 데이터 프레임으로 저장한다
        metric_df.loc[len(metric_df)] = [f'model_{num+1}', accuracy, precision, recall, f1, roc_auc]

        # 평균 performance metric을 구한다
        mean_accuracy += accuracy / n_splits
        mean_precision += accuracy / n_splits
        mean_recall += recall / n_splits
        mean_f1 += f1 / n_splits
        mean_roc_auc += roc_auc / n_splits

        print('test accuracy : ', accuracy)

        # explaining model
        explainer = shap.TreeExplainer(best_clf)
        shap_values = explainer.shap_values(X_val)
        
        if class_nums > 2:
            for i in range(3):
                shap_df = pd.DataFrame(shap_values[i], columns=X_train.columns)
                shap_df_list[i].append(shap_df)
        elif len(shap_values) == 2:
            for i in range(2):
                shap_df = pd.DataFrame(shap_values[i], columns=X_train.columns)
                shap_df_list[i].append(shap_df)
        else:
            shap_df = pd.DataFrame(shap_values, columns=X_train.columns)
            shap_df_list[0].append(shap_df)

    print()
    print('mean accuracy : ', mean_accuracy)
    print('mean precision : ', mean_precision)
    print('mean recall : ', mean_recall)
    print('mean f1 : ', mean_f1)
    print('mean roc_auc : ', mean_roc_auc)
    print()

    metric_df.loc[len(metric_df)] = ['mean', mean_accuracy, mean_precision, mean_recall, mean_f1, mean_roc_auc]

    return (shap_df_list, metric_df)

## Accuracy가 아닌 다른 metric을 기준으로 Optuna을 하려면 precision ~ roc_auc 주석 중 하나를 풀고 return 해주면 됩니다

In [60]:
def InnerCrossValidation(clf, X, y, cv_method, n_fold=39):
    class_nums = len(set(y))
    if cv_method == 'loso':
        kfold = KFold(n_splits=39)
        splits = kfold.split(X)
    elif cv_method == 'k_fold':
        stratified_kfold = StratifiedKFold(n_splits=n_fold)
        splits = stratified_kfold.split(X, y)
    else:
        raise ValueError("cv_method must be 'loso' or 'k_fold'!")
    
    train_X_len = len(X)
    if train_X_len % n_fold != 0:
        raise ValueError(f"{train_X_len} / {n_fold} is not Integer!")

    y_true_list, y_pred_list = [], []
    y_pred_prob_list = []
    
    for train_idx, val_idx in splits:
        X_train, X_val = X.iloc[train_idx, :], X.iloc[val_idx, :]
        y_train, y_val = y[train_idx], y[val_idx]
        
        classifier = clone(clf)

        # 모델을 학습한다
        classifier.fit(X_train, y_train)

        y_pred = classifier.predict(X_val)
        y_pred_prob = classifier.predict_proba(X_val)
        
        # 실제, 예측 라벨을 저장한다
        y_true_list.extend(y_val)
        y_pred_list.extend(y_pred)
        y_pred_prob_list.extend(y_pred_prob)
    
    # 여기를 바꾸면 됩니다
    accuracy = accuracy_score(y_true_list, y_pred_list)
    # precision = precision_score(y_true_list, y_pred_list, average='macro')
    # recall = recall_score(y_true_list, y_pred_list, average='macro')
    # f1 = f1_score(y_true_list, y_pred_list, average='macro')
    # if class_nums > 2:
    #     roc_auc = roc_auc_score(y_val, y_pred_prob, average='macro', multi_class='ovr')
    # else:
    #     roc_auc = roc_auc_score(y_val, y_pred)

    # 반환하는 변수를 바꿔줘야 합니다
    return accuracy

# Lightgbm Bayesian

In [61]:
class LGBMObjective(object):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __call__(self, trial: Trial):
        lgbm_params = {
            'n_estimators': trial.suggest_int('n_estimators', 100, 500),
            'learning_rate': trial.suggest_loguniform("learning_rate", 1e-3, 1e-1),
            'num_leaves': trial.suggest_int('num_leaves', 2, 256),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0, step=0.1),
            'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0, step=0.1), 
            'reg_alpha': trial.suggest_float('reg_alpha', 1.0, 5.0), # lambda_l1
            'reg_lambda': trial.suggest_float('reg_lambda', 1.0, 5.0), # lambda_l2
            'random_state': 42
        }

        clf = LGBMClassifier(**lgbm_params)

        mean_accuracy = InnerCrossValidation(clf, self.X, self.y, cv_method, n_fold)

        return mean_accuracy

In [62]:
clf = LGBMClassifier(random_state=42)
shap_df_list, metric_df = NestedCVwithOptuna(LGBMObjective, clf, 'lightgbm')

class_nums :  2


[32m[I 2022-08-29 00:31:24,872][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:31:27,521][0m Trial 0 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 372, 'learning_rate': 0.04039568529438273, 'num_leaves': 101, 'colsample_bytree': 0.9, 'min_child_samples': 8, 'subsample': 0.6, 'reg_alpha': 1.2089113402209128, 'reg_lambda': 3.814423241478219}. Best is trial 0 with value: 0.7777777777777778.[0m


Optuna Best score :  0.7777777777777778
Best parameters :  {'n_estimators': 372, 'learning_rate': 0.04039568529438273, 'num_leaves': 101, 'colsample_bytree': 0.9, 'min_child_samples': 8, 'subsample': 0.6, 'reg_alpha': 1.2089113402209128, 'reg_lambda': 3.814423241478219}
Selected features Num:  38
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', 

[32m[I 2022-08-29 00:31:39,447][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:31:39,695][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 108, 'learning_rate': 0.0012774167045818592, 'num_leaves': 197, 'colsample_bytree': 0.6, 'min_child_samples': 43, 'subsample': 0.9, 'reg_alpha': 1.8118835890207468, 'reg_lambda': 4.951960913517542}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 108, 'learning_rate': 0.0012774167045818592, 'num_leaves': 197, 'colsample_bytree': 0.6, 'min_child_samples': 43, 'subsample': 0.9, 'reg_alpha': 1.8118835890207468, 'reg_lambda': 4.951960913517542}
Selected features Num:  20
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_TER', 'ws_median intercharacter time', 'ws_CPS', 'ws_KSPS', 'ws_COER']
test accuracy :  0.6666666666666666


[32m[I 2022-08-29 00:31:50,519][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:31:52,215][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 340, 'learning_rate': 0.010845483603503979, 'num_leaves': 253, 'colsample_bytree': 1.0, 'min_child_samples': 18, 'subsample': 0.8, 'reg_alpha': 2.8157966634241167, 'reg_lambda': 2.127100135141429}. Best is trial 0 with value: 0.7806267806267806.[0m


Optuna Best score :  0.7806267806267806
Best parameters :  {'n_estimators': 340, 'learning_rate': 0.010845483603503979, 'num_leaves': 253, 'colsample_bytree': 1.0, 'min_child_samples': 18, 'subsample': 0.8, 'reg_alpha': 2.8157966634241167, 'reg_lambda': 2.127100135141429}
Selected features Num:  30
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_TER', 'sms_UB', 'ws

[32m[I 2022-08-29 00:32:02,216][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:32:03,701][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 214, 'learning_rate': 0.03148750449853724, 'num_leaves': 13, 'colsample_bytree': 0.8, 'min_child_samples': 6, 'subsample': 0.7, 'reg_alpha': 3.41868698575857, 'reg_lambda': 3.07598876330211}. Best is trial 0 with value: 0.7891737891737892.[0m


Optuna Best score :  0.7891737891737892
Best parameters :  {'n_estimators': 214, 'learning_rate': 0.03148750449853724, 'num_leaves': 13, 'colsample_bytree': 0.8, 'min_child_samples': 6, 'subsample': 0.7, 'reg_alpha': 3.41868698575857, 'reg_lambda': 3.07598876330211}
Selected features Num:  36
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', 'sms_median intercharacter 

[32m[I 2022-08-29 00:32:13,750][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:32:14,281][0m Trial 0 finished with value: 0.7663817663817664 and parameters: {'n_estimators': 271, 'learning_rate': 0.03449283962153545, 'num_leaves': 69, 'colsample_bytree': 0.9, 'min_child_samples': 70, 'subsample': 0.9, 'reg_alpha': 2.5790333650836663, 'reg_lambda': 2.621442561024316}. Best is trial 0 with value: 0.7663817663817664.[0m


Optuna Best score :  0.7663817663817664
Best parameters :  {'n_estimators': 271, 'learning_rate': 0.03449283962153545, 'num_leaves': 69, 'colsample_bytree': 0.9, 'min_child_samples': 70, 'subsample': 0.9, 'reg_alpha': 2.5790333650836663, 'reg_lambda': 2.621442561024316}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS

[32m[I 2022-08-29 00:32:23,647][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:32:25,040][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 401, 'learning_rate': 0.004386504604660451, 'num_leaves': 180, 'colsample_bytree': 0.8, 'min_child_samples': 40, 'subsample': 0.9, 'reg_alpha': 1.3924564864169011, 'reg_lambda': 4.159846567932638}. Best is trial 0 with value: 0.7692307692307693.[0m


Optuna Best score :  0.7692307692307693
Best parameters :  {'n_estimators': 401, 'learning_rate': 0.004386504604660451, 'num_leaves': 180, 'colsample_bytree': 0.8, 'min_child_samples': 40, 'subsample': 0.9, 'reg_alpha': 1.3924564864169011, 'reg_lambda': 4.159846567932638}
Selected features Num:  42
Selected features :  ['transition_sum_trial', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_aver

[32m[I 2022-08-29 00:32:35,758][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:32:36,122][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 233, 'learning_rate': 0.005742436692923547, 'num_leaves': 131, 'colsample_bytree': 1.0, 'min_child_samples': 97, 'subsample': 0.6, 'reg_alpha': 3.670183980083784, 'reg_lambda': 3.2820491151626205}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 233, 'learning_rate': 0.005742436692923547, 'num_leaves': 131, 'colsample_bytree': 1.0, 'min_child_samples': 97, 'subsample': 0.6, 'reg_alpha': 3.670183980083784, 'reg_lambda': 3.2820491151626205}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time

[32m[I 2022-08-29 00:32:45,702][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:32:46,769][0m Trial 0 finished with value: 0.7635327635327636 and parameters: {'n_estimators': 129, 'learning_rate': 0.011320922759871685, 'num_leaves': 229, 'colsample_bytree': 0.6, 'min_child_samples': 5, 'subsample': 0.9, 'reg_alpha': 2.776336273567712, 'reg_lambda': 2.402801683035644}. Best is trial 0 with value: 0.7635327635327636.[0m


Optuna Best score :  0.7635327635327636
Best parameters :  {'n_estimators': 129, 'learning_rate': 0.011320922759871685, 'num_leaves': 229, 'colsample_bytree': 0.6, 'min_child_samples': 5, 'subsample': 0.9, 'reg_alpha': 2.776336273567712, 'reg_lambda': 2.402801683035644}
Selected features Num:  40
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', 

[32m[I 2022-08-29 00:32:57,231][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:32:57,861][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 333, 'learning_rate': 0.08503896776232589, 'num_leaves': 126, 'colsample_bytree': 0.9, 'min_child_samples': 61, 'subsample': 0.9, 'reg_alpha': 2.367770313739447, 'reg_lambda': 3.0895181373051606}. Best is trial 0 with value: 0.7692307692307693.[0m


Optuna Best score :  0.7692307692307693
Best parameters :  {'n_estimators': 333, 'learning_rate': 0.08503896776232589, 'num_leaves': 126, 'colsample_bytree': 0.9, 'min_child_samples': 61, 'subsample': 0.9, 'reg_alpha': 2.367770313739447, 'reg_lambda': 3.0895181373051606}
Selected features Num:  26
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_TER', 'ws_median intercharacter time', 'ws_min intercharacter time', 'ws_CPS', 'ws_KSPS', 'w

[32m[I 2022-08-29 00:33:08,885][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:33:09,366][0m Trial 0 finished with value: 0.7407407407407407 and parameters: {'n_estimators': 280, 'learning_rate': 0.007203694359000519, 'num_leaves': 172, 'colsample_bytree': 0.9, 'min_child_samples': 72, 'subsample': 0.6, 'reg_alpha': 2.603878282291447, 'reg_lambda': 3.62504842991095}. Best is trial 0 with value: 0.7407407407407407.[0m


Optuna Best score :  0.7407407407407407
Best parameters :  {'n_estimators': 280, 'learning_rate': 0.007203694359000519, 'num_leaves': 172, 'colsample_bytree': 0.9, 'min_child_samples': 72, 'subsample': 0.6, 'reg_alpha': 2.603878282291447, 'reg_lambda': 3.62504842991095}
Selected features Num:  36
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', '

[32m[I 2022-08-29 00:33:19,904][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:33:20,240][0m Trial 0 finished with value: 0.7635327635327636 and parameters: {'n_estimators': 166, 'learning_rate': 0.021469402952087647, 'num_leaves': 59, 'colsample_bytree': 0.6, 'min_child_samples': 67, 'subsample': 0.9, 'reg_alpha': 4.510543513998328, 'reg_lambda': 2.664424135355453}. Best is trial 0 with value: 0.7635327635327636.[0m


Optuna Best score :  0.7635327635327636
Best parameters :  {'n_estimators': 166, 'learning_rate': 0.021469402952087647, 'num_leaves': 59, 'colsample_bytree': 0.6, 'min_child_samples': 67, 'subsample': 0.9, 'reg_alpha': 4.510543513998328, 'reg_lambda': 2.664424135355453}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time',

[32m[I 2022-08-29 00:33:30,429][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:33:30,732][0m Trial 0 finished with value: 0.7521367521367521 and parameters: {'n_estimators': 179, 'learning_rate': 0.016798962096860177, 'num_leaves': 75, 'colsample_bytree': 1.0, 'min_child_samples': 92, 'subsample': 0.6, 'reg_alpha': 2.66089861153102, 'reg_lambda': 3.662271630488631}. Best is trial 0 with value: 0.7521367521367521.[0m


Optuna Best score :  0.7521367521367521
Best parameters :  {'n_estimators': 179, 'learning_rate': 0.016798962096860177, 'num_leaves': 75, 'colsample_bytree': 1.0, 'min_child_samples': 92, 'subsample': 0.6, 'reg_alpha': 2.66089861153102, 'reg_lambda': 3.662271630488631}
Selected features Num:  38
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', '

[32m[I 2022-08-29 00:33:40,643][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:33:41,617][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 300, 'learning_rate': 0.010155075004339306, 'num_leaves': 25, 'colsample_bytree': 0.8, 'min_child_samples': 46, 'subsample': 0.6, 'reg_alpha': 2.4339179691410253, 'reg_lambda': 3.8180643582356435}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977
Best parameters :  {'n_estimators': 300, 'learning_rate': 0.010155075004339306, 'num_leaves': 25, 'colsample_bytree': 0.8, 'min_child_samples': 46, 'subsample': 0.6, 'reg_alpha': 2.4339179691410253, 'reg_lambda': 3.8180643582356435}
Selected features Num:  42
Selected features :  ['Routine_Screen_Unlock_Pattern', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 

[32m[I 2022-08-29 00:33:52,652][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:33:53,343][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 393, 'learning_rate': 0.05538363009498413, 'num_leaves': 239, 'colsample_bytree': 1.0, 'min_child_samples': 62, 'subsample': 0.9, 'reg_alpha': 3.7541495706022125, 'reg_lambda': 3.389404709960322}. Best is trial 0 with value: 0.7891737891737892.[0m


Optuna Best score :  0.7891737891737892
Best parameters :  {'n_estimators': 393, 'learning_rate': 0.05538363009498413, 'num_leaves': 239, 'colsample_bytree': 1.0, 'min_child_samples': 62, 'subsample': 0.9, 'reg_alpha': 3.7541495706022125, 'reg_lambda': 3.389404709960322}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter tim

[32m[I 2022-08-29 00:34:03,102][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:34:03,827][0m Trial 0 finished with value: 0.7435897435897436 and parameters: {'n_estimators': 350, 'learning_rate': 0.00430714939124474, 'num_leaves': 89, 'colsample_bytree': 0.8, 'min_child_samples': 61, 'subsample': 1.0, 'reg_alpha': 1.1256722784558106, 'reg_lambda': 4.977409966109782}. Best is trial 0 with value: 0.7435897435897436.[0m


Optuna Best score :  0.7435897435897436
Best parameters :  {'n_estimators': 350, 'learning_rate': 0.00430714939124474, 'num_leaves': 89, 'colsample_bytree': 0.8, 'min_child_samples': 61, 'subsample': 1.0, 'reg_alpha': 1.1256722784558106, 'reg_lambda': 4.977409966109782}
Selected features Num:  40
Selected features :  ['transition_sum_trial', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_averag

[32m[I 2022-08-29 00:34:15,279][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:34:18,510][0m Trial 0 finished with value: 0.7863247863247863 and parameters: {'n_estimators': 467, 'learning_rate': 0.011623458941844547, 'num_leaves': 118, 'colsample_bytree': 0.8, 'min_child_samples': 7, 'subsample': 0.9, 'reg_alpha': 3.492315681006897, 'reg_lambda': 3.6800957867755844}. Best is trial 0 with value: 0.7863247863247863.[0m


Optuna Best score :  0.7863247863247863
Best parameters :  {'n_estimators': 467, 'learning_rate': 0.011623458941844547, 'num_leaves': 118, 'colsample_bytree': 0.8, 'min_child_samples': 7, 'subsample': 0.9, 'reg_alpha': 3.492315681006897, 'reg_lambda': 3.6800957867755844}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_

[32m[I 2022-08-29 00:34:29,465][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:34:30,697][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 113, 'learning_rate': 0.04900223193015766, 'num_leaves': 136, 'colsample_bytree': 1.0, 'min_child_samples': 5, 'subsample': 0.9, 'reg_alpha': 1.5594275493645928, 'reg_lambda': 4.220406650831182}. Best is trial 0 with value: 0.7806267806267806.[0m


Optuna Best score :  0.7806267806267806
Best parameters :  {'n_estimators': 113, 'learning_rate': 0.04900223193015766, 'num_leaves': 136, 'colsample_bytree': 1.0, 'min_child_samples': 5, 'subsample': 0.9, 'reg_alpha': 1.5594275493645928, 'reg_lambda': 4.220406650831182}
Selected features Num:  22
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_sharing_time', 'sms_median intercharacter time', 'sms_max intercharacter time', 'sms_KSPS', 'sms_TER', 'sms_UB', 'ws_median intercharacter time', 'ws_min intercharacter time', 'ws_CPS', 'ws_KSPS', 'ws_COER']
test accuracy :  0.5555555555555556


[32m[I 2022-08-29 00:34:40,265][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:34:41,227][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 316, 'learning_rate': 0.0013876052566088684, 'num_leaves': 62, 'colsample_bytree': 0.8, 'min_child_samples': 22, 'subsample': 0.9, 'reg_alpha': 4.516996120244235, 'reg_lambda': 2.922514915780542}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 316, 'learning_rate': 0.0013876052566088684, 'num_leaves': 62, 'colsample_bytree': 0.8, 'min_child_samples': 22, 'subsample': 0.9, 'reg_alpha': 4.516996120244235, 'reg_lambda': 2.922514915780542}
Selected features Num:  30
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_COER', 'sms_TER', 'ws_average intercharacter 

[32m[I 2022-08-29 00:34:51,176][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:34:51,782][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 353, 'learning_rate': 0.0808152680590488, 'num_leaves': 253, 'colsample_bytree': 1.0, 'min_child_samples': 30, 'subsample': 0.6, 'reg_alpha': 4.664507839581347, 'reg_lambda': 4.953830867086028}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977
Best parameters :  {'n_estimators': 353, 'learning_rate': 0.0808152680590488, 'num_leaves': 253, 'colsample_bytree': 1.0, 'min_child_samples': 30, 'subsample': 0.6, 'reg_alpha': 4.664507839581347, 'reg_lambda': 4.953830867086028}
Selected features Num:  26
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_TER', 'ws_median intercharacter time', 'ws_min intercharacter time', 'ws_max intercharacter time', 'ws_C

[32m[I 2022-08-29 00:35:00,965][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:01,398][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 226, 'learning_rate': 0.0011989970451306733, 'num_leaves': 178, 'colsample_bytree': 0.8, 'min_child_samples': 64, 'subsample': 1.0, 'reg_alpha': 2.8899608811064783, 'reg_lambda': 2.6171395432378137}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 226, 'learning_rate': 0.0011989970451306733, 'num_leaves': 178, 'colsample_bytree': 0.8, 'min_child_samples': 64, 'subsample': 1.0, 'reg_alpha': 2.8899608811064783, 'reg_lambda': 2.6171395432378137}
Selected features Num:  38
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter tim

[32m[I 2022-08-29 00:35:10,746][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:11,045][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 266, 'learning_rate': 0.05430944518506282, 'num_leaves': 168, 'colsample_bytree': 0.8, 'min_child_samples': 98, 'subsample': 1.0, 'reg_alpha': 4.266222938759796, 'reg_lambda': 1.1895462763060674}. Best is trial 0 with value: 0.792022792022792.[0m


Optuna Best score :  0.792022792022792
Best parameters :  {'n_estimators': 266, 'learning_rate': 0.05430944518506282, 'num_leaves': 168, 'colsample_bytree': 0.8, 'min_child_samples': 98, 'subsample': 1.0, 'reg_alpha': 4.266222938759796, 'reg_lambda': 1.1895462763060674}
Selected features Num:  22
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_total_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_KSPS', 'sms_UB', 'ws_average intercharacter time', 'ws_median intercharacter time', 'ws_CPS', 'ws_COER']
test accuracy :  0.5555555555555556


[32m[I 2022-08-29 00:35:18,906][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:19,342][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 290, 'learning_rate': 0.0011251830129970432, 'num_leaves': 214, 'colsample_bytree': 0.7, 'min_child_samples': 69, 'subsample': 0.9, 'reg_alpha': 1.5447360190374781, 'reg_lambda': 4.9963330150053205}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 290, 'learning_rate': 0.0011251830129970432, 'num_leaves': 214, 'colsample_bytree': 0.7, 'min_child_samples': 69, 'subsample': 0.9, 'reg_alpha': 1.5447360190374781, 'reg_lambda': 4.9963330150053205}
Selected features Num:  38
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter tim

[32m[I 2022-08-29 00:35:26,985][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:27,280][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 174, 'learning_rate': 0.0030630669498707803, 'num_leaves': 71, 'colsample_bytree': 0.8, 'min_child_samples': 81, 'subsample': 0.8, 'reg_alpha': 1.0743027182760976, 'reg_lambda': 3.2773604698536616}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 174, 'learning_rate': 0.0030630669498707803, 'num_leaves': 71, 'colsample_bytree': 0.8, 'min_child_samples': 81, 'subsample': 0.8, 'reg_alpha': 1.0743027182760976, 'reg_lambda': 3.2773604698536616}
Selected features Num:  40
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time

[32m[I 2022-08-29 00:35:33,712][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:34,311][0m Trial 0 finished with value: 0.7635327635327636 and parameters: {'n_estimators': 490, 'learning_rate': 0.005420555731070484, 'num_leaves': 28, 'colsample_bytree': 0.9, 'min_child_samples': 71, 'subsample': 0.8, 'reg_alpha': 2.42821658303504, 'reg_lambda': 2.1169478028337188}. Best is trial 0 with value: 0.7635327635327636.[0m


Optuna Best score :  0.7635327635327636
Best parameters :  {'n_estimators': 490, 'learning_rate': 0.005420555731070484, 'num_leaves': 28, 'colsample_bytree': 0.9, 'min_child_samples': 71, 'subsample': 0.8, 'reg_alpha': 2.42821658303504, 'reg_lambda': 2.1169478028337188}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_taken_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_COER', 'sms_TER'

[32m[I 2022-08-29 00:35:40,559][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:40,838][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 125, 'learning_rate': 0.002094972557873669, 'num_leaves': 147, 'colsample_bytree': 0.7, 'min_child_samples': 36, 'subsample': 0.7, 'reg_alpha': 4.552920086798473, 'reg_lambda': 4.315430510802482}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 125, 'learning_rate': 0.002094972557873669, 'num_leaves': 147, 'colsample_bytree': 0.7, 'min_child_samples': 36, 'subsample': 0.7, 'reg_alpha': 4.552920086798473, 'reg_lambda': 4.315430510802482}
Selected features Num:  36
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharact

[32m[I 2022-08-29 00:35:46,927][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:47,501][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 452, 'learning_rate': 0.033127528685445044, 'num_leaves': 164, 'colsample_bytree': 1.0, 'min_child_samples': 73, 'subsample': 1.0, 'reg_alpha': 4.47677250092303, 'reg_lambda': 2.602728916959454}. Best is trial 0 with value: 0.7806267806267806.[0m


Optuna Best score :  0.7806267806267806
Best parameters :  {'n_estimators': 452, 'learning_rate': 0.033127528685445044, 'num_leaves': 164, 'colsample_bytree': 1.0, 'min_child_samples': 73, 'subsample': 1.0, 'reg_alpha': 4.47677250092303, 'reg_lambda': 2.602728916959454}
Selected features Num:  44
Selected features :  ['Routine_Screen_Unlock_Pattern', 'Transition_Transfer', 'transition_sum_trial', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sh

[32m[I 2022-08-29 00:35:53,803][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:35:54,236][0m Trial 0 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 191, 'learning_rate': 0.03182110086259778, 'num_leaves': 244, 'colsample_bytree': 0.6, 'min_child_samples': 45, 'subsample': 0.7, 'reg_alpha': 1.2679885959430086, 'reg_lambda': 2.120083198853613}. Best is trial 0 with value: 0.7777777777777778.[0m


Optuna Best score :  0.7777777777777778
Best parameters :  {'n_estimators': 191, 'learning_rate': 0.03182110086259778, 'num_leaves': 244, 'colsample_bytree': 0.6, 'min_child_samples': 45, 'subsample': 0.7, 'reg_alpha': 1.2679885959430086, 'reg_lambda': 2.120083198853613}
Selected features Num:  36
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 

[32m[I 2022-08-29 00:36:00,101][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:02,036][0m Trial 0 finished with value: 0.7293447293447294 and parameters: {'n_estimators': 460, 'learning_rate': 0.0021505849718623056, 'num_leaves': 200, 'colsample_bytree': 0.6, 'min_child_samples': 18, 'subsample': 1.0, 'reg_alpha': 4.6226737617417974, 'reg_lambda': 3.815410235831862}. Best is trial 0 with value: 0.7293447293447294.[0m


Optuna Best score :  0.7293447293447294
Best parameters :  {'n_estimators': 460, 'learning_rate': 0.0021505849718623056, 'num_leaves': 200, 'colsample_bytree': 0.6, 'min_child_samples': 18, 'subsample': 1.0, 'reg_alpha': 4.6226737617417974, 'reg_lambda': 3.815410235831862}
Selected features Num:  42
Selected features :  ['transition_sum_trial', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_ave

[32m[I 2022-08-29 00:36:09,688][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:10,128][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 397, 'learning_rate': 0.001295094303089626, 'num_leaves': 3, 'colsample_bytree': 0.7, 'min_child_samples': 51, 'subsample': 0.6, 'reg_alpha': 4.217034629714612, 'reg_lambda': 3.0747925777228873}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 397, 'learning_rate': 0.001295094303089626, 'num_leaves': 3, 'colsample_bytree': 0.7, 'min_child_samples': 51, 'subsample': 0.6, 'reg_alpha': 4.217034629714612, 'reg_lambda': 3.0747925777228873}
Selected features Num:  38
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', 

[32m[I 2022-08-29 00:36:17,269][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:19,802][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 270, 'learning_rate': 0.01284478441744208, 'num_leaves': 148, 'colsample_bytree': 0.6, 'min_child_samples': 5, 'subsample': 0.9, 'reg_alpha': 2.185266803819865, 'reg_lambda': 4.11845859248351}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977
Best parameters :  {'n_estimators': 270, 'learning_rate': 0.01284478441744208, 'num_leaves': 148, 'colsample_bytree': 0.6, 'min_child_samples': 5, 'subsample': 0.9, 'reg_alpha': 2.185266803819865, 'reg_lambda': 4.11845859248351}
Selected features Num:  38
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', 's

[32m[I 2022-08-29 00:36:27,811][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:28,402][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 323, 'learning_rate': 0.09417347224240731, 'num_leaves': 199, 'colsample_bytree': 0.8, 'min_child_samples': 52, 'subsample': 0.8, 'reg_alpha': 1.193223030844381, 'reg_lambda': 1.4511733199249717}. Best is trial 0 with value: 0.792022792022792.[0m


Optuna Best score :  0.792022792022792
Best parameters :  {'n_estimators': 323, 'learning_rate': 0.09417347224240731, 'num_leaves': 199, 'colsample_bytree': 0.8, 'min_child_samples': 52, 'subsample': 0.8, 'reg_alpha': 1.193223030844381, 'reg_lambda': 1.4511733199249717}
Selected features Num:  34
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_ma

[32m[I 2022-08-29 00:36:36,405][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:37,078][0m Trial 0 finished with value: 0.6809116809116809 and parameters: {'n_estimators': 381, 'learning_rate': 0.0030651739050323204, 'num_leaves': 171, 'colsample_bytree': 0.7, 'min_child_samples': 96, 'subsample': 1.0, 'reg_alpha': 1.466836765963098, 'reg_lambda': 4.435714506700927}. Best is trial 0 with value: 0.6809116809116809.[0m


Optuna Best score :  0.6809116809116809
Best parameters :  {'n_estimators': 381, 'learning_rate': 0.0030651739050323204, 'num_leaves': 171, 'colsample_bytree': 0.7, 'min_child_samples': 96, 'subsample': 1.0, 'reg_alpha': 1.466836765963098, 'reg_lambda': 4.435714506700927}
Selected features Num:  40
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time'

[32m[I 2022-08-29 00:36:44,811][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:45,512][0m Trial 0 finished with value: 0.7407407407407407 and parameters: {'n_estimators': 415, 'learning_rate': 0.002574949616883585, 'num_leaves': 36, 'colsample_bytree': 1.0, 'min_child_samples': 51, 'subsample': 0.9, 'reg_alpha': 2.4145272083964557, 'reg_lambda': 2.519096502849755}. Best is trial 0 with value: 0.7407407407407407.[0m


Optuna Best score :  0.7407407407407407
Best parameters :  {'n_estimators': 415, 'learning_rate': 0.002574949616883585, 'num_leaves': 36, 'colsample_bytree': 1.0, 'min_child_samples': 51, 'subsample': 0.9, 'reg_alpha': 2.4145272083964557, 'reg_lambda': 2.519096502849755}
Selected features Num:  20
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_call_time', 'phone_receive_total_time', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'sms_max intercharacter time', 'sms_KSPS', 'sms_TER', 'ws_average intercharacter time', 'ws_median intercharacter time', 'ws_CPS', 'ws_KSPS', 'ws_COER']
test accuracy :  0.7777777777777778


[32m[I 2022-08-29 00:36:53,140][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:36:54,456][0m Trial 0 finished with value: 0.7663817663817664 and parameters: {'n_estimators': 490, 'learning_rate': 0.0026292073947639673, 'num_leaves': 49, 'colsample_bytree': 0.6, 'min_child_samples': 13, 'subsample': 1.0, 'reg_alpha': 1.4574305799282188, 'reg_lambda': 2.4694706633954246}. Best is trial 0 with value: 0.7663817663817664.[0m


Optuna Best score :  0.7663817663817664
Best parameters :  {'n_estimators': 490, 'learning_rate': 0.0026292073947639673, 'num_leaves': 49, 'colsample_bytree': 0.6, 'min_child_samples': 13, 'subsample': 1.0, 'reg_alpha': 1.4574305799282188, 'reg_lambda': 2.4694706633954246}
Selected features Num:  36
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time

[32m[I 2022-08-29 00:37:01,374][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:37:01,761][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 315, 'learning_rate': 0.04354821898814804, 'num_leaves': 79, 'colsample_bytree': 1.0, 'min_child_samples': 91, 'subsample': 0.6, 'reg_alpha': 3.69474661484168, 'reg_lambda': 1.0498828653499461}. Best is trial 0 with value: 0.7806267806267806.[0m


Optuna Best score :  0.7806267806267806
Best parameters :  {'n_estimators': 315, 'learning_rate': 0.04354821898814804, 'num_leaves': 79, 'colsample_bytree': 1.0, 'min_child_samples': 91, 'subsample': 0.6, 'reg_alpha': 3.69474661484168, 'reg_lambda': 1.0498828653499461}
Selected features Num:  30
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'camera_gallery_delete_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_KSPS', 'sms_COER', 'sms_TER', 'sms_UB', 'ws_median

[32m[I 2022-08-29 00:37:08,704][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:37:09,140][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 180, 'learning_rate': 0.0014434716215996513, 'num_leaves': 188, 'colsample_bytree': 0.8, 'min_child_samples': 29, 'subsample': 0.6, 'reg_alpha': 1.158169363365916, 'reg_lambda': 1.8555539949073876}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 180, 'learning_rate': 0.0014434716215996513, 'num_leaves': 188, 'colsample_bytree': 0.8, 'min_child_samples': 29, 'subsample': 0.6, 'reg_alpha': 1.158169363365916, 'reg_lambda': 1.8555539949073876}
Selected features Num:  36
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_average intercharacter time', 'sms_median intercha

[32m[I 2022-08-29 00:37:15,980][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:37:16,439][0m Trial 0 finished with value: 0.6666666666666666 and parameters: {'n_estimators': 133, 'learning_rate': 0.0031396163205134435, 'num_leaves': 40, 'colsample_bytree': 0.9, 'min_child_samples': 7, 'subsample': 0.9, 'reg_alpha': 4.999117601535151, 'reg_lambda': 4.4053834121981525}. Best is trial 0 with value: 0.6666666666666666.[0m


Optuna Best score :  0.6666666666666666
Best parameters :  {'n_estimators': 133, 'learning_rate': 0.0031396163205134435, 'num_leaves': 40, 'colsample_bytree': 0.9, 'min_child_samples': 7, 'subsample': 0.9, 'reg_alpha': 4.999117601535151, 'reg_lambda': 4.4053834121981525}
Selected features Num:  30
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time', 'sms_CPS', 'sms_COE

[32m[I 2022-08-29 00:37:23,122][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:37:23,515][0m Trial 0 finished with value: 0.7492877492877493 and parameters: {'n_estimators': 204, 'learning_rate': 0.004774595235652467, 'num_leaves': 12, 'colsample_bytree': 0.6, 'min_child_samples': 53, 'subsample': 1.0, 'reg_alpha': 1.849662025337539, 'reg_lambda': 1.8262010537552507}. Best is trial 0 with value: 0.7492877492877493.[0m


Optuna Best score :  0.7492877492877493
Best parameters :  {'n_estimators': 204, 'learning_rate': 0.004774595235652467, 'num_leaves': 12, 'colsample_bytree': 0.6, 'min_child_samples': 53, 'subsample': 1.0, 'reg_alpha': 1.849662025337539, 'reg_lambda': 1.8262010537552507}
Selected features Num:  42
Selected features :  ['transition_sum_trial', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_avera

[32m[I 2022-08-29 00:37:29,996][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:37:30,458][0m Trial 0 finished with value: 0.7578347578347578 and parameters: {'n_estimators': 242, 'learning_rate': 0.011808987259591303, 'num_leaves': 109, 'colsample_bytree': 0.7, 'min_child_samples': 32, 'subsample': 0.7, 'reg_alpha': 3.92223714099638, 'reg_lambda': 4.902649662666024}. Best is trial 0 with value: 0.7578347578347578.[0m


Optuna Best score :  0.7578347578347578
Best parameters :  {'n_estimators': 242, 'learning_rate': 0.011808987259591303, 'num_leaves': 109, 'colsample_bytree': 0.7, 'min_child_samples': 32, 'subsample': 0.7, 'reg_alpha': 3.92223714099638, 'reg_lambda': 4.902649662666024}
Selected features Num:  44
Selected features :  ['Transition_SMS_Reply', 'Transition_Camera', 'transition_sum_trial', 'average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_1', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_taken_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time'

[32m[I 2022-08-29 00:37:37,238][0m A new study created in memory with name: lightgbm Study[0m
[32m[I 2022-08-29 00:37:37,567][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 284, 'learning_rate': 0.026730585845044393, 'num_leaves': 3, 'colsample_bytree': 1.0, 'min_child_samples': 23, 'subsample': 1.0, 'reg_alpha': 1.7205383605329772, 'reg_lambda': 4.91434700601009}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977
Best parameters :  {'n_estimators': 284, 'learning_rate': 0.026730585845044393, 'num_leaves': 3, 'colsample_bytree': 1.0, 'min_child_samples': 23, 'subsample': 1.0, 'reg_alpha': 1.7205383605329772, 'reg_lambda': 4.91434700601009}
Selected features Num:  32
Selected features :  ['average_noti_response_app_start', 'median_noti_response_app_start', 'phone_register_noti_response', 'phone_register_screen_unlocking_time', 'phone_register_phone_number_register_time', 'phone_register_call_time', 'phone_register_total_time', 'phone_receive_total_time', 'phone_receive_missing_call_time_2', 'sms_reply_compeltion_time', 'sms_reply_total_time', 'camera_gallery_delete_time', 'camera_total_time', 'transfer_usage_time', 'transfer_share_time', 'transfer_total_time', 'weather_information_searching_time', 'weather_information_sharing_time', 'weather_searching_total_time', 'sms_median intercharacter time', 'sms_min intercharacter time', 'sms_max intercharacter time'

## 40개의 모델의 개별 performance metric + 평균 performnace metric을 저장한다
## 각 모델별 test set의 feature에 따른 shap value를 저장한다

In [71]:
if label_name == 'three_label':
    shap_values_df_0 = pd.concat(shap_df_list[0])
    shap_values_df_1 = pd.concat(shap_df_list[1])
    shap_values_df_2 = pd.concat(shap_df_list[2])

    with pd.ExcelWriter(f"./lgbm_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df_0.to_excel(writer, sheet_name="shap_list_0", index=False)
        shap_values_df_1.to_excel(writer, sheet_name="shap_list_1", index=False)
        shap_values_df_2.to_excel(writer, sheet_name="shap_list_2", index=False)

if label_name != 'three_label':
    shap_values_df = pd.concat(shap_df_list[0])

    with pd.ExcelWriter(f"./lgbm_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df.to_excel(writer, sheet_name="shap_list", index=False)

# Xgboost Bayesian

In [31]:
class XBGObjective(object):
    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __call__(self, trial: Trial):    
        xgb_params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'learning_rate': trial.suggest_loguniform("learning_rate", 1e-4, 1e-1),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'min_child_weight': trial.suggest_int('min_child_weight', 1, 9),
            'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0, step=0.1),
            'subsample': trial.suggest_float('subsample', 0.5, 1.0, step=0.1),
            'gamma': trial.suggest_float('gamma', 0.0, 5.0, step=0.1),
            'reg_alpha': trial.suggest_float('reg_alpha', 1.0, 5.0), # lambda_l1
            'reg_lambda': trial.suggest_float('reg_lambda', 1.0, 5.0), # lambda_l2
            'random_state': 42
        }

        clf = XGBClassifier(**xgb_params)

        mean_accuracy = InnerCrossValidation(clf, self.X, self.y, cv_method, n_fold)

        return mean_accuracy

In [32]:
clf = XGBClassifier(random_state=42)
shap_df_list, metric_df = NestedCVwithOptuna(XBGObjective, clf, 'xgboost')

[32m[I 2022-08-28 22:22:07,306][0m A new study created in memory with name: xgboost Study[0m


class_nums :  2


[32m[I 2022-08-28 22:22:10,122][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 439, 'learning_rate': 0.0002472873616023207, 'max_depth': 8, 'min_child_weight': 2, 'colsample_bytree': 0.7, 'subsample': 0.9, 'gamma': 0.2, 'reg_alpha': 1.779755163462574, 'reg_lambda': 1.8620792710380027}. Best is trial 0 with value: 0.7692307692307693.[0m


Optuna Best score :  0.7692307692307693


[32m[I 2022-08-28 22:22:10,569][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:22:12,646][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 415, 'learning_rate': 0.007132260495815669, 'max_depth': 4, 'min_child_weight': 2, 'colsample_bytree': 0.7, 'subsample': 0.7, 'gamma': 4.1000000000000005, 'reg_alpha': 4.150335636616924, 'reg_lambda': 3.0634693174330714}. Best is trial 0 with value: 0.7692307692307693.[0m


Optuna Best score :  0.7692307692307693


[32m[I 2022-08-28 22:22:12,966][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:22:15,356][0m Trial 0 finished with value: 0.7435897435897436 and parameters: {'n_estimators': 417, 'learning_rate': 0.0002570443509561165, 'max_depth': 7, 'min_child_weight': 4, 'colsample_bytree': 0.6, 'subsample': 0.9, 'gamma': 0.7000000000000001, 'reg_alpha': 4.94986912699046, 'reg_lambda': 4.991730690571341}. Best is trial 0 with value: 0.7435897435897436.[0m


Optuna Best score :  0.7435897435897436


[32m[I 2022-08-28 22:22:15,700][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:22:17,148][0m Trial 0 finished with value: 0.7492877492877493 and parameters: {'n_estimators': 346, 'learning_rate': 0.0001573611932151284, 'max_depth': 3, 'min_child_weight': 7, 'colsample_bytree': 0.5, 'subsample': 0.7, 'gamma': 2.8000000000000003, 'reg_alpha': 3.283777247927489, 'reg_lambda': 1.572738845633281}. Best is trial 0 with value: 0.7492877492877493.[0m


Optuna Best score :  0.7492877492877493
test accuracy :  1.0


[32m[I 2022-08-28 22:22:17,355][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:18,380][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 168, 'learning_rate': 0.025089503356483633, 'max_depth': 6, 'min_child_weight': 2, 'colsample_bytree': 0.9, 'subsample': 0.8, 'gamma': 3.0, 'reg_alpha': 1.5952222536120066, 'reg_lambda': 4.283981902958346}. Best is trial 0 with value: 0.792022792022792.[0m


Optuna Best score :  0.792022792022792
test accuracy :  1.0


[32m[I 2022-08-28 22:22:18,590][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:19,565][0m Trial 0 finished with value: 0.7321937321937322 and parameters: {'n_estimators': 244, 'learning_rate': 0.0007769840413821829, 'max_depth': 7, 'min_child_weight': 9, 'colsample_bytree': 0.6, 'subsample': 0.7, 'gamma': 0.7000000000000001, 'reg_alpha': 3.356698722631553, 'reg_lambda': 1.0990767541893942}. Best is trial 0 with value: 0.7321937321937322.[0m
[32m[I 2022-08-28 22:22:19,719][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7321937321937322
test accuracy :  1.0


[32m[I 2022-08-28 22:22:20,928][0m Trial 0 finished with value: 0.7663817663817664 and parameters: {'n_estimators': 307, 'learning_rate': 0.09095157633421118, 'max_depth': 3, 'min_child_weight': 1, 'colsample_bytree': 0.9, 'subsample': 0.8, 'gamma': 0.6000000000000001, 'reg_alpha': 4.350634051157123, 'reg_lambda': 1.760735260969088}. Best is trial 0 with value: 0.7663817663817664.[0m
[32m[I 2022-08-28 22:22:21,127][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7663817663817664
test accuracy :  1.0


[32m[I 2022-08-28 22:22:22,308][0m Trial 0 finished with value: 0.7549857549857549 and parameters: {'n_estimators': 246, 'learning_rate': 0.00024992198130392463, 'max_depth': 4, 'min_child_weight': 8, 'colsample_bytree': 0.8, 'subsample': 1.0, 'gamma': 0.30000000000000004, 'reg_alpha': 2.051589433203005, 'reg_lambda': 3.0637623989450273}. Best is trial 0 with value: 0.7549857549857549.[0m
[32m[I 2022-08-28 22:22:22,488][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7549857549857549
test accuracy :  1.0


[32m[I 2022-08-28 22:22:24,181][0m Trial 0 finished with value: 0.7207977207977208 and parameters: {'n_estimators': 447, 'learning_rate': 0.0008726794771117431, 'max_depth': 9, 'min_child_weight': 9, 'colsample_bytree': 0.7, 'subsample': 0.5, 'gamma': 4.2, 'reg_alpha': 1.915290709386512, 'reg_lambda': 2.4510578273217}. Best is trial 0 with value: 0.7207977207977208.[0m


Optuna Best score :  0.7207977207977208
test accuracy :  1.0


[32m[I 2022-08-28 22:22:24,411][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:26,016][0m Trial 0 finished with value: 0.7521367521367521 and parameters: {'n_estimators': 394, 'learning_rate': 0.00161859702010901, 'max_depth': 7, 'min_child_weight': 7, 'colsample_bytree': 1.0, 'subsample': 0.6, 'gamma': 0.7000000000000001, 'reg_alpha': 4.367850190240267, 'reg_lambda': 2.0140485069666036}. Best is trial 0 with value: 0.7521367521367521.[0m


Optuna Best score :  0.7521367521367521
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:22:26,238][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:27,233][0m Trial 0 finished with value: 0.7606837606837606 and parameters: {'n_estimators': 209, 'learning_rate': 0.0008953223623360992, 'max_depth': 8, 'min_child_weight': 3, 'colsample_bytree': 1.0, 'subsample': 0.6, 'gamma': 1.8, 'reg_alpha': 2.280002930115382, 'reg_lambda': 4.044638042927836}. Best is trial 0 with value: 0.7606837606837606.[0m
[32m[I 2022-08-28 22:22:27,397][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7606837606837606
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:28,146][0m Trial 0 finished with value: 0.7635327635327636 and parameters: {'n_estimators': 166, 'learning_rate': 0.00040130162236227566, 'max_depth': 7, 'min_child_weight': 8, 'colsample_bytree': 0.6, 'subsample': 0.7, 'gamma': 0.2, 'reg_alpha': 1.8819207012063157, 'reg_lambda': 2.3677426958828907}. Best is trial 0 with value: 0.7635327635327636.[0m
[32m[I 2022-08-28 22:22:28,249][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7635327635327636
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:31,018][0m Trial 0 finished with value: 0.811965811965812 and parameters: {'n_estimators': 444, 'learning_rate': 0.004378699808908474, 'max_depth': 10, 'min_child_weight': 2, 'colsample_bytree': 0.7, 'subsample': 1.0, 'gamma': 0.6000000000000001, 'reg_alpha': 3.3145528850981107, 'reg_lambda': 1.9343786183710492}. Best is trial 0 with value: 0.811965811965812.[0m


Optuna Best score :  0.811965811965812


[32m[I 2022-08-28 22:22:31,512][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:32,650][0m Trial 0 finished with value: 0.7207977207977208 and parameters: {'n_estimators': 341, 'learning_rate': 0.00016339633720776007, 'max_depth': 9, 'min_child_weight': 9, 'colsample_bytree': 0.8, 'subsample': 0.5, 'gamma': 4.9, 'reg_alpha': 1.7994311580937428, 'reg_lambda': 4.995017967395645}. Best is trial 0 with value: 0.7207977207977208.[0m


Optuna Best score :  0.7207977207977208
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:22:32,853][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:34,484][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 391, 'learning_rate': 0.00016026547273547122, 'max_depth': 4, 'min_child_weight': 7, 'colsample_bytree': 1.0, 'subsample': 0.8, 'gamma': 3.2, 'reg_alpha': 1.761623104921235, 'reg_lambda': 3.7139157764696806}. Best is trial 0 with value: 0.7692307692307693.[0m


Optuna Best score :  0.7692307692307693


[32m[I 2022-08-28 22:22:34,740][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:22:35,588][0m Trial 0 finished with value: 0.8034188034188035 and parameters: {'n_estimators': 112, 'learning_rate': 0.01710329921442241, 'max_depth': 7, 'min_child_weight': 1, 'colsample_bytree': 1.0, 'subsample': 0.7, 'gamma': 0.7000000000000001, 'reg_alpha': 3.393052380189377, 'reg_lambda': 3.547883247208093}. Best is trial 0 with value: 0.8034188034188035.[0m
[32m[I 2022-08-28 22:22:35,720][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.8034188034188035
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:36,017][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 53, 'learning_rate': 0.033172906398642045, 'max_depth': 9, 'min_child_weight': 9, 'colsample_bytree': 1.0, 'subsample': 1.0, 'gamma': 4.0, 'reg_alpha': 1.5614012217235347, 'reg_lambda': 1.9987292840603494}. Best is trial 0 with value: 0.792022792022792.[0m
[32m[I 2022-08-28 22:22:36,066][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.792022792022792
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:22:36,496][0m Trial 0 finished with value: 0.8034188034188035 and parameters: {'n_estimators': 73, 'learning_rate': 0.03227378099468157, 'max_depth': 6, 'min_child_weight': 3, 'colsample_bytree': 0.8, 'subsample': 0.8, 'gamma': 0.7000000000000001, 'reg_alpha': 3.3129076697197464, 'reg_lambda': 2.2597201480697566}. Best is trial 0 with value: 0.8034188034188035.[0m
[32m[I 2022-08-28 22:22:36,572][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.8034188034188035
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:22:37,735][0m Trial 0 finished with value: 0.7635327635327636 and parameters: {'n_estimators': 302, 'learning_rate': 0.004119735069549154, 'max_depth': 7, 'min_child_weight': 9, 'colsample_bytree': 1.0, 'subsample': 0.8, 'gamma': 3.1, 'reg_alpha': 4.663193467446092, 'reg_lambda': 2.73695559996948}. Best is trial 0 with value: 0.7635327635327636.[0m


Optuna Best score :  0.7635327635327636
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:37,956][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:38,622][0m Trial 0 finished with value: 0.8091168091168092 and parameters: {'n_estimators': 116, 'learning_rate': 0.05626625117181885, 'max_depth': 6, 'min_child_weight': 3, 'colsample_bytree': 0.9, 'subsample': 0.8, 'gamma': 1.4000000000000001, 'reg_alpha': 4.173592042364968, 'reg_lambda': 4.163138993672922}. Best is trial 0 with value: 0.8091168091168092.[0m
[32m[I 2022-08-28 22:22:38,737][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.8091168091168092
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:22:41,146][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 421, 'learning_rate': 0.006143330167985399, 'max_depth': 5, 'min_child_weight': 1, 'colsample_bytree': 0.7, 'subsample': 0.7, 'gamma': 0.9, 'reg_alpha': 2.3253818557510124, 'reg_lambda': 4.891949713723024}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977


[32m[I 2022-08-28 22:22:41,547][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:22:42,031][0m Trial 0 finished with value: 0.7606837606837606 and parameters: {'n_estimators': 71, 'learning_rate': 0.01116865379534018, 'max_depth': 9, 'min_child_weight': 4, 'colsample_bytree': 0.9, 'subsample': 1.0, 'gamma': 2.5, 'reg_alpha': 1.0548660234217122, 'reg_lambda': 4.277259114861744}. Best is trial 0 with value: 0.7606837606837606.[0m
[32m[I 2022-08-28 22:22:42,105][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7606837606837606
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:22:43,126][0m Trial 0 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 241, 'learning_rate': 0.007499792891369625, 'max_depth': 6, 'min_child_weight': 9, 'colsample_bytree': 1.0, 'subsample': 1.0, 'gamma': 0.4, 'reg_alpha': 3.0076557128557417, 'reg_lambda': 1.8582469201558158}. Best is trial 0 with value: 0.7777777777777778.[0m
[32m[I 2022-08-28 22:22:43,293][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7777777777777778
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:43,958][0m Trial 0 finished with value: 0.7521367521367521 and parameters: {'n_estimators': 154, 'learning_rate': 0.0006847696586085113, 'max_depth': 8, 'min_child_weight': 8, 'colsample_bytree': 0.8, 'subsample': 0.9, 'gamma': 0.2, 'reg_alpha': 3.349715160290812, 'reg_lambda': 4.940499441870771}. Best is trial 0 with value: 0.7521367521367521.[0m
[32m[I 2022-08-28 22:22:44,067][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7521367521367521
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:22:44,699][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 133, 'learning_rate': 0.012598290593487547, 'max_depth': 4, 'min_child_weight': 3, 'colsample_bytree': 0.8, 'subsample': 0.6, 'gamma': 0.30000000000000004, 'reg_alpha': 3.2355134603118816, 'reg_lambda': 3.7431853866748828}. Best is trial 0 with value: 0.7806267806267806.[0m
[32m[I 2022-08-28 22:22:44,803][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7806267806267806
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:22:46,080][0m Trial 0 finished with value: 0.7720797720797721 and parameters: {'n_estimators': 275, 'learning_rate': 0.010286677422048212, 'max_depth': 9, 'min_child_weight': 8, 'colsample_bytree': 1.0, 'subsample': 1.0, 'gamma': 2.8000000000000003, 'reg_alpha': 3.700403454423865, 'reg_lambda': 4.564813216709039}. Best is trial 0 with value: 0.7720797720797721.[0m


Optuna Best score :  0.7720797720797721


[32m[I 2022-08-28 22:22:46,350][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:22:47,890][0m Trial 0 finished with value: 0.7606837606837606 and parameters: {'n_estimators': 469, 'learning_rate': 0.0002540971537179174, 'max_depth': 8, 'min_child_weight': 8, 'colsample_bytree': 1.0, 'subsample': 0.5, 'gamma': 0.0, 'reg_alpha': 3.963776706566543, 'reg_lambda': 1.8202682753781558}. Best is trial 0 with value: 0.7606837606837606.[0m


Optuna Best score :  0.7606837606837606
test accuracy :  0.4444444444444444


[32m[I 2022-08-28 22:22:48,122][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:49,750][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 410, 'learning_rate': 0.025120095667334915, 'max_depth': 3, 'min_child_weight': 5, 'colsample_bytree': 0.5, 'subsample': 0.5, 'gamma': 1.4000000000000001, 'reg_alpha': 4.717131717540265, 'reg_lambda': 3.115180967165032}. Best is trial 0 with value: 0.7948717948717948.[0m


Optuna Best score :  0.7948717948717948


[32m[I 2022-08-28 22:22:49,994][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:51,119][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 322, 'learning_rate': 0.026264286023403954, 'max_depth': 8, 'min_child_weight': 7, 'colsample_bytree': 0.8, 'subsample': 0.6, 'gamma': 3.0, 'reg_alpha': 3.474122818221138, 'reg_lambda': 4.574992509269601}. Best is trial 0 with value: 0.7891737891737892.[0m
[32m[I 2022-08-28 22:22:51,291][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7891737891737892
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:22:52,419][0m Trial 0 finished with value: 0.7578347578347578 and parameters: {'n_estimators': 312, 'learning_rate': 0.08674427249976438, 'max_depth': 4, 'min_child_weight': 8, 'colsample_bytree': 0.5, 'subsample': 0.8, 'gamma': 0.0, 'reg_alpha': 2.4573375535396864, 'reg_lambda': 2.031313436920013}. Best is trial 0 with value: 0.7578347578347578.[0m


Optuna Best score :  0.7578347578347578
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:22:52,629][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:22:53,572][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 249, 'learning_rate': 0.005595832742110544, 'max_depth': 5, 'min_child_weight': 5, 'colsample_bytree': 0.7, 'subsample': 0.6, 'gamma': 2.2, 'reg_alpha': 4.058887170320906, 'reg_lambda': 1.4265711221073665}. Best is trial 0 with value: 0.7692307692307693.[0m
[32m[I 2022-08-28 22:22:53,739][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7692307692307693
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:22:54,073][0m Trial 0 finished with value: 0.7578347578347578 and parameters: {'n_estimators': 57, 'learning_rate': 0.01311677735207692, 'max_depth': 5, 'min_child_weight': 6, 'colsample_bytree': 1.0, 'subsample': 1.0, 'gamma': 3.0, 'reg_alpha': 2.531556435648844, 'reg_lambda': 2.6928701380125526}. Best is trial 0 with value: 0.7578347578347578.[0m
[32m[I 2022-08-28 22:22:54,129][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7578347578347578
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:22:55,531][0m Trial 0 finished with value: 0.7663817663817664 and parameters: {'n_estimators': 275, 'learning_rate': 0.0056555042071181946, 'max_depth': 4, 'min_child_weight': 6, 'colsample_bytree': 1.0, 'subsample': 1.0, 'gamma': 1.3, 'reg_alpha': 1.9434198942670333, 'reg_lambda': 2.627413084505556}. Best is trial 0 with value: 0.7663817663817664.[0m


Optuna Best score :  0.7663817663817664


[32m[I 2022-08-28 22:22:55,794][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:22:56,782][0m Trial 0 finished with value: 0.7606837606837606 and parameters: {'n_estimators': 173, 'learning_rate': 0.00083792098475635, 'max_depth': 8, 'min_child_weight': 3, 'colsample_bytree': 1.0, 'subsample': 0.8, 'gamma': 1.1, 'reg_alpha': 3.189021299073327, 'reg_lambda': 2.6220180154201724}. Best is trial 0 with value: 0.7606837606837606.[0m
[32m[I 2022-08-28 22:22:56,942][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7606837606837606
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:22:57,520][0m Trial 0 finished with value: 0.7663817663817664 and parameters: {'n_estimators': 122, 'learning_rate': 0.00019814885725945368, 'max_depth': 7, 'min_child_weight': 6, 'colsample_bytree': 1.0, 'subsample': 0.8, 'gamma': 2.1, 'reg_alpha': 3.400968164684875, 'reg_lambda': 2.4733384171881783}. Best is trial 0 with value: 0.7663817663817664.[0m
[32m[I 2022-08-28 22:22:57,619][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7663817663817664
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:00,089][0m Trial 0 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 491, 'learning_rate': 0.04004540663984456, 'max_depth': 9, 'min_child_weight': 3, 'colsample_bytree': 0.6, 'subsample': 0.9, 'gamma': 2.7, 'reg_alpha': 4.814720255674183, 'reg_lambda': 3.3191595949105195}. Best is trial 0 with value: 0.7777777777777778.[0m


Optuna Best score :  0.7777777777777778


[32m[I 2022-08-28 22:23:00,544][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:01,999][0m Trial 0 finished with value: 0.7521367521367521 and parameters: {'n_estimators': 225, 'learning_rate': 0.0001903455723320209, 'max_depth': 7, 'min_child_weight': 3, 'colsample_bytree': 1.0, 'subsample': 1.0, 'gamma': 1.9000000000000001, 'reg_alpha': 2.125473877961378, 'reg_lambda': 3.4492998418119205}. Best is trial 0 with value: 0.7521367521367521.[0m


Optuna Best score :  0.7521367521367521
test accuracy :  0.3333333333333333


[32m[I 2022-08-28 22:23:02,247][0m A new study created in memory with name: xgboost Study[0m
[32m[I 2022-08-28 22:23:03,970][0m Trial 0 finished with value: 0.7549857549857549 and parameters: {'n_estimators': 449, 'learning_rate': 0.00024268954115243773, 'max_depth': 3, 'min_child_weight': 6, 'colsample_bytree': 0.6, 'subsample': 0.9, 'gamma': 0.4, 'reg_alpha': 3.577671621935351, 'reg_lambda': 4.511967861123894}. Best is trial 0 with value: 0.7549857549857549.[0m


Optuna Best score :  0.7549857549857549


[32m[I 2022-08-28 22:23:04,237][0m A new study created in memory with name: xgboost Study[0m


test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:23:04,572][0m Trial 0 finished with value: 0.7264957264957265 and parameters: {'n_estimators': 73, 'learning_rate': 0.00016832020808169008, 'max_depth': 7, 'min_child_weight': 8, 'colsample_bytree': 1.0, 'subsample': 0.5, 'gamma': 3.6, 'reg_alpha': 3.38313210881419, 'reg_lambda': 4.817883719821394}. Best is trial 0 with value: 0.7264957264957265.[0m
[32m[I 2022-08-28 22:23:04,622][0m A new study created in memory with name: xgboost Study[0m


Optuna Best score :  0.7264957264957265
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:23:05,379][0m Trial 0 finished with value: 0.7350427350427351 and parameters: {'n_estimators': 231, 'learning_rate': 0.0002492853177412892, 'max_depth': 9, 'min_child_weight': 8, 'colsample_bytree': 0.6, 'subsample': 0.6, 'gamma': 4.800000000000001, 'reg_alpha': 4.293663243831675, 'reg_lambda': 1.9381776651413425}. Best is trial 0 with value: 0.7350427350427351.[0m


Optuna Best score :  0.7350427350427351
test accuracy :  0.6666666666666666

mean accuracy :  0.7611111111111113
mean precision :  0.7611111111111113
mean recall :  0.7020833333333334
mean f1 :  0.6698419635919639
mean roc_auc :  0.7020833333333334



## 40개의 모델의 개별 performance metric + 평균 performnace metric을 저장한다
## 각 모델별 test set의 feature에 따른 shap value를 저장한다

In [33]:
if label_name == 'three_label':
    shap_values_df_0 = pd.concat(shap_df_list[0])
    shap_values_df_1 = pd.concat(shap_df_list[1])
    shap_values_df_2 = pd.concat(shap_df_list[2])

    with pd.ExcelWriter(f"./xgb_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df_0.to_excel(writer, sheet_name="shap_list_0", index=False)
        shap_values_df_1.to_excel(writer, sheet_name="shap_list_1", index=False)
        shap_values_df_2.to_excel(writer, sheet_name="shap_list_2", index=False)

if label_name != 'three_label':
    shap_values_df = pd.concat(shap_df_list[0])

    with pd.ExcelWriter(f"./xgb_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df.to_excel(writer, sheet_name="shap_list", index=False)

# Random Forest Bayesian

In [40]:
class RFObjective(object):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __call__(self, trial: Trial):
        rf_params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 2, 10),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'random_state': 42
        }

        clf = RandomForestClassifier(**rf_params)

        mean_accuracy = InnerCrossValidation(clf, self.X, self.y, cv_method, n_fold)

        return mean_accuracy

In [41]:
clf = RandomForestClassifier(random_state=42)
shap_df_list, metric_df = NestedCVwithOptuna(RFObjective, clf, 'randomforest')

[32m[I 2022-08-28 22:23:06,299][0m A new study created in memory with name: randomforest Study[0m


class_nums :  2


[32m[I 2022-08-28 22:23:07,191][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 134, 'max_depth': 8, 'min_samples_leaf': 7, 'min_samples_split': 4}. Best is trial 0 with value: 0.7806267806267806.[0m
[32m[I 2022-08-28 22:23:07,303][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7806267806267806
test accuracy :  1.0


[32m[I 2022-08-28 22:23:07,972][0m Trial 0 finished with value: 0.8005698005698005 and parameters: {'n_estimators': 101, 'max_depth': 8, 'min_samples_leaf': 7, 'min_samples_split': 5}. Best is trial 0 with value: 0.8005698005698005.[0m
[32m[I 2022-08-28 22:23:08,057][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.8005698005698005
test accuracy :  1.0


[32m[I 2022-08-28 22:23:10,049][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 293, 'max_depth': 6, 'min_samples_leaf': 2, 'min_samples_split': 5}. Best is trial 0 with value: 0.7891737891737892.[0m


Optuna Best score :  0.7891737891737892
test accuracy :  1.0


[32m[I 2022-08-28 22:23:10,294][0m A new study created in memory with name: randomforest Study[0m
[32m[I 2022-08-28 22:23:11,360][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'n_estimators': 170, 'max_depth': 6, 'min_samples_leaf': 9, 'min_samples_split': 10}. Best is trial 0 with value: 0.7692307692307693.[0m
[32m[I 2022-08-28 22:23:11,497][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7692307692307693
test accuracy :  1.0


[32m[I 2022-08-28 22:23:14,054][0m Trial 0 finished with value: 0.7834757834757835 and parameters: {'n_estimators': 424, 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 10}. Best is trial 0 with value: 0.7834757834757835.[0m


Optuna Best score :  0.7834757834757835


[32m[I 2022-08-28 22:23:14,363][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:23:15,162][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 117, 'max_depth': 6, 'min_samples_leaf': 3, 'min_samples_split': 2}. Best is trial 0 with value: 0.792022792022792.[0m
[32m[I 2022-08-28 22:23:15,263][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.792022792022792
test accuracy :  1.0


[32m[I 2022-08-28 22:23:16,308][0m Trial 0 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 170, 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 5}. Best is trial 0 with value: 0.7777777777777778.[0m
[32m[I 2022-08-28 22:23:16,435][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7777777777777778
test accuracy :  1.0


[32m[I 2022-08-28 22:23:18,153][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 257, 'max_depth': 7, 'min_samples_leaf': 4, 'min_samples_split': 8}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977


[32m[I 2022-08-28 22:23:18,382][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:23:20,149][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 261, 'max_depth': 7, 'min_samples_leaf': 3, 'min_samples_split': 3}. Best is trial 0 with value: 0.7891737891737892.[0m


Optuna Best score :  0.7891737891737892


[32m[I 2022-08-28 22:23:20,374][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:23:21,287][0m Trial 0 finished with value: 0.8062678062678063 and parameters: {'n_estimators': 132, 'max_depth': 10, 'min_samples_leaf': 4, 'min_samples_split': 10}. Best is trial 0 with value: 0.8062678062678063.[0m
[32m[I 2022-08-28 22:23:21,400][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.8062678062678063
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:23:21,832][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 60, 'max_depth': 8, 'min_samples_leaf': 4, 'min_samples_split': 5}. Best is trial 0 with value: 0.7948717948717948.[0m
[32m[I 2022-08-28 22:23:21,888][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7948717948717948
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:22,893][0m Trial 0 finished with value: 0.7863247863247863 and parameters: {'n_estimators': 158, 'max_depth': 6, 'min_samples_leaf': 9, 'min_samples_split': 6}. Best is trial 0 with value: 0.7863247863247863.[0m
[32m[I 2022-08-28 22:23:23,018][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7863247863247863
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:25,341][0m Trial 0 finished with value: 0.8034188034188035 and parameters: {'n_estimators': 379, 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 8}. Best is trial 0 with value: 0.8034188034188035.[0m


Optuna Best score :  0.8034188034188035


[32m[I 2022-08-28 22:23:25,620][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:28,465][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 443, 'max_depth': 8, 'min_samples_leaf': 7, 'min_samples_split': 2}. Best is trial 0 with value: 0.7891737891737892.[0m


Optuna Best score :  0.7891737891737892


[32m[I 2022-08-28 22:23:28,818][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:23:31,832][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 493, 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 6}. Best is trial 0 with value: 0.7948717948717948.[0m


Optuna Best score :  0.7948717948717948


[32m[I 2022-08-28 22:23:32,196][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:23:34,047][0m Trial 0 finished with value: 0.7863247863247863 and parameters: {'n_estimators': 329, 'max_depth': 3, 'min_samples_leaf': 4, 'min_samples_split': 10}. Best is trial 0 with value: 0.7863247863247863.[0m
[32m[I 2022-08-28 22:23:34,270][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7863247863247863
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:36,160][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 305, 'max_depth': 10, 'min_samples_leaf': 9, 'min_samples_split': 6}. Best is trial 0 with value: 0.7977207977207977.[0m
[32m[I 2022-08-28 22:23:36,394][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7977207977207977
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:23:38,046][0m Trial 0 finished with value: 0.8034188034188035 and parameters: {'n_estimators': 258, 'max_depth': 6, 'min_samples_leaf': 6, 'min_samples_split': 4}. Best is trial 0 with value: 0.8034188034188035.[0m
[32m[I 2022-08-28 22:23:38,255][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.8034188034188035
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:23:41,019][0m Trial 0 finished with value: 0.7863247863247863 and parameters: {'n_estimators': 435, 'max_depth': 8, 'min_samples_leaf': 7, 'min_samples_split': 3}. Best is trial 0 with value: 0.7863247863247863.[0m


Optuna Best score :  0.7863247863247863


[32m[I 2022-08-28 22:23:41,373][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  1.0


[32m[I 2022-08-28 22:23:44,111][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 484, 'max_depth': 3, 'min_samples_leaf': 7, 'min_samples_split': 9}. Best is trial 0 with value: 0.7806267806267806.[0m


Optuna Best score :  0.7806267806267806


[32m[I 2022-08-28 22:23:44,435][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:23:46,961][0m Trial 0 finished with value: 0.8005698005698005 and parameters: {'n_estimators': 380, 'max_depth': 8, 'min_samples_leaf': 5, 'min_samples_split': 7}. Best is trial 0 with value: 0.8005698005698005.[0m


Optuna Best score :  0.8005698005698005


[32m[I 2022-08-28 22:23:47,280][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:49,658][0m Trial 0 finished with value: 0.7891737891737892 and parameters: {'n_estimators': 371, 'max_depth': 7, 'min_samples_leaf': 6, 'min_samples_split': 2}. Best is trial 0 with value: 0.7891737891737892.[0m


Optuna Best score :  0.7891737891737892


[32m[I 2022-08-28 22:23:50,015][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:52,067][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 333, 'max_depth': 5, 'min_samples_leaf': 7, 'min_samples_split': 4}. Best is trial 0 with value: 0.7977207977207977.[0m


Optuna Best score :  0.7977207977207977


[32m[I 2022-08-28 22:23:52,320][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:23:53,675][0m Trial 0 finished with value: 0.7863247863247863 and parameters: {'n_estimators': 222, 'max_depth': 4, 'min_samples_leaf': 3, 'min_samples_split': 7}. Best is trial 0 with value: 0.7863247863247863.[0m
[32m[I 2022-08-28 22:23:53,839][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7863247863247863
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:23:55,525][0m Trial 0 finished with value: 0.8062678062678063 and parameters: {'n_estimators': 240, 'max_depth': 9, 'min_samples_leaf': 2, 'min_samples_split': 4}. Best is trial 0 with value: 0.8062678062678063.[0m
[32m[I 2022-08-28 22:23:55,738][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.8062678062678063
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:23:57,317][0m Trial 0 finished with value: 0.8034188034188035 and parameters: {'n_estimators': 243, 'max_depth': 9, 'min_samples_leaf': 6, 'min_samples_split': 7}. Best is trial 0 with value: 0.8034188034188035.[0m
[32m[I 2022-08-28 22:23:57,523][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.8034188034188035
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:23:58,911][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 214, 'max_depth': 8, 'min_samples_leaf': 6, 'min_samples_split': 3}. Best is trial 0 with value: 0.792022792022792.[0m
[32m[I 2022-08-28 22:23:59,089][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.792022792022792
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:23:59,983][0m Trial 0 finished with value: 0.7977207977207977 and parameters: {'n_estimators': 147, 'max_depth': 4, 'min_samples_leaf': 6, 'min_samples_split': 7}. Best is trial 0 with value: 0.7977207977207977.[0m
[32m[I 2022-08-28 22:24:00,093][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7977207977207977
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:24:01,174][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 169, 'max_depth': 8, 'min_samples_leaf': 7, 'min_samples_split': 3}. Best is trial 0 with value: 0.7948717948717948.[0m
[32m[I 2022-08-28 22:24:01,311][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7948717948717948
test accuracy :  0.5555555555555556


[32m[I 2022-08-28 22:24:02,110][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 134, 'max_depth': 4, 'min_samples_leaf': 10, 'min_samples_split': 9}. Best is trial 0 with value: 0.792022792022792.[0m
[32m[I 2022-08-28 22:24:02,215][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.792022792022792
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:24:05,466][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 469, 'max_depth': 9, 'min_samples_leaf': 2, 'min_samples_split': 6}. Best is trial 0 with value: 0.7948717948717948.[0m


Optuna Best score :  0.7948717948717948


[32m[I 2022-08-28 22:24:05,886][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:24:08,589][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 415, 'max_depth': 9, 'min_samples_leaf': 6, 'min_samples_split': 4}. Best is trial 0 with value: 0.7948717948717948.[0m


Optuna Best score :  0.7948717948717948


[32m[I 2022-08-28 22:24:08,927][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:24:09,614][0m Trial 0 finished with value: 0.8034188034188035 and parameters: {'n_estimators': 104, 'max_depth': 9, 'min_samples_leaf': 7, 'min_samples_split': 2}. Best is trial 0 with value: 0.8034188034188035.[0m
[32m[I 2022-08-28 22:24:09,705][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.8034188034188035
test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:24:12,729][0m Trial 0 finished with value: 0.792022792022792 and parameters: {'n_estimators': 492, 'max_depth': 4, 'min_samples_leaf': 2, 'min_samples_split': 2}. Best is trial 0 with value: 0.792022792022792.[0m


Optuna Best score :  0.792022792022792


[32m[I 2022-08-28 22:24:13,095][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:24:15,818][0m Trial 0 finished with value: 0.7777777777777778 and parameters: {'n_estimators': 489, 'max_depth': 3, 'min_samples_leaf': 7, 'min_samples_split': 10}. Best is trial 0 with value: 0.7777777777777778.[0m


Optuna Best score :  0.7777777777777778


[32m[I 2022-08-28 22:24:16,157][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:24:19,116][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 472, 'max_depth': 10, 'min_samples_leaf': 8, 'min_samples_split': 8}. Best is trial 0 with value: 0.7806267806267806.[0m


Optuna Best score :  0.7806267806267806


[32m[I 2022-08-28 22:24:19,484][0m A new study created in memory with name: randomforest Study[0m


test accuracy :  0.6666666666666666


[32m[I 2022-08-28 22:24:20,665][0m Trial 0 finished with value: 0.7948717948717948 and parameters: {'n_estimators': 185, 'max_depth': 5, 'min_samples_leaf': 6, 'min_samples_split': 2}. Best is trial 0 with value: 0.7948717948717948.[0m
[32m[I 2022-08-28 22:24:20,814][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7948717948717948
test accuracy :  0.3333333333333333


[32m[I 2022-08-28 22:24:21,340][0m Trial 0 finished with value: 0.7863247863247863 and parameters: {'n_estimators': 81, 'max_depth': 10, 'min_samples_leaf': 9, 'min_samples_split': 8}. Best is trial 0 with value: 0.7863247863247863.[0m
[32m[I 2022-08-28 22:24:21,409][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7863247863247863
test accuracy :  0.7777777777777778


[32m[I 2022-08-28 22:24:21,816][0m Trial 0 finished with value: 0.7806267806267806 and parameters: {'n_estimators': 60, 'max_depth': 7, 'min_samples_leaf': 7, 'min_samples_split': 9}. Best is trial 0 with value: 0.7806267806267806.[0m
[32m[I 2022-08-28 22:24:21,869][0m A new study created in memory with name: randomforest Study[0m


Optuna Best score :  0.7806267806267806
test accuracy :  0.8888888888888888


[32m[I 2022-08-28 22:24:22,264][0m Trial 0 finished with value: 0.7834757834757835 and parameters: {'n_estimators': 65, 'max_depth': 3, 'min_samples_leaf': 6, 'min_samples_split': 7}. Best is trial 0 with value: 0.7834757834757835.[0m


Optuna Best score :  0.7834757834757835
test accuracy :  0.7777777777777778

mean accuracy :  0.7944444444444447
mean precision :  0.7944444444444447
mean recall :  0.7375000000000002
mean f1 :  0.7188267982017983
mean roc_auc :  0.7375000000000002



## 40개의 모델의 개별 performance metric + 평균 performnace metric을 저장한다
## 각 모델별 test set의 feature에 따른 shap value를 저장한다

In [42]:
if label_name == 'three_label':
    shap_values_df_0 = pd.concat(shap_df_list[0])
    shap_values_df_1 = pd.concat(shap_df_list[1])
    shap_values_df_2 = pd.concat(shap_df_list[2])

    with pd.ExcelWriter(f"./rf_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df_0.to_excel(writer, sheet_name="shap_list_0", index=False)
        shap_values_df_1.to_excel(writer, sheet_name="shap_list_1", index=False)
        shap_values_df_2.to_excel(writer, sheet_name="shap_list_2", index=False)

if label_name != 'three_label':
    shap_values_df = pd.concat(shap_df_list[0])

    with pd.ExcelWriter(f"./rf_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df.to_excel(writer, sheet_name="shap_list", index=False)

# GBM Bayesian

In [50]:
class GBMObjective(object):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def __call__(self, trial: Trial):
        gbm_params = {
            'n_estimators': trial.suggest_int('n_estimators', 50, 500),
            'learning_rate': trial.suggest_loguniform("learning_rate", 1e-3, 1e-1),
            'max_depth': trial.suggest_int('max_depth', 3, 10),
            'max_features': trial.suggest_categorical('max_features', [None, 'sqrt']),
            'min_samples_leaf': trial.suggest_int('min_samples_leaf', 2, 10),
            'min_samples_split': trial.suggest_int('min_samples_split', 2, 10),
            'subsample': trial.suggest_float('subsample', 0.6, 1.0, step=0.1), 
            'random_state': 42
        }

        clf = GradientBoostingClassifier(**gbm_params)

        mean_accuracy = InnerCrossValidation(clf, self.X, self.y, cv_method, n_fold)

        return mean_accuracy

In [51]:
clf = GradientBoostingClassifier(random_state=42)
shap_df_list, metric_df = NestedCVwithOptuna(GBMObjective, clf, 'gbm')

[32m[I 2022-08-28 22:24:23,418][0m A new study created in memory with name: gbm Study[0m


class_nums :  2


[32m[I 2022-08-28 22:24:24,090][0m Trial 0 finished with value: 0.8005698005698005 and parameters: {'n_estimators': 139, 'learning_rate': 0.031178084281101547, 'max_depth': 4, 'max_features': 'sqrt', 'min_samples_leaf': 2, 'min_samples_split': 7, 'subsample': 1.0}. Best is trial 0 with value: 0.8005698005698005.[0m
[32m[I 2022-08-28 22:24:24,177][0m A new study created in memory with name: gbm Study[0m


Optuna Best score :  0.8005698005698005
test accuracy :  1.0


KeyboardInterrupt: 

## 40개의 모델의 개별 performance metric + 평균 performnace metric을 저장한다
## 각 모델별 test set의 feature에 따른 shap value를 저장한다

In [None]:
if label_name == 'three_label':
    shap_values_df_0 = pd.concat(shap_df_list[0])
    shap_values_df_1 = pd.concat(shap_df_list[1])
    shap_values_df_2 = pd.concat(shap_df_list[2])

    with pd.ExcelWriter(f"./gbm_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df_0.to_excel(writer, sheet_name="shap_list_0", index=False)
        shap_values_df_1.to_excel(writer, sheet_name="shap_list_1", index=False)
        shap_values_df_2.to_excel(writer, sheet_name="shap_list_2", index=False)

if label_name != 'three_label':
    shap_values_df = pd.concat(shap_df_list[0])

    with pd.ExcelWriter(f"./gbm_result.xlsx") as writer:
        metric_df.to_excel(writer, sheet_name="performance metric", index=False)
        shap_values_df.to_excel(writer, sheet_name="shap_list", index=False)