# 0. Default start

In [1]:
import pandas as pd
import numpy as np

In [2]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)

In [3]:
np.random.seed(42)

In [4]:
df = pd.read_csv('train_dataset_Самолет.csv')

  df = pd.read_csv('train_dataset_Самолет.csv')


In [5]:
def device(data):
    columns = [f'col{n}' for n in range(529, 544)]
    new_feature_name = 'device'
    most_common_value = data[columns].mode(axis=1).iloc[:, 0]
    data[new_feature_name] = most_common_value
    return data

In [6]:
def info_redirect(data):
    columns = [f'col{n}' for n in range(561, 576)]
    mail_info = 'info_mail'
    most_common_value = any(['sms' in data[columns], 'email' in data[columns]])
    data[mail_info] = int(most_common_value)
    
    invite_count = 'sale_invite_count'
    most_common_value = (data[columns].notnull()).sum(axis=1)
    data[invite_count] = most_common_value
    return data

In [7]:
def generate_new_features(data):
    """
    Генерирует новые признаки на основе заданных колонок.
    :param data: pandas.DataFrame, содержащий данные.
    :param columns: список, содержащий названия колонок для генерации новых признаков.
    :return: pandas.DataFrame с добавленными новыми признаками.
    """
    columns = [f'col{n}' for n in range(465, 473)]
    new_feature_name = 'filter' + '_most_common'
    most_common_value = data[columns].mode(axis=1).iloc[:, 0]
    data[new_feature_name] = most_common_value

    count_feature_name = 'filter' + '_count'
    count_of_most_common = (data[columns] == most_common_value[..., None]).sum(axis=1)
    data[count_feature_name] = count_of_most_common

    non_zero_feature_name = 'filter' + '_non_zero_count'
    non_zero_count = (data[columns] != 0).sum(axis=1)
    data[non_zero_feature_name] = non_zero_count

    return data

In [8]:
def calculate_quantitative_metrics(data):
    """
    Вычисляет количественные метрики на основе заданных колонок для каждой строки в датафрейме.
    :param data: pandas.DataFrame, содержащий данные.
    :param columns: список, содержащий названия колонок для вычисления метрик.
    :return: pandas.DataFrame с добавленными количественными метриками.
    """
    columns = [f'col{n}' for n in range(457,465)]
    # Количество уникальных страниц
    unique_value = data[columns].nunique(axis=1)
    data['unique_pages_count'] = unique_value

    # Количество общих страниц
    count_feature_name = 'visit_site' + '_count'
    count_of_most_common = (data[columns].notnull()).sum(axis=1)
    data[count_feature_name] = count_of_most_common

    return data

In [9]:
df = device(df)
df = info_redirect(df)
df = generate_new_features(df)
df = calculate_quantitative_metrics(df)

  count_of_most_common = (data[columns] == most_common_value[..., None]).sum(axis=1)


# 1. Data analyze

In [10]:
def count_missing_values(df):
    """
    Функция для подсчета пропусков в указанном столбце DataFrame. 
    Параметры:
    df (pd.DataFrame): Исходный DataFrame. 
    Возвращает:
    float: Процент пропусков в столбце."""
    total_rows = df.shape[0]  # Общее количество строк в DataFrame
    missing_values = df.isnull().sum()  # Подсчет пропусков в столбце

    # Вычисление процента пропусков
    missing_percentage = (missing_values / total_rows) * 100

    return missing_percentage

In [11]:
df = df.replace({'nan': None})

In [12]:
df.shape

(14456, 2674)

In [13]:
df = df.T.drop_duplicates().T
df.shape

(14456, 2221)

In [14]:
miss_pos_col = count_missing_values(df[df['target'] == 1])
threshold = 75 # percent value in col for drop
to_drop = miss_pos_col[miss_pos_col >= threshold].index.tolist()
len(to_drop)

1878

In [15]:
df = df.drop(to_drop, axis=1)

In [16]:
df.shape

(14456, 343)

In [17]:
def intersection_col(df, list_cols):
    if isinstance(list_cols, list):
        return list(set(df.columns.tolist()) & set(list_cols))
    else:
        return list(set(df.columns.tolist()) & set(list_cols.columns.tolist()))

In [18]:
cat_all_columns = ['col520', 'col528', 'col536',
 'col544', 'col552', 'col592', 'col600', 'col608', 'col1454',
                  'device', 'info_mail', 'filter_most_common']
to_drop_cat = ['col552', 'col1454']

In [19]:
cat_cols = intersection_col(df, cat_all_columns)
to_drop = list(set(to_drop_cat) & set(cat_cols))
cat_cols = list(set(cat_cols) - set(to_drop))
len(cat_cols)

1

In [20]:
df = df.drop(to_drop, axis=1)

In [21]:
df.shape

(14456, 342)

In [22]:
cols = df.columns.tolist()
cols.remove('client_id'), cols.remove('target'), cols.remove('report_date')
df[cols] = df[cols].astype(np.float64)

category_cols = []

for col in df.columns.tolist():
    if col == 'report_date':
        continue
    if df[col].max() <= 40:
        category_cols.append(col)
category_cols.remove('col2663'), category_cols.remove('target')

(None, None)

In [23]:
# df[category_cols] = df[category_cols].astype(str)

# 2. Model builder

In [24]:
from sklearn.metrics import classification_report
from eli5.sklearn import PermutationImportance
from lightgbm import LGBMClassifier
from category_encoders import CatBoostEncoder

In [25]:
from sklearn.metrics import (
    roc_auc_score,
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
    confusion_matrix,
    average_precision_score,
    log_loss,
    )
def evaluate_metrics(y_true, y_pred):
    roc_auc = roc_auc_score(y_true, y_pred) 
    accuracy = accuracy_score(y_true, (y_pred > 0.5).astype(int)) 
    precision = precision_score(y_true, (y_pred > 0.5).astype(int)) 
    recall = recall_score(y_true, (y_pred > 0.5).astype(int))
    f1 = f1_score(y_true, (y_pred > 0.5).astype(int)) 
    # confusion = confusion_matrix(y_true, (y_pred > 0.5).astype(int))
    pr_auc = average_precision_score(y_true, y_pred) 
    logloss = log_loss(y_true, y_pred)

    print(f"ROC AUC: {roc_auc}", end='; ')
    print(f"Accuracy: {accuracy}", end='; ')
    print(f"Precision: {precision}", end='; ')
    print(f"Recall: {recall}", end='; ')
    print(f"F1 Score: {f1}", end='; ')
    print(f"PR AUC: {pr_auc}", end='; ')
    print(f"Log Loss: {logloss}")

In [26]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.base import BaseEstimator, ClassifierMixin

from eli5.sklearn import PermutationImportance
from imblearn.over_sampling import SMOTE
import optuna
from optuna.integration import LightGBMPruningCallback
import lightgbm as lgb
from copy import deepcopy

    
class FeatureSelector(BaseEstimator, ClassifierMixin):
    def __init__(self, features):
        """
        Класс для отбора заданных признаков.
        
        Параметры:
        features (list): Список заданных признаков.
        """
        self.features = features
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        """
        Метод для отбора заданных признаков из датасета.
        
        Параметры:
        df (pd.DataFrame): Входной датасет.
        
        Возвращает:
        pd.DataFrame: Датасет, содержащий только отобранные признаки.
        
        Исключения:
        ValueError: Генерируется, если какие-то из заданных признаков отсутствуют во входном датасете.
        """
        # Проверяем наличие всех заданных признаков во входном датасете
        missing_features = set(self.features) - set(df.columns)
        if missing_features:
            raise ValueError(f"Признаки {missing_features} отсутствуют во входном датасете.")
        
        # Отбираем заданные признаки из датасета
        selected_df = X[self.features]
        
        return selected_df

def objective(trial, trf, df_train, df_val, target_col='target'):
    params = {
        'objective': 'binary',
        'metric': 'auc',
        'boosting_type': 'gbdt',
        'num_leaves': trial.suggest_int('num_leaves', 2, 256),
        'learning_rate': trial.suggest_float('learning_rate', 0.001, 0.1, log=True),
        'feature_fraction': trial.suggest_float('feature_fraction', 0.1, 1.0),
        'bagging_fraction': trial.suggest_float('bagging_fraction', 0.1, 1.0),
        'bagging_freq': trial.suggest_int('bagging_freq', 1, 10),
        'max_depth': trial.suggest_int('max_depth', 3, 12),
        'min_child_samples': trial.suggest_int('min_child_samples', 1, 100),
        'min_sum_hessian_in_leaf ': trial.suggest_float('min_sum_hessian_in_leaf', 0.0, 10.0),
        'is_unbalance': trial.suggest_categorical('is_unbalance', [True]),
        'random_state': trial.suggest_categorical('random_state', [42])
    }
    pruning_callback = LightGBMPruningCallback(trial, "auc")
    trf = trf.fit(df_train, df_train[target_col])

    model = lgb.train(params, lgb.Dataset(trf.transform(df_train), label=df_train[target_col]), num_boost_round=1000,
                    valid_sets=[lgb.Dataset(trf.transform(df_val), label=df_val[target_col])], 
                    early_stopping_rounds=100, verbose_eval=False, callbacks=[pruning_callback])
   
    y_pred = model.predict(trf.transform(df_val))

    evaluate_metrics(df_val[target_col], y_pred)
    roc_auc = roc_auc_score(df_val[target_col], y_pred) 

    return roc_auc

def split_dataset_by_target(data, target, n):
    """
    Разделить датасет на n групп, с одинаковым положительным таргетом и разными негативными.
    :param data: pandas.DataFrame, содержащий данные для разделения.
    :param target: str, описывающий положительный таргет.
    :param n: количество групп для разделения (и, соответственно, количество моделей).
    :return: список, содержащий n кортежей из данных для обучения.
    """
    sorted_data = data.sort_values(target, kind='mergesort')
    positive_set_index = sorted_data.index[sorted_data[target] == 1]
    positive_set_size = len(positive_set_index)
    negative_set_index = sorted_data.index[sorted_data[target] == 0]
    negative_set_size = len(negative_set_index)
    negative_sets = []
    for i in range(n-1):
        negative_sets.append(negative_set_index[i*(negative_set_size//n):(i+1)*(negative_set_size//n)])
    negative_sets.append(negative_set_index[(n-1)*(negative_set_size//n):])
    splitted_data = []
    for i in range(n):
        negative_index = negative_sets[i]
        train_data_index = positive_set_index.append(negative_index)
        # test_data_index = negative_set_index.difference(negative_index)
        train_data = data.loc[train_data_index]
        # test_data = data.loc[test_data_index]
        splitted_data.append(train_data)
    return splitted_data
    
class ModelBuilder(object):
    def __init__(self, cat_cols, features_name, ml_cfg, target_col='target', params=None):
        
        self.features = features_name
        self.cat_cols = cat_cols
        self.target_col = target_col
        self.cfg = ml_cfg
        selector = FeatureSelector(features=features_name)
        enc = CatBoostEncoder(cols=cat_cols)
        lgbm = LGBMClassifier()
        self.model = Pipeline([('sel', selector), ('enc', enc), ('lgbm', lgbm)])      
        self.model.set_params(**params) 
        
        
    def train(self, df_train):
        model = self.model.fit(df_train, df_train[self.target_col])
        return model
        
    def select_features(self, df_train, df_val):

        model = self.train(df_train)
        transformer = Pipeline(model.steps[:-1])
        model = model.steps[-1][-1]
        transformed_val_data = transformer.transform(df_val)

        print('start feature select...')
        perm = PermutationImportance(model, random_state=42) \
                                    .fit(transformed_val_data, df_val[self.target_col])
        print('end feature select...')

        feature_importances = list(zip(transformed_val_data.columns, perm.feature_importances_))
        feature_importances.sort(key=lambda x: x[1], reverse=True)
        selected_featurs = [name_col for name_col, value in feature_importances if value > 0]
        print(f'selected {len(selected_featurs)} out of {len(df_train.columns)}')
        
        cat_cols = list(set(self.cat_cols) & set(selected_featurs))
        selected_features = {'sel__features': selected_featurs,'enc__cols': cat_cols}
        return selected_features
    
    def params_tuning(self, df_train, df_val, model):
        print('start tuning...')
        study = optuna.create_study(direction='maximize')
        transformer = Pipeline(model.steps[:-1])
        study.optimize(lambda trial: objective(trial, transformer, df_train, df_val),
                       n_jobs=1, n_trials=self.cfg['n_trails'])
        print('end tuning...')

        best_params = study.best_params
        best_score = study.best_value
        best_params = {f'lgbm__{k}':v for k,v in best_params.items()}
        # self.model.set_params(**best_params)
        print(f'best params: {best_params}')
        print(f'best score: {best_score}')
        return best_params
    
    def build(self, df):
        print('start build model...')
        df_train, df_test = train_test_split(df, random_state=42, test_size=self.cfg['test_size'],
                                            shuffle=True, stratify=df[self.target_col])
        
        test = split_dataset_by_target(df_train, self.target_col, self.cfg['n_split_df'])
        
        models = []
        
        for df in test:
            df_train, df_val = train_test_split(df, random_state=42, test_size=self.cfg['test_size'],
                                            shuffle=True, stratify=df[self.target_col])
            
            features = self.select_features(df_train, df_val)
            model = deepcopy(self.model)
            model.set_params(**features)
            best_params = self.params_tuning(df_train, df_val, model)
            model.set_params(**best_params)

            model.fit(df_train, df_train[self.target_col])

            y_pred = model.predict(df_val)
            metrics = roc_auc_score(df_val[self.target_col], y_pred)
            print(f'roc_auc on validate data = {metrics}')
            models.append(model)
        preds = []
        for m in models:
            preds.append(m.predict_proba(df_test)[:, 1])
        result = pd.DataFrame(preds).mean()
        print('metrics on test data:')
        evaluate_metrics(df_test['target'], result)
        return models

# 3. Create and get trainable model

In [27]:
exclude_cols = ['target', 'client_id', 'report_date']
features = df.columns.tolist()
features = list(set(features) - set(exclude_cols))
category_cols = list(set(features) & set(category_cols))
default_params = {'lgbm__n_estimators': 100, 'lgbm__max_depth': 10, 
                  'lgbm__random_state': 42, 'lgbm__n_jobs': -1}
ml_cfg =  {'n_trails': 100, 'test_size': 0.3, 'n_split_df': 3}

In [28]:
df['target'] = df['target'].astype(int)

In [29]:
builder = ModelBuilder(cat_cols = category_cols, features_name=features, ml_cfg=ml_cfg, params=default_params)

In [30]:
%%time
models = builder.build(df)

start build model...
start feature select...


[I 2023-09-10 01:02:22,024] A new study created in memory with name: no-name-1841985a-57a0-4230-bb48-c93d956dbd6c


end feature select...
selected 85 out of 342
Finished loading model, total used 100 iterations
start tuning...


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:02:22,472] Trial 0 finished with value: 0.807090147586757 and parameters: {'num_leaves': 177, 'learning_rate': 0.004480001263134817, 'feature_fraction': 0.3102741342546008, 'bagging_fraction': 0.9701755396059921, 'bagging_freq': 4, 'max_depth': 3, 'min_child_samples': 53, 'min_sum_hessian_in_leaf': 9.07594152588438, 'is_unbalance': True, 'random_state': 42}. Best is trial 0 with value: 0.807090147586757.


ROC AUC: 0.807090147586757; Accuracy: 0.9142590866728798; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.29901968657359324; Log Loss: 0.2813556007874963


[I 2023-09-10 01:02:23,842] Trial 1 finished with value: 0.8710610291184683 and parameters: {'num_leaves': 20, 'learning_rate': 0.0032981314233934797, 'feature_fraction': 0.516724932405449, 'bagging_fraction': 0.7171840706930268, 'bagging_freq': 9, 'max_depth': 8, 'min_child_samples': 42, 'min_sum_hessian_in_leaf': 0.7564660891033004, 'is_unbalance': True, 'random_state': 42}. Best is trial 1 with value: 0.8710610291184683.


ROC AUC: 0.8710610291184683; Accuracy: 0.9030754892823858; Precision: 0.45714285714285713; Recall: 0.6956521739130435; F1 Score: 0.5517241379310345; PR AUC: 0.5670505557245006; Log Loss: 0.24873178833484597


[I 2023-09-10 01:02:24,434] Trial 2 finished with value: 0.8393498205025928 and parameters: {'num_leaves': 108, 'learning_rate': 0.004400321284307427, 'feature_fraction': 0.6844226392131085, 'bagging_fraction': 0.38677162325230263, 'bagging_freq': 5, 'max_depth': 10, 'min_child_samples': 94, 'min_sum_hessian_in_leaf': 5.261949324037611, 'is_unbalance': True, 'random_state': 42}. Best is trial 1 with value: 0.8710610291184683.


ROC AUC: 0.8393498205025928; Accuracy: 0.8769804287045666; Precision: 0.34615384615384615; Recall: 0.4891304347826087; F1 Score: 0.4054054054054054; PR AUC: 0.40266933427067125; Log Loss: 0.2927073529671477


[I 2023-09-10 01:02:25,045] Trial 3 finished with value: 0.8949053760581483 and parameters: {'num_leaves': 207, 'learning_rate': 0.0363080134844106, 'feature_fraction': 0.6711617339684846, 'bagging_fraction': 0.41386597859997676, 'bagging_freq': 5, 'max_depth': 4, 'min_child_samples': 63, 'min_sum_hessian_in_leaf': 1.1647359223234366, 'is_unbalance': True, 'random_state': 42}. Best is trial 3 with value: 0.8949053760581483.


ROC AUC: 0.8949053760581483; Accuracy: 0.9058713886300093; Precision: 0.46616541353383456; Recall: 0.6739130434782609; F1 Score: 0.551111111111111; PR AUC: 0.5672191784927723; Log Loss: 0.2396001660141196


[I 2023-09-10 01:02:25,673] Trial 4 finished with value: 0.8808115055622036 and parameters: {'num_leaves': 187, 'learning_rate': 0.005890415656544541, 'feature_fraction': 0.1066211127819446, 'bagging_fraction': 0.5408781681864472, 'bagging_freq': 5, 'max_depth': 9, 'min_child_samples': 61, 'min_sum_hessian_in_leaf': 8.851349120570172, 'is_unbalance': True, 'random_state': 42}. Best is trial 3 with value: 0.8949053760581483.


ROC AUC: 0.8808115055622036; Accuracy: 0.8974836905871388; Precision: 0.4296875; Recall: 0.5978260869565217; F1 Score: 0.5; PR AUC: 0.5109348814727938; Log Loss: 0.2904358553563991


[I 2023-09-10 01:02:25,982] Trial 5 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:26,290] Trial 6 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:26,603] Trial 7 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:26,916] Trial 8 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:02:27,394] Trial 9 finished with value: 0.8563577538447902 and parameters: {'num_leaves': 205, 'learning_rate': 0.0017088595711975364, 'feature_fraction': 0.13430631417113054, 'bagging_fraction': 0.8751888564052122, 'bagging_freq': 4, 'max_depth': 4, 'min_child_samples': 40, 'min_sum_hessian_in_leaf': 1.577359754824288, 'is_unbalance': True, 'random_state': 42}. Best is trial 3 with value: 0.8949053760581483.


ROC AUC: 0.8563577538447902; Accuracy: 0.9142590866728798; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.4442631956201843; Log Loss: 0.28352876513173775


[I 2023-09-10 01:02:27,736] Trial 10 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:28,140] Trial 11 pruned. Trial was pruned at iteration 73.
[I 2023-09-10 01:02:28,486] Trial 12 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:28,831] Trial 13 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:29,172] Trial 14 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:29,528] Trial 15 pruned. Trial was pruned at iteration 4.
[I 2023-09-10 01:02:29,873] Trial 16 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:30,220] Trial 17 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:30,563] Trial 18 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:30,905] Trial 19 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:31,309] Trial 20 pruned. Trial was pruned at iteration 64.
[I 2023-09-10 01:02:31,658] Trial 21 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:32,000] Trial 22 pruned. Trial was pruned 

ROC AUC: 0.8920023932987635; Accuracy: 0.9440820130475303; Precision: 0.7105263157894737; Recall: 0.5869565217391305; F1 Score: 0.6428571428571428; PR AUC: 0.6546644957753341; Log Loss: 0.19928019488126472


[I 2023-09-10 01:02:34,983] Trial 28 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:35,330] Trial 29 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:35,675] Trial 30 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:36,196] Trial 31 pruned. Trial was pruned at iteration 79.
[I 2023-09-10 01:02:36,682] Trial 32 pruned. Trial was pruned at iteration 122.
[I 2023-09-10 01:02:37,024] Trial 33 pruned. Trial was pruned at iteration 3.
[I 2023-09-10 01:02:37,359] Trial 34 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:37,695] Trial 35 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:38,661] Trial 36 finished with value: 0.9087887249036032 and parameters: {'num_leaves': 28, 'learning_rate': 0.0553483565856453, 'feature_fraction': 0.27584668570275706, 'bagging_fraction': 0.6280596056594672, 'bagging_freq': 4, 'max_depth': 10, 'min_child_samples': 77, 'min_sum_hessian_in_leaf': 4.8898464267943, 'is_unbalance': True, 'random_state': 

ROC AUC: 0.9087887249036032; Accuracy: 0.9459459459459459; Precision: 0.6808510638297872; Recall: 0.6956521739130435; F1 Score: 0.6881720430107526; PR AUC: 0.6834178007908217; Log Loss: 0.23475202594594063


[I 2023-09-10 01:02:38,994] Trial 37 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:39,329] Trial 38 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:39,669] Trial 39 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:40,359] Trial 40 finished with value: 0.8936533262420776 and parameters: {'num_leaves': 215, 'learning_rate': 0.03027208805184591, 'feature_fraction': 0.1421890925944396, 'bagging_fraction': 0.9991370618127189, 'bagging_freq': 3, 'max_depth': 11, 'min_child_samples': 88, 'min_sum_hessian_in_leaf': 5.118408736437082, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.8936533262420776; Accuracy: 0.8956197576887233; Precision: 0.4382716049382716; Recall: 0.7717391304347826; F1 Score: 0.5590551181102362; PR AUC: 0.5804671175300034; Log Loss: 0.2604494193711453


[I 2023-09-10 01:02:41,436] Trial 41 finished with value: 0.8974981163852326 and parameters: {'num_leaves': 206, 'learning_rate': 0.03088058084391281, 'feature_fraction': 0.13632023836330776, 'bagging_fraction': 0.9598472468223164, 'bagging_freq': 3, 'max_depth': 11, 'min_child_samples': 92, 'min_sum_hessian_in_leaf': 5.026309583905153, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.8974981163852326; Accuracy: 0.9440820130475303; Precision: 0.6632653061224489; Recall: 0.7065217391304348; F1 Score: 0.6842105263157895; PR AUC: 0.6595263013056347; Log Loss: 0.22959066365242511


[I 2023-09-10 01:02:42,119] Trial 42 pruned. Trial was pruned at iteration 426.
[I 2023-09-10 01:02:43,102] Trial 43 finished with value: 0.9017750299162346 and parameters: {'num_leaves': 208, 'learning_rate': 0.04815142972618713, 'feature_fraction': 0.19577900659473385, 'bagging_fraction': 0.9354481418834566, 'bagging_freq': 3, 'max_depth': 12, 'min_child_samples': 90, 'min_sum_hessian_in_leaf': 3.8280646934799685, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9017750299162346; Accuracy: 0.9496738117427772; Precision: 0.7021276595744681; Recall: 0.717391304347826; F1 Score: 0.7096774193548387; PR AUC: 0.6740928212486414; Log Loss: 0.24194440801435158


[I 2023-09-10 01:02:43,441] Trial 44 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:43,835] Trial 45 pruned. Trial was pruned at iteration 56.
[I 2023-09-10 01:02:44,274] Trial 46 pruned. Trial was pruned at iteration 122.
[I 2023-09-10 01:02:45,083] Trial 47 finished with value: 0.9025063156495144 and parameters: {'num_leaves': 182, 'learning_rate': 0.07308754982244522, 'feature_fraction': 0.22474849693205845, 'bagging_fraction': 0.9221960016447202, 'bagging_freq': 2, 'max_depth': 11, 'min_child_samples': 100, 'min_sum_hessian_in_leaf': 5.3520662028157515, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9025063156495144; Accuracy: 0.9487418452935694; Precision: 0.6907216494845361; Recall: 0.7282608695652174; F1 Score: 0.7089947089947088; PR AUC: 0.6761833246229985; Log Loss: 0.23926053925359997


[I 2023-09-10 01:02:45,875] Trial 48 finished with value: 0.8997473740194124 and parameters: {'num_leaves': 181, 'learning_rate': 0.08762631903544373, 'feature_fraction': 0.2147636361931752, 'bagging_fraction': 0.9010749375865479, 'bagging_freq': 1, 'max_depth': 12, 'min_child_samples': 92, 'min_sum_hessian_in_leaf': 3.2582455975361198, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.8997473740194124; Accuracy: 0.9478098788443616; Precision: 0.6956521739130435; Recall: 0.6956521739130435; F1 Score: 0.6956521739130435; PR AUC: 0.6771246013651272; Log Loss: 0.25512163603479343


[I 2023-09-10 01:02:46,336] Trial 49 pruned. Trial was pruned at iteration 154.
[I 2023-09-10 01:02:47,315] Trial 50 finished with value: 0.9044010105039223 and parameters: {'num_leaves': 156, 'learning_rate': 0.07535168176327205, 'feature_fraction': 0.26011581559040836, 'bagging_fraction': 0.8663250807922178, 'bagging_freq': 2, 'max_depth': 12, 'min_child_samples': 99, 'min_sum_hessian_in_leaf': 4.660428508036362, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9044010105039223; Accuracy: 0.9468779123951537; Precision: 0.6923076923076923; Recall: 0.6847826086956522; F1 Score: 0.6885245901639345; PR AUC: 0.6929397149050857; Log Loss: 0.2875529463259709


[I 2023-09-10 01:02:48,213] Trial 51 finished with value: 0.9035146035544918 and parameters: {'num_leaves': 186, 'learning_rate': 0.07807106643001904, 'feature_fraction': 0.24119940240944382, 'bagging_fraction': 0.8707902457761517, 'bagging_freq': 2, 'max_depth': 12, 'min_child_samples': 100, 'min_sum_hessian_in_leaf': 4.745353210431722, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9035146035544918; Accuracy: 0.9468779123951537; Precision: 0.6923076923076923; Recall: 0.6847826086956522; F1 Score: 0.6885245901639345; PR AUC: 0.67390781902175; Log Loss: 0.27598772080020706


[I 2023-09-10 01:02:48,572] Trial 52 pruned. Trial was pruned at iteration 18.
[I 2023-09-10 01:02:48,919] Trial 53 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:49,272] Trial 54 pruned. Trial was pruned at iteration 12.
[I 2023-09-10 01:02:49,612] Trial 55 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:49,963] Trial 56 pruned. Trial was pruned at iteration 12.
[I 2023-09-10 01:02:50,878] Trial 57 finished with value: 0.9070713114390817 and parameters: {'num_leaves': 172, 'learning_rate': 0.09897101284869199, 'feature_fraction': 0.33074089652815136, 'bagging_fraction': 0.9163738014435971, 'bagging_freq': 1, 'max_depth': 11, 'min_child_samples': 88, 'min_sum_hessian_in_leaf': 3.2161284200060063, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9070713114390817; Accuracy: 0.9450139794967382; Precision: 0.6774193548387096; Recall: 0.6847826086956522; F1 Score: 0.6810810810810811; PR AUC: 0.7072867373970808; Log Loss: 0.28095588783510894


[I 2023-09-10 01:02:51,261] Trial 58 pruned. Trial was pruned at iteration 35.
[I 2023-09-10 01:02:52,029] Trial 59 finished with value: 0.90629570535833 and parameters: {'num_leaves': 153, 'learning_rate': 0.09998541378306137, 'feature_fraction': 0.3155769611389112, 'bagging_fraction': 0.8217383912051996, 'bagging_freq': 2, 'max_depth': 10, 'min_child_samples': 84, 'min_sum_hessian_in_leaf': 4.743909516054399, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.90629570535833; Accuracy: 0.9468779123951537; Precision: 0.6881720430107527; Recall: 0.6956521739130435; F1 Score: 0.6918918918918919; PR AUC: 0.6748001145610655; Log Loss: 0.2402340329675744


[I 2023-09-10 01:02:52,377] Trial 60 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:52,717] Trial 61 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:53,058] Trial 62 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:53,401] Trial 63 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:53,755] Trial 64 pruned. Trial was pruned at iteration 15.
[I 2023-09-10 01:02:54,095] Trial 65 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:54,435] Trial 66 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:54,774] Trial 67 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:55,115] Trial 68 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:02:55,468] Trial 69 pruned. Trial was pruned at iteration 14.
[I 2023-09-10 01:02:55,823] Trial 70 pruned. Trial was pruned at iteration 15.
[I 2023-09-10 01:02:56,530] Trial 71 finished with value: 0.9011767052253691 and parameters: {'num_leaves': 183, 'learning_rate': 0.0883177

ROC AUC: 0.9011767052253691; Accuracy: 0.9468779123951537; Precision: 0.6804123711340206; Recall: 0.717391304347826; F1 Score: 0.6984126984126985; PR AUC: 0.6581709695351687; Log Loss: 0.22765050447155166


[I 2023-09-10 01:02:56,887] Trial 72 pruned. Trial was pruned at iteration 26.
[I 2023-09-10 01:02:57,230] Trial 73 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:57,571] Trial 74 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:58,491] Trial 75 finished with value: 0.9033594823383415 and parameters: {'num_leaves': 173, 'learning_rate': 0.06371093951907654, 'feature_fraction': 0.16225437754069533, 'bagging_fraction': 0.8678304256133172, 'bagging_freq': 1, 'max_depth': 12, 'min_child_samples': 87, 'min_sum_hessian_in_leaf': 3.636198238679735, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9033594823383415; Accuracy: 0.9515377446411929; Precision: 0.717391304347826; Recall: 0.717391304347826; F1 Score: 0.717391304347826; PR AUC: 0.6800341295400321; Log Loss: 0.25342459759629454


[I 2023-09-10 01:02:58,855] Trial 76 pruned. Trial was pruned at iteration 32.
[I 2023-09-10 01:02:59,196] Trial 77 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:02:59,633] Trial 78 pruned. Trial was pruned at iteration 126.
[I 2023-09-10 01:02:59,978] Trial 79 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:03:00,323] Trial 80 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:01,092] Trial 81 finished with value: 0.9011656251385012 and parameters: {'num_leaves': 191, 'learning_rate': 0.09078140260118775, 'feature_fraction': 0.21797470822514442, 'bagging_fraction': 0.8844746863807047, 'bagging_freq': 1, 'max_depth': 12, 'min_child_samples': 89, 'min_sum_hessian_in_leaf': 3.625290706308305, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9011656251385012; Accuracy: 0.9487418452935694; Precision: 0.6947368421052632; Recall: 0.717391304347826; F1 Score: 0.7058823529411765; PR AUC: 0.673200103358496; Log Loss: 0.24856462137020688


[I 2023-09-10 01:03:01,430] Trial 82 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:01,779] Trial 83 pruned. Trial was pruned at iteration 5.
[I 2023-09-10 01:03:02,515] Trial 84 finished with value: 0.9003456987102778 and parameters: {'num_leaves': 185, 'learning_rate': 0.098949785275646, 'feature_fraction': 0.20265743456775842, 'bagging_fraction': 0.9782269474468119, 'bagging_freq': 2, 'max_depth': 12, 'min_child_samples': 86, 'min_sum_hessian_in_leaf': 3.943544862460466, 'is_unbalance': True, 'random_state': 42}. Best is trial 36 with value: 0.9087887249036032.


ROC AUC: 0.9003456987102778; Accuracy: 0.9478098788443616; Precision: 0.6875; Recall: 0.717391304347826; F1 Score: 0.702127659574468; PR AUC: 0.6879674858976628; Log Loss: 0.24756249556966092


[I 2023-09-10 01:03:02,853] Trial 85 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:03,199] Trial 86 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:03:03,548] Trial 87 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:03,894] Trial 88 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:04,238] Trial 89 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:04,592] Trial 90 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:04,945] Trial 91 pruned. Trial was pruned at iteration 11.
[I 2023-09-10 01:03:05,292] Trial 92 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:05,637] Trial 93 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:05,982] Trial 94 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:06,330] Trial 95 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:06,679] Trial 96 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:07,102] Trial 97 pruned. Trial was pruned a

end tuning...
best params: {'lgbm__num_leaves': 28, 'lgbm__learning_rate': 0.0553483565856453, 'lgbm__feature_fraction': 0.27584668570275706, 'lgbm__bagging_fraction': 0.6280596056594672, 'lgbm__bagging_freq': 4, 'lgbm__max_depth': 10, 'lgbm__min_child_samples': 77, 'lgbm__min_sum_hessian_in_leaf': 4.8898464267943, 'lgbm__is_unbalance': True, 'lgbm__random_state': 42}
best score: 0.9087887249036032
roc_auc on validate data = 0.8299760670123654
start feature select...


[I 2023-09-10 01:03:20,129] A new study created in memory with name: no-name-2e110e52-1a33-4977-ab37-752ea314c5e0


end feature select...
selected 94 out of 342
Finished loading model, total used 100 iterations
start tuning...


[I 2023-09-10 01:03:21,238] Trial 0 finished with value: 0.8777423214998006 and parameters: {'num_leaves': 163, 'learning_rate': 0.022668936702247313, 'feature_fraction': 0.3888024660145337, 'bagging_fraction': 0.18078733949222492, 'bagging_freq': 1, 'max_depth': 7, 'min_child_samples': 4, 'min_sum_hessian_in_leaf': 6.723744581602345, 'is_unbalance': True, 'random_state': 42}. Best is trial 0 with value: 0.8777423214998006.


ROC AUC: 0.8777423214998006; Accuracy: 0.9338303821062441; Precision: 0.6521739130434783; Recall: 0.4891304347826087; F1 Score: 0.5590062111801243; PR AUC: 0.5447398091948162; Log Loss: 0.2272352399829318


[I 2023-09-10 01:03:21,892] Trial 1 finished with value: 0.8534104507379338 and parameters: {'num_leaves': 217, 'learning_rate': 0.004960191404585977, 'feature_fraction': 0.3241422585932826, 'bagging_fraction': 0.7180450112233655, 'bagging_freq': 8, 'max_depth': 7, 'min_child_samples': 24, 'min_sum_hessian_in_leaf': 2.6349872288750085, 'is_unbalance': True, 'random_state': 42}. Best is trial 0 with value: 0.8777423214998006.


ROC AUC: 0.8534104507379338; Accuracy: 0.9161230195712954; Precision: 1.0; Recall: 0.021739130434782608; F1 Score: 0.042553191489361694; PR AUC: 0.4828759117841636; Log Loss: 0.2463170359545602


[I 2023-09-10 01:03:22,546] Trial 2 finished with value: 0.881708992598502 and parameters: {'num_leaves': 175, 'learning_rate': 0.07466847275419876, 'feature_fraction': 0.3026301106086306, 'bagging_fraction': 0.464610270581043, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 74, 'min_sum_hessian_in_leaf': 7.503144853615815, 'is_unbalance': True, 'random_state': 42}. Best is trial 2 with value: 0.881708992598502.


ROC AUC: 0.881708992598502; Accuracy: 0.9179869524697111; Precision: 0.5166666666666667; Recall: 0.6739130434782609; F1 Score: 0.5849056603773585; PR AUC: 0.603782181348815; Log Loss: 0.2675248313378824


[I 2023-09-10 01:03:23,520] Trial 3 finished with value: 0.8881797633293446 and parameters: {'num_leaves': 133, 'learning_rate': 0.06396847997688691, 'feature_fraction': 0.47678883928903515, 'bagging_fraction': 0.8031032593749604, 'bagging_freq': 3, 'max_depth': 9, 'min_child_samples': 94, 'min_sum_hessian_in_leaf': 2.4439294434613066, 'is_unbalance': True, 'random_state': 42}. Best is trial 3 with value: 0.8881797633293446.


ROC AUC: 0.8881797633293446; Accuracy: 0.9394221808014911; Precision: 0.6421052631578947; Recall: 0.6630434782608695; F1 Score: 0.6524064171122995; PR AUC: 0.6642630323000944; Log Loss: 0.3166579745564137


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:03:24,075] Trial 4 finished with value: 0.8485684527766698 and parameters: {'num_leaves': 26, 'learning_rate': 0.003801148017432898, 'feature_fraction': 0.7111008340132895, 'bagging_fraction': 0.9729612686774398, 'bagging_freq': 8, 'max_depth': 9, 'min_child_samples': 80, 'min_sum_hessian_in_leaf': 6.5321621338960645, 'is_unbalance': True, 'random_state': 42}. Best is trial 3 with value: 0.8881797633293446.


ROC AUC: 0.8485684527766698; Accuracy: 0.9142590866728798; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.321137347857257; Log Loss: 0.28711995609610197


[I 2023-09-10 01:03:24,410] Trial 5 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:24,738] Trial 6 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:25,060] Trial 7 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:25,385] Trial 8 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:25,712] Trial 9 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:03:26,290] Trial 10 finished with value: 0.87685591455037 and parameters: {'num_leaves': 255, 'learning_rate': 0.0016266001281591608, 'feature_fraction': 0.1631094403538771, 'bagging_fraction': 0.6640673980737888, 'bagging_freq': 4, 'max_depth': 12, 'min_child_samples': 44, 'min_sum_hessian_in_leaf': 3.923454069614818, 'is_unbalance': True, 'random_state': 42}. Best is trial 3 with value: 0.8881797633293446.


ROC AUC: 0.87685591455037; Accuracy: 0.9142590866728798; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.45763509292524573; Log Loss: 0.278255985568038


[I 2023-09-10 01:03:26,639] Trial 11 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:28,007] Trial 12 finished with value: 0.8938084474582281 and parameters: {'num_leaves': 92, 'learning_rate': 0.0975741467476048, 'feature_fraction': 0.5879753020209024, 'bagging_fraction': 0.8617818100345407, 'bagging_freq': 4, 'max_depth': 11, 'min_child_samples': 65, 'min_sum_hessian_in_leaf': 9.746167444244099, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8938084474582281; Accuracy: 0.9496738117427772; Precision: 0.7261904761904762; Recall: 0.6630434782608695; F1 Score: 0.6931818181818181; PR AUC: 0.6923185130256027; Log Loss: 0.5024191151194995


[I 2023-09-10 01:03:28,364] Trial 13 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:28,718] Trial 14 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:29,073] Trial 15 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:29,453] Trial 16 pruned. Trial was pruned at iteration 20.
[I 2023-09-10 01:03:29,808] Trial 17 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:30,171] Trial 18 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:30,527] Trial 19 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:30,881] Trial 20 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:31,241] Trial 21 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:31,604] Trial 22 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:03:31,967] Trial 23 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:32,330] Trial 24 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:32,691] Trial 25 pruned. Trial was pruned a

ROC AUC: 0.8746343571333599; Accuracy: 0.8909599254426841; Precision: 0.42424242424242425; Recall: 0.7608695652173914; F1 Score: 0.5447470817120622; PR AUC: 0.5170902773726646; Log Loss: 0.28391867916111896


[I 2023-09-10 01:03:35,156] Trial 31 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:35,515] Trial 32 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:35,879] Trial 33 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:36,239] Trial 34 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:36,604] Trial 35 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:36,965] Trial 36 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:37,325] Trial 37 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:37,687] Trial 38 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:38,049] Trial 39 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:38,413] Trial 40 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:38,983] Trial 41 pruned. Trial was pruned at iteration 162.
[I 2023-09-10 01:03:39,354] Trial 42 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:39,731] Trial 43 pruned. Trial was pruned 

ROC AUC: 0.875193901520188; Accuracy: 0.875116495806151; Precision: 0.38333333333333336; Recall: 0.75; F1 Score: 0.5073529411764707; PR AUC: 0.4999103909401965; Log Loss: 0.28758515266129614


[I 2023-09-10 01:03:43,406] Trial 52 finished with value: 0.8866507113415768 and parameters: {'num_leaves': 37, 'learning_rate': 0.08597484799861739, 'feature_fraction': 0.11530561371527791, 'bagging_fraction': 0.8674948696287034, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 86, 'min_sum_hessian_in_leaf': 5.724217485529416, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8866507113415768; Accuracy: 0.8685927306616962; Precision: 0.366120218579235; Recall: 0.7282608695652174; F1 Score: 0.4872727272727273; PR AUC: 0.5213581790908199; Log Loss: 0.29368825904299795


[I 2023-09-10 01:03:43,962] Trial 53 finished with value: 0.8826618800691398 and parameters: {'num_leaves': 35, 'learning_rate': 0.058212687128315406, 'feature_fraction': 0.1429984616214094, 'bagging_fraction': 0.9920765820784617, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 84, 'min_sum_hessian_in_leaf': 5.095229309867349, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8826618800691398; Accuracy: 0.8872320596458527; Precision: 0.4088050314465409; Recall: 0.7065217391304348; F1 Score: 0.5179282868525897; PR AUC: 0.49970335836631535; Log Loss: 0.2926070752799023


[I 2023-09-10 01:03:44,517] Trial 54 finished with value: 0.8936090058946062 and parameters: {'num_leaves': 32, 'learning_rate': 0.06368636010554084, 'feature_fraction': 0.11093445080861496, 'bagging_fraction': 0.9639178087075685, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 86, 'min_sum_hessian_in_leaf': 5.8785147455802935, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8936090058946062; Accuracy: 0.869524697110904; Precision: 0.3723404255319149; Recall: 0.7608695652173914; F1 Score: 0.5; PR AUC: 0.5272009999145237; Log Loss: 0.30022121517667794


[I 2023-09-10 01:03:44,878] Trial 55 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:45,501] Trial 56 finished with value: 0.879382174356247 and parameters: {'num_leaves': 44, 'learning_rate': 0.08739138499665448, 'feature_fraction': 0.14997338875410354, 'bagging_fraction': 0.9967049062278759, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 86, 'min_sum_hessian_in_leaf': 5.909191639494556, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.879382174356247; Accuracy: 0.8732525629077353; Precision: 0.37640449438202245; Recall: 0.7282608695652174; F1 Score: 0.4962962962962962; PR AUC: 0.49774808736892634; Log Loss: 0.2944400483461799


[I 2023-09-10 01:03:45,863] Trial 57 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:46,437] Trial 58 finished with value: 0.892512077294686 and parameters: {'num_leaves': 29, 'learning_rate': 0.06606674754011473, 'feature_fraction': 0.1066407021380131, 'bagging_fraction': 0.9446595392555547, 'bagging_freq': 2, 'max_depth': 9, 'min_child_samples': 90, 'min_sum_hessian_in_leaf': 5.68649571839345, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.892512077294686; Accuracy: 0.8825722273998136; Precision: 0.4034090909090909; Recall: 0.7717391304347826; F1 Score: 0.5298507462686567; PR AUC: 0.526660149892896; Log Loss: 0.28462405501124194


[I 2023-09-10 01:03:46,800] Trial 59 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:47,173] Trial 60 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:47,726] Trial 61 finished with value: 0.8822851571156317 and parameters: {'num_leaves': 18, 'learning_rate': 0.06692391466198415, 'feature_fraction': 0.1714723864162752, 'bagging_fraction': 0.9966065144060331, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 73, 'min_sum_hessian_in_leaf': 5.534697771282178, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8822851571156317; Accuracy: 0.896551724137931; Precision: 0.43448275862068964; Recall: 0.6847826086956522; F1 Score: 0.5316455696202532; PR AUC: 0.5119383445405041; Log Loss: 0.2787258636684483


[I 2023-09-10 01:03:48,086] Trial 62 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:48,458] Trial 63 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:49,017] Trial 64 finished with value: 0.8772492576341799 and parameters: {'num_leaves': 29, 'learning_rate': 0.06456598479952731, 'feature_fraction': 0.20744835132696857, 'bagging_fraction': 0.9554463297411053, 'bagging_freq': 2, 'max_depth': 8, 'min_child_samples': 78, 'min_sum_hessian_in_leaf': 6.28601712848888, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8772492576341799; Accuracy: 0.875116495806151; Precision: 0.3793103448275862; Recall: 0.717391304347826; F1 Score: 0.49624060150375937; PR AUC: 0.48235459229576594; Log Loss: 0.28628479549128855


[I 2023-09-10 01:03:49,388] Trial 65 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:49,756] Trial 66 pruned. Trial was pruned at iteration 3.
[I 2023-09-10 01:03:50,142] Trial 67 pruned. Trial was pruned at iteration 27.
[I 2023-09-10 01:03:50,506] Trial 68 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:50,871] Trial 69 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:51,232] Trial 70 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:51,993] Trial 71 finished with value: 0.8849000576164517 and parameters: {'num_leaves': 24, 'learning_rate': 0.09837014679511762, 'feature_fraction': 0.2163378874812545, 'bagging_fraction': 0.9187667659090523, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 75, 'min_sum_hessian_in_leaf': 5.5671252833861296, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8849000576164517; Accuracy: 0.9366262814538676; Precision: 0.62; Recall: 0.6739130434782609; F1 Score: 0.6458333333333334; PR AUC: 0.6537752594882736; Log Loss: 0.2748399141728172


[I 2023-09-10 01:03:52,356] Trial 72 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:52,719] Trial 73 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:53,083] Trial 74 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:53,481] Trial 75 pruned. Trial was pruned at iteration 30.
[I 2023-09-10 01:03:53,847] Trial 76 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:54,209] Trial 77 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:54,584] Trial 78 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:54,958] Trial 79 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:55,334] Trial 80 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:55,943] Trial 81 finished with value: 0.8814763107742765 and parameters: {'num_leaves': 57, 'learning_rate': 0.07279281048702957, 'feature_fraction': 0.20102719938700148, 'bagging_fraction': 0.9773567968352399, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 74, 'min_s

ROC AUC: 0.8814763107742765; Accuracy: 0.875116495806151; Precision: 0.38461538461538464; Recall: 0.7608695652173914; F1 Score: 0.5109489051094891; PR AUC: 0.5142999012560894; Log Loss: 0.2827450935642144


[I 2023-09-10 01:03:56,316] Trial 82 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:56,683] Trial 83 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:57,055] Trial 84 pruned. Trial was pruned at iteration 5.
[I 2023-09-10 01:03:57,430] Trial 85 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:57,796] Trial 86 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:58,164] Trial 87 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:58,531] Trial 88 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:03:58,894] Trial 89 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:03:59,259] Trial 90 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:03:59,808] Trial 91 finished with value: 0.883476266453929 and parameters: {'num_leaves': 57, 'learning_rate': 0.07129750351739647, 'feature_fraction': 0.20041032935476183, 'bagging_fraction': 0.9765951503882259, 'bagging_freq

ROC AUC: 0.883476266453929; Accuracy: 0.9142590866728798; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.4434510229097922; Log Loss: 0.25934321767532814


[I 2023-09-10 01:04:00,369] Trial 92 finished with value: 0.8820247750742365 and parameters: {'num_leaves': 33, 'learning_rate': 0.06552780121392467, 'feature_fraction': 0.20023675614010783, 'bagging_fraction': 0.9795519940385566, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 76, 'min_sum_hessian_in_leaf': 5.7438500867551765, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8820247750742365; Accuracy: 0.907735321528425; Precision: 0.4690265486725664; Recall: 0.5760869565217391; F1 Score: 0.5170731707317073; PR AUC: 0.48830489608854866; Log Loss: 0.27092712650669415


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:00,919] Trial 93 finished with value: 0.8842574125781146 and parameters: {'num_leaves': 31, 'learning_rate': 0.06447883269021264, 'feature_fraction': 0.20270624530462789, 'bagging_fraction': 0.9993333610802974, 'bagging_freq': 1, 'max_depth': 9, 'min_child_samples': 76, 'min_sum_hessian_in_leaf': 5.581289321824299, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8842574125781146; Accuracy: 0.9142590866728798; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.4604522549488985; Log Loss: 0.25968791454204737


[I 2023-09-10 01:04:01,281] Trial 94 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:01,647] Trial 95 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:02,024] Trial 96 pruned. Trial was pruned at iteration 3.
[I 2023-09-10 01:04:02,390] Trial 97 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:02,975] Trial 98 finished with value: 0.8851271993972433 and parameters: {'num_leaves': 37, 'learning_rate': 0.09909270835343736, 'feature_fraction': 0.18921614083111662, 'bagging_fraction': 0.9225364312717471, 'bagging_freq': 2, 'max_depth': 10, 'min_child_samples': 64, 'min_sum_hessian_in_leaf': 5.9191463364634505, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8851271993972433; Accuracy: 0.8956197576887233; Precision: 0.4358974358974359; Recall: 0.7391304347826086; F1 Score: 0.5483870967741935; PR AUC: 0.5585660046976865; Log Loss: 0.25928960445253


[I 2023-09-10 01:04:03,559] Trial 99 finished with value: 0.8917420112573682 and parameters: {'num_leaves': 60, 'learning_rate': 0.09176362026186077, 'feature_fraction': 0.14533273791480594, 'bagging_fraction': 0.9297053019483005, 'bagging_freq': 2, 'max_depth': 11, 'min_child_samples': 55, 'min_sum_hessian_in_leaf': 6.488672124114261, 'is_unbalance': True, 'random_state': 42}. Best is trial 12 with value: 0.8938084474582281.


ROC AUC: 0.8917420112573682; Accuracy: 0.8937558247903076; Precision: 0.4266666666666667; Recall: 0.6956521739130435; F1 Score: 0.5289256198347106; PR AUC: 0.5451418990205136; Log Loss: 0.26149952653769337
end tuning...
best params: {'lgbm__num_leaves': 92, 'lgbm__learning_rate': 0.0975741467476048, 'lgbm__feature_fraction': 0.5879753020209024, 'lgbm__bagging_fraction': 0.8617818100345407, 'lgbm__bagging_freq': 4, 'lgbm__max_depth': 11, 'lgbm__min_child_samples': 65, 'lgbm__min_sum_hessian_in_leaf': 9.746167444244099, 'lgbm__is_unbalance': True, 'lgbm__random_state': 42}
best score: 0.8938084474582281
roc_auc on validate data = 0.7547478172228871
start feature select...


[I 2023-09-10 01:04:16,310] A new study created in memory with name: no-name-508d16b5-a2e4-494f-8f2a-fc4fe81a970e


end feature select...
selected 132 out of 342
Finished loading model, total used 100 iterations
start tuning...


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:16,992] Trial 0 finished with value: 0.869908350305499 and parameters: {'num_leaves': 160, 'learning_rate': 0.002758052438530389, 'feature_fraction': 0.2694299730883139, 'bagging_fraction': 0.782792570848964, 'bagging_freq': 10, 'max_depth': 10, 'min_child_samples': 80, 'min_sum_hessian_in_leaf': 1.8539325225277603, 'is_unbalance': True, 'random_state': 42}. Best is trial 0 with value: 0.869908350305499.


ROC AUC: 0.869908350305499; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.40229050005869077; Log Loss: 0.27751362169309657


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:17,685] Trial 1 finished with value: 0.8120129726379173 and parameters: {'num_leaves': 109, 'learning_rate': 0.0010014981881722115, 'feature_fraction': 0.47259046284818607, 'bagging_fraction': 0.6631192255456909, 'bagging_freq': 9, 'max_depth': 3, 'min_child_samples': 6, 'min_sum_hessian_in_leaf': 7.935636911905123, 'is_unbalance': True, 'random_state': 42}. Best is trial 0 with value: 0.869908350305499.


ROC AUC: 0.8120129726379173; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.3332197123412172; Log Loss: 0.2734131610447355


[I 2023-09-10 01:04:18,874] Trial 2 finished with value: 0.8782763658903746 and parameters: {'num_leaves': 154, 'learning_rate': 0.020709344161264534, 'feature_fraction': 0.6373578826982531, 'bagging_fraction': 0.87511822342118, 'bagging_freq': 5, 'max_depth': 4, 'min_child_samples': 58, 'min_sum_hessian_in_leaf': 9.658258761730803, 'is_unbalance': True, 'random_state': 42}. Best is trial 2 with value: 0.8782763658903746.


ROC AUC: 0.8782763658903746; Accuracy: 0.9227188081936686; Precision: 0.5371900826446281; Recall: 0.7065217391304348; F1 Score: 0.6103286384976525; PR AUC: 0.6250622328006609; Log Loss: 0.24823746408231495


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:19,548] Trial 3 finished with value: 0.8687184539095014 and parameters: {'num_leaves': 73, 'learning_rate': 0.0071751832609288375, 'feature_fraction': 0.20395030379793067, 'bagging_fraction': 0.5760240315442355, 'bagging_freq': 2, 'max_depth': 10, 'min_child_samples': 74, 'min_sum_hessian_in_leaf': 7.604314407518383, 'is_unbalance': True, 'random_state': 42}. Best is trial 2 with value: 0.8782763658903746.


ROC AUC: 0.8687184539095014; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.45464865099678997; Log Loss: 0.25540820945679654


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:20,295] Trial 4 finished with value: 0.8747454175152749 and parameters: {'num_leaves': 170, 'learning_rate': 0.006047550729785, 'feature_fraction': 0.28010738296894133, 'bagging_fraction': 0.7521595028878314, 'bagging_freq': 5, 'max_depth': 7, 'min_child_samples': 25, 'min_sum_hessian_in_leaf': 5.6734578114765055, 'is_unbalance': True, 'random_state': 42}. Best is trial 2 with value: 0.8782763658903746.


ROC AUC: 0.8747454175152749; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.5149605992271745; Log Loss: 0.2648658518740617


[I 2023-09-10 01:04:20,760] Trial 5 pruned. Trial was pruned at iteration 8.
[I 2023-09-10 01:04:21,291] Trial 6 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:21,757] Trial 7 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:22,218] Trial 8 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:22,682] Trial 9 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:23,174] Trial 10 pruned. Trial was pruned at iteration 1.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:23,952] Trial 11 finished with value: 0.9130988222792881 and parameters: {'num_leaves': 156, 'learning_rate': 0.01387981031756079, 'feature_fraction': 0.10012284357020612, 'bagging_fraction': 0.9993840904900294, 'bagging_freq': 5, 'max_depth': 12, 'min_child_samples': 33, 'min_sum_hessian_in_leaf': 4.35516166867488, 'is_unbalance': True, 'random_state': 42}. Best is trial 11 with value: 0.9130988222792881.


ROC AUC: 0.9130988222792881; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.61622853703879; Log Loss: 0.2591537581610899


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:24,733] Trial 12 finished with value: 0.9079130877534756 and parameters: {'num_leaves': 139, 'learning_rate': 0.017720502511548036, 'feature_fraction': 0.14964774816041773, 'bagging_fraction': 0.9884950834768432, 'bagging_freq': 3, 'max_depth': 12, 'min_child_samples': 32, 'min_sum_hessian_in_leaf': 4.089623507861096, 'is_unbalance': True, 'random_state': 42}. Best is trial 11 with value: 0.9130988222792881.


ROC AUC: 0.9079130877534756; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.5610409031399128; Log Loss: 0.2615984750800105


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:25,513] Trial 13 finished with value: 0.9085384751616046 and parameters: {'num_leaves': 123, 'learning_rate': 0.015008442957293483, 'feature_fraction': 0.11744912816326186, 'bagging_fraction': 0.9992220880571453, 'bagging_freq': 2, 'max_depth': 12, 'min_child_samples': 31, 'min_sum_hessian_in_leaf': 3.5676272375677103, 'is_unbalance': True, 'random_state': 42}. Best is trial 11 with value: 0.9130988222792881.


ROC AUC: 0.9085384751616046; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.585014086646785; Log Loss: 0.26475969858910997


[I 2023-09-10 01:04:26,001] Trial 14 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:26,854] Trial 15 finished with value: 0.9037180111573542 and parameters: {'num_leaves': 191, 'learning_rate': 0.03144101718997864, 'feature_fraction': 0.1247391516063645, 'bagging_fraction': 0.8792522799449574, 'bagging_freq': 6, 'max_depth': 10, 'min_child_samples': 19, 'min_sum_hessian_in_leaf': 2.2532158259863095, 'is_unbalance': True, 'random_state': 42}. Best is trial 11 with value: 0.9130988222792881.


ROC AUC: 0.9037180111573542; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.6510882304713491; Log Loss: 0.22761359264375478


[I 2023-09-10 01:04:27,363] Trial 16 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:27,868] Trial 17 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:28,370] Trial 18 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:28,870] Trial 19 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:29,367] Trial 20 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:30,172] Trial 21 finished with value: 0.9106636854688744 and parameters: {'num_leaves': 139, 'learning_rate': 0.01717549908582081, 'feature_fraction': 0.18446764311468294, 'bagging_fraction': 0.9788980652436002, 'bagging_freq': 3, 'max_depth': 12, 'min_child_samples': 29, 'min_sum_hessian_in_leaf': 3.88773581039711, 'is_unbalance': True, 'random_state': 42}. Best is trial 11 with value: 0.9130988222792881.


ROC AUC: 0.9106636854688744; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.5419195201377168; Log Loss: 0.2639914884747421


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:30,999] Trial 22 finished with value: 0.8919186664305323 and parameters: {'num_leaves': 172, 'learning_rate': 0.016048971852232547, 'feature_fraction': 0.20656851374178803, 'bagging_fraction': 0.9463046320564037, 'bagging_freq': 2, 'max_depth': 11, 'min_child_samples': 27, 'min_sum_hessian_in_leaf': 3.341329846070707, 'is_unbalance': True, 'random_state': 42}. Best is trial 11 with value: 0.9130988222792881.


ROC AUC: 0.8919186664305323; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.5966774797631362; Log Loss: 0.25623935985037755


[I 2023-09-10 01:04:31,487] Trial 23 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:31,988] Trial 24 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:32,489] Trial 25 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:32,986] Trial 26 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:33,487] Trial 27 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:33,978] Trial 28 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:34,478] Trial 29 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:34,974] Trial 30 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:35,471] Trial 31 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:36,264] Trial 32 finished with value: 0.905776808642522 and parameters: {'num_leaves': 121, 'learning_rate': 0.020034539875915466, 'feature_fraction': 0.15528207538306765, 'bagging_fraction': 0.9822907916428926, 'bagging_fr

ROC AUC: 0.905776808642522; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.5584122894648308; Log Loss: 0.25799935395352974


[I 2023-09-10 01:04:36,758] Trial 33 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:37,259] Trial 34 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:37,758] Trial 35 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:38,252] Trial 36 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:38,745] Trial 37 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:39,241] Trial 38 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:39,803] Trial 39 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:40,327] Trial 40 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:04:41,137] Trial 41 finished with value: 0.905372797308067 and parameters: {'num_leaves': 122, 'learning_rate': 0.017720777260214045, 'feature_fraction': 0.16341127895850344, 'bagging_fraction': 0.9552167005217416, 'bagging_freq': 3, 'max_depth': 12, 'min_child_samples': 30, 'min_sum_hessian_in_leaf': 4

ROC AUC: 0.905372797308067; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.601605694850979; Log Loss: 0.2488188541374099


[I 2023-09-10 01:04:41,635] Trial 42 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:42,137] Trial 43 pruned. Trial was pruned at iteration 3.
[I 2023-09-10 01:04:42,637] Trial 44 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:43,149] Trial 45 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:43,659] Trial 46 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:44,170] Trial 47 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:44,674] Trial 48 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:45,172] Trial 49 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:45,673] Trial 50 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:46,186] Trial 51 pruned. Trial was pruned at iteration 7.
[I 2023-09-10 01:04:46,698] Trial 52 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:47,199] Trial 53 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:47,698] Trial 54 pruned. Trial was pruned at

ROC AUC: 0.9171942796422562; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.716361016378214; Log Loss: 0.21843503814057696


[I 2023-09-10 01:04:53,069] Trial 64 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:53,575] Trial 65 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:54,075] Trial 66 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:54,577] Trial 67 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:55,087] Trial 68 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:04:55,590] Trial 69 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:56,097] Trial 70 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:56,663] Trial 71 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:57,167] Trial 72 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:57,674] Trial 73 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:58,183] Trial 74 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:04:58,696] Trial 75 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:04:59,206] Trial 76 pruned. Trial was pruned at

ROC AUC: 0.9100438324625875; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.6214275546865221; Log Loss: 0.2678197956079322


[I 2023-09-10 01:05:03,148] Trial 83 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:03,653] Trial 84 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:04,155] Trial 85 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:04,661] Trial 86 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:05,166] Trial 87 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:05,678] Trial 88 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:06,184] Trial 89 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:05:06,683] Trial 90 pruned. Trial was pruned at iteration 0.
  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:05:07,492] Trial 91 finished with value: 0.9079462941645267 and parameters: {'num_leaves': 166, 'learning_rate': 0.014558554876082834, 'feature_fraction': 0.14450963627934071, 'bagging_fraction': 0.9830247000763846, 'bagging_freq': 2, 'max_depth': 12, 'min_child_samples': 27, 'min_sum_hessian_in_leaf': 

ROC AUC: 0.9079462941645267; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.6336849630280837; Log Loss: 0.2641986105264968


  _warn_prf(average, modifier, msg_start, len(result))
[I 2023-09-10 01:05:08,294] Trial 92 finished with value: 0.9180299743203755 and parameters: {'num_leaves': 187, 'learning_rate': 0.014392528833066335, 'feature_fraction': 0.13496648152122087, 'bagging_fraction': 0.9876052174735012, 'bagging_freq': 2, 'max_depth': 12, 'min_child_samples': 28, 'min_sum_hessian_in_leaf': 2.752827497441662, 'is_unbalance': True, 'random_state': 42}. Best is trial 92 with value: 0.9180299743203755.


ROC AUC: 0.9180299743203755; Accuracy: 0.9143389199255121; Precision: 0.0; Recall: 0.0; F1 Score: 0.0; PR AUC: 0.6323204747095775; Log Loss: 0.26071392970135004


[I 2023-09-10 01:05:08,791] Trial 93 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:09,294] Trial 94 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:09,796] Trial 95 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:10,304] Trial 96 pruned. Trial was pruned at iteration 2.
[I 2023-09-10 01:05:10,814] Trial 97 pruned. Trial was pruned at iteration 1.
[I 2023-09-10 01:05:11,318] Trial 98 pruned. Trial was pruned at iteration 0.
[I 2023-09-10 01:05:11,824] Trial 99 pruned. Trial was pruned at iteration 0.


end tuning...
best params: {'lgbm__num_leaves': 187, 'lgbm__learning_rate': 0.014392528833066335, 'lgbm__feature_fraction': 0.13496648152122087, 'lgbm__bagging_fraction': 0.9876052174735012, 'lgbm__bagging_freq': 2, 'lgbm__max_depth': 12, 'lgbm__min_child_samples': 28, 'lgbm__min_sum_hessian_in_leaf': 2.752827497441662, 'lgbm__is_unbalance': True, 'lgbm__random_state': 42}
best score: 0.9180299743203755
roc_auc on validate data = 0.7214756929071107
metrics on test data:
ROC AUC: 0.8865325071780408; Accuracy: 0.9439704865114135; Precision: 0.2565217391304348; Recall: 0.45038167938931295; F1 Score: 0.3268698060941828; PR AUC: 0.23645772891042358; Log Loss: 0.19516081567686666
CPU times: total: 25min 12s
Wall time: 3min 3s


In [31]:
import pickle

In [38]:
with open(f'model.pickle', 'wb') as f:
    pickle.dump(models, f)

In [40]:
# with open(f'model.pickle', 'rb') as f:
#     md = pickle.load(f)

# 4. Submission file

In [229]:
def inference_model(df, models, prob_col='score'):
    preds = []
    for m in models:
        preds.append(m.predict_proba(df_test)[:, 1])
    df[prob_col] = pd.DataFrame(preds).mean()
    return df

In [None]:
test = pd.read_csv(...)

In [None]:
test = inference_model(test, models)

In [None]:
sample_submission = test.loc[:, ['report_date', 'client_id', 'score']]

In [None]:
sample_submission.to_csv('result_novichki.csv')