# Baseline

In [70]:
import numpy as np
import pandas as pd
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split, cross_validate, StratifiedGroupKFold, KFold

## Загрузка данных

In [71]:
train_df = pd.read_parquet("train_data.pqt")
test_df = pd.read_parquet("test_data.pqt")

In [72]:

missing_city_when_city_type = train_df[train_df['city_type'].notna() & train_df['city'].isna()]
missing_city_type_when_city = train_df[train_df['city'].notna() & train_df['city_type'].isna()]

missing_city_when_city_type2 = train_df[train_df['index_city_code'].notna() & train_df['city'].isna()]
missing_city_type_when_city2 = train_df[train_df['city'].notna() & train_df['index_city_code'].isna()]


print(f"Количество пропусков в 'city', когда 'city_type' имеет данные: {missing_city_when_city_type.shape[0]}")
print(f"Количество пропусков в 'city_type', когда 'city' имеет данные: {missing_city_type_when_city.shape[0]}")

print(f"Количество пропусков в 'city', когда 'index_city_code' имеет данные: {missing_city_when_city_type2.shape[0]}")
print(f"Количество пропусков в 'index_city_code', когда 'city' имеет данные: {missing_city_type_when_city2.shape[0]}")


Количество пропусков в 'city', когда 'city_type' имеет данные: 0
Количество пропусков в 'city_type', когда 'city' имеет данные: 0
Количество пропусков в 'city', когда 'index_city_code' имеет данные: 32836
Количество пропусков в 'index_city_code', когда 'city' имеет данные: 279716


In [73]:
# # Заполняем пропущенные значения в 'city' значениями из 'index_city_code', если 'city' пустой
# train_df.loc[train_df['city'].isna(), 'city'] = train_df['index_city_code']


In [74]:
# missing_city_when_city_type2 = train_df[train_df['index_city_code'].notna() & train_df['city'].isna()]
# missing_city_type_when_city2 = train_df[train_df['city'].notna() & train_df['index_city_code'].isna()]
#
#
# print(f"Количество пропусков в 'city', когда 'index_city_code' имеет данные: {missing_city_when_city_type2.shape[0]}")
# print(f"Количество пропусков в 'index_city_code', когда 'city' имеет данные: {missing_city_type_when_city2.shape[0]}")

In [75]:
# Предполагаем, что у нас уже есть DataFrame df с нужными столбцами

# Шаг 1: Создать словарь соответствия
# Здесь мы исходим из того, что для каждого уникального index_city_code уже есть соответствующий city
city_to_index_mapping = train_df.dropna(subset=['city', 'index_city_code']).drop_duplicates('index_city_code').set_index('index_city_code')['city'].to_dict()

# Шаг 2: Восстановление значений city
for idx, row in train_df.iterrows():
    if pd.isna(row['city']):
        index_city_code = row['index_city_code']
        # Если есть соответствующий city для index_city_code
        if index_city_code in city_to_index_mapping:
            train_df.at[idx, 'city'] = city_to_index_mapping[index_city_code]
        # Если index_city_code есть, но нет соответствующего city
        elif not pd.isna(index_city_code):
            train_df.at[idx, 'city'] = index_city_code


In [76]:
train_df

Unnamed: 0,id,date,balance_amt_avg,balance_amt_max,balance_amt_min,balance_amt_day_avg,channel_code,city,city_type,index_city_code,...,cnt_cred_g_oper_3m,cnt_days_cred_g_oper_3m,sum_deb_h_oper_3m,cnt_deb_h_oper_3m,cnt_days_deb_h_oper_3m,sum_cred_h_oper_3m,cnt_cred_h_oper_3m,cnt_days_cred_h_oper_3m,start_cluster,end_cluster
0,0,month_1,0.744845,0.705492,1.287207,0.748101,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.951166,0.568681,0.897565,0.553624,0.774354,0.936506,0.295984,0.967947,"{α, γ}",{other}
1,0,month_2,1.049605,0.831916,2.458609,1.053805,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.948812,0.499716,0.785029,0.551904,0.696576,0.990157,0.298873,0.945969,"{α, γ}",{other}
2,0,month_3,0.692653,0.740253,0.430042,0.695747,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.946458,0.442244,0.877050,0.551044,0.663243,0.810065,0.294829,0.956958,"{α, γ}",{other}
3,1,month_1,-0.081586,-0.091860,-0.114040,-0.080890,channel_code_2,city_14,city_type_0,,...,0.945281,0.407762,0.369318,0.567093,0.785465,-0.184002,0.253523,0.462452,{other},{other}
4,1,month_2,-0.094962,-0.100504,-0.119302,-0.094307,channel_code_2,city_14,city_type_0,,...,0.946066,0.430750,0.067275,0.559928,0.696576,-0.183854,0.255545,0.495419,{other},{other}
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599995,199998,month_2,-0.156775,-0.204960,-0.125987,-0.156311,channel_code_9,city_14,city_type_0,,...,,,-0.165588,,,-0.201123,,,{},{}
599996,199998,month_3,-0.156776,-0.204960,-0.125995,-0.156312,channel_code_9,city_14,city_type_0,,...,0.944497,0.384773,-0.165588,0.546889,0.407687,-0.201123,0.250924,0.374540,{α},{}
599997,199999,month_1,-0.156712,-0.204913,-0.125831,-0.156248,channel_code_14,city_1876,city_type_0,index_city_code_195,...,,,-0.165588,,,-0.201123,,,{},{}
599998,199999,month_2,-0.156712,-0.204913,-0.125831,-0.156248,channel_code_14,city_1876,city_type_0,index_city_code_195,...,,,-0.165588,,,-0.201123,,,{},{}


In [77]:
train_df.head(30)

Unnamed: 0,id,date,balance_amt_avg,balance_amt_max,balance_amt_min,balance_amt_day_avg,channel_code,city,city_type,index_city_code,...,cnt_cred_g_oper_3m,cnt_days_cred_g_oper_3m,sum_deb_h_oper_3m,cnt_deb_h_oper_3m,cnt_days_deb_h_oper_3m,sum_cred_h_oper_3m,cnt_cred_h_oper_3m,cnt_days_cred_h_oper_3m,start_cluster,end_cluster
0,0,month_1,0.744845,0.705492,1.287207,0.748101,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.951166,0.568681,0.897565,0.553624,0.774354,0.936506,0.295984,0.967947,"{α, γ}",{other}
1,0,month_2,1.049605,0.831916,2.458609,1.053805,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.948812,0.499716,0.785029,0.551904,0.696576,0.990157,0.298873,0.945969,"{α, γ}",{other}
2,0,month_3,0.692653,0.740253,0.430042,0.695747,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.946458,0.442244,0.87705,0.551044,0.663243,0.810065,0.294829,0.956958,"{α, γ}",{other}
3,1,month_1,-0.081586,-0.09186,-0.11404,-0.08089,channel_code_2,city_14,city_type_0,,...,0.945281,0.407762,0.369318,0.567093,0.785465,-0.184002,0.253523,0.462452,{other},{other}
4,1,month_2,-0.094962,-0.100504,-0.119302,-0.094307,channel_code_2,city_14,city_type_0,,...,0.946066,0.43075,0.067275,0.559928,0.696576,-0.183854,0.255545,0.495419,{other},{other}
5,1,month_3,-0.090605,-0.114275,-0.114119,-0.089937,channel_code_2,city_14,city_type_0,,...,0.948027,0.488221,0.043221,0.560788,0.707687,-0.167905,0.259011,0.605309,{other},{other}
6,2,month_1,-0.154685,-0.186795,-0.122805,-0.154215,channel_code_12,city_613,city_type_306,,...,0.944497,0.384773,-0.165588,0.546889,0.407687,-0.178674,0.252657,0.440474,{α},{α}
7,2,month_2,-0.152784,-0.193686,-0.122805,-0.152308,channel_code_12,city_613,city_type_306,,...,0.944497,0.384773,-0.165588,0.546889,0.407687,-0.177854,0.252657,0.440474,{α},{α}
8,2,month_3,-0.148737,-0.187003,-0.112416,-0.148249,channel_code_12,city_613,city_type_306,,...,0.944497,0.384773,-0.165588,0.546889,0.407687,-0.176302,0.252368,0.429485,{α},{α}
9,3,month_1,-0.156643,-0.204861,-0.12566,-0.156179,channel_code_14,city_21,city_type_0,index_city_code_46,...,0.944497,0.384773,-0.165588,0.546889,0.407687,-0.201123,0.250924,0.37454,{α},{α}


In [78]:

train_df['month_number'] = train_df.groupby('id').cumcount() + 1


train_df

Unnamed: 0,id,date,balance_amt_avg,balance_amt_max,balance_amt_min,balance_amt_day_avg,channel_code,city,city_type,index_city_code,...,cnt_days_cred_g_oper_3m,sum_deb_h_oper_3m,cnt_deb_h_oper_3m,cnt_days_deb_h_oper_3m,sum_cred_h_oper_3m,cnt_cred_h_oper_3m,cnt_days_cred_h_oper_3m,start_cluster,end_cluster,month_number
0,0,month_1,0.744845,0.705492,1.287207,0.748101,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.568681,0.897565,0.553624,0.774354,0.936506,0.295984,0.967947,"{α, γ}",{other},1
1,0,month_2,1.049605,0.831916,2.458609,1.053805,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.499716,0.785029,0.551904,0.696576,0.990157,0.298873,0.945969,"{α, γ}",{other},2
2,0,month_3,0.692653,0.740253,0.430042,0.695747,channel_code_5,city_23,city_type_0,index_city_code_39,...,0.442244,0.877050,0.551044,0.663243,0.810065,0.294829,0.956958,"{α, γ}",{other},3
3,1,month_1,-0.081586,-0.091860,-0.114040,-0.080890,channel_code_2,city_14,city_type_0,,...,0.407762,0.369318,0.567093,0.785465,-0.184002,0.253523,0.462452,{other},{other},1
4,1,month_2,-0.094962,-0.100504,-0.119302,-0.094307,channel_code_2,city_14,city_type_0,,...,0.430750,0.067275,0.559928,0.696576,-0.183854,0.255545,0.495419,{other},{other},2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
599995,199998,month_2,-0.156775,-0.204960,-0.125987,-0.156311,channel_code_9,city_14,city_type_0,,...,,-0.165588,,,-0.201123,,,{},{},2
599996,199998,month_3,-0.156776,-0.204960,-0.125995,-0.156312,channel_code_9,city_14,city_type_0,,...,0.384773,-0.165588,0.546889,0.407687,-0.201123,0.250924,0.374540,{α},{},3
599997,199999,month_1,-0.156712,-0.204913,-0.125831,-0.156248,channel_code_14,city_1876,city_type_0,index_city_code_195,...,,-0.165588,,,-0.201123,,,{},{},1
599998,199999,month_2,-0.156712,-0.204913,-0.125831,-0.156248,channel_code_14,city_1876,city_type_0,index_city_code_195,...,,-0.165588,,,-0.201123,,,{},{},2


In [79]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600000 entries, 0 to 599999
Data columns (total 94 columns):
 #   Column                   Non-Null Count   Dtype  
---  ------                   --------------   -----  
 0   id                       600000 non-null  int64  
 1   date                     600000 non-null  object 
 2   balance_amt_avg          534423 non-null  float64
 3   balance_amt_max          534423 non-null  float64
 4   balance_amt_min          534423 non-null  float64
 5   balance_amt_day_avg      534423 non-null  float64
 6   channel_code             557150 non-null  object 
 7   city                     554492 non-null  object 
 8   city_type                521656 non-null  object 
 9   index_city_code          274776 non-null  object 
 10  ogrn_days_end_month      568527 non-null  float64
 11  ogrn_days_end_quarter    568527 non-null  float64
 12  ogrn_month               568527 non-null  object 
 13  ogrn_year                568527 non-null  object 
 14  ft_r

In [80]:
pivot_columns = [col for col in train_df.columns if col not in ['id', 'date', 'month_number']]

df_pivoted = train_df.pivot_table(index='id', columns='month_number', values=pivot_columns, aggfunc='first')

# Теперь, когда мы сформировали сводную таблицу, преобразуем MultiIndex столбцы в единый уровень
df_pivoted.columns = [f'{col[0]}_{col[1]}month' for col in df_pivoted.columns]

# Переименовываем индекс обратно в столбец 'id', если он стал индексом
df_pivoted.reset_index(inplace=True)

df_pivoted.head()



Unnamed: 0,id,balance_amt_avg_1month,balance_amt_avg_2month,balance_amt_avg_3month,balance_amt_day_avg_1month,balance_amt_day_avg_2month,balance_amt_day_avg_3month,balance_amt_max_1month,balance_amt_max_2month,balance_amt_max_3month,...,sum_deb_h_oper_3m_3month,sum_of_paym_1y_1month,sum_of_paym_1y_2month,sum_of_paym_1y_3month,sum_of_paym_2m_1month,sum_of_paym_2m_2month,sum_of_paym_2m_3month,sum_of_paym_6m_1month,sum_of_paym_6m_2month,sum_of_paym_6m_3month
0,0,0.744845,1.049605,0.692653,0.748101,1.053805,0.695747,0.705492,0.831916,0.740253,...,0.87705,0.51149,0.486425,0.480547,0.942275,0.645704,0.403604,0.536013,0.536378,0.613167
1,1,-0.081586,-0.094962,-0.090605,-0.08089,-0.094307,-0.089937,-0.09186,-0.100504,-0.114275,...,0.043221,0.052041,0.033554,0.039472,0.014051,-0.057593,-0.092059,0.0438,0.035027,0.025233
2,2,-0.154685,-0.152784,-0.148737,-0.154215,-0.152308,-0.148249,-0.186795,-0.193686,-0.187003,...,-0.165588,-0.291924,-0.290712,-0.288318,-0.255837,-0.267913,-0.255946,-0.287121,-0.284955,-0.280676
3,3,-0.156643,-0.156666,-0.156522,-0.156179,-0.156202,-0.156058,-0.204861,-0.20479,-0.204718,...,-0.165588,-0.242793,-0.262878,-0.273303,-0.273969,-0.273969,-0.273969,-0.268832,-0.294398,-0.294447
4,4,-0.138847,-0.129233,-0.141798,-0.138328,-0.128684,-0.141289,-0.182486,-0.162965,-0.170262,...,-0.078297,-0.124641,-0.121939,-0.128903,-0.103807,-0.134192,-0.16674,-0.130025,-0.134049,-0.142831


In [81]:
df_pivoted.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000 entries, 0 to 199999
Columns: 274 entries, id to sum_of_paym_6m_3month
dtypes: float64(243), int64(1), object(30)
memory usage: 418.1+ MB


In [82]:
cat_cols = [
    "channel_code_2month", "city_2month", "city_type_2month",
    "okved_2month", "segment_2month", "start_cluster_2month",
    "index_city_code_2month", "ogrn_month_2month", "ogrn_year_2month",
    "channel_code_1month", "city_1month", "city_type_1month",
    "okved_1month", "segment_1month", "start_cluster_1month",
    "index_city_code_1month", "ogrn_month_1month", "ogrn_year_1month",
    "channel_code_3month", "city_3month", "city_type_3month",
    "okved_3month", "segment_3month", "start_cluster_3month",
    "index_city_code_3month", "ogrn_month_3month", "ogrn_year_3month",
]

In [83]:
float_cols = df_pivoted.select_dtypes(include=['float64', 'float32'])

std_devs = float_cols.std()
srted = std_devs.sort_values()

srted.head(20)


cnt_cred_f_oper_3m_1month    0.000886
cnt_cred_e_oper_3m_1month    0.001131
cnt_cred_f_oper_1m_1month    0.001246
cnt_deb_f_oper_3m_1month     0.001289
cnt_cred_e_oper_1m_1month    0.001361
cnt_cred_f_oper_1m_2month    0.001488
cnt_deb_f_oper_1m_1month     0.001514
cnt_cred_f_oper_3m_2month    0.001546
cnt_cred_e_oper_1m_2month    0.001563
cnt_b_oper_3m_3month         0.001655
cnt_cred_e_oper_3m_2month    0.001695
cnt_deb_f_oper_1m_2month     0.001834
cnt_deb_f_oper_3m_2month     0.001894
cnt_deb_e_oper_1m_1month     0.002139
cnt_deb_e_oper_3m_1month     0.002175
cnt_b_oper_3m_2month         0.002196
cnt_deb_e_oper_1m_2month     0.002359
cnt_cred_f_oper_3m_3month    0.002564
cnt_deb_f_oper_3m_3month     0.002612
cnt_deb_e_oper_3m_2month     0.002639
dtype: float64

In [84]:
float_cols = df_pivoted.select_dtypes(include=['float64', 'float32'])

means = float_cols.mean()

std_devs = float_cols.std()

cv_percent = (std_devs / abs(means)) * 100

sorted_cv_percent = cv_percent.sort_values()

print(sorted_cv_percent.iloc[0:50])
print(sorted_cv_percent.iloc[50:20])


cnt_cred_e_oper_3m_1month    0.130930
cnt_cred_e_oper_1m_1month    0.155773
cnt_cred_e_oper_1m_2month    0.178981
cnt_cred_e_oper_3m_2month    0.196164
cnt_deb_f_oper_1m_1month     0.196272
cnt_deb_f_oper_1m_2month     0.237819
cnt_deb_e_oper_1m_1month     0.239462
cnt_deb_e_oper_1m_2month     0.264140
cnt_cred_e_oper_3m_3month    0.307015
cnt_cred_e_oper_1m_3month    0.310253
cnt_b_oper_3m_3month         0.319841
cnt_deb_e_oper_1m_3month     0.345459
cnt_deb_f_oper_1m_3month     0.348912
cnt_cred_f_oper_3m_1month    0.356484
cnt_c_oper_3m_2month         0.358865
cnt_c_oper_3m_1month         0.359786
cnt_c_oper_3m_3month         0.362152
cnt_b_oper_3m_2month         0.424392
cnt_deb_f_oper_3m_1month     0.450445
cnt_deb_e_oper_3m_1month     0.510480
cnt_deb_d_oper_1m_2month     0.588607
cnt_c_oper_1m_1month         0.593846
cnt_b_oper_3m_1month         0.614297
cnt_deb_e_oper_3m_2month     0.619415
cnt_cred_f_oper_3m_2month    0.621906
cnt_deb_d_oper_3m_3month     0.624855
cnt_deb_d_op

In [85]:
float_columns = df_pivoted.select_dtypes(include=['float64']).columns
df_pivoted[float_columns] = df_pivoted[float_columns].astype('float32')

In [86]:
float_columns

Index(['balance_amt_avg_1month', 'balance_amt_avg_2month',
       'balance_amt_avg_3month', 'balance_amt_day_avg_1month',
       'balance_amt_day_avg_2month', 'balance_amt_day_avg_3month',
       'balance_amt_max_1month', 'balance_amt_max_2month',
       'balance_amt_max_3month', 'balance_amt_min_1month',
       ...
       'sum_deb_h_oper_3m_3month', 'sum_of_paym_1y_1month',
       'sum_of_paym_1y_2month', 'sum_of_paym_1y_3month',
       'sum_of_paym_2m_1month', 'sum_of_paym_2m_2month',
       'sum_of_paym_2m_3month', 'sum_of_paym_6m_1month',
       'sum_of_paym_6m_2month', 'sum_of_paym_6m_3month'],
      dtype='object', length=243)

Обозначение категориальных признаков

In [87]:
# ["cnt_days_cred_f_oper_3m_1month", "sum_cred_f_oper_1m_1month", "cnt_days_cred_f_oper_1m_1month", "cnt_cred_f_oper_1m_1month",
#  "cnt_cred_d_oper_1m_1month", "cnt_cred_d_oper_3m_1month", "cnt_cred_f_oper_1m_1month", "cnt_cred_g_oper_1m_1month",
#  "cnt_days_cred_f_oper_3m_2month", "sum_cred_f_oper_1m_2month", "cnt_days_cred_f_oper_1m_2month", "cnt_cred_f_oper_1m_2month",
#  "cnt_cred_d_oper_1m_2month", "cnt_cred_d_oper_3m_2month", "cnt_cred_f_oper_1m_2month", "cnt_cred_g_oper_1m_2month",
#  "cnt_days_cred_f_oper_3m_3month", "sum_cred_f_oper_1m_3month", "cnt_days_cred_f_oper_1m_3month", "cnt_cred_f_oper_1m_3month",
#  "cnt_cred_d_oper_1m_3month", "cnt_cred_d_oper_3m_3month", "cnt_cred_f_oper_1m_3month", "cnt_cred_g_oper_1m_3month",
# "sum_cred_d_oper_1m_3month", "cnt_days_deb_f_oper_1m_3month", "sum_cred_g_oper_1m_3month", "cnt_days_cred_g_oper_1m_3month", "sum_a_oper_1m_3month", "sum_b_oper_1m_3month", "cnt_b_oper_1m_3month",
# "sum_cred_d_oper_1m_2month", "cnt_days_deb_f_oper_1m_2month", "sum_cred_g_oper_1m_2month", "cnt_days_cred_g_oper_1m_2month", "sum_a_oper_1m_2month", "sum_b_oper_1m_2month", "cnt_b_oper_1m_2month",
# "sum_cred_d_oper_1m_1month", "cnt_days_deb_f_oper_1m_1month", "sum_cred_g_oper_1m_1month", "cnt_days_cred_g_oper_1m_1month", "sum_a_oper_1m_1month", "sum_b_oper_1m_1month", "cnt_b_oper_1m_1month",

# "cnt_days_cred_h_oper_1m_1month",  "cnt_cred_h_oper_1m_1month","cnt_days_cred_h_oper_1m_2month",  "cnt_cred_h_oper_1m_2month","cnt_days_cred_h_oper_1m_3month",  "cnt_cred_h_oper_1m_3month",

Создаем выборки для валидации и обучения

In [88]:
#"cnt_cred_e_oper_3m_1month",
#"cnt_cred_e_oper_1m_1month",
#"cnt_cred_e_oper_1m_2month",
#"cnt_cred_e_oper_3m_2month",
#"cnt_deb_f_oper_1m_1month",
#"cnt_deb_f_oper_1m_2month",
#"cnt_deb_e_oper_1m_1month",
#"cnt_deb_e_oper_1m_2month",
#"cnt_cred_e_oper_3m_3month",
#"cnt_cred_e_oper_1m_3month",
#"cnt_b_oper_3m_3month",
#"cnt_deb_e_oper_1m_3month",
#"cnt_deb_f_oper_1m_3month",
#"cnt_cred_f_oper_3m_1month",
#"cnt_c_oper_3m_2month",
#"cnt_c_oper_3m_1month",
#"cnt_c_oper_3m_3month",
#"cnt_b_oper_3m_2month",
#"cnt_deb_f_oper_3m_1month",
#"cnt_deb_e_oper_3m_1month",
#"cnt_deb_d_oper_1m_2month",
#"cnt_c_oper_1m_1month",
#"cnt_b_oper_3m_1month",
#"cnt_deb_e_oper_3m_2month",
#"cnt_cred_f_oper_3m_2month",
#"cnt_deb_d_oper_3m_3month",
#"cnt_deb_d_oper_3m_2month",
#"cnt_c_oper_1m_3month",
#"cnt_deb_f_oper_3m_2month",
#"cnt_c_oper_1m_2month",
#"cnt_deb_d_oper_3m_1month",
#"cnt_deb_d_oper_1m_3month",
#"cnt_deb_e_oper_3m_3month",
#"cnt_cred_f_oper_1m_1month",
#"cnt_deb_d_oper_1m_1month",
#
# "cnt_deb_f_oper_3m_3month",
# "cnt_cred_f_oper_1m_2month",
# "cnt_cred_f_oper_3m_3month",
# "cnt_cred_g_oper_3m_3month",
# "cnt_cred_g_oper_3m_2month",
# "cnt_deb_h_oper_1m_2month",
# "cnt_cred_g_oper_3m_1month",
# "cnt_deb_g_oper_1m_2month",
# "cnt_deb_h_oper_1m_3month",
# "cnt_cred_f_oper_1m_3month",
# "cnt_deb_g_oper_1m_3month",
# "cnt_deb_h_oper_1m_1month",
# "cnt_deb_g_oper_3m_3month",
# "cnt_deb_g_oper_3m_2month"
# "cnt_deb_g_oper_3m_1month"

#
# "ogrn_days_end_quarter_2month", "sum_cred_f_oper_3m_3month", "sum_deb_g_oper_3m_1month", "sum_cred_e_oper_3m_3month", "sum_cred_g_oper_3m_1month", "sum_cred_g_oper_1m_1month", "sum_a_oper_3m_3month", "sum_b_oper_3m_2month", "sum_of_paym_2m_2month", "sum_cred_g_oper_3m_2month", "sum_a_oper_1m_3month", "sum_c_oper_1m_3month", "sum_c_oper_3m_2month", "sum_c_oper_3m_1month", "sum_deb_g_oper_3m_3month", "sum_cred_g_oper_3m_3month", "sum_b_oper_1m_1month", "sum_cred_d_oper_3m_3month", "balance_amt_avg_2month", "sum_of_paym_1y_2month", "sum_cred_h_oper_1m_3month", "sum_deb_g_oper_3m_2month", "balance_amt_day_avg_2month", "sum_deb_e_oper_3m_3month", "sum_cred_f_oper_3m_1month", "max_founderpres_1month", "ogrn_exist_months_2month", "sum_deb_d_oper_3m_1month", "sum_cred_f_oper_3m_2month", "ogrn_days_end_month_3month", "sum_c_oper_3m_3month", "sum_deb_g_oper_1m_3month", "sum_deb_d_oper_3m_3month", "sum_cred_d_oper_3m_1month", "max_founderpres_2month", "ft_registration_date_2month", "sum_cred_f_oper_1m_1month", "sum_b_oper_3m_1month", "sum_of_paym_1y_1month", "sum_cred_f_oper_1m_3month", "sum_cred_d_oper_1m_3month", "min_founderpres_2month", "sum_deb_f_oper_3m_3month", "sum_of_paym_6m_2month", "sum_b_oper_3m_3month", "max_founderpres_3month", "sum_cred_d_oper_3m_2month", "sum_b_oper_1m_2month", "sum_deb_e_oper_3m_1month", "balance_amt_avg_1month",


X = df_pivoted.drop([
"cnt_a_oper_1m_1month", "cnt_a_oper_1m_2month", "cnt_a_oper_1m_3month",
"cnt_b_oper_1m_1month", "cnt_b_oper_1m_2month", "cnt_b_oper_1m_3month",
"cnt_c_oper_1m_1month", "cnt_c_oper_1m_2month", "cnt_c_oper_1m_3month",
"cnt_deb_d_oper_1m_1month", "cnt_deb_d_oper_1m_2month", "cnt_deb_d_oper_1m_3month",
"cnt_cred_d_oper_1m_1month", "cnt_cred_d_oper_1m_2month", "cnt_cred_d_oper_1m_3month",
"cnt_deb_e_oper_1m_1month", "cnt_deb_e_oper_1m_2month", "cnt_deb_e_oper_1m_3month",
"cnt_cred_e_oper_1m_1month", "cnt_cred_e_oper_1m_2month", "cnt_cred_e_oper_1m_3month",
"cnt_deb_f_oper_1m_1month", "cnt_deb_f_oper_1m_2month", "cnt_deb_f_oper_1m_3month",
"cnt_cred_f_oper_1m_1month", "cnt_cred_f_oper_1m_2month", "cnt_cred_f_oper_1m_3month",
"cnt_deb_g_oper_1m_1month", "cnt_deb_g_oper_1m_2month", "cnt_deb_g_oper_1m_3month",
"cnt_cred_g_oper_1m_1month", "cnt_cred_g_oper_1m_2month", "cnt_cred_g_oper_1m_3month",
"cnt_deb_h_oper_1m_1month", "cnt_deb_h_oper_1m_2month", "cnt_deb_h_oper_1m_3month",
"cnt_cred_h_oper_1m_1month", "cnt_cred_h_oper_1m_2month", "cnt_cred_h_oper_1m_3month",
"cnt_a_oper_3m_1month", "cnt_a_oper_3m_2month", "cnt_a_oper_3m_3month",
"cnt_b_oper_3m_1month", "cnt_b_oper_3m_2month", "cnt_b_oper_3m_3month",
"cnt_c_oper_3m_1month", "cnt_c_oper_3m_2month", "cnt_c_oper_3m_3month",
"cnt_deb_d_oper_3m_1month", "cnt_deb_d_oper_3m_2month", "cnt_deb_d_oper_3m_3month",
"cnt_cred_d_oper_3m_1month", "cnt_cred_d_oper_3m_2month", "cnt_cred_d_oper_3m_3month",
"cnt_deb_e_oper_3m_1month", "cnt_deb_e_oper_3m_2month", "cnt_deb_e_oper_3m_3month",
"cnt_cred_e_oper_3m_1month", "cnt_cred_e_oper_3m_2month", "cnt_cred_e_oper_3m_3month",
"cnt_deb_f_oper_3m_1month", "cnt_deb_f_oper_3m_2month", "cnt_deb_f_oper_3m_3month",
"cnt_cred_f_oper_3m_1month", "cnt_cred_f_oper_3m_2month", "cnt_cred_f_oper_3m_3month",
"cnt_deb_g_oper_3m_1month", "cnt_deb_g_oper_3m_2month", "cnt_deb_g_oper_3m_3month",
"cnt_cred_g_oper_3m_1month", "cnt_cred_g_oper_3m_2month", "cnt_cred_g_oper_3m_3month",
"cnt_deb_h_oper_3m_1month", "cnt_deb_h_oper_3m_2month", "cnt_deb_h_oper_3m_3month",
"cnt_cred_h_oper_3m_1month", "cnt_cred_h_oper_3m_2month", "cnt_cred_h_oper_3m_3month",

"city_type_1month",
"city_type_2month",
"city_type_3month",
"index_city_code_1month",
"index_city_code_2month",
"index_city_code_3month",

#"index_city_code_1month", "city_1month", "city_type_1month",
#"index_city_code_2month","city_2month", "city_type_2month",
#"index_city_code_3month","city_3month", "city_type_3month",



"start_cluster_3month",
"id","end_cluster_3month", "end_cluster_2month", "end_cluster_1month"
], axis=1)
# X = df_pivoted.drop([
#     "cnt_days_cred_h_oper_1m_1month",  "cnt_cred_h_oper_1m_1month","cnt_days_cred_h_oper_1m_2month",  "cnt_cred_h_oper_1m_2month","cnt_days_cred_h_oper_1m_3month",  "cnt_cred_h_oper_1m_3month",
#  "cnt_cred_d_oper_1m_3month", "cnt_cred_d_oper_3m_3month", "cnt_cred_f_oper_1m_3month", "cnt_cred_g_oper_1m_3month",
# "sum_cred_d_oper_1m_3month", "cnt_days_deb_f_oper_1m_3month", "sum_cred_g_oper_1m_3month", "cnt_days_cred_g_oper_1m_3month", "sum_a_oper_1m_3month", "sum_b_oper_1m_3month", "cnt_b_oper_1m_3month",
# "sum_cred_d_oper_1m_2month", "cnt_days_deb_f_oper_1m_2month", "sum_cred_g_oper_1m_2month", "cnt_days_cred_g_oper_1m_2month", "sum_a_oper_1m_2month", "sum_b_oper_1m_2month", "cnt_b_oper_1m_2month",
# "sum_cred_d_oper_1m_1month", "cnt_days_deb_f_oper_1m_1month", "sum_cred_g_oper_1m_1month", "cnt_days_cred_g_oper_1m_1month", "sum_a_oper_1m_1month", "sum_b_oper_1m_1month", "cnt_b_oper_1m_1month","cnt_days_cred_f_oper_3m_1month", "sum_cred_f_oper_1m_1month", "cnt_days_cred_f_oper_1m_1month", "cnt_cred_f_oper_1m_1month",
#  "cnt_cred_d_oper_1m_1month", "cnt_cred_d_oper_3m_1month", "cnt_cred_f_oper_1m_1month", "cnt_cred_g_oper_1m_1month",
#  "cnt_days_cred_f_oper_3m_2month", "sum_cred_f_oper_1m_2month", "cnt_days_cred_f_oper_1m_2month", "cnt_cred_f_oper_1m_2month",
#  "cnt_cred_d_oper_1m_2month", "cnt_cred_d_oper_3m_2month", "cnt_cred_f_oper_1m_2month", "cnt_cred_g_oper_1m_2month",
#  "cnt_days_cred_f_oper_3m_3month", "sum_cred_f_oper_1m_3month", "cnt_days_cred_f_oper_1m_3month", "cnt_cred_f_oper_1m_3month",
#  "cnt_cred_d_oper_1m_3month", "cnt_cred_d_oper_3m_3month", "cnt_cred_f_oper_1m_3month", "cnt_cred_g_oper_1m_3month", "id","end_cluster_3month", "end_cluster_2month", "end_cluster_1month"
# ], axis=1)
y = df_pivoted["end_cluster_3month"]

x_train, x_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

В качестве базовой модели возьмем LGBM обучим на всех признаках

In [89]:
params_cat = {
    "iterations":1000,
    "depth":12,
    "learning_rate":0.1,
    "eval_metric":'AUC',
    "random_seed":1,
    "bootstrap_type":'Bayesian',
    "bagging_temperature":1,
    "od_type":'Iter',
    "od_wait":100,
    "task_type":'GPU'
}

In [90]:
from sklearn.model_selection import StratifiedKFold, LeavePOut, RepeatedStratifiedKFold
from catboost import  CatBoostClassifier


cat_model = CatBoostClassifier(**params_cat)


# cv = KFold(n_splits=7)
cv = RepeatedStratifiedKFold(n_splits=5, n_repeats=3, random_state=12)

# cv = StratifiedKFold(n_splits=6, shuffle=True, random_state=42)
auc_scores = []  # Список для хранения показателей AUC для каждого разбиения


In [91]:
def weighted_roc_auc(y_true, y_pred, labels, weights_dict):
    unnorm_weights = np.array([weights_dict[label] for label in labels])
    weights = unnorm_weights / unnorm_weights.sum()
    classes_roc_auc = roc_auc_score(y_true, y_pred, labels=labels,
                                    multi_class="ovr", average=None)
    return sum(weights * classes_roc_auc)

In [92]:
cluster_weights = pd.read_excel("cluster_weights.xlsx").set_index("cluster")
weights_dict = cluster_weights["unnorm_weight"].to_dict()

In [93]:
# weights_dict

In [94]:
cat_features = [col for col in X.columns if X[col].dtype.name == 'category' or X[col].dtype.name == 'object']
for col in cat_features:
    X[col] = X[col].astype('category')
    #test_df[col] = test_df[col].astype('category')



In [95]:
y = y.astype('category')
# y = y.cat.add_categories('Missing').fillna('Missing')

In [96]:
cat_features = [col for col in X.columns if X[col].dtype.name == 'category' or X[col].dtype.name == 'object']
for col in cat_features:
    X[col] = X[col].cat.add_categories('Missing').fillna('Missing')
cat_features

['channel_code_1month',
 'channel_code_2month',
 'channel_code_3month',
 'city_1month',
 'city_2month',
 'city_3month',
 'ogrn_month_1month',
 'ogrn_month_2month',
 'ogrn_month_3month',
 'ogrn_year_1month',
 'ogrn_year_2month',
 'ogrn_year_3month',
 'okved_1month',
 'okved_2month',
 'okved_3month',
 'segment_1month',
 'segment_2month',
 'segment_3month',
 'start_cluster_1month',
 'start_cluster_2month']

In [97]:
cat_features = [col for col in X.columns if X[col].dtype.name == 'category' or X[col].dtype.name == 'object']
cat_features

['channel_code_1month',
 'channel_code_2month',
 'channel_code_3month',
 'city_1month',
 'city_2month',
 'city_3month',
 'ogrn_month_1month',
 'ogrn_month_2month',
 'ogrn_month_3month',
 'ogrn_year_1month',
 'ogrn_year_2month',
 'ogrn_year_3month',
 'okved_1month',
 'okved_2month',
 'okved_3month',
 'segment_1month',
 'segment_2month',
 'segment_3month',
 'start_cluster_1month',
 'start_cluster_2month']

In [98]:
X

Unnamed: 0,balance_amt_avg_1month,balance_amt_avg_2month,balance_amt_avg_3month,balance_amt_day_avg_1month,balance_amt_day_avg_2month,balance_amt_day_avg_3month,balance_amt_max_1month,balance_amt_max_2month,balance_amt_max_3month,balance_amt_min_1month,...,sum_deb_h_oper_3m_3month,sum_of_paym_1y_1month,sum_of_paym_1y_2month,sum_of_paym_1y_3month,sum_of_paym_2m_1month,sum_of_paym_2m_2month,sum_of_paym_2m_3month,sum_of_paym_6m_1month,sum_of_paym_6m_2month,sum_of_paym_6m_3month
0,0.744845,1.049605,0.692653,0.748101,1.053805,0.695747,0.705492,0.831916,0.740253,1.287207,...,0.877050,0.511490,0.486425,0.480547,0.942275,0.645704,0.403604,0.536013,0.536378,0.613167
1,-0.081586,-0.094962,-0.090605,-0.080890,-0.094307,-0.089937,-0.091860,-0.100504,-0.114275,-0.114040,...,0.043221,0.052041,0.033554,0.039472,0.014051,-0.057593,-0.092059,0.043800,0.035027,0.025233
2,-0.154685,-0.152784,-0.148737,-0.154215,-0.152308,-0.148249,-0.186795,-0.193686,-0.187003,-0.122805,...,-0.165588,-0.291924,-0.290712,-0.288318,-0.255837,-0.267913,-0.255946,-0.287121,-0.284955,-0.280676
3,-0.156643,-0.156666,-0.156522,-0.156179,-0.156202,-0.156058,-0.204861,-0.204790,-0.204718,-0.125660,...,-0.165588,-0.242793,-0.262878,-0.273303,-0.273969,-0.273969,-0.273969,-0.268832,-0.294398,-0.294447
4,-0.138847,-0.129233,-0.141798,-0.138328,-0.128684,-0.141289,-0.182486,-0.162965,-0.170262,-0.125630,...,-0.078297,-0.124641,-0.121939,-0.128903,-0.103807,-0.134192,-0.166740,-0.130025,-0.134049,-0.142831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
199995,-0.067129,-0.004851,-0.039281,-0.066388,-0.003917,-0.038454,-0.108082,-0.053838,-0.053694,-0.031401,...,-0.078382,-0.295238,-0.290994,-0.283410,-0.269524,-0.248313,-0.214854,-0.293044,-0.285459,-0.271905
199996,0.543173,0.413435,0.293117,0.545804,0.415664,0.294974,0.382148,0.281704,0.189316,1.484547,...,-0.153030,-0.125855,-0.126250,-0.126344,-0.221403,-0.222278,-0.225526,-0.136274,-0.178142,-0.194693
199997,-0.082159,-0.111169,0.032941,-0.081465,-0.110564,0.033992,-0.028263,-0.041417,0.140726,-0.120011,...,0.680322,1.732060,1.662368,1.694414,0.661477,0.127654,0.751383,1.658004,1.251180,1.126610
199998,-0.156775,-0.156775,-0.156776,-0.156311,-0.156311,-0.156312,-0.204960,-0.204960,-0.204960,-0.125987,...,-0.165588,,,,,,,,,


In [99]:
y_val.info()

<class 'pandas.core.series.Series'>
Index: 40000 entries, 119737 to 6584
Series name: end_cluster_3month
Non-Null Count  Dtype 
--------------  ----- 
40000 non-null  object
dtypes: object(1)
memory usage: 625.0+ KB


In [100]:
i=0
for train_idx, val_idx in cv.split(X, y):
    print(f"\n\ni: {i} ")
    x_train, x_val = X.iloc[train_idx], X.iloc[val_idx]
    y_train, y_val = y.iloc[train_idx], y.iloc[val_idx]
    cat_model.fit(
        x_train, y_train,
        eval_set=(x_val, y_val),
        cat_features=cat_features,
        use_best_model=True,
        verbose=True
     )

    y_pred_proba = cat_model.predict_proba(x_val)
    cat_model.save_model(f'my_meth4/model_{i}.cbm')
    i+=1
    # print('y_val')
    # print(y_val)
    # print('y_pred_proba')
    # print(y_pred_proba)
    # print('cat_model.classes_')
    # print(cat_model.classes_)
    # print('weights_dict')
    # print(weights_dict)
    auc_score = weighted_roc_auc(y_val, y_pred_proba, cat_model.classes_, weights_dict)
    auc_scores.append(auc_score)

mean_auc = np.mean(auc_scores)
std_auc = np.std(auc_scores)
print(f"Средний взвешенный ROC AUC: {mean_auc:.3f} ± {std_auc:.3f}")




i: 0 


Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8422939	best: 0.8422939 (0)	total: 655ms	remaining: 10m 54s
1:	total: 1.16s	remaining: 9m 37s
2:	total: 1.77s	remaining: 9m 46s
3:	total: 2.24s	remaining: 9m 18s
4:	total: 2.92s	remaining: 9m 42s
5:	test: 0.8640777	best: 0.8640777 (5)	total: 3.41s	remaining: 9m 24s
6:	total: 3.86s	remaining: 9m 8s
7:	total: 4.36s	remaining: 9m
8:	total: 4.85s	remaining: 8m 54s
9:	total: 5.35s	remaining: 8m 49s
10:	test: 0.8753813	best: 0.8753813 (10)	total: 5.86s	remaining: 8m 47s
11:	total: 6.37s	remaining: 8m 44s
12:	total: 6.87s	remaining: 8m 41s
13:	total: 7.34s	remaining: 8m 37s
14:	total: 7.85s	remaining: 8m 35s
15:	test: 0.8841874	best: 0.8841874 (15)	total: 8.46s	remaining: 8m 40s
16:	total: 9.04s	remaining: 8m 42s
17:	total: 9.57s	remaining: 8m 42s
18:	total: 10.1s	remaining: 8m 42s
19:	total: 10.7s	remaining: 8m 45s
20:	test: 0.8894428	best: 0.8894428 (20)	total: 11.3s	remaining: 8m 47s
21:	total: 11.8s	remaining: 8m 46s
22:	total: 12.4s	remaining: 8m 45s
23:	total: 12.9s	remaining

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8388023	best: 0.8388023 (0)	total: 467ms	remaining: 7m 46s
1:	total: 948ms	remaining: 7m 53s
2:	total: 1.41s	remaining: 7m 49s
3:	total: 1.92s	remaining: 7m 56s
4:	total: 2.39s	remaining: 7m 55s
5:	test: 0.8617683	best: 0.8617683 (5)	total: 2.84s	remaining: 7m 51s
6:	total: 3.33s	remaining: 7m 52s
7:	total: 3.8s	remaining: 7m 50s
8:	total: 4.29s	remaining: 7m 52s
9:	total: 4.77s	remaining: 7m 52s
10:	test: 0.8645783	best: 0.8645783 (10)	total: 5.26s	remaining: 7m 53s
11:	total: 5.76s	remaining: 7m 53s
12:	total: 6.35s	remaining: 8m 1s
13:	total: 7.15s	remaining: 8m 23s
14:	total: 7.68s	remaining: 8m 24s
15:	test: 0.8748642	best: 0.8748642 (15)	total: 8.33s	remaining: 8m 32s
16:	total: 9.03s	remaining: 8m 42s
17:	total: 9.55s	remaining: 8m 41s
18:	total: 10.6s	remaining: 9m 9s
19:	total: 11.6s	remaining: 9m 30s
20:	test: 0.8854613	best: 0.8854613 (20)	total: 12.1s	remaining: 9m 25s
21:	total: 13.1s	remaining: 9m 43s
22:	total: 13.8s	remaining: 9m 45s
23:	total: 14.4s	remainin

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8020871	best: 0.8020871 (0)	total: 461ms	remaining: 7m 40s
1:	total: 913ms	remaining: 7m 35s
2:	total: 1.37s	remaining: 7m 36s
3:	total: 1.83s	remaining: 7m 34s
4:	total: 2.3s	remaining: 7m 37s
5:	test: 0.8414566	best: 0.8414566 (5)	total: 2.79s	remaining: 7m 41s
6:	total: 3.25s	remaining: 7m 40s
7:	total: 3.73s	remaining: 7m 41s
8:	total: 4.3s	remaining: 7m 53s
9:	total: 4.81s	remaining: 7m 56s
10:	test: 0.8523180	best: 0.8523180 (10)	total: 5.33s	remaining: 7m 59s
11:	total: 5.82s	remaining: 7m 59s
12:	total: 6.31s	remaining: 7m 59s
13:	total: 6.83s	remaining: 8m 1s
14:	total: 7.34s	remaining: 8m 2s
15:	test: 0.8685991	best: 0.8685991 (15)	total: 7.9s	remaining: 8m 5s
16:	total: 8.43s	remaining: 8m 7s
17:	total: 9.01s	remaining: 8m 11s
18:	total: 9.57s	remaining: 8m 14s
19:	total: 10.1s	remaining: 8m 17s
20:	test: 0.8741253	best: 0.8741253 (20)	total: 10.7s	remaining: 8m 18s
21:	total: 11.2s	remaining: 8m 17s
22:	total: 11.7s	remaining: 8m 18s
23:	total: 12.3s	remaining: 8

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8471773	best: 0.8471773 (0)	total: 464ms	remaining: 7m 43s
1:	total: 916ms	remaining: 7m 36s
2:	total: 1.4s	remaining: 7m 46s
3:	total: 1.9s	remaining: 7m 52s
4:	total: 2.38s	remaining: 7m 53s
5:	test: 0.8607184	best: 0.8632722 (4)	total: 2.87s	remaining: 7m 54s
6:	total: 3.35s	remaining: 7m 55s
7:	total: 3.89s	remaining: 8m 2s
8:	total: 4.36s	remaining: 7m 59s
9:	total: 4.88s	remaining: 8m 3s
10:	test: 0.8684113	best: 0.8684113 (10)	total: 5.4s	remaining: 8m 5s
11:	total: 5.88s	remaining: 8m 4s
12:	total: 6.36s	remaining: 8m 3s
13:	total: 6.9s	remaining: 8m 6s
14:	total: 7.4s	remaining: 8m 5s
15:	test: 0.8803859	best: 0.8803859 (15)	total: 7.88s	remaining: 8m 4s
16:	total: 8.35s	remaining: 8m 3s
17:	total: 9.01s	remaining: 8m 11s
18:	total: 9.56s	remaining: 8m 13s
19:	total: 10.1s	remaining: 8m 13s
20:	test: 0.8880806	best: 0.8885551 (19)	total: 10.6s	remaining: 8m 15s
21:	total: 11.1s	remaining: 8m 13s
22:	total: 11.7s	remaining: 8m 17s
23:	total: 12.2s	remaining: 8m 16s
2

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8384910	best: 0.8384910 (0)	total: 464ms	remaining: 7m 43s
1:	total: 911ms	remaining: 7m 34s
2:	total: 1.39s	remaining: 7m 41s
3:	total: 1.84s	remaining: 7m 37s
4:	total: 2.31s	remaining: 7m 40s
5:	test: 0.8616472	best: 0.8616472 (5)	total: 2.8s	remaining: 7m 43s
6:	total: 3.31s	remaining: 7m 49s
7:	total: 3.8s	remaining: 7m 51s
8:	total: 4.3s	remaining: 7m 53s
9:	total: 4.78s	remaining: 7m 52s
10:	test: 0.8658267	best: 0.8658267 (10)	total: 5.29s	remaining: 7m 55s
11:	total: 5.8s	remaining: 7m 57s
12:	total: 6.3s	remaining: 7m 58s
13:	total: 6.83s	remaining: 8m 1s
14:	total: 7.34s	remaining: 8m 1s
15:	test: 0.8788440	best: 0.8788440 (15)	total: 7.89s	remaining: 8m 5s
16:	total: 8.45s	remaining: 8m 8s
17:	total: 8.94s	remaining: 8m 7s
18:	total: 9.42s	remaining: 8m 6s
19:	total: 9.96s	remaining: 8m 8s
20:	test: 0.8839889	best: 0.8839889 (20)	total: 10.5s	remaining: 8m 10s
21:	total: 11s	remaining: 8m 9s
22:	total: 11.6s	remaining: 8m 10s
23:	total: 12.1s	remaining: 8m 10s
24

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8375323	best: 0.8375323 (0)	total: 459ms	remaining: 7m 38s
1:	total: 935ms	remaining: 7m 46s
2:	total: 1.41s	remaining: 7m 48s
3:	total: 1.88s	remaining: 7m 46s
4:	total: 2.36s	remaining: 7m 49s
5:	test: 0.8637322	best: 0.8637322 (5)	total: 2.84s	remaining: 7m 51s
6:	total: 3.31s	remaining: 7m 49s
7:	total: 3.81s	remaining: 7m 52s
8:	total: 4.27s	remaining: 7m 50s
9:	total: 4.77s	remaining: 7m 51s
10:	test: 0.8743946	best: 0.8743946 (10)	total: 5.33s	remaining: 7m 59s
11:	total: 5.83s	remaining: 8m
12:	total: 6.34s	remaining: 8m 1s
13:	total: 6.84s	remaining: 8m 2s
14:	total: 7.35s	remaining: 8m 2s
15:	test: 0.8841383	best: 0.8841383 (15)	total: 7.83s	remaining: 8m 1s
16:	total: 8.38s	remaining: 8m 4s
17:	total: 8.89s	remaining: 8m 5s
18:	total: 9.5s	remaining: 8m 10s
19:	total: 9.99s	remaining: 8m 9s
20:	test: 0.8877192	best: 0.8879857 (19)	total: 10.6s	remaining: 8m 12s
21:	total: 11.2s	remaining: 8m 16s
22:	total: 11.7s	remaining: 8m 18s
23:	total: 12.2s	remaining: 8m 17s

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8214443	best: 0.8214443 (0)	total: 456ms	remaining: 7m 35s
1:	total: 915ms	remaining: 7m 36s
2:	total: 1.38s	remaining: 7m 39s
3:	total: 1.86s	remaining: 7m 43s
4:	total: 2.34s	remaining: 7m 44s
5:	test: 0.8547219	best: 0.8562877 (4)	total: 2.8s	remaining: 7m 44s
6:	total: 3.27s	remaining: 7m 44s
7:	total: 3.73s	remaining: 7m 42s
8:	total: 4.19s	remaining: 7m 41s
9:	total: 4.67s	remaining: 7m 42s
10:	test: 0.8694823	best: 0.8694823 (10)	total: 5.21s	remaining: 7m 48s
11:	total: 5.71s	remaining: 7m 50s
12:	total: 6.24s	remaining: 7m 53s
13:	total: 6.79s	remaining: 7m 58s
14:	total: 7.3s	remaining: 7m 59s
15:	test: 0.8775483	best: 0.8775483 (15)	total: 7.81s	remaining: 8m
16:	total: 8.39s	remaining: 8m 5s
17:	total: 8.92s	remaining: 8m 6s
18:	total: 9.51s	remaining: 8m 11s
19:	total: 9.99s	remaining: 8m 9s
20:	test: 0.8854148	best: 0.8854148 (20)	total: 10.5s	remaining: 8m 9s
21:	total: 11s	remaining: 8m 9s
22:	total: 11.5s	remaining: 8m 9s
23:	total: 12.1s	remaining: 8m 10s
2

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8371747	best: 0.8371747 (0)	total: 461ms	remaining: 7m 40s
1:	total: 930ms	remaining: 7m 43s
2:	total: 1.39s	remaining: 7m 40s
3:	total: 1.86s	remaining: 7m 44s
4:	total: 2.35s	remaining: 7m 47s
5:	test: 0.8704555	best: 0.8704555 (5)	total: 2.81s	remaining: 7m 45s
6:	total: 3.28s	remaining: 7m 45s
7:	total: 3.79s	remaining: 7m 49s
8:	total: 4.28s	remaining: 7m 51s
9:	total: 4.8s	remaining: 7m 55s
10:	test: 0.8715603	best: 0.8718180 (9)	total: 5.35s	remaining: 8m 1s
11:	total: 5.92s	remaining: 8m 7s
12:	total: 6.42s	remaining: 8m 7s
13:	total: 6.92s	remaining: 8m 7s
14:	total: 7.41s	remaining: 8m 6s
15:	test: 0.8773481	best: 0.8773481 (15)	total: 7.88s	remaining: 8m 4s
16:	total: 8.43s	remaining: 8m 7s
17:	total: 9.03s	remaining: 8m 12s
18:	total: 9.61s	remaining: 8m 16s
19:	total: 10.2s	remaining: 8m 19s
20:	test: 0.8889399	best: 0.8889399 (20)	total: 10.8s	remaining: 8m 23s
21:	total: 11.3s	remaining: 8m 23s
22:	total: 11.9s	remaining: 8m 24s
23:	total: 12.4s	remaining: 8m 

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8220082	best: 0.8220082 (0)	total: 468ms	remaining: 7m 47s
1:	total: 948ms	remaining: 7m 52s
2:	total: 1.42s	remaining: 7m 51s
3:	total: 1.89s	remaining: 7m 49s
4:	total: 2.35s	remaining: 7m 47s
5:	test: 0.8468294	best: 0.8468294 (5)	total: 2.81s	remaining: 7m 44s
6:	total: 3.27s	remaining: 7m 44s
7:	total: 3.76s	remaining: 7m 46s
8:	total: 4.31s	remaining: 7m 55s
9:	total: 4.81s	remaining: 7m 56s
10:	test: 0.8557224	best: 0.8557224 (10)	total: 5.32s	remaining: 7m 58s
11:	total: 5.81s	remaining: 7m 58s
12:	total: 6.3s	remaining: 7m 58s
13:	total: 6.84s	remaining: 8m 1s
14:	total: 7.34s	remaining: 8m 2s
15:	test: 0.8668723	best: 0.8668723 (15)	total: 7.88s	remaining: 8m 4s
16:	total: 8.37s	remaining: 8m 4s
17:	total: 8.89s	remaining: 8m 5s
18:	total: 9.71s	remaining: 8m 21s
19:	total: 10.4s	remaining: 8m 28s
20:	test: 0.8714972	best: 0.8715078 (19)	total: 10.9s	remaining: 8m 29s
21:	total: 11.6s	remaining: 8m 34s
22:	total: 12.1s	remaining: 8m 34s
23:	total: 12.8s	remaining: 

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8405941	best: 0.8405941 (0)	total: 466ms	remaining: 7m 45s
1:	total: 912ms	remaining: 7m 35s
2:	total: 1.37s	remaining: 7m 34s
3:	total: 1.85s	remaining: 7m 40s
4:	total: 2.35s	remaining: 7m 47s
5:	test: 0.8668378	best: 0.8679593 (4)	total: 2.83s	remaining: 7m 48s
6:	total: 3.27s	remaining: 7m 44s
7:	total: 3.75s	remaining: 7m 45s
8:	total: 4.27s	remaining: 7m 49s
9:	total: 4.8s	remaining: 7m 55s
10:	test: 0.8722056	best: 0.8722056 (10)	total: 5.28s	remaining: 7m 54s
11:	total: 5.77s	remaining: 7m 54s
12:	total: 6.3s	remaining: 7m 58s
13:	total: 6.8s	remaining: 7m 58s
14:	total: 7.59s	remaining: 8m 18s
15:	test: 0.8804551	best: 0.8804551 (15)	total: 8.15s	remaining: 8m 21s
16:	total: 8.85s	remaining: 8m 31s
17:	total: 9.46s	remaining: 8m 35s
18:	total: 10.3s	remaining: 8m 49s
19:	total: 10.9s	remaining: 8m 54s
20:	test: 0.8839493	best: 0.8839493 (20)	total: 11.5s	remaining: 8m 58s
21:	total: 12.1s	remaining: 8m 56s
22:	total: 12.9s	remaining: 9m 7s
23:	total: 13.7s	remaining

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8484713	best: 0.8484713 (0)	total: 1.11s	remaining: 18m 32s
1:	total: 1.58s	remaining: 13m 7s
2:	total: 2.07s	remaining: 11m 27s
3:	total: 2.6s	remaining: 10m 46s
4:	total: 3.07s	remaining: 10m 11s
5:	test: 0.8716449	best: 0.8716449 (5)	total: 3.77s	remaining: 10m 24s
6:	total: 4.26s	remaining: 10m 4s
7:	total: 4.75s	remaining: 9m 49s
8:	total: 5.29s	remaining: 9m 42s
9:	total: 5.8s	remaining: 9m 33s
10:	test: 0.8808444	best: 0.8808444 (10)	total: 6.37s	remaining: 9m 32s
11:	total: 6.98s	remaining: 9m 34s
12:	total: 7.51s	remaining: 9m 30s
13:	total: 8.04s	remaining: 9m 26s
14:	total: 8.63s	remaining: 9m 26s
15:	test: 0.8902832	best: 0.8902832 (15)	total: 9.39s	remaining: 9m 37s
16:	total: 10s	remaining: 9m 39s
17:	total: 10.5s	remaining: 9m 34s
18:	total: 11.5s	remaining: 9m 52s
19:	total: 12s	remaining: 9m 47s
20:	test: 0.8942747	best: 0.8942747 (20)	total: 12.8s	remaining: 9m 57s
21:	total: 13.6s	remaining: 10m 6s
22:	total: 14.6s	remaining: 10m 19s
23:	total: 15.5s	remai

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8145600	best: 0.8145600 (0)	total: 465ms	remaining: 7m 44s
1:	total: 937ms	remaining: 7m 47s
2:	total: 1.39s	remaining: 7m 41s
3:	total: 1.85s	remaining: 7m 40s
4:	total: 2.33s	remaining: 7m 43s
5:	test: 0.8511668	best: 0.8511668 (5)	total: 2.83s	remaining: 7m 48s
6:	total: 3.3s	remaining: 7m 48s
7:	total: 3.79s	remaining: 7m 50s
8:	total: 4.25s	remaining: 7m 47s
9:	total: 4.74s	remaining: 7m 48s
10:	test: 0.8666260	best: 0.8666260 (10)	total: 5.24s	remaining: 7m 51s
11:	total: 5.82s	remaining: 7m 59s
12:	total: 6.29s	remaining: 7m 57s
13:	total: 6.85s	remaining: 8m 2s
14:	total: 7.39s	remaining: 8m 5s
15:	test: 0.8808700	best: 0.8808700 (15)	total: 7.89s	remaining: 8m 5s
16:	total: 8.43s	remaining: 8m 7s
17:	total: 8.98s	remaining: 8m 10s
18:	total: 9.52s	remaining: 8m 11s
19:	total: 10s	remaining: 8m 12s
20:	test: 0.8896095	best: 0.8896095 (20)	total: 10.6s	remaining: 8m 12s
21:	total: 11.1s	remaining: 8m 14s
22:	total: 11.7s	remaining: 8m 15s
23:	total: 12.2s	remaining: 8

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8299584	best: 0.8299584 (0)	total: 475ms	remaining: 7m 54s
1:	total: 929ms	remaining: 7m 43s
2:	total: 1.41s	remaining: 7m 47s
3:	total: 1.87s	remaining: 7m 46s
4:	total: 2.36s	remaining: 7m 49s
5:	test: 0.8634498	best: 0.8634498 (5)	total: 2.84s	remaining: 7m 50s
6:	total: 3.36s	remaining: 7m 57s
7:	total: 3.83s	remaining: 7m 54s
8:	total: 4.36s	remaining: 8m
9:	total: 4.84s	remaining: 7m 59s
10:	test: 0.8714429	best: 0.8714429 (10)	total: 5.35s	remaining: 8m 1s
11:	total: 5.87s	remaining: 8m 3s
12:	total: 6.39s	remaining: 8m 5s
13:	total: 6.94s	remaining: 8m 8s
14:	total: 7.42s	remaining: 8m 6s
15:	test: 0.8765998	best: 0.8765998 (15)	total: 7.98s	remaining: 8m 10s
16:	total: 8.51s	remaining: 8m 11s
17:	total: 9.03s	remaining: 8m 12s
18:	total: 9.59s	remaining: 8m 15s
19:	total: 10.1s	remaining: 8m 17s
20:	test: 0.8845085	best: 0.8845085 (20)	total: 10.7s	remaining: 8m 17s
21:	total: 11.3s	remaining: 8m 21s
22:	total: 11.9s	remaining: 8m 23s
23:	total: 12.4s	remaining: 8m 

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.8482158	best: 0.8482158 (0)	total: 473ms	remaining: 7m 52s
1:	total: 929ms	remaining: 7m 43s
2:	total: 1.38s	remaining: 7m 38s
3:	total: 1.86s	remaining: 7m 42s
4:	total: 2.33s	remaining: 7m 43s
5:	test: 0.8611939	best: 0.8611939 (5)	total: 2.83s	remaining: 7m 48s
6:	total: 3.29s	remaining: 7m 46s
7:	total: 3.78s	remaining: 7m 48s
8:	total: 4.24s	remaining: 7m 46s
9:	total: 4.74s	remaining: 7m 48s
10:	test: 0.8722012	best: 0.8722012 (10)	total: 5.24s	remaining: 7m 51s
11:	total: 5.8s	remaining: 7m 57s
12:	total: 6.37s	remaining: 8m 3s
13:	total: 6.87s	remaining: 8m 3s
14:	total: 7.35s	remaining: 8m 2s
15:	test: 0.8795741	best: 0.8795741 (15)	total: 7.87s	remaining: 8m 3s
16:	total: 8.45s	remaining: 8m 8s
17:	total: 8.99s	remaining: 8m 10s
18:	total: 9.52s	remaining: 8m 11s
19:	total: 10s	remaining: 8m 11s
20:	test: 0.8825641	best: 0.8825641 (20)	total: 10.6s	remaining: 8m 14s
21:	total: 11.2s	remaining: 8m 17s
22:	total: 11.7s	remaining: 8m 18s
23:	total: 12.3s	remaining: 8m

Default metric period is 5 because AUC is/are not implemented for GPU
AUC is not implemented on GPU. Will use CPU for metric computation, this could significantly affect learning time


0:	test: 0.7986227	best: 0.7986227 (0)	total: 458ms	remaining: 7m 37s
1:	total: 930ms	remaining: 7m 43s
2:	total: 1.39s	remaining: 7m 43s
3:	total: 1.88s	remaining: 7m 47s
4:	total: 2.36s	remaining: 7m 49s
5:	test: 0.8421876	best: 0.8421876 (5)	total: 2.82s	remaining: 7m 46s
6:	total: 3.31s	remaining: 7m 49s
7:	total: 3.79s	remaining: 7m 49s
8:	total: 4.27s	remaining: 7m 50s
9:	total: 4.75s	remaining: 7m 50s
10:	test: 0.8534198	best: 0.8534198 (10)	total: 5.24s	remaining: 7m 51s
11:	total: 5.82s	remaining: 7m 59s
12:	total: 6.35s	remaining: 8m 2s
13:	total: 6.93s	remaining: 8m 7s
14:	total: 7.48s	remaining: 8m 11s
15:	test: 0.8638183	best: 0.8638183 (15)	total: 7.99s	remaining: 8m 11s
16:	total: 8.5s	remaining: 8m 11s
17:	total: 9.09s	remaining: 8m 16s
18:	total: 9.71s	remaining: 8m 21s
19:	total: 10.2s	remaining: 8m 20s
20:	test: 0.8673553	best: 0.8673553 (20)	total: 10.7s	remaining: 8m 20s
21:	total: 11.3s	remaining: 8m 23s
22:	total: 11.9s	remaining: 8m 23s
23:	total: 12.3s	remainin