# 0. Импорт сторонних библиотек

In [1]:
import pandas as pd

import numpy as np
import itertools

import seaborn as sb

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import OneHotEncoder

from sklearn.linear_model import LogisticRegressionCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import matplotlib.pyplot as plt
from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.feature_selection import SequentialFeatureSelector
from catboost import CatBoostClassifier
import lightgbm as lgb
from sklearn.utils import class_weight

from sklearn.metrics import roc_auc_score
from sklearn.model_selection import cross_val_score
from sklearn.metrics import RocCurveDisplay, roc_curve, auc, classification_report, confusion_matrix

import gc

import warnings
warnings.filterwarnings('ignore')

## 0.1 Настройки Notebook и определенние глобальных переменных

In [2]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

RANDOM_SEED = 42
ROC_AUC_Treshold = 0.75

# 1. Чтение датасета с признаками

In [3]:
df_features = pd.read_csv('../train_data/train_data.csv')
df_features.shape

(23878461, 61)

# 2. Удаление дубликатов

In [4]:
df_features_witout_dub = df_features.drop_duplicates()
del df_features
gc.collect()

0

In [5]:
df_features_witout_dub.shape

(23878461, 61)

# 3. Определение признаков для исключения и кодирования

In [6]:
cat_cols = ['pre_since_opened', 'pre_since_confirmed', 'pre_pterm', 'pre_fterm', 'pre_till_pclose', 'pre_till_fclose', 'pre_loans_credit_limit', 'pre_loans_next_pay_summ', 'pre_loans_outstanding', 'pre_loans_max_overdue_sum', 'pre_loans_credit_cost_rate', 'pre_loans5', 'pre_loans530', 'pre_loans3060', 'pre_loans6090', 'pre_loans90', 'pre_util', 'pre_over2limit', 'pre_maxover2limit', 'enc_paym_0', 'enc_paym_1', 'enc_paym_2', 'enc_paym_3', 'enc_paym_4', 'enc_paym_5', 'enc_paym_6', 'enc_paym_7', 'enc_paym_8', 'enc_paym_9', 'enc_paym_10', 'enc_paym_11', 'enc_paym_12', 'enc_paym_13', 'enc_paym_14', 'enc_paym_15', 'enc_paym_16', 'enc_paym_17', 'enc_paym_18', 'enc_paym_19', 'enc_paym_20', 'enc_paym_21', 'enc_paym_22', 'enc_paym_23', 'enc_paym_24', 'enc_loans_account_holder_type', 'enc_loans_credit_status', 'enc_loans_credit_type', 'enc_loans_account_cur']                          
cols_for_del = ['rn', 'pre_loans_total_overdue']

In [7]:
df_features_witout_dub.drop(cols_for_del, inplace = True, axis = 1)

In [8]:
df_features_witout_dub.shape

(23878461, 59)

# 4. Кодирование категориальных переменных

In [9]:
cat_dfs = []

In [10]:
ohe = OneHotEncoder(sparse=False, drop = 'first', handle_unknown='ignore')

In [11]:
def category_encoding(column_name):
    ft = ohe.fit_transform(df_features_witout_dub[[column_name]])
    df_ft = pd.DataFrame(ft, columns=ohe.get_feature_names_out())
    for col in df_ft.columns:
        df_ft[col] = df_ft[col].astype('int8')
    cat_dfs.append(df_ft)
    df_features_witout_dub.drop(column_name, inplace = True, axis = 1)

In [12]:
for col in cat_cols:
    category_encoding(col)

In [13]:
df_features_category = df_features_witout_dub.copy()
del df_features_witout_dub
gc.collect()
df_features_category.shape

(23878461, 11)

In [14]:
df_features_category.shape

(23878461, 11)

In [15]:
for col in df_features_category.columns.drop('id'):
        df_features_category[col] = df_features_category[col].astype('int8')
df_features_category.info() 

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23878461 entries, 0 to 23878460
Data columns (total 11 columns):
 #   Column                 Dtype
---  ------                 -----
 0   id                     int64
 1   is_zero_loans5         int8 
 2   is_zero_loans530       int8 
 3   is_zero_loans3060      int8 
 4   is_zero_loans6090      int8 
 5   is_zero_loans90        int8 
 6   is_zero_util           int8 
 7   is_zero_over2limit     int8 
 8   is_zero_maxover2limit  int8 
 9   pclose_flag            int8 
 10  fclose_flag            int8 
dtypes: int64(1), int8(10)
memory usage: 409.9 MB


In [16]:
def df_concatination(df):
    df_features_category.reset_index(drop=True, inplace=True)
    df.reset_index(drop=True, inplace=True)
    return pd.concat([df_features_category, df], axis=1)

In [17]:
for df in cat_dfs:
    df_features_category = df_concatination(df)
    del df
    gc.collect()

In [18]:
del cat_dfs

# 5. Агрегация признаков методом суммирование по id

Процедура объединения информации о характеристиках клиента по потношению ко всем его кредитным продуктам методом суммирования, позволит выяснить вес того или иного параметра.

In [19]:
df_features = df_features_category.groupby('id').agg(np.sum)
df_features.shape

(2750000, 357)

In [20]:
df_features.head()

Unnamed: 0_level_0,is_zero_loans5,is_zero_loans530,is_zero_loans3060,is_zero_loans6090,is_zero_loans90,is_zero_util,is_zero_over2limit,is_zero_maxover2limit,pclose_flag,fclose_flag,pre_since_opened_1,pre_since_opened_2,pre_since_opened_3,pre_since_opened_4,pre_since_opened_5,pre_since_opened_6,pre_since_opened_7,pre_since_opened_8,pre_since_opened_9,pre_since_opened_10,pre_since_opened_11,pre_since_opened_12,pre_since_opened_13,pre_since_opened_14,pre_since_opened_15,pre_since_opened_16,pre_since_opened_17,pre_since_opened_18,pre_since_opened_19,pre_since_confirmed_1,pre_since_confirmed_2,pre_since_confirmed_3,pre_since_confirmed_4,pre_since_confirmed_5,pre_since_confirmed_6,pre_since_confirmed_7,pre_since_confirmed_8,pre_since_confirmed_9,pre_since_confirmed_10,pre_since_confirmed_11,pre_since_confirmed_12,pre_since_confirmed_13,pre_since_confirmed_14,pre_since_confirmed_15,pre_since_confirmed_16,pre_since_confirmed_17,pre_pterm_1,pre_pterm_2,pre_pterm_3,pre_pterm_4,pre_pterm_5,pre_pterm_6,pre_pterm_7,pre_pterm_8,pre_pterm_9,pre_pterm_10,pre_pterm_11,pre_pterm_12,pre_pterm_13,pre_pterm_14,pre_pterm_15,pre_pterm_16,pre_pterm_17,pre_fterm_1,pre_fterm_2,pre_fterm_3,pre_fterm_4,pre_fterm_5,pre_fterm_6,pre_fterm_7,pre_fterm_8,pre_fterm_9,pre_fterm_10,pre_fterm_11,pre_fterm_12,pre_fterm_13,pre_fterm_14,pre_fterm_15,pre_fterm_16,pre_till_pclose_1,pre_till_pclose_2,pre_till_pclose_3,pre_till_pclose_4,pre_till_pclose_5,pre_till_pclose_6,pre_till_pclose_7,pre_till_pclose_8,pre_till_pclose_9,pre_till_pclose_10,pre_till_pclose_11,pre_till_pclose_12,pre_till_pclose_13,pre_till_pclose_14,pre_till_pclose_15,pre_till_pclose_16,pre_till_fclose_1,pre_till_fclose_2,pre_till_fclose_3,pre_till_fclose_4,pre_till_fclose_5,pre_till_fclose_6,pre_till_fclose_7,pre_till_fclose_8,pre_till_fclose_9,pre_till_fclose_10,pre_till_fclose_11,pre_till_fclose_12,pre_till_fclose_13,pre_till_fclose_14,pre_till_fclose_15,pre_loans_credit_limit_1,pre_loans_credit_limit_2,pre_loans_credit_limit_3,pre_loans_credit_limit_4,pre_loans_credit_limit_5,pre_loans_credit_limit_6,pre_loans_credit_limit_7,pre_loans_credit_limit_8,pre_loans_credit_limit_9,pre_loans_credit_limit_10,pre_loans_credit_limit_11,pre_loans_credit_limit_12,pre_loans_credit_limit_13,pre_loans_credit_limit_14,pre_loans_credit_limit_15,pre_loans_credit_limit_16,pre_loans_credit_limit_17,pre_loans_credit_limit_18,pre_loans_credit_limit_19,pre_loans_next_pay_summ_1,pre_loans_next_pay_summ_2,pre_loans_next_pay_summ_3,pre_loans_next_pay_summ_4,pre_loans_next_pay_summ_5,pre_loans_next_pay_summ_6,pre_loans_outstanding_2,pre_loans_outstanding_3,pre_loans_outstanding_4,pre_loans_outstanding_5,pre_loans_max_overdue_sum_1,pre_loans_max_overdue_sum_2,pre_loans_max_overdue_sum_3,pre_loans_credit_cost_rate_1,pre_loans_credit_cost_rate_2,pre_loans_credit_cost_rate_3,pre_loans_credit_cost_rate_4,pre_loans_credit_cost_rate_5,pre_loans_credit_cost_rate_6,pre_loans_credit_cost_rate_7,pre_loans_credit_cost_rate_8,pre_loans_credit_cost_rate_9,pre_loans_credit_cost_rate_10,pre_loans_credit_cost_rate_11,pre_loans_credit_cost_rate_12,pre_loans_credit_cost_rate_13,pre_loans5_1,pre_loans5_2,pre_loans5_3,pre_loans5_5,pre_loans5_6,pre_loans5_7,pre_loans5_8,pre_loans5_9,pre_loans5_11,pre_loans5_13,pre_loans5_16,pre_loans530_1,pre_loans530_2,pre_loans530_3,pre_loans530_4,pre_loans530_5,pre_loans530_6,pre_loans530_7,pre_loans530_8,pre_loans530_9,pre_loans530_10,pre_loans530_11,pre_loans530_12,pre_loans530_13,pre_loans530_14,pre_loans530_15,pre_loans530_16,pre_loans530_18,pre_loans530_19,pre_loans3060_1,pre_loans3060_2,pre_loans3060_3,pre_loans3060_4,pre_loans3060_5,pre_loans3060_6,pre_loans3060_7,pre_loans3060_8,pre_loans3060_9,pre_loans6090_1,pre_loans6090_2,pre_loans6090_3,pre_loans6090_4,pre_loans90_3,pre_loans90_8,pre_loans90_10,pre_loans90_13,pre_loans90_14,pre_loans90_19,pre_util_1,pre_util_2,pre_util_3,pre_util_4,pre_util_5,pre_util_6,pre_util_7,pre_util_8,pre_util_9,pre_util_10,pre_util_11,pre_util_12,pre_util_13,pre_util_14,pre_util_15,pre_util_16,pre_util_17,pre_util_18,pre_util_19,pre_over2limit_1,pre_over2limit_2,pre_over2limit_3,pre_over2limit_4,pre_over2limit_5,pre_over2limit_6,pre_over2limit_7,pre_over2limit_8,pre_over2limit_9,pre_over2limit_10,pre_over2limit_11,pre_over2limit_12,pre_over2limit_13,pre_over2limit_14,pre_over2limit_15,pre_over2limit_16,pre_over2limit_17,pre_over2limit_18,pre_over2limit_19,pre_maxover2limit_1,pre_maxover2limit_2,pre_maxover2limit_3,pre_maxover2limit_4,pre_maxover2limit_5,pre_maxover2limit_6,pre_maxover2limit_7,pre_maxover2limit_8,pre_maxover2limit_9,pre_maxover2limit_10,pre_maxover2limit_11,pre_maxover2limit_12,pre_maxover2limit_13,pre_maxover2limit_14,pre_maxover2limit_15,pre_maxover2limit_16,pre_maxover2limit_17,pre_maxover2limit_18,pre_maxover2limit_19,enc_paym_0_1,enc_paym_0_2,enc_paym_0_3,enc_paym_1_1,enc_paym_1_2,enc_paym_1_3,enc_paym_2_1,enc_paym_2_2,enc_paym_2_3,enc_paym_3_1,enc_paym_3_2,enc_paym_3_3,enc_paym_4_1,enc_paym_4_2,enc_paym_4_3,enc_paym_5_1,enc_paym_5_2,enc_paym_5_3,enc_paym_6_1,enc_paym_6_2,enc_paym_6_3,enc_paym_7_1,enc_paym_7_2,enc_paym_7_3,enc_paym_8_1,enc_paym_8_2,enc_paym_8_3,enc_paym_9_1,enc_paym_9_2,enc_paym_9_3,enc_paym_10_1,enc_paym_10_2,enc_paym_10_3,enc_paym_11_2,enc_paym_11_3,enc_paym_11_4,enc_paym_12_1,enc_paym_12_2,enc_paym_12_3,enc_paym_13_1,enc_paym_13_2,enc_paym_13_3,enc_paym_14_1,enc_paym_14_2,enc_paym_14_3,enc_paym_15_1,enc_paym_15_2,enc_paym_15_3,enc_paym_16_1,enc_paym_16_2,enc_paym_16_3,enc_paym_17_1,enc_paym_17_2,enc_paym_17_3,enc_paym_18_1,enc_paym_18_2,enc_paym_18_3,enc_paym_19_1,enc_paym_19_2,enc_paym_19_3,enc_paym_20_2,enc_paym_20_3,enc_paym_20_4,enc_paym_21_1,enc_paym_21_2,enc_paym_21_3,enc_paym_22_1,enc_paym_22_2,enc_paym_22_3,enc_paym_23_1,enc_paym_23_2,enc_paym_23_3,enc_paym_24_2,enc_paym_24_3,enc_paym_24_4,enc_loans_account_holder_type_1,enc_loans_account_holder_type_2,enc_loans_account_holder_type_3,enc_loans_account_holder_type_4,enc_loans_account_holder_type_5,enc_loans_account_holder_type_6,enc_loans_credit_status_1,enc_loans_credit_status_2,enc_loans_credit_status_3,enc_loans_credit_status_4,enc_loans_credit_status_5,enc_loans_credit_status_6,enc_loans_credit_type_1,enc_loans_credit_type_2,enc_loans_credit_type_3,enc_loans_credit_type_4,enc_loans_credit_type_5,enc_loans_credit_type_6,enc_loans_credit_type_7,enc_loans_account_cur_1,enc_loans_account_cur_2,enc_loans_account_cur_3
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1,Unnamed: 266_level_1,Unnamed: 267_level_1,Unnamed: 268_level_1,Unnamed: 269_level_1,Unnamed: 270_level_1,Unnamed: 271_level_1,Unnamed: 272_level_1,Unnamed: 273_level_1,Unnamed: 274_level_1,Unnamed: 275_level_1,Unnamed: 276_level_1,Unnamed: 277_level_1,Unnamed: 278_level_1,Unnamed: 279_level_1,Unnamed: 280_level_1,Unnamed: 281_level_1,Unnamed: 282_level_1,Unnamed: 283_level_1,Unnamed: 284_level_1,Unnamed: 285_level_1,Unnamed: 286_level_1,Unnamed: 287_level_1,Unnamed: 288_level_1,Unnamed: 289_level_1,Unnamed: 290_level_1,Unnamed: 291_level_1,Unnamed: 292_level_1,Unnamed: 293_level_1,Unnamed: 294_level_1,Unnamed: 295_level_1,Unnamed: 296_level_1,Unnamed: 297_level_1,Unnamed: 298_level_1,Unnamed: 299_level_1,Unnamed: 300_level_1,Unnamed: 301_level_1,Unnamed: 302_level_1,Unnamed: 303_level_1,Unnamed: 304_level_1,Unnamed: 305_level_1,Unnamed: 306_level_1,Unnamed: 307_level_1,Unnamed: 308_level_1,Unnamed: 309_level_1,Unnamed: 310_level_1,Unnamed: 311_level_1,Unnamed: 312_level_1,Unnamed: 313_level_1,Unnamed: 314_level_1,Unnamed: 315_level_1,Unnamed: 316_level_1,Unnamed: 317_level_1,Unnamed: 318_level_1,Unnamed: 319_level_1,Unnamed: 320_level_1,Unnamed: 321_level_1,Unnamed: 322_level_1,Unnamed: 323_level_1,Unnamed: 324_level_1,Unnamed: 325_level_1,Unnamed: 326_level_1,Unnamed: 327_level_1,Unnamed: 328_level_1,Unnamed: 329_level_1,Unnamed: 330_level_1,Unnamed: 331_level_1,Unnamed: 332_level_1,Unnamed: 333_level_1,Unnamed: 334_level_1,Unnamed: 335_level_1,Unnamed: 336_level_1,Unnamed: 337_level_1,Unnamed: 338_level_1,Unnamed: 339_level_1,Unnamed: 340_level_1,Unnamed: 341_level_1,Unnamed: 342_level_1,Unnamed: 343_level_1,Unnamed: 344_level_1,Unnamed: 345_level_1,Unnamed: 346_level_1,Unnamed: 347_level_1,Unnamed: 348_level_1,Unnamed: 349_level_1,Unnamed: 350_level_1,Unnamed: 351_level_1,Unnamed: 352_level_1,Unnamed: 353_level_1,Unnamed: 354_level_1,Unnamed: 355_level_1,Unnamed: 356_level_1,Unnamed: 357_level_1
0,9,10,10,10,10,6,9,9,1,2,1,1,1,1,2,0,1,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,0,7,0,0,1,0,0,0,0,0,1,3,0,1,0,0,0,0,1,0,2,0,0,1,1,0,0,0,2,2,0,0,0,0,2,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,4,0,1,0,2,0,0,0,0,0,0,1,2,0,1,2,2,0,2,0,0,1,0,1,0,0,0,0,0,1,2,1,0,0,2,1,0,0,0,0,3,2,1,3,0,1,7,0,1,0,10,0,0,0,1,3,0,0,0,1,0,0,4,0,1,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,10,0,0,0,0,0,0,0,10,0,10,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,6,0,0,0,0,9,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,1,0,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,3,0,0,4,0,0,5,0,0,5,0,0,5,0,0,5,0,0,6,0,0,6,0,0,7,0,0,7,0,0,7,0,0,7,0,0,7,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,10,10,0,0,0,0,0,0,4,6,0,0,0,1,0,2,7,0,0,0,10,0,0
1,12,10,12,12,11,10,12,11,1,2,0,1,0,0,0,0,1,2,0,2,0,1,1,3,2,0,0,1,0,0,0,2,1,1,2,3,1,1,0,0,0,0,3,0,0,0,3,1,0,1,0,0,3,1,0,0,0,1,1,0,2,0,0,1,1,0,0,0,3,0,2,2,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,5,0,0,0,3,2,1,0,5,0,1,0,1,0,0,1,0,0,2,0,2,1,1,2,0,1,0,0,4,0,0,0,1,1,0,0,1,1,1,0,0,1,1,10,0,1,1,0,2,11,1,0,2,11,1,0,0,0,10,1,0,1,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,14,0,0,0,0,0,0,0,14,0,14,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,0,12,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,12,0,0,1,0,1,2,0,3,1,0,3,0,0,3,0,0,3,0,0,6,1,0,7,0,1,7,0,0,7,0,1,6,0,0,6,0,0,7,0,0,8,0,0,9,0,0,10,1,0,10,0,0,10,0,0,10,0,0,10,0,0,10,0,0,10,0,0,10,1,0,11,0,0,11,0,0,11,14,0,0,0,0,0,0,4,10,0,0,0,3,0,3,8,0,0,0,14,0,0
2,3,2,2,2,3,1,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,1,1,1,0,1,2,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,2,1,0,2,1,0,2,1,0,2,1,0,2,0,0,3,0,1,2,1,0,2,0,1,2,1,0,2,1,0,2,1,0,2,1,0,2,1,0,2,0,0,3,3,0,0,0,0,0,0,2,1,0,0,0,0,0,2,1,0,0,0,3,0,0
3,15,15,15,15,15,8,14,14,5,6,3,1,0,2,1,3,0,0,0,0,0,1,3,0,0,0,0,1,0,1,1,1,0,1,1,0,0,7,0,0,0,0,1,0,1,0,1,1,0,5,0,0,0,1,1,0,0,1,0,1,0,3,0,0,1,0,0,0,0,1,6,1,1,1,0,0,1,0,1,5,2,1,0,0,0,3,2,0,0,0,0,1,0,0,1,0,2,0,0,2,2,0,1,0,1,6,1,0,0,0,2,0,1,1,1,0,0,0,0,1,3,0,2,1,0,1,1,1,0,1,9,2,2,0,1,1,11,1,2,0,15,0,1,0,4,4,1,0,1,1,2,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,15,0,0,0,0,0,0,0,15,0,15,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,8,0,1,0,0,14,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,2,0,0,2,0,0,2,0,0,3,0,0,6,0,0,6,0,0,6,0,0,7,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,10,15,0,0,0,0,0,0,7,8,0,0,0,1,0,4,9,1,0,0,15,0,0
4,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0


In [21]:
del df_features_category
gc.collect()

0

# 6. Чтение датасета с целевой переменной

In [22]:
df_target = pd.read_csv('../train_data/train_target.csv')
df_target = df_target.rename(columns={'flag': 'target'})
df_target.shape

(3000000, 2)

# 7. Объединение датасетов и удаление поля id

In [23]:
df_final = df_features.merge(df_target, on='id', how='inner').drop_duplicates(subset=['id'], keep='first')
del df_features
del df_target
gc.collect()

0

In [24]:
df_final.shape

(2750000, 359)

In [25]:
df_final.head()

Unnamed: 0,id,is_zero_loans5,is_zero_loans530,is_zero_loans3060,is_zero_loans6090,is_zero_loans90,is_zero_util,is_zero_over2limit,is_zero_maxover2limit,pclose_flag,fclose_flag,pre_since_opened_1,pre_since_opened_2,pre_since_opened_3,pre_since_opened_4,pre_since_opened_5,pre_since_opened_6,pre_since_opened_7,pre_since_opened_8,pre_since_opened_9,pre_since_opened_10,pre_since_opened_11,pre_since_opened_12,pre_since_opened_13,pre_since_opened_14,pre_since_opened_15,pre_since_opened_16,pre_since_opened_17,pre_since_opened_18,pre_since_opened_19,pre_since_confirmed_1,pre_since_confirmed_2,pre_since_confirmed_3,pre_since_confirmed_4,pre_since_confirmed_5,pre_since_confirmed_6,pre_since_confirmed_7,pre_since_confirmed_8,pre_since_confirmed_9,pre_since_confirmed_10,pre_since_confirmed_11,pre_since_confirmed_12,pre_since_confirmed_13,pre_since_confirmed_14,pre_since_confirmed_15,pre_since_confirmed_16,pre_since_confirmed_17,pre_pterm_1,pre_pterm_2,pre_pterm_3,pre_pterm_4,pre_pterm_5,pre_pterm_6,pre_pterm_7,pre_pterm_8,pre_pterm_9,pre_pterm_10,pre_pterm_11,pre_pterm_12,pre_pterm_13,pre_pterm_14,pre_pterm_15,pre_pterm_16,pre_pterm_17,pre_fterm_1,pre_fterm_2,pre_fterm_3,pre_fterm_4,pre_fterm_5,pre_fterm_6,pre_fterm_7,pre_fterm_8,pre_fterm_9,pre_fterm_10,pre_fterm_11,pre_fterm_12,pre_fterm_13,pre_fterm_14,pre_fterm_15,pre_fterm_16,pre_till_pclose_1,pre_till_pclose_2,pre_till_pclose_3,pre_till_pclose_4,pre_till_pclose_5,pre_till_pclose_6,pre_till_pclose_7,pre_till_pclose_8,pre_till_pclose_9,pre_till_pclose_10,pre_till_pclose_11,pre_till_pclose_12,pre_till_pclose_13,pre_till_pclose_14,pre_till_pclose_15,pre_till_pclose_16,pre_till_fclose_1,pre_till_fclose_2,pre_till_fclose_3,pre_till_fclose_4,pre_till_fclose_5,pre_till_fclose_6,pre_till_fclose_7,pre_till_fclose_8,pre_till_fclose_9,pre_till_fclose_10,pre_till_fclose_11,pre_till_fclose_12,pre_till_fclose_13,pre_till_fclose_14,pre_till_fclose_15,pre_loans_credit_limit_1,pre_loans_credit_limit_2,pre_loans_credit_limit_3,pre_loans_credit_limit_4,pre_loans_credit_limit_5,pre_loans_credit_limit_6,pre_loans_credit_limit_7,pre_loans_credit_limit_8,pre_loans_credit_limit_9,pre_loans_credit_limit_10,pre_loans_credit_limit_11,pre_loans_credit_limit_12,pre_loans_credit_limit_13,pre_loans_credit_limit_14,pre_loans_credit_limit_15,pre_loans_credit_limit_16,pre_loans_credit_limit_17,pre_loans_credit_limit_18,pre_loans_credit_limit_19,pre_loans_next_pay_summ_1,pre_loans_next_pay_summ_2,pre_loans_next_pay_summ_3,pre_loans_next_pay_summ_4,pre_loans_next_pay_summ_5,pre_loans_next_pay_summ_6,pre_loans_outstanding_2,pre_loans_outstanding_3,pre_loans_outstanding_4,pre_loans_outstanding_5,pre_loans_max_overdue_sum_1,pre_loans_max_overdue_sum_2,pre_loans_max_overdue_sum_3,pre_loans_credit_cost_rate_1,pre_loans_credit_cost_rate_2,pre_loans_credit_cost_rate_3,pre_loans_credit_cost_rate_4,pre_loans_credit_cost_rate_5,pre_loans_credit_cost_rate_6,pre_loans_credit_cost_rate_7,pre_loans_credit_cost_rate_8,pre_loans_credit_cost_rate_9,pre_loans_credit_cost_rate_10,pre_loans_credit_cost_rate_11,pre_loans_credit_cost_rate_12,pre_loans_credit_cost_rate_13,pre_loans5_1,pre_loans5_2,pre_loans5_3,pre_loans5_5,pre_loans5_6,pre_loans5_7,pre_loans5_8,pre_loans5_9,pre_loans5_11,pre_loans5_13,pre_loans5_16,pre_loans530_1,pre_loans530_2,pre_loans530_3,pre_loans530_4,pre_loans530_5,pre_loans530_6,pre_loans530_7,pre_loans530_8,pre_loans530_9,pre_loans530_10,pre_loans530_11,pre_loans530_12,pre_loans530_13,pre_loans530_14,pre_loans530_15,pre_loans530_16,pre_loans530_18,pre_loans530_19,pre_loans3060_1,pre_loans3060_2,pre_loans3060_3,pre_loans3060_4,pre_loans3060_5,pre_loans3060_6,pre_loans3060_7,pre_loans3060_8,pre_loans3060_9,pre_loans6090_1,pre_loans6090_2,pre_loans6090_3,pre_loans6090_4,pre_loans90_3,pre_loans90_8,pre_loans90_10,pre_loans90_13,pre_loans90_14,pre_loans90_19,pre_util_1,pre_util_2,pre_util_3,pre_util_4,pre_util_5,pre_util_6,pre_util_7,pre_util_8,pre_util_9,pre_util_10,pre_util_11,pre_util_12,pre_util_13,pre_util_14,pre_util_15,pre_util_16,pre_util_17,pre_util_18,pre_util_19,pre_over2limit_1,pre_over2limit_2,pre_over2limit_3,pre_over2limit_4,pre_over2limit_5,pre_over2limit_6,pre_over2limit_7,pre_over2limit_8,pre_over2limit_9,pre_over2limit_10,pre_over2limit_11,pre_over2limit_12,pre_over2limit_13,pre_over2limit_14,pre_over2limit_15,pre_over2limit_16,pre_over2limit_17,pre_over2limit_18,pre_over2limit_19,pre_maxover2limit_1,pre_maxover2limit_2,pre_maxover2limit_3,pre_maxover2limit_4,pre_maxover2limit_5,pre_maxover2limit_6,pre_maxover2limit_7,pre_maxover2limit_8,pre_maxover2limit_9,pre_maxover2limit_10,pre_maxover2limit_11,pre_maxover2limit_12,pre_maxover2limit_13,pre_maxover2limit_14,pre_maxover2limit_15,pre_maxover2limit_16,pre_maxover2limit_17,pre_maxover2limit_18,pre_maxover2limit_19,enc_paym_0_1,enc_paym_0_2,enc_paym_0_3,enc_paym_1_1,enc_paym_1_2,enc_paym_1_3,enc_paym_2_1,enc_paym_2_2,enc_paym_2_3,enc_paym_3_1,enc_paym_3_2,enc_paym_3_3,enc_paym_4_1,enc_paym_4_2,enc_paym_4_3,enc_paym_5_1,enc_paym_5_2,enc_paym_5_3,enc_paym_6_1,enc_paym_6_2,enc_paym_6_3,enc_paym_7_1,enc_paym_7_2,enc_paym_7_3,enc_paym_8_1,enc_paym_8_2,enc_paym_8_3,enc_paym_9_1,enc_paym_9_2,enc_paym_9_3,enc_paym_10_1,enc_paym_10_2,enc_paym_10_3,enc_paym_11_2,enc_paym_11_3,enc_paym_11_4,enc_paym_12_1,enc_paym_12_2,enc_paym_12_3,enc_paym_13_1,enc_paym_13_2,enc_paym_13_3,enc_paym_14_1,enc_paym_14_2,enc_paym_14_3,enc_paym_15_1,enc_paym_15_2,enc_paym_15_3,enc_paym_16_1,enc_paym_16_2,enc_paym_16_3,enc_paym_17_1,enc_paym_17_2,enc_paym_17_3,enc_paym_18_1,enc_paym_18_2,enc_paym_18_3,enc_paym_19_1,enc_paym_19_2,enc_paym_19_3,enc_paym_20_2,enc_paym_20_3,enc_paym_20_4,enc_paym_21_1,enc_paym_21_2,enc_paym_21_3,enc_paym_22_1,enc_paym_22_2,enc_paym_22_3,enc_paym_23_1,enc_paym_23_2,enc_paym_23_3,enc_paym_24_2,enc_paym_24_3,enc_paym_24_4,enc_loans_account_holder_type_1,enc_loans_account_holder_type_2,enc_loans_account_holder_type_3,enc_loans_account_holder_type_4,enc_loans_account_holder_type_5,enc_loans_account_holder_type_6,enc_loans_credit_status_1,enc_loans_credit_status_2,enc_loans_credit_status_3,enc_loans_credit_status_4,enc_loans_credit_status_5,enc_loans_credit_status_6,enc_loans_credit_type_1,enc_loans_credit_type_2,enc_loans_credit_type_3,enc_loans_credit_type_4,enc_loans_credit_type_5,enc_loans_credit_type_6,enc_loans_credit_type_7,enc_loans_account_cur_1,enc_loans_account_cur_2,enc_loans_account_cur_3,target
0,0,9,10,10,10,10,6,9,9,1,2,1,1,1,1,2,0,1,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,0,7,0,0,1,0,0,0,0,0,1,3,0,1,0,0,0,0,1,0,2,0,0,1,1,0,0,0,2,2,0,0,0,0,2,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,4,0,1,0,2,0,0,0,0,0,0,1,2,0,1,2,2,0,2,0,0,1,0,1,0,0,0,0,0,1,2,1,0,0,2,1,0,0,0,0,3,2,1,3,0,1,7,0,1,0,10,0,0,0,1,3,0,0,0,1,0,0,4,0,1,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,10,0,0,0,0,0,0,0,10,0,10,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,6,0,0,0,0,9,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,1,0,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,3,0,0,4,0,0,5,0,0,5,0,0,5,0,0,5,0,0,6,0,0,6,0,0,7,0,0,7,0,0,7,0,0,7,0,0,7,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,10,10,0,0,0,0,0,0,4,6,0,0,0,1,0,2,7,0,0,0,10,0,0,0
1,1,12,10,12,12,11,10,12,11,1,2,0,1,0,0,0,0,1,2,0,2,0,1,1,3,2,0,0,1,0,0,0,2,1,1,2,3,1,1,0,0,0,0,3,0,0,0,3,1,0,1,0,0,3,1,0,0,0,1,1,0,2,0,0,1,1,0,0,0,3,0,2,2,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,5,0,0,0,3,2,1,0,5,0,1,0,1,0,0,1,0,0,2,0,2,1,1,2,0,1,0,0,4,0,0,0,1,1,0,0,1,1,1,0,0,1,1,10,0,1,1,0,2,11,1,0,2,11,1,0,0,0,10,1,0,1,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,14,0,0,0,0,0,0,0,14,0,14,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,0,12,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,12,0,0,1,0,1,2,0,3,1,0,3,0,0,3,0,0,3,0,0,6,1,0,7,0,1,7,0,0,7,0,1,6,0,0,6,0,0,7,0,0,8,0,0,9,0,0,10,1,0,10,0,0,10,0,0,10,0,0,10,0,0,10,0,0,10,0,0,10,1,0,11,0,0,11,0,0,11,14,0,0,0,0,0,0,4,10,0,0,0,3,0,3,8,0,0,0,14,0,0,0
2,2,3,2,2,2,3,1,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,1,1,1,0,1,2,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,2,1,0,2,1,0,2,1,0,2,1,0,2,0,0,3,0,1,2,1,0,2,0,1,2,1,0,2,1,0,2,1,0,2,1,0,2,1,0,2,0,0,3,3,0,0,0,0,0,0,2,1,0,0,0,0,0,2,1,0,0,0,3,0,0,0
3,3,15,15,15,15,15,8,14,14,5,6,3,1,0,2,1,3,0,0,0,0,0,1,3,0,0,0,0,1,0,1,1,1,0,1,1,0,0,7,0,0,0,0,1,0,1,0,1,1,0,5,0,0,0,1,1,0,0,1,0,1,0,3,0,0,1,0,0,0,0,1,6,1,1,1,0,0,1,0,1,5,2,1,0,0,0,3,2,0,0,0,0,1,0,0,1,0,2,0,0,2,2,0,1,0,1,6,1,0,0,0,2,0,1,1,1,0,0,0,0,1,3,0,2,1,0,1,1,1,0,1,9,2,2,0,1,1,11,1,2,0,15,0,1,0,4,4,1,0,1,1,2,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,15,0,0,0,0,0,0,0,15,0,15,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,8,0,1,0,0,14,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,2,0,0,2,0,0,2,0,0,3,0,0,6,0,0,6,0,0,6,0,0,7,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,10,15,0,0,0,0,0,0,7,8,0,0,0,1,0,4,9,1,0,0,15,0,0,0
4,4,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0


In [28]:
df_final.drop('id', inplace=True, axis = 1)
df_final.head()

Unnamed: 0,is_zero_loans5,is_zero_loans530,is_zero_loans3060,is_zero_loans6090,is_zero_loans90,is_zero_util,is_zero_over2limit,is_zero_maxover2limit,pclose_flag,fclose_flag,pre_since_opened_1,pre_since_opened_2,pre_since_opened_3,pre_since_opened_4,pre_since_opened_5,pre_since_opened_6,pre_since_opened_7,pre_since_opened_8,pre_since_opened_9,pre_since_opened_10,pre_since_opened_11,pre_since_opened_12,pre_since_opened_13,pre_since_opened_14,pre_since_opened_15,pre_since_opened_16,pre_since_opened_17,pre_since_opened_18,pre_since_opened_19,pre_since_confirmed_1,pre_since_confirmed_2,pre_since_confirmed_3,pre_since_confirmed_4,pre_since_confirmed_5,pre_since_confirmed_6,pre_since_confirmed_7,pre_since_confirmed_8,pre_since_confirmed_9,pre_since_confirmed_10,pre_since_confirmed_11,pre_since_confirmed_12,pre_since_confirmed_13,pre_since_confirmed_14,pre_since_confirmed_15,pre_since_confirmed_16,pre_since_confirmed_17,pre_pterm_1,pre_pterm_2,pre_pterm_3,pre_pterm_4,pre_pterm_5,pre_pterm_6,pre_pterm_7,pre_pterm_8,pre_pterm_9,pre_pterm_10,pre_pterm_11,pre_pterm_12,pre_pterm_13,pre_pterm_14,pre_pterm_15,pre_pterm_16,pre_pterm_17,pre_fterm_1,pre_fterm_2,pre_fterm_3,pre_fterm_4,pre_fterm_5,pre_fterm_6,pre_fterm_7,pre_fterm_8,pre_fterm_9,pre_fterm_10,pre_fterm_11,pre_fterm_12,pre_fterm_13,pre_fterm_14,pre_fterm_15,pre_fterm_16,pre_till_pclose_1,pre_till_pclose_2,pre_till_pclose_3,pre_till_pclose_4,pre_till_pclose_5,pre_till_pclose_6,pre_till_pclose_7,pre_till_pclose_8,pre_till_pclose_9,pre_till_pclose_10,pre_till_pclose_11,pre_till_pclose_12,pre_till_pclose_13,pre_till_pclose_14,pre_till_pclose_15,pre_till_pclose_16,pre_till_fclose_1,pre_till_fclose_2,pre_till_fclose_3,pre_till_fclose_4,pre_till_fclose_5,pre_till_fclose_6,pre_till_fclose_7,pre_till_fclose_8,pre_till_fclose_9,pre_till_fclose_10,pre_till_fclose_11,pre_till_fclose_12,pre_till_fclose_13,pre_till_fclose_14,pre_till_fclose_15,pre_loans_credit_limit_1,pre_loans_credit_limit_2,pre_loans_credit_limit_3,pre_loans_credit_limit_4,pre_loans_credit_limit_5,pre_loans_credit_limit_6,pre_loans_credit_limit_7,pre_loans_credit_limit_8,pre_loans_credit_limit_9,pre_loans_credit_limit_10,pre_loans_credit_limit_11,pre_loans_credit_limit_12,pre_loans_credit_limit_13,pre_loans_credit_limit_14,pre_loans_credit_limit_15,pre_loans_credit_limit_16,pre_loans_credit_limit_17,pre_loans_credit_limit_18,pre_loans_credit_limit_19,pre_loans_next_pay_summ_1,pre_loans_next_pay_summ_2,pre_loans_next_pay_summ_3,pre_loans_next_pay_summ_4,pre_loans_next_pay_summ_5,pre_loans_next_pay_summ_6,pre_loans_outstanding_2,pre_loans_outstanding_3,pre_loans_outstanding_4,pre_loans_outstanding_5,pre_loans_max_overdue_sum_1,pre_loans_max_overdue_sum_2,pre_loans_max_overdue_sum_3,pre_loans_credit_cost_rate_1,pre_loans_credit_cost_rate_2,pre_loans_credit_cost_rate_3,pre_loans_credit_cost_rate_4,pre_loans_credit_cost_rate_5,pre_loans_credit_cost_rate_6,pre_loans_credit_cost_rate_7,pre_loans_credit_cost_rate_8,pre_loans_credit_cost_rate_9,pre_loans_credit_cost_rate_10,pre_loans_credit_cost_rate_11,pre_loans_credit_cost_rate_12,pre_loans_credit_cost_rate_13,pre_loans5_1,pre_loans5_2,pre_loans5_3,pre_loans5_5,pre_loans5_6,pre_loans5_7,pre_loans5_8,pre_loans5_9,pre_loans5_11,pre_loans5_13,pre_loans5_16,pre_loans530_1,pre_loans530_2,pre_loans530_3,pre_loans530_4,pre_loans530_5,pre_loans530_6,pre_loans530_7,pre_loans530_8,pre_loans530_9,pre_loans530_10,pre_loans530_11,pre_loans530_12,pre_loans530_13,pre_loans530_14,pre_loans530_15,pre_loans530_16,pre_loans530_18,pre_loans530_19,pre_loans3060_1,pre_loans3060_2,pre_loans3060_3,pre_loans3060_4,pre_loans3060_5,pre_loans3060_6,pre_loans3060_7,pre_loans3060_8,pre_loans3060_9,pre_loans6090_1,pre_loans6090_2,pre_loans6090_3,pre_loans6090_4,pre_loans90_3,pre_loans90_8,pre_loans90_10,pre_loans90_13,pre_loans90_14,pre_loans90_19,pre_util_1,pre_util_2,pre_util_3,pre_util_4,pre_util_5,pre_util_6,pre_util_7,pre_util_8,pre_util_9,pre_util_10,pre_util_11,pre_util_12,pre_util_13,pre_util_14,pre_util_15,pre_util_16,pre_util_17,pre_util_18,pre_util_19,pre_over2limit_1,pre_over2limit_2,pre_over2limit_3,pre_over2limit_4,pre_over2limit_5,pre_over2limit_6,pre_over2limit_7,pre_over2limit_8,pre_over2limit_9,pre_over2limit_10,pre_over2limit_11,pre_over2limit_12,pre_over2limit_13,pre_over2limit_14,pre_over2limit_15,pre_over2limit_16,pre_over2limit_17,pre_over2limit_18,pre_over2limit_19,pre_maxover2limit_1,pre_maxover2limit_2,pre_maxover2limit_3,pre_maxover2limit_4,pre_maxover2limit_5,pre_maxover2limit_6,pre_maxover2limit_7,pre_maxover2limit_8,pre_maxover2limit_9,pre_maxover2limit_10,pre_maxover2limit_11,pre_maxover2limit_12,pre_maxover2limit_13,pre_maxover2limit_14,pre_maxover2limit_15,pre_maxover2limit_16,pre_maxover2limit_17,pre_maxover2limit_18,pre_maxover2limit_19,enc_paym_0_1,enc_paym_0_2,enc_paym_0_3,enc_paym_1_1,enc_paym_1_2,enc_paym_1_3,enc_paym_2_1,enc_paym_2_2,enc_paym_2_3,enc_paym_3_1,enc_paym_3_2,enc_paym_3_3,enc_paym_4_1,enc_paym_4_2,enc_paym_4_3,enc_paym_5_1,enc_paym_5_2,enc_paym_5_3,enc_paym_6_1,enc_paym_6_2,enc_paym_6_3,enc_paym_7_1,enc_paym_7_2,enc_paym_7_3,enc_paym_8_1,enc_paym_8_2,enc_paym_8_3,enc_paym_9_1,enc_paym_9_2,enc_paym_9_3,enc_paym_10_1,enc_paym_10_2,enc_paym_10_3,enc_paym_11_2,enc_paym_11_3,enc_paym_11_4,enc_paym_12_1,enc_paym_12_2,enc_paym_12_3,enc_paym_13_1,enc_paym_13_2,enc_paym_13_3,enc_paym_14_1,enc_paym_14_2,enc_paym_14_3,enc_paym_15_1,enc_paym_15_2,enc_paym_15_3,enc_paym_16_1,enc_paym_16_2,enc_paym_16_3,enc_paym_17_1,enc_paym_17_2,enc_paym_17_3,enc_paym_18_1,enc_paym_18_2,enc_paym_18_3,enc_paym_19_1,enc_paym_19_2,enc_paym_19_3,enc_paym_20_2,enc_paym_20_3,enc_paym_20_4,enc_paym_21_1,enc_paym_21_2,enc_paym_21_3,enc_paym_22_1,enc_paym_22_2,enc_paym_22_3,enc_paym_23_1,enc_paym_23_2,enc_paym_23_3,enc_paym_24_2,enc_paym_24_3,enc_paym_24_4,enc_loans_account_holder_type_1,enc_loans_account_holder_type_2,enc_loans_account_holder_type_3,enc_loans_account_holder_type_4,enc_loans_account_holder_type_5,enc_loans_account_holder_type_6,enc_loans_credit_status_1,enc_loans_credit_status_2,enc_loans_credit_status_3,enc_loans_credit_status_4,enc_loans_credit_status_5,enc_loans_credit_status_6,enc_loans_credit_type_1,enc_loans_credit_type_2,enc_loans_credit_type_3,enc_loans_credit_type_4,enc_loans_credit_type_5,enc_loans_credit_type_6,enc_loans_credit_type_7,enc_loans_account_cur_1,enc_loans_account_cur_2,enc_loans_account_cur_3,target
0,9,10,10,10,10,6,9,9,1,2,1,1,1,1,2,0,1,0,0,0,0,0,0,0,0,0,0,3,0,1,0,0,0,0,0,0,0,7,0,0,1,0,0,0,0,0,1,3,0,1,0,0,0,0,1,0,2,0,0,1,1,0,0,0,2,2,0,0,0,0,2,0,1,0,1,1,1,0,0,1,0,0,0,0,0,0,1,0,0,1,4,0,1,0,2,0,0,0,0,0,0,1,2,0,1,2,2,0,2,0,0,1,0,1,0,0,0,0,0,1,2,1,0,0,2,1,0,0,0,0,3,2,1,3,0,1,7,0,1,0,10,0,0,0,1,3,0,0,0,1,0,0,4,0,1,0,0,0,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,10,0,0,0,0,0,0,10,0,0,0,0,0,0,0,10,0,10,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,6,0,0,0,0,9,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,9,0,0,1,0,0,0,0,0,0,0,2,0,0,2,0,0,2,0,0,2,0,0,3,0,0,4,0,0,5,0,0,5,0,0,5,0,0,5,0,0,6,0,0,6,0,0,7,0,0,7,0,0,7,0,0,7,0,0,7,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,10,10,0,0,0,0,0,0,4,6,0,0,0,1,0,2,7,0,0,0,10,0,0,0
1,12,10,12,12,11,10,12,11,1,2,0,1,0,0,0,0,1,2,0,2,0,1,1,3,2,0,0,1,0,0,0,2,1,1,2,3,1,1,0,0,0,0,3,0,0,0,3,1,0,1,0,0,3,1,0,0,0,1,1,0,2,0,0,1,1,0,0,0,3,0,2,2,0,1,0,0,1,1,1,1,0,0,0,0,1,1,0,5,0,0,0,3,2,1,0,5,0,1,0,1,0,0,1,0,0,2,0,2,1,1,2,0,1,0,0,4,0,0,0,1,1,0,0,1,1,1,0,0,1,1,10,0,1,1,0,2,11,1,0,2,11,1,0,0,0,10,1,0,1,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,14,0,0,0,0,0,0,0,14,0,14,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0,0,0,0,0,10,0,0,0,0,12,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,12,0,0,1,0,1,2,0,3,1,0,3,0,0,3,0,0,3,0,0,6,1,0,7,0,1,7,0,0,7,0,1,6,0,0,6,0,0,7,0,0,8,0,0,9,0,0,10,1,0,10,0,0,10,0,0,10,0,0,10,0,0,10,0,0,10,0,0,10,1,0,11,0,0,11,0,0,11,14,0,0,0,0,0,0,4,10,0,0,0,3,0,3,8,0,0,0,14,0,0,0
2,3,2,2,2,3,1,3,2,2,2,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,1,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,2,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,1,0,0,0,0,1,1,1,0,1,2,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,3,0,0,0,0,0,0,0,3,0,3,0,0,0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,1,0,0,0,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,2,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,2,1,0,2,1,0,2,1,0,2,1,0,2,0,0,3,0,1,2,1,0,2,0,1,2,1,0,2,1,0,2,1,0,2,1,0,2,1,0,2,0,0,3,3,0,0,0,0,0,0,2,1,0,0,0,0,0,2,1,0,0,0,3,0,0,0
3,15,15,15,15,15,8,14,14,5,6,3,1,0,2,1,3,0,0,0,0,0,1,3,0,0,0,0,1,0,1,1,1,0,1,1,0,0,7,0,0,0,0,1,0,1,0,1,1,0,5,0,0,0,1,1,0,0,1,0,1,0,3,0,0,1,0,0,0,0,1,6,1,1,1,0,0,1,0,1,5,2,1,0,0,0,3,2,0,0,0,0,1,0,0,1,0,2,0,0,2,2,0,1,0,1,6,1,0,0,0,2,0,1,1,1,0,0,0,0,1,3,0,2,1,0,1,1,1,0,1,9,2,2,0,1,1,11,1,2,0,15,0,1,0,4,4,1,0,1,1,2,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,0,0,0,0,0,0,15,0,0,0,0,0,0,0,15,0,15,0,0,0,0,0,1,1,1,0,1,0,0,1,0,0,0,1,0,0,8,0,1,0,0,14,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,14,0,0,0,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,2,0,0,2,0,0,2,0,0,3,0,0,6,0,0,6,0,0,6,0,0,7,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,8,0,0,10,15,0,0,0,0,0,0,7,8,0,0,0,1,0,4,9,1,0,0,15,0,0,0
4,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,0,0,1,1,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0,0


# 8. Сохранение финального датасета в файл

In [29]:
df_final.to_csv('../train_data/df_final_1.csv', index = False)
del df_final

# 9. Пайплайн по подготовке датасета

In [None]:
df_final = pd.read_csv('../train_data/df_final_1.csv')
df_final.head()

In [None]:
X, y = train_test_split(df_final, stratify=df_final['target'], test_size=0.2, random_state=RANDOM_SEED)

In [None]:
def columnsDroping(X):

    return X.drop(
            cols_for_del, axis=1)

def dublicatesDroping(X):
    
    return X.drop_duplicates()

def featuresEncoding(X):
    
    for catcol in cat_cols:
        ft = ohe.fit_transform(X[[catcol]])
        df_ft = pd.DataFrame(ft, columns=ohe.get_feature_names_out())
            for col in df_ft.columns:
                df_ft[col] = df_ft[col].astype('int8')
        cat_dfs.append(df_ft)
        X.drop(catcol, inplace = True, axis = 1)
    
    return X

def rowAgrigation(X):
    
    X = X.groupby('id').agg(np.sum)
    
    return X

def typeConversion(X):
    
    for col in X.columns.drop('id'):
        X[col] = X[col].astype('int8')
        
    return X
        
        

In [None]:
steps = [('columnsDroping', FunctionTransformer(columnsDroping)),
         ('dublicatesDroping', FunctionTransformer(dublicatesDroping)),
         ('featuresEncoding', FunctionTransformer(featuresEncoding)),
         ('rowAgrigation', FunctionTransformer(rowAgrigation)),
         ('typeConversion', FunctionTransformer(typeConversion))
        ]

pipeline = Pipeline(steps=steps)

X = pipline.fit_transform(X)