In [173]:
# 0. Load libraries #

import numpy as np
import pandas as pd
import os, time, warnings, random, gc, pickle
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.svm import SVC
from sklearn.preprocessing import LabelBinarizer, LabelEncoder, OrdinalEncoder, OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV, train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier, VotingClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.inspection import permutation_importance
from xgboost import XGBClassifier

pd.set_option('display.max_columns', 300)
pd.set_option('display.max_rows', 300)
pd.set_option('mode.chained_assignment', None)
pd.set_option('display.expand_frame_repr', False)
warnings.filterwarnings('ignore')

# Load custom pre-processing functions:

def draw_histograms(df, variables, n_rows, n_cols):
    # stolen from https://stackoverflow.com/questions/29530355/plotting-multiple-histograms-in-grid
    fig=plt.figure()
    for i, var_name in enumerate(variables):
        ax=fig.add_subplot(n_rows,n_cols,i+1)
        df[var_name].hist(bins=10,ax=ax)
        ax.set_title(var_name+" Distribution")
    fig.tight_layout()  
    plt.show()


def fillna_mp_i1(df_train, df_test, df_pred, num_features, cat_features, num_fill='median', cat_fill='mode'):
    """This function speeds up filling missing values for 3 main datasets using different imputation methods.
    Later may replace it with some subclass.
    Example: fillna_mp_i1(X_train, X_test, X_pred, num_cols, cat_cols)"""
    # set df_pred to None if it does not exist
    if not ((cat_fill=='mode') and (num_fill=='median')):
        print ('Imputation method not Implemented yet!')
        return None
    
    df_train[num_features] = df_train[num_features].fillna(value=df_train[num_features].median())
    df_test[num_features] = df_test[num_features].fillna(value=df_train[num_features].median())
    df_train[cat_features] = df_train[cat_features].fillna(value=df_train[cat_features].mode().iloc[0])
    df_test[cat_features] = df_test[cat_features].fillna(value=df_train[cat_features].mode().iloc[0])
    if (df_pred is not None):
        df_pred[num_features] = df_pred[num_features].fillna(value=df_train[num_features].median())
        df_pred[cat_features] = df_pred[cat_features].fillna(value=df_train[cat_features].mode().iloc[0])
    df_train[num_features+cat_features].count
    
    all_good = (
    (np.prod(df_train[num_features+cat_features].shape)==df_train[num_features+cat_features].count().sum()) and 
    (np.prod(df_test[num_features+cat_features].shape) == df_test[num_features+cat_features].count().sum()) and 
    (np.prod(df_pred[num_features+cat_features].shape) == df_pred[num_features+cat_features].count().sum()))
    if (all_good):
        print('Missing values imputed successfully')
    else:
        print('There are still some missing values...')
    
def add_misDummy_mp_i1(df_train, df_test, df_pred, features):
    """This function creates new dummy columns for missing features.
    Example: add_misDummy_mp_i1(X_train, X_test, X_pred, ['Age'])"""
    # set df_pred to None if it does not exist
    for feature_name in features:
        misColName = 'mis'+feature_name
        df_train.loc[df_train[feature_name].isnull(), misColName]=1
        df_train.loc[df_train[feature_name].notnull(), misColName]=0
        df_test.loc[df_test[feature_name].isnull(), misColName]=1
        df_test.loc[df_test[feature_name].notnull(), misColName]=0
        if (df_pred is not None):
            df_pred.loc[df_pred[feature_name].isnull(), misColName]=1
            df_pred.loc[df_pred[feature_name].notnull(), misColName]=0
   

def discretize_mp_i1(df_train, df_test, df_pred, feature, ntiles, delete_feature=False):
    """This function divides a continuous feature into quantile groups.
    Example: discretize_mp_i1(X_train, X_test, X_pred, 'Age', 15)"""
    # set df_pred to None if it does not exist
    _,bin = pd.qcut(df_train[feature], ntiles, retbins = True, labels = False, duplicates = 'drop')
    df_train[feature+'Ntile'] = pd.cut(df_train[feature], labels=False, duplicates = 'drop', bins = bin ,include_lowest = True)
    df_test[feature+'Ntile'] = pd.cut(df_test[feature], labels=False, duplicates = 'drop', bins = bin ,include_lowest = True)
    if (df_pred is not None):
        df_pred[feature+'Ntile'] = pd.cut(df_pred[feature], labels=False, duplicates = 'drop', bins = bin ,include_lowest = True)
    if (delete_feature==True):
        df_train.drop(columns=[feature], inplace=True)
        df_test.drop(columns=[feature], inplace=True)
        df_pred.drop(columns=[feature], inplace=True)
    print('Discretized ',feature, ' into ', len(bin)-1, ' bins')


def log_transformer_mp_i1(df_train, df_test, df_pred, feature_subset=False, min_skew=3):
    """This function divides a continuous feature into quantile groups.
    Example: log_transformer_mp_i1(X_train, X_test, X_pred, feature_subset=num_cols)"""
    # set df_pred to None if it does not exist
    if (feature_subset==False):
        features_totransform = df_train.columns
    else:
        features_totransform = feature_subset.copy()
    skewed_vars = list(df_train.skew()[abs(df_train.skew())>min_skew].index)
    for col in list(set(skewed_vars)&set(features_totransform)):
        df_train[col] = np.log1p(df_train[col])
        df_test[col] = np.log1p(df_test[col])
        if (df_pred is not None):
            df_pred[col] = np.log1p(df_pred[col])
    print('Skewed columns log-transformed: ', list(set(skewed_vars)&set(features_totransform)))
    
    


In [174]:
with open('../input/amex-default-downsampled-01/amex_default_0.1sample.pickle', 'rb') as pickled_one:
    df = pickle.load(pickled_one)
display(df.head(),df.shape)

Unnamed: 0,customer_ID,S_2,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,D_44,B_4,D_45,B_5,R_2,D_46,D_47,D_48,D_49,B_6,B_7,B_8,D_50,D_51,B_9,R_3,D_52,P_3,B_10,D_53,S_5,B_11,S_6,D_54,R_4,S_7,B_12,S_8,D_55,D_56,B_13,R_5,D_58,S_9,B_14,D_59,D_60,D_61,B_15,S_11,D_62,D_63,D_64,D_65,B_16,B_17,B_18,B_19,D_66,B_20,D_68,S_12,R_6,S_13,B_21,D_69,B_22,D_70,D_71,D_72,S_15,B_23,D_73,P_4,D_74,D_75,D_76,B_24,R_7,D_77,B_25,B_26,D_78,D_79,R_8,R_9,S_16,D_80,R_10,R_11,B_27,D_81,D_82,S_17,R_12,B_28,R_13,D_83,R_14,R_15,D_84,R_16,B_29,B_30,S_18,D_86,D_87,R_17,R_18,D_88,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,D_89,R_22,R_23,D_91,D_92,D_93,D_94,R_24,R_25,D_96,S_22,S_23,S_24,S_25,S_26,D_102,D_103,D_104,D_105,D_106,D_107,B_36,B_37,R_26,R_27,B_38,D_108,D_109,D_110,D_111,B_39,D_112,B_40,S_27,D_113,D_114,D_115,D_116,D_117,D_118,D_119,D_120,D_121,D_122,D_123,D_124,D_125,D_126,D_127,D_128,D_129,B_41,B_42,D_130,D_131,D_132,D_133,R_28,D_134,D_135,D_136,D_137,D_138,D_139,D_140,D_141,D_142,D_143,D_144,D_145
0,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,2017-05-30,0.937438,0.003936,0.003352,0.814304,0.002594,,0.000545,0.003142,,0.051788,0.006299,0.056203,0.294726,0.006012,0.004684,0.516281,0.463092,0.263574,,0.048533,0.042849,0.006816,0.076369,0.004431,0.006775,0.005824,0.192826,0.577143,0.044728,,0.00328,0.007864,1.000973,1.008453,0.000671,,0.13955,0.003945,0.177336,0.169218,0.008859,0.009552,0.331046,,0.005952,0.292875,0.002996,0.174586,0.005551,0.288992,0.081457,CO,O,0.009775,0.008577,,0.645052,0.001056,1.0,0.001964,6.0,0.19399,0.000306,0.001171,0.000274,0.006808,0.005755,0.259391,0.015849,0.00157,0.507748,0.025547,,0.00137,0.150138,0.137811,,0.009684,0.004231,,0.005078,0.00355,0.003895,0.000551,0.008775,,0.003207,0.004396,0.004527,0.008564,0.006419,0.007386,,0.000966,1.009854,0.058119,0.003344,0.007251,0.003969,0.008143,0.005722,0.009559,,0.0,0.004709,0.001287,,0.006435,0.008478,,1,0.00574,0.005207,0.005587,0.000917,0.002636,0.001219,1.008942,0.000116,0.0026,0.001327,0.005468,0.006224,2.9e-05,0.003084,5.4e-05,0.005109,0.002801,0.301518,0.140075,0.082386,0.977059,0.001901,0.413177,1.000086,0.958687,0.661508,,1.003331,0.006081,0.009483,,1.005789,2.0,,0.000268,,,,1.00941,0.165166,,0.001087,1.0,0.425165,0.0,4.0,0.418822,0.419459,0.0,0.547555,0.43812,0.003239,0.187171,0.006194,1.0,0.009093,1.005538,1.000016,0.00345,,1.005729,0.001219,,0.002394,0.004556,,,,,,0.008103,0.006132,0.003267,,0.008814,0.001211,0.001624
1,00013181a0c5fc8f1ea38cd2b90fe8ad2fa8cad9d9f13e...,2017-06-16,0.471242,0.001547,0.000233,0.816437,0.003437,,0.005852,0.005877,,0.20844,0.382538,0.313276,0.054264,0.002035,0.002264,0.95178,0.32547,0.740619,,0.00779,1.047744,1.005298,0.076422,0.000206,0.005905,0.009041,0.148017,0.493458,0.039864,,0.006881,0.003204,1.005171,1.001148,0.007656,,0.010886,0.00733,0.448186,0.126476,0.00481,0.003998,0.619877,,0.003674,0.375356,0.001085,0.599261,0.004723,0.285493,0.017829,CO,O,0.001949,0.005428,,0.532836,0.001264,,0.002074,5.0,0.184956,0.004232,0.004607,0.000368,0.00262,0.003322,1.008265,0.034615,0.001517,0.500245,0.995367,,0.001463,0.365847,0.338917,,0.007694,0.004741,,0.004701,0.009776,0.004254,0.001066,0.003238,0.171472,0.003671,0.409818,0.00182,0.001104,0.008046,0.001091,,0.001583,1.004534,0.211125,0.002501,0.007636,0.009532,0.004129,0.001284,0.008175,,0.0,0.003752,0.004509,,0.001339,0.008494,,1,0.005238,0.008327,0.002736,0.007807,0.005138,0.009324,1.006314,0.005657,0.006198,0.006972,0.007,0.001464,0.001995,0.004677,0.009206,0.008958,0.008415,0.302657,0.134319,0.086714,0.972725,8e-05,0.00358,0.008142,0.003662,,,0.008267,0.00091,0.002263,0.08133,0.015774,1.0,,0.005904,,,,1.002032,1.113697,,0.004361,1.0,0.044666,0.0,4.0,0.045775,0.044385,0.0,0.755079,0.291698,0.00917,0.455286,0.006972,1.0,0.008171,1.004957,1.009854,0.000533,,1.002896,0.006603,,0.007623,0.006064,,,,,,0.009825,0.005849,0.00335,,0.006323,0.007118,0.00742
2,00013181a0c5fc8f1ea38cd2b90fe8ad2fa8cad9d9f13e...,2017-08-17,0.467701,0.006205,0.005869,0.819957,0.000378,,0.008037,0.008558,,0.148911,0.627657,0.48063,0.063674,2.8e-05,0.005044,0.935256,0.327515,0.901241,,0.0101,1.164417,1.001536,0.081456,0.003159,0.006622,0.108383,0.149677,0.50362,0.018164,,0.003259,0.006402,1.007938,1.002968,0.000769,,0.007096,0.008619,0.62274,0.12972,0.00829,0.001826,0.2936,,0.006495,0.425143,0.002992,0.815612,0.002106,0.289433,0.035034,CO,O,0.001823,0.001336,,0.479464,0.006855,,0.008589,5.0,0.189972,0.006763,0.009873,0.001513,0.00273,0.000633,0.257676,0.013854,0.006144,0.50629,1.15084,,0.006235,0.362119,0.340423,,0.006347,0.008762,,0.006534,0.000402,0.009662,0.008106,0.003236,0.173019,0.007788,0.604919,0.007499,0.004737,0.007295,0.004215,,6.6e-05,1.006996,0.317591,0.008555,0.006911,0.003373,0.009086,0.001949,0.006862,,0.0,0.007397,0.000639,,0.008764,0.009613,,1,0.00588,0.008371,0.007068,0.004779,0.009163,0.006826,1.00943,0.001587,0.006292,0.002701,0.007758,0.009594,0.00971,0.007728,0.007752,0.00543,0.005929,0.301553,0.137537,0.078743,0.970239,0.007011,0.004957,0.009385,0.003309,,,0.001168,0.007991,0.007811,0.080626,0.020086,1.0,,0.008882,,,,1.004104,1.147685,,0.003374,1.0,0.046637,0.0,4.0,0.041137,0.045305,0.0,0.760963,0.286112,0.001132,0.460282,0.009217,1.0,0.004836,1.006152,1.007872,0.002559,,1.002708,0.007124,,0.004873,0.006783,,,,,,0.005949,0.008454,0.004268,,0.004756,0.009932,0.004188
3,0002d381bdd8048d76719042cf1eb63caf53b636f8aacd...,2017-11-21,1.004737,0.007491,0.011268,0.815235,0.004707,0.167983,0.007022,0.005782,,,0.004346,0.000623,0.204098,0.006022,0.008137,,0.419005,0.016205,,0.16476,0.030058,0.001657,0.071683,0.340903,0.009491,0.008493,0.123615,0.669208,0.293235,,0.002898,0.009279,1.007817,1.006192,0.003584,0.116267,0.012557,0.330734,0.023306,0.142516,0.007849,0.003358,0.002332,,0.002678,0.336137,0.046614,0.018364,0.009088,0.444328,0.284889,CO,O,0.002262,0.090021,,1.009554,0.004585,,0.00054,6.0,0.188458,0.00402,0.008547,0.003696,0.006651,0.007346,0.001627,0.00959,0.000481,0.2044,0.008539,,0.001955,0.00115,0.00968,,0.007706,0.004342,0.263201,0.008383,0.000833,0.00147,0.005853,0.000318,,0.004364,0.004569,0.000893,0.000827,0.007878,0.009827,,0.002171,1.006954,0.006199,0.007014,0.004212,0.008848,0.009341,0.002519,0.000597,,0.0,0.003691,0.007256,,0.00392,0.002669,,1,0.002969,0.009031,0.008911,0.009604,0.001946,0.005293,1.009686,0.006844,0.00904,0.005778,0.005616,0.005936,0.00559,0.009827,0.009982,0.007121,0.0026,0.783929,0.134354,0.804484,0.969548,0.009972,0.545109,1.00123,0.976875,0.423101,,0.339787,0.004525,0.005884,,1.004259,1.0,,0.00166,,,,1.002119,0.064525,,0.005334,1.0,0.354861,0.0,4.0,0.348603,0.345898,0.0,0.718976,0.723254,0.001202,0.554524,0.000523,1.0,0.003754,1.00552,1.009376,0.002034,,0.007847,0.007497,,0.004805,0.003351,,,,,,0.004527,0.009969,0.007597,,0.000389,0.002958,0.006377
4,000473eb907b57c8c23f652bba40f87fe7261273dda470...,2017-08-08,0.600104,0.031315,0.69896,0.026414,0.003807,0.15198,0.005612,0.982836,,0.205391,0.252669,0.200987,0.048019,0.019929,0.002747,0.459173,0.442493,0.759969,,0.000829,0.787099,1.002522,,0.001657,0.636616,0.201164,0.051407,0.827741,0.006361,,0.019792,0.64646,0.008648,1.002048,0.002824,0.113118,0.007629,0.175061,0.689852,,0.014891,0.004405,0.597304,,0.160664,0.357412,0.332458,0.899766,0.007412,0.44081,0.022751,CO,U,0.006996,1.009787,0.994517,0.148648,1.004696,1.0,1.0082,3.0,0.185788,0.004269,0.282491,0.005111,0.001428,0.500662,0.505127,0.006231,0.006744,0.505679,0.74539,,0.006123,0.428581,0.471355,,0.005054,0.00415,,0.283092,0.004833,0.00251,0.006491,0.005071,,0.003844,0.003226,0.008886,0.008856,0.004379,0.006307,,0.002352,1.006771,0.214892,0.004464,0.00552,0.001858,0.003523,0.004248,0.000714,0.00195,1.0,0.005569,0.008498,,0.002063,0.007088,,1,0.00318,0.006332,0.001579,0.008772,0.008577,0.000815,0.008622,0.007164,0.002716,0.0039,0.00158,0.001656,0.003019,1.9e-05,0.003675,0.000234,0.000346,0.953654,1.141669,0.954401,0.19045,0.006645,0.453317,1.001948,0.986592,0.523158,,0.336799,0.000254,0.703397,,1.008359,6.0,,0.00786,,,,1.009013,0.588145,0.00018,0.204834,0.0,0.143651,0.0,-1.0,0.062348,0.064641,0.0,0.291003,0.292169,0.008677,0.280909,0.004211,1.0,0.005739,0.000269,0.008488,0.009117,,0.005012,0.004104,,0.009369,0.003991,,,,,,1.005589,0.00654,0.873327,0.043301,1.00659,0.009968,0.100346


(54849, 190)

In [175]:
df_labels = pd.read_csv('../input/amex-default-prediction/train_labels.csv')
display(len(df_labels))
df = pd.merge(df, df_labels, on='customer_ID', how='left')
display(df.shape, df.head())
df.drop(columns=['customer_ID', 'S_2'], inplace=True)

458913

(54849, 191)

Unnamed: 0,customer_ID,S_2,P_2,D_39,B_1,B_2,R_1,S_3,D_41,B_3,D_42,D_43,D_44,B_4,D_45,B_5,R_2,D_46,D_47,D_48,D_49,B_6,B_7,B_8,D_50,D_51,B_9,R_3,D_52,P_3,B_10,D_53,S_5,B_11,S_6,D_54,R_4,S_7,B_12,S_8,D_55,D_56,B_13,R_5,D_58,S_9,B_14,D_59,D_60,D_61,B_15,S_11,D_62,D_63,D_64,D_65,B_16,B_17,B_18,B_19,D_66,B_20,D_68,S_12,R_6,S_13,B_21,D_69,B_22,D_70,D_71,D_72,S_15,B_23,D_73,P_4,D_74,D_75,D_76,B_24,R_7,D_77,B_25,B_26,D_78,D_79,R_8,R_9,S_16,D_80,R_10,R_11,B_27,D_81,D_82,S_17,R_12,B_28,R_13,D_83,R_14,R_15,D_84,R_16,B_29,B_30,S_18,D_86,D_87,R_17,R_18,D_88,B_31,S_19,R_19,B_32,S_20,R_20,R_21,B_33,D_89,R_22,R_23,D_91,D_92,D_93,D_94,R_24,R_25,D_96,S_22,S_23,S_24,S_25,S_26,D_102,D_103,D_104,D_105,D_106,D_107,B_36,B_37,R_26,R_27,B_38,D_108,D_109,D_110,D_111,B_39,D_112,B_40,S_27,D_113,D_114,D_115,D_116,D_117,D_118,D_119,D_120,D_121,D_122,D_123,D_124,D_125,D_126,D_127,D_128,D_129,B_41,B_42,D_130,D_131,D_132,D_133,R_28,D_134,D_135,D_136,D_137,D_138,D_139,D_140,D_141,D_142,D_143,D_144,D_145,target
0,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,2017-05-30,0.937438,0.003936,0.003352,0.814304,0.002594,,0.000545,0.003142,,0.051788,0.006299,0.056203,0.294726,0.006012,0.004684,0.516281,0.463092,0.263574,,0.048533,0.042849,0.006816,0.076369,0.004431,0.006775,0.005824,0.192826,0.577143,0.044728,,0.00328,0.007864,1.000973,1.008453,0.000671,,0.13955,0.003945,0.177336,0.169218,0.008859,0.009552,0.331046,,0.005952,0.292875,0.002996,0.174586,0.005551,0.288992,0.081457,CO,O,0.009775,0.008577,,0.645052,0.001056,1.0,0.001964,6.0,0.19399,0.000306,0.001171,0.000274,0.006808,0.005755,0.259391,0.015849,0.00157,0.507748,0.025547,,0.00137,0.150138,0.137811,,0.009684,0.004231,,0.005078,0.00355,0.003895,0.000551,0.008775,,0.003207,0.004396,0.004527,0.008564,0.006419,0.007386,,0.000966,1.009854,0.058119,0.003344,0.007251,0.003969,0.008143,0.005722,0.009559,,0.0,0.004709,0.001287,,0.006435,0.008478,,1,0.00574,0.005207,0.005587,0.000917,0.002636,0.001219,1.008942,0.000116,0.0026,0.001327,0.005468,0.006224,2.9e-05,0.003084,5.4e-05,0.005109,0.002801,0.301518,0.140075,0.082386,0.977059,0.001901,0.413177,1.000086,0.958687,0.661508,,1.003331,0.006081,0.009483,,1.005789,2.0,,0.000268,,,,1.00941,0.165166,,0.001087,1.0,0.425165,0.0,4.0,0.418822,0.419459,0.0,0.547555,0.43812,0.003239,0.187171,0.006194,1.0,0.009093,1.005538,1.000016,0.00345,,1.005729,0.001219,,0.002394,0.004556,,,,,,0.008103,0.006132,0.003267,,0.008814,0.001211,0.001624,0
1,00013181a0c5fc8f1ea38cd2b90fe8ad2fa8cad9d9f13e...,2017-06-16,0.471242,0.001547,0.000233,0.816437,0.003437,,0.005852,0.005877,,0.20844,0.382538,0.313276,0.054264,0.002035,0.002264,0.95178,0.32547,0.740619,,0.00779,1.047744,1.005298,0.076422,0.000206,0.005905,0.009041,0.148017,0.493458,0.039864,,0.006881,0.003204,1.005171,1.001148,0.007656,,0.010886,0.00733,0.448186,0.126476,0.00481,0.003998,0.619877,,0.003674,0.375356,0.001085,0.599261,0.004723,0.285493,0.017829,CO,O,0.001949,0.005428,,0.532836,0.001264,,0.002074,5.0,0.184956,0.004232,0.004607,0.000368,0.00262,0.003322,1.008265,0.034615,0.001517,0.500245,0.995367,,0.001463,0.365847,0.338917,,0.007694,0.004741,,0.004701,0.009776,0.004254,0.001066,0.003238,0.171472,0.003671,0.409818,0.00182,0.001104,0.008046,0.001091,,0.001583,1.004534,0.211125,0.002501,0.007636,0.009532,0.004129,0.001284,0.008175,,0.0,0.003752,0.004509,,0.001339,0.008494,,1,0.005238,0.008327,0.002736,0.007807,0.005138,0.009324,1.006314,0.005657,0.006198,0.006972,0.007,0.001464,0.001995,0.004677,0.009206,0.008958,0.008415,0.302657,0.134319,0.086714,0.972725,8e-05,0.00358,0.008142,0.003662,,,0.008267,0.00091,0.002263,0.08133,0.015774,1.0,,0.005904,,,,1.002032,1.113697,,0.004361,1.0,0.044666,0.0,4.0,0.045775,0.044385,0.0,0.755079,0.291698,0.00917,0.455286,0.006972,1.0,0.008171,1.004957,1.009854,0.000533,,1.002896,0.006603,,0.007623,0.006064,,,,,,0.009825,0.005849,0.00335,,0.006323,0.007118,0.00742,1
2,00013181a0c5fc8f1ea38cd2b90fe8ad2fa8cad9d9f13e...,2017-08-17,0.467701,0.006205,0.005869,0.819957,0.000378,,0.008037,0.008558,,0.148911,0.627657,0.48063,0.063674,2.8e-05,0.005044,0.935256,0.327515,0.901241,,0.0101,1.164417,1.001536,0.081456,0.003159,0.006622,0.108383,0.149677,0.50362,0.018164,,0.003259,0.006402,1.007938,1.002968,0.000769,,0.007096,0.008619,0.62274,0.12972,0.00829,0.001826,0.2936,,0.006495,0.425143,0.002992,0.815612,0.002106,0.289433,0.035034,CO,O,0.001823,0.001336,,0.479464,0.006855,,0.008589,5.0,0.189972,0.006763,0.009873,0.001513,0.00273,0.000633,0.257676,0.013854,0.006144,0.50629,1.15084,,0.006235,0.362119,0.340423,,0.006347,0.008762,,0.006534,0.000402,0.009662,0.008106,0.003236,0.173019,0.007788,0.604919,0.007499,0.004737,0.007295,0.004215,,6.6e-05,1.006996,0.317591,0.008555,0.006911,0.003373,0.009086,0.001949,0.006862,,0.0,0.007397,0.000639,,0.008764,0.009613,,1,0.00588,0.008371,0.007068,0.004779,0.009163,0.006826,1.00943,0.001587,0.006292,0.002701,0.007758,0.009594,0.00971,0.007728,0.007752,0.00543,0.005929,0.301553,0.137537,0.078743,0.970239,0.007011,0.004957,0.009385,0.003309,,,0.001168,0.007991,0.007811,0.080626,0.020086,1.0,,0.008882,,,,1.004104,1.147685,,0.003374,1.0,0.046637,0.0,4.0,0.041137,0.045305,0.0,0.760963,0.286112,0.001132,0.460282,0.009217,1.0,0.004836,1.006152,1.007872,0.002559,,1.002708,0.007124,,0.004873,0.006783,,,,,,0.005949,0.008454,0.004268,,0.004756,0.009932,0.004188,1
3,0002d381bdd8048d76719042cf1eb63caf53b636f8aacd...,2017-11-21,1.004737,0.007491,0.011268,0.815235,0.004707,0.167983,0.007022,0.005782,,,0.004346,0.000623,0.204098,0.006022,0.008137,,0.419005,0.016205,,0.16476,0.030058,0.001657,0.071683,0.340903,0.009491,0.008493,0.123615,0.669208,0.293235,,0.002898,0.009279,1.007817,1.006192,0.003584,0.116267,0.012557,0.330734,0.023306,0.142516,0.007849,0.003358,0.002332,,0.002678,0.336137,0.046614,0.018364,0.009088,0.444328,0.284889,CO,O,0.002262,0.090021,,1.009554,0.004585,,0.00054,6.0,0.188458,0.00402,0.008547,0.003696,0.006651,0.007346,0.001627,0.00959,0.000481,0.2044,0.008539,,0.001955,0.00115,0.00968,,0.007706,0.004342,0.263201,0.008383,0.000833,0.00147,0.005853,0.000318,,0.004364,0.004569,0.000893,0.000827,0.007878,0.009827,,0.002171,1.006954,0.006199,0.007014,0.004212,0.008848,0.009341,0.002519,0.000597,,0.0,0.003691,0.007256,,0.00392,0.002669,,1,0.002969,0.009031,0.008911,0.009604,0.001946,0.005293,1.009686,0.006844,0.00904,0.005778,0.005616,0.005936,0.00559,0.009827,0.009982,0.007121,0.0026,0.783929,0.134354,0.804484,0.969548,0.009972,0.545109,1.00123,0.976875,0.423101,,0.339787,0.004525,0.005884,,1.004259,1.0,,0.00166,,,,1.002119,0.064525,,0.005334,1.0,0.354861,0.0,4.0,0.348603,0.345898,0.0,0.718976,0.723254,0.001202,0.554524,0.000523,1.0,0.003754,1.00552,1.009376,0.002034,,0.007847,0.007497,,0.004805,0.003351,,,,,,0.004527,0.009969,0.007597,,0.000389,0.002958,0.006377,0
4,000473eb907b57c8c23f652bba40f87fe7261273dda470...,2017-08-08,0.600104,0.031315,0.69896,0.026414,0.003807,0.15198,0.005612,0.982836,,0.205391,0.252669,0.200987,0.048019,0.019929,0.002747,0.459173,0.442493,0.759969,,0.000829,0.787099,1.002522,,0.001657,0.636616,0.201164,0.051407,0.827741,0.006361,,0.019792,0.64646,0.008648,1.002048,0.002824,0.113118,0.007629,0.175061,0.689852,,0.014891,0.004405,0.597304,,0.160664,0.357412,0.332458,0.899766,0.007412,0.44081,0.022751,CO,U,0.006996,1.009787,0.994517,0.148648,1.004696,1.0,1.0082,3.0,0.185788,0.004269,0.282491,0.005111,0.001428,0.500662,0.505127,0.006231,0.006744,0.505679,0.74539,,0.006123,0.428581,0.471355,,0.005054,0.00415,,0.283092,0.004833,0.00251,0.006491,0.005071,,0.003844,0.003226,0.008886,0.008856,0.004379,0.006307,,0.002352,1.006771,0.214892,0.004464,0.00552,0.001858,0.003523,0.004248,0.000714,0.00195,1.0,0.005569,0.008498,,0.002063,0.007088,,1,0.00318,0.006332,0.001579,0.008772,0.008577,0.000815,0.008622,0.007164,0.002716,0.0039,0.00158,0.001656,0.003019,1.9e-05,0.003675,0.000234,0.000346,0.953654,1.141669,0.954401,0.19045,0.006645,0.453317,1.001948,0.986592,0.523158,,0.336799,0.000254,0.703397,,1.008359,6.0,,0.00786,,,,1.009013,0.588145,0.00018,0.204834,0.0,0.143651,0.0,-1.0,0.062348,0.064641,0.0,0.291003,0.292169,0.008677,0.280909,0.004211,1.0,0.005739,0.000269,0.008488,0.009117,,0.005012,0.004104,,0.009369,0.003991,,,,,,1.005589,0.00654,0.873327,0.043301,1.00659,0.009968,0.100346,1


In [176]:
miss_c = df.count()
miss_feat = miss_c[miss_c<40000]
df = df[list(set(df.columns)-set(miss_feat.index))]
display(df.shape)
df[['D_114','D_120','D_68','B_30','D_117','D_116','B_38']] = \
df[['D_114','D_120','D_68','B_30','D_117','D_116','B_38']].apply(pd.to_numeric)
df['D_64'] = df['D_64'].astype('string')
display(df.dtypes)

(54849, 157)

D_47      float64
B_5       float64
R_24      float64
D_65      float64
B_19      float64
D_119     float64
S_5       float64
B_14      float64
S_26      float64
D_91      float64
B_9       float64
R_16      float64
D_81      float64
D_133     float64
P_4       float64
B_37      float64
S_12      float64
D_89      float64
D_61      float64
D_122     float64
S_25      float64
R_8       float64
R_20      float64
D_92      float64
B_20      float64
D_144     float64
D_70      float64
B_27      float64
D_48      float64
target      int64
B_40      float64
B_22      float64
S_13      float64
B_25      float64
D_114     float64
B_33      float64
B_4       float64
S_15      float64
D_74      float64
S_23      float64
S_11      float64
B_7       float64
D_143     float64
S_6       float64
D_93      float64
D_131     float64
D_121     float64
R_25      float64
B_3       float64
D_120     float64
D_39      float64
R_14      float64
D_113     float64
D_115     float64
D_140     float64
R_15      

In [177]:
### sample split ###

test_size = 0.1
df.reset_index(inplace=True, drop=True)
test_index = random.sample(list(df.index), int(test_size*df.shape[0]))
train = df.iloc[list(set(df.index)-set(test_index))]
test = df.iloc[test_index]
display(train.shape, test.shape, train.head(3), test.head(3))
display(train.dtypes, test.dtypes)


(49365, 157)

(5484, 157)

Unnamed: 0,D_47,B_5,R_24,D_65,B_19,D_119,S_5,B_14,S_26,D_91,B_9,R_16,D_81,D_133,P_4,B_37,S_12,D_89,D_61,D_122,S_25,R_8,R_20,D_92,B_20,D_144,D_70,B_27,D_48,target,B_40,B_22,S_13,B_25,D_114,B_33,B_4,S_15,D_74,S_23,S_11,B_7,D_143,S_6,D_93,D_131,D_121,R_25,B_3,D_120,D_39,R_14,D_113,D_115,D_140,R_15,D_55,D_118,D_69,B_12,P_2,D_44,S_24,R_28,S_27,B_24,D_80,D_68,B_6,B_11,B_30,R_1,S_8,R_22,D_124,S_20,B_26,D_129,D_125,D_127,D_86,B_8,R_13,D_128,R_4,D_62,D_59,S_16,D_103,D_83,D_60,D_71,S_18,B_16,B_32,D_54,B_10,R_6,D_84,B_31,B_21,R_5,R_2,R_12,R_23,R_19,D_63,B_23,R_10,R_17,R_21,D_102,B_13,B_28,B_2,D_45,D_130,D_139,R_27,D_141,B_18,D_117,S_22,D_107,S_3,D_51,D_52,B_36,D_41,B_41,D_145,D_116,S_19,D_72,R_3,D_79,D_109,D_78,D_112,R_7,D_96,D_123,D_75,B_38,D_126,D_46,S_7,P_3,R_11,D_58,D_64,D_94,S_17,D_104,B_1,B_15,R_18
0,0.463092,0.006012,5.4e-05,0.009775,0.001056,0.419459,0.00328,0.005952,0.001901,0.005468,0.006775,0.009559,0.007386,0.002394,0.00137,0.009483,0.19399,0.000116,0.174586,0.43812,0.977059,0.008775,0.002636,0.006224,0.001964,0.001211,0.259391,0.006419,0.263574,0,0.165166,0.005755,0.001171,0.005078,1.0,1.008942,0.056203,0.507748,0.150138,0.140075,0.288992,0.042849,0.008814,1.000973,2.9e-05,0.001219,0.547555,0.005109,0.003142,0.0,0.003936,0.003969,0.001087,0.425165,0.006132,0.008143,0.177336,0.418822,0.006808,0.13955,0.937438,0.006299,0.082386,0.004556,,0.009684,0.004396,6.0,0.048533,0.007864,0.0,0.002594,0.003945,0.0026,0.187171,0.000917,0.00355,1.000016,0.006194,0.009093,0.001287,0.006816,0.003344,1.005538,0.000671,0.081457,0.292875,0.003207,1.000086,0.007251,0.002996,0.015849,0.004709,0.008577,0.005587,1.008453,0.044728,0.000306,0.005722,1,0.000274,0.009552,0.004684,1.009854,0.001327,0.005207,CO,0.025547,0.004527,0.006435,0.001219,0.413177,0.008859,0.058119,0.814304,0.294726,1.005729,0.008103,1.005789,0.003267,0.645052,4.0,0.301518,1.003331,,0.004431,0.192826,0.006081,0.000545,0.00345,0.001624,0.0,0.00574,0.00157,0.005824,0.000551,0.000268,0.003895,1.00941,0.004231,0.002801,0.003239,0.137811,2.0,1.0,0.516281,,0.577143,0.008564,0.331046,O,0.003084,0.000966,0.958687,0.003352,0.005551,0.008478
1,0.32547,0.002035,0.009206,0.001949,0.001264,0.044385,0.006881,0.003674,8e-05,0.007,0.005905,0.008175,0.001091,0.007623,0.001463,0.002263,0.184956,0.005657,0.599261,0.291698,0.972725,0.003238,0.005138,0.001464,0.002074,0.007118,1.008265,0.008046,0.740619,1,1.113697,0.003322,0.004607,0.004701,1.0,1.006314,0.313276,0.500245,0.365847,0.134319,0.285493,1.047744,0.006323,1.005171,0.001995,0.006603,0.755079,0.008958,0.005877,0.0,0.001547,0.009532,0.004361,0.044666,0.005849,0.004129,0.448186,0.045775,0.00262,0.010886,0.471242,0.382538,0.086714,0.006064,,0.007694,0.409818,5.0,0.00779,0.003204,0.0,0.003437,0.00733,0.006198,0.455286,0.007807,0.009776,1.009854,0.006972,0.008171,0.004509,1.005298,0.002501,1.004957,0.007656,0.017829,0.375356,0.003671,0.008142,0.007636,0.001085,0.034615,0.003752,0.005428,0.002736,1.001148,0.039864,0.004232,0.001284,1,0.000368,0.003998,0.002264,1.004534,0.006972,0.008327,CO,0.995367,0.00182,0.001339,0.009324,0.00358,0.00481,0.211125,0.816437,0.054264,1.002896,0.009825,0.015774,0.00335,0.532836,4.0,0.302657,0.008267,,0.000206,0.148017,0.00091,0.005852,0.000533,0.00742,0.0,0.005238,0.001517,0.009041,0.001066,0.005904,0.004254,1.002032,0.004741,0.008415,0.00917,0.338917,1.0,1.0,0.95178,,0.493458,0.001104,0.619877,O,0.004677,0.001583,0.003662,0.000233,0.004723,0.008494
3,0.419005,0.006022,0.009982,0.002262,0.004585,0.345898,0.002898,0.002678,0.009972,0.005616,0.009491,0.000597,0.009827,0.004805,0.001955,0.005884,0.188458,0.006844,0.018364,0.723254,0.969548,0.000318,0.001946,0.005936,0.00054,0.002958,0.001627,0.007878,0.016205,0,0.064525,0.007346,0.008547,0.008383,1.0,1.009686,0.000623,0.2044,0.00115,0.134354,0.444328,0.030058,0.000389,1.007817,0.00559,0.007497,0.718976,0.007121,0.005782,0.0,0.007491,0.008848,0.005334,0.354861,0.009969,0.009341,0.023306,0.348603,0.006651,0.012557,1.004737,0.004346,0.804484,0.003351,,0.007706,0.004569,6.0,0.16476,0.009279,0.0,0.004707,0.330734,0.00904,0.554524,0.009604,0.000833,1.009376,0.000523,0.003754,0.007256,0.001657,0.007014,1.00552,0.003584,0.284889,0.336137,0.004364,1.00123,0.004212,0.046614,0.00959,0.003691,0.090021,0.008911,1.006192,0.293235,0.00402,0.002519,1,0.003696,0.003358,0.008137,1.006954,0.005778,0.009031,CO,0.008539,0.000893,0.00392,0.005293,0.545109,0.007849,0.006199,0.815235,0.204098,0.007847,0.004527,1.004259,0.007597,1.009554,4.0,0.783929,0.339787,0.167983,0.340903,0.123615,0.004525,0.007022,0.002034,0.006377,0.0,0.002969,0.000481,0.008493,0.005853,0.00166,0.00147,1.002119,0.004342,0.0026,0.001202,0.00968,1.0,1.0,,0.116267,0.669208,0.000827,0.002332,O,0.009827,0.002171,0.976875,0.011268,0.009088,0.002669


Unnamed: 0,D_47,B_5,R_24,D_65,B_19,D_119,S_5,B_14,S_26,D_91,B_9,R_16,D_81,D_133,P_4,B_37,S_12,D_89,D_61,D_122,S_25,R_8,R_20,D_92,B_20,D_144,D_70,B_27,D_48,target,B_40,B_22,S_13,B_25,D_114,B_33,B_4,S_15,D_74,S_23,S_11,B_7,D_143,S_6,D_93,D_131,D_121,R_25,B_3,D_120,D_39,R_14,D_113,D_115,D_140,R_15,D_55,D_118,D_69,B_12,P_2,D_44,S_24,R_28,S_27,B_24,D_80,D_68,B_6,B_11,B_30,R_1,S_8,R_22,D_124,S_20,B_26,D_129,D_125,D_127,D_86,B_8,R_13,D_128,R_4,D_62,D_59,S_16,D_103,D_83,D_60,D_71,S_18,B_16,B_32,D_54,B_10,R_6,D_84,B_31,B_21,R_5,R_2,R_12,R_23,R_19,D_63,B_23,R_10,R_17,R_21,D_102,B_13,B_28,B_2,D_45,D_130,D_139,R_27,D_141,B_18,D_117,S_22,D_107,S_3,D_51,D_52,B_36,D_41,B_41,D_145,D_116,S_19,D_72,R_3,D_79,D_109,D_78,D_112,R_7,D_96,D_123,D_75,B_38,D_126,D_46,S_7,P_3,R_11,D_58,D_64,D_94,S_17,D_104,B_1,B_15,R_18
45810,-0.023208,0.006786,0.003466,0.008527,0.001508,,0.001776,0.004528,0.003382,,0.009095,0.002627,,,0.96196,0.009493,0.187637,,0.277665,,0.975758,0.007385,0.000985,0.009452,0.007116,,,0.003043,,1,0.008105,0.005367,0.007226,0.003385,,1.007688,0.002959,0.50983,0.004201,0.131963,0.288504,0.000445,,1.008822,0.000508,,,0.000818,0.009632,,0.007341,0.006255,,,,0.004117,,,,0.006373,,,0.193343,0.002906,,0.007365,0.004658,,0.18178,0.008009,0.0,0.005394,0.000731,0.001813,,0.006377,0.00628,,,0.006197,0.005165,1.00406,0.003365,,0.00552,,,0.006663,,,1.004641,0.01657,0.004361,0.006806,0.008162,1.007515,0.245815,0.009645,,1,0.007994,0.000439,0.008274,1.006727,0.005689,0.009188,CO,0.004168,0.003507,0.005678,0.005809,,,0.009287,0.81717,0.003187,,,1.007891,,1.009506,,0.347436,,,0.007113,,0.000436,0.000992,0.004802,,,0.005245,,0.005611,,0.000364,,1.004163,0.008254,0.003215,,0.009233,1.0,,,,,0.007088,0.003364,,0.00873,0.00827,,0.00152,5.5e-05,0.00711
30971,0.882235,0.001231,0.000635,0.000694,0.000915,0.643212,0.000616,0.005195,0.006847,0.008704,0.000846,0.006711,0.00838,0.003372,0.006845,0.002553,0.194232,0.008932,0.045547,0.286309,0.978525,0.006119,0.009416,0.002561,0.004444,0.005066,0.007193,0.009455,0.048362,0,0.03569,0.007811,0.000865,0.004076,1.0,1.00811,0.031986,0.506095,0.006426,0.139927,0.288057,0.034703,0.005483,1.009783,0.007126,0.003805,0.68683,0.0054,0.000204,0.0,0.006754,0.002435,0.004639,0.421795,0.005603,0.005255,0.049583,0.651547,0.005919,0.030137,0.926444,0.009402,0.005098,0.005437,,0.003375,0.00475,6.0,0.190054,0.007138,0.0,0.002586,0.009827,0.005004,0.22733,0.009577,0.005929,0.008317,0.006107,0.006832,0.004245,0.001808,0.005766,1.007622,0.005447,0.195839,0.069902,0.009814,6.4e-05,0.006861,0.000496,0.010825,0.0084,0.005234,0.002425,1.003802,0.300936,0.006934,0.005915,1,0.009009,0.000765,0.009786,1.004412,0.000543,0.003454,CR,0.02,0.006403,0.008199,0.003198,0.00617,0.009382,0.022894,0.815761,0.687521,0.008473,0.004954,1.007397,0.00099,1.001,4.0,0.042244,0.001245,,0.007328,0.213015,0.003722,0.002623,0.008965,0.004058,0.0,0.00864,0.008713,0.104705,0.000325,0.00032,0.001913,1.003816,0.008051,0.006175,0.002976,0.002982,1.0,1.0,0.439917,,0.642611,0.502459,0.007237,O,0.009495,0.002342,0.00674,0.00433,0.003991,0.00905
2109,0.203856,0.002919,0.002427,0.002974,0.002234,0.097924,0.004441,0.00581,0.0055,0.008004,0.00683,0.008449,0.002752,0.008532,0.001927,0.001703,0.192424,0.006783,0.136817,0.434564,0.978624,0.008124,0.002965,0.006519,0.003894,0.001081,0.006288,0.009161,0.169444,0,0.014813,0.003039,0.006782,0.002961,1.0,1.000559,0.007749,0.501207,0.008978,0.140328,0.286253,0.017963,0.008063,0.007345,0.004199,0.005153,0.284347,0.009521,0.00746,0.0,0.004988,0.005544,0.407032,0.09884,0.007502,0.001671,0.080897,0.093032,0.007498,0.022164,0.859788,0.006692,0.0775,0.006197,,0.005049,0.004947,4.0,0.34288,0.001158,0.0,0.007896,0.007632,0.00541,0.229406,0.002517,0.004734,1.008839,0.001194,0.004114,0.002408,1.003322,0.004736,1.005547,0.008878,0.457775,0.397594,0.000338,1.006356,0.005,0.008892,0.006838,5e-06,0.002067,0.004938,1.007026,0.298831,0.007019,0.005276,1,0.002455,0.008824,0.007587,1.003755,0.009333,0.007241,CO,0.006574,0.009485,0.009174,0.002884,0.296818,0.007732,0.010056,0.817755,0.115183,0.009235,0.008632,1.006486,0.005805,1.002163,4.0,0.302088,0.341895,0.19999,0.340123,0.208339,0.00374,0.005724,0.008152,0.000614,0.0,0.006298,0.005825,0.001238,0.004376,0.007284,0.002114,1.008125,0.007412,0.009919,0.006063,0.006696,2.0,1.0,0.533845,0.156576,0.560486,0.007861,0.009127,O,0.003242,0.005456,0.973428,0.002232,0.002483,0.007198


D_47      float64
B_5       float64
R_24      float64
D_65      float64
B_19      float64
D_119     float64
S_5       float64
B_14      float64
S_26      float64
D_91      float64
B_9       float64
R_16      float64
D_81      float64
D_133     float64
P_4       float64
B_37      float64
S_12      float64
D_89      float64
D_61      float64
D_122     float64
S_25      float64
R_8       float64
R_20      float64
D_92      float64
B_20      float64
D_144     float64
D_70      float64
B_27      float64
D_48      float64
target      int64
B_40      float64
B_22      float64
S_13      float64
B_25      float64
D_114     float64
B_33      float64
B_4       float64
S_15      float64
D_74      float64
S_23      float64
S_11      float64
B_7       float64
D_143     float64
S_6       float64
D_93      float64
D_131     float64
D_121     float64
R_25      float64
B_3       float64
D_120     float64
D_39      float64
R_14      float64
D_113     float64
D_115     float64
D_140     float64
R_15      

D_47      float64
B_5       float64
R_24      float64
D_65      float64
B_19      float64
D_119     float64
S_5       float64
B_14      float64
S_26      float64
D_91      float64
B_9       float64
R_16      float64
D_81      float64
D_133     float64
P_4       float64
B_37      float64
S_12      float64
D_89      float64
D_61      float64
D_122     float64
S_25      float64
R_8       float64
R_20      float64
D_92      float64
B_20      float64
D_144     float64
D_70      float64
B_27      float64
D_48      float64
target      int64
B_40      float64
B_22      float64
S_13      float64
B_25      float64
D_114     float64
B_33      float64
B_4       float64
S_15      float64
D_74      float64
S_23      float64
S_11      float64
B_7       float64
D_143     float64
S_6       float64
D_93      float64
D_131     float64
D_121     float64
R_25      float64
B_3       float64
D_120     float64
D_39      float64
R_14      float64
D_113     float64
D_115     float64
D_140     float64
R_15      

In [178]:
cat_feat = ['B_30', 'B_38', 'D_114', 'D_116', 'D_117', 'D_120', 'D_126', 'D_63', 'D_64', 'D_68']
num_feat = list(set(train.columns)-set(cat_feat)-set(['target']))
display(train[cat_feat], [train[col].value_counts() for col in train.columns if col in cat_feat])

Unnamed: 0,B_30,B_38,D_114,D_116,D_117,D_120,D_126,D_63,D_64,D_68
0,0.0,2.0,1.0,0.0,4.0,0.0,1.0,CO,O,6.0
1,0.0,1.0,1.0,0.0,4.0,0.0,1.0,CO,O,5.0
3,0.0,1.0,1.0,0.0,4.0,0.0,1.0,CO,O,6.0
4,1.0,6.0,0.0,0.0,-1.0,0.0,1.0,CO,U,3.0
5,0.0,2.0,1.0,0.0,-1.0,0.0,1.0,CL,R,6.0
...,...,...,...,...,...,...,...,...,...,...
54844,0.0,2.0,1.0,0.0,4.0,1.0,1.0,CR,O,5.0
54845,0.0,3.0,1.0,0.0,-1.0,0.0,1.0,CO,U,5.0
54846,0.0,2.0,1.0,0.0,5.0,0.0,1.0,CL,O,5.0
54847,0.0,1.0,,,,,,CO,,


[1.0    29495
 0.0    18312
 Name: D_114, dtype: int64,
 0.0    42177
 1.0     5630
 Name: D_120, dtype: int64,
 6.0    24846
 5.0    10718
 3.0     4316
 4.0     4239
 2.0     1959
 1.0     1235
 0.0      151
 Name: D_68, dtype: int64,
 0.0    42086
 1.0     6804
 2.0      454
 Name: B_30, dtype: int64,
 CO    36866
 CR     8241
 CL     3918
 XZ      194
 XM       83
 XL       63
 Name: D_63, dtype: int64,
 -1.0    12988
  3.0    10493
  4.0    10220
  2.0     5877
  5.0     3988
  6.0     3111
  1.0     1130
 Name: D_117, dtype: int64,
 0.0    47742
 1.0       65
 Name: D_116, dtype: int64,
 2.0    17274
 3.0    11357
 1.0    10332
 5.0     3972
 4.0     2595
 7.0     2269
 6.0     1545
 Name: B_38, dtype: int64,
  1.0    37966
  0.0     8116
 -1.0     2265
 Name: D_126, dtype: int64,
 O     25849
 U     13706
 R      7553
 -1      353
 Name: D_64, dtype: Int64]

In [179]:
for col in cat_feat:
    if train[col].dtype in ['string', 'O', 'str']:
        print('str', col)
    else:
        print('num', col)
        

num B_30
num B_38
num D_114
num D_116
num D_117
num D_120
num D_126
str D_63
str D_64
num D_68


In [180]:
### fill missing values ###

for col in cat_feat:
    if train[col].dtype in ['string', 'O', 'str']:
        train[col].fillna(value='M', inplace=True)
        test[col].fillna(value='M', inplace=True)
    else:
        train[col].fillna(value=-1000, inplace=True)
        test[col].fillna(value=-1000, inplace=True)

for col in num_feat:
    train[col].fillna(train[col].median(), inplace=True)
    test[col].fillna(train[col].median(), inplace=True)
    
#display(train.count(), test.count())
#display(train.count(), test.count())

In [181]:
train.dtypes

D_47      float64
B_5       float64
R_24      float64
D_65      float64
B_19      float64
D_119     float64
S_5       float64
B_14      float64
S_26      float64
D_91      float64
B_9       float64
R_16      float64
D_81      float64
D_133     float64
P_4       float64
B_37      float64
S_12      float64
D_89      float64
D_61      float64
D_122     float64
S_25      float64
R_8       float64
R_20      float64
D_92      float64
B_20      float64
D_144     float64
D_70      float64
B_27      float64
D_48      float64
target      int64
B_40      float64
B_22      float64
S_13      float64
B_25      float64
D_114     float64
B_33      float64
B_4       float64
S_15      float64
D_74      float64
S_23      float64
S_11      float64
B_7       float64
D_143     float64
S_6       float64
D_93      float64
D_131     float64
D_121     float64
R_25      float64
B_3       float64
D_120     float64
D_39      float64
R_14      float64
D_113     float64
D_115     float64
D_140     float64
R_15      

In [182]:
### OHE ###

X_train = train.copy()
y_train = X_train.pop('target')
X_test = test.copy()
y_test = X_test.pop('target')
display(X_test.head())
display(X_train.nunique())

### Do OHE for some features ###

# this code uses passthrough from 
# https://stackoverflow.com/questions/54160370/how-to-use-sklearn-column-transformer

feature_transformer = ColumnTransformer([
    ("cat", OneHotEncoder(sparse = False, handle_unknown="ignore"), cat_feat)],
    remainder="passthrough")

print('Number of features before transaformation: ', X_train.shape)
X_train = pd.DataFrame(feature_transformer.fit_transform(X_train), columns=feature_transformer.get_feature_names_out())
X_test = pd.DataFrame(feature_transformer.transform(X_test), columns=feature_transformer.get_feature_names_out())

print('time to do feature proprocessing: ', time.time()-time1)

print('Number of features after transaformation: ', X_train.shape)

Unnamed: 0,D_47,B_5,R_24,D_65,B_19,D_119,S_5,B_14,S_26,D_91,B_9,R_16,D_81,D_133,P_4,B_37,S_12,D_89,D_61,D_122,S_25,R_8,R_20,D_92,B_20,D_144,D_70,B_27,D_48,B_40,B_22,S_13,B_25,D_114,B_33,B_4,S_15,D_74,S_23,S_11,B_7,D_143,S_6,D_93,D_131,D_121,R_25,B_3,D_120,D_39,R_14,D_113,D_115,D_140,R_15,D_55,D_118,D_69,B_12,P_2,D_44,S_24,R_28,S_27,B_24,D_80,D_68,B_6,B_11,B_30,R_1,S_8,R_22,D_124,S_20,B_26,D_129,D_125,D_127,D_86,B_8,R_13,D_128,R_4,D_62,D_59,S_16,D_103,D_83,D_60,D_71,S_18,B_16,B_32,D_54,B_10,R_6,D_84,B_31,B_21,R_5,R_2,R_12,R_23,R_19,D_63,B_23,R_10,R_17,R_21,D_102,B_13,B_28,B_2,D_45,D_130,D_139,R_27,D_141,B_18,D_117,S_22,D_107,S_3,D_51,D_52,B_36,D_41,B_41,D_145,D_116,S_19,D_72,R_3,D_79,D_109,D_78,D_112,R_7,D_96,D_123,D_75,B_38,D_126,D_46,S_7,P_3,R_11,D_58,D_64,D_94,S_17,D_104,B_1,B_15,R_18
45810,-0.023208,0.006786,0.003466,0.008527,0.001508,0.215585,0.001776,0.004528,0.003382,0.00554,0.009095,0.002627,0.005194,0.005458,0.96196,0.009493,0.187637,0.005054,0.277665,0.429217,0.975758,0.007385,0.000985,0.009452,0.007116,0.005513,0.006781,0.003043,0.287675,0.008105,0.005367,0.007226,0.003385,-1000.0,1.007688,0.002959,0.50983,0.004201,0.131963,0.288504,0.000445,0.00612,1.008822,0.000508,0.005563,0.595825,0.000818,0.009632,-1000.0,0.007341,0.006255,0.008699,0.198263,0.005117,0.004117,0.183962,0.222743,0.005217,0.006373,0.692421,0.007659,0.193343,0.002906,0.292374,0.007365,0.004658,-1000.0,0.18178,0.008009,0.0,0.005394,0.000731,0.001813,0.273516,0.006377,0.00628,0.008811,0.005391,0.006197,0.005165,1.00406,0.003365,1.000395,0.00552,0.093395,0.379306,0.006663,0.00931,0.005138,1.004641,0.01657,0.004361,0.006806,0.008162,1.007515,0.245815,0.009645,0.005233,1,0.007994,0.000439,0.008274,1.006727,0.005689,0.009188,CO,0.004168,0.003507,0.005678,0.005809,0.008929,0.029184,0.009287,0.81717,0.003187,0.006228,0.006082,1.007891,0.006092,1.009506,-1000.0,0.347436,0.009325,0.163856,0.007113,0.14396,0.000436,0.000992,0.004802,0.006074,-1000.0,0.005245,0.005259,0.005611,0.005488,0.000364,0.005393,1.004163,0.008254,0.003215,0.005251,0.009233,1.0,-1000.0,0.459904,0.139736,0.617932,0.007088,0.003364,M,0.00873,0.00827,0.009303,0.00152,5.5e-05,0.00711
30971,0.882235,0.001231,0.000635,0.000694,0.000915,0.643212,0.000616,0.005195,0.006847,0.008704,0.000846,0.006711,0.00838,0.003372,0.006845,0.002553,0.194232,0.008932,0.045547,0.286309,0.978525,0.006119,0.009416,0.002561,0.004444,0.005066,0.007193,0.009455,0.048362,0.03569,0.007811,0.000865,0.004076,1.0,1.00811,0.031986,0.506095,0.006426,0.139927,0.288057,0.034703,0.005483,1.009783,0.007126,0.003805,0.68683,0.0054,0.000204,0.0,0.006754,0.002435,0.004639,0.421795,0.005603,0.005255,0.049583,0.651547,0.005919,0.030137,0.926444,0.009402,0.005098,0.005437,0.292374,0.003375,0.00475,6.0,0.190054,0.007138,0.0,0.002586,0.009827,0.005004,0.22733,0.009577,0.005929,0.008317,0.006107,0.006832,0.004245,0.001808,0.005766,1.007622,0.005447,0.195839,0.069902,0.009814,6.4e-05,0.006861,0.000496,0.010825,0.0084,0.005234,0.002425,1.003802,0.300936,0.006934,0.005915,1,0.009009,0.000765,0.009786,1.004412,0.000543,0.003454,CR,0.02,0.006403,0.008199,0.003198,0.00617,0.009382,0.022894,0.815761,0.687521,0.008473,0.004954,1.007397,0.00099,1.001,4.0,0.042244,0.001245,0.163856,0.007328,0.213015,0.003722,0.002623,0.008965,0.004058,0.0,0.00864,0.008713,0.104705,0.000325,0.00032,0.001913,1.003816,0.008051,0.006175,0.002976,0.002982,1.0,1.0,0.439917,0.139736,0.642611,0.502459,0.007237,O,0.009495,0.002342,0.00674,0.00433,0.003991,0.00905
2109,0.203856,0.002919,0.002427,0.002974,0.002234,0.097924,0.004441,0.00581,0.0055,0.008004,0.00683,0.008449,0.002752,0.008532,0.001927,0.001703,0.192424,0.006783,0.136817,0.434564,0.978624,0.008124,0.002965,0.006519,0.003894,0.001081,0.006288,0.009161,0.169444,0.014813,0.003039,0.006782,0.002961,1.0,1.000559,0.007749,0.501207,0.008978,0.140328,0.286253,0.017963,0.008063,0.007345,0.004199,0.005153,0.284347,0.009521,0.00746,0.0,0.004988,0.005544,0.407032,0.09884,0.007502,0.001671,0.080897,0.093032,0.007498,0.022164,0.859788,0.006692,0.0775,0.006197,0.292374,0.005049,0.004947,4.0,0.34288,0.001158,0.0,0.007896,0.007632,0.00541,0.229406,0.002517,0.004734,1.008839,0.001194,0.004114,0.002408,1.003322,0.004736,1.005547,0.008878,0.457775,0.397594,0.000338,1.006356,0.005,0.008892,0.006838,5e-06,0.002067,0.004938,1.007026,0.298831,0.007019,0.005276,1,0.002455,0.008824,0.007587,1.003755,0.009333,0.007241,CO,0.006574,0.009485,0.009174,0.002884,0.296818,0.007732,0.010056,0.817755,0.115183,0.009235,0.008632,1.006486,0.005805,1.002163,4.0,0.302088,0.341895,0.19999,0.340123,0.208339,0.00374,0.005724,0.008152,0.000614,0.0,0.006298,0.005825,0.001238,0.004376,0.007284,0.002114,1.008125,0.007412,0.009919,0.006063,0.006696,2.0,1.0,0.533845,0.156576,0.560486,0.007861,0.009127,O,0.003242,0.005456,0.973428,0.002232,0.002483,0.007198
50051,0.704787,0.079793,0.008621,0.003068,0.002732,0.859582,0.056001,0.195145,0.003144,0.000876,0.002284,0.00783,0.008127,0.0076,0.003679,0.087357,0.171351,0.00183,0.626568,0.717055,0.972611,0.003411,0.002838,0.001069,1.007457,0.008311,0.005463,0.000438,0.041434,0.038927,0.508727,0.508526,0.005743,0.0,0.004438,0.103203,0.208549,0.223603,0.133131,0.401402,0.072045,1.008426,0.001086,0.007052,0.002538,0.754719,0.009217,0.409959,0.0,0.005904,0.002385,0.005804,0.869935,0.002182,0.000956,0.080226,0.858319,0.004924,0.123303,0.611281,0.000411,0.972551,0.009665,0.298391,0.009063,0.002725,6.0,0.058187,0.070972,0.0,0.000548,0.708027,0.007454,0.099672,0.000893,0.009988,0.007895,0.004641,0.002913,0.006557,0.001999,0.006761,0.007551,0.002831,0.016237,0.445643,0.003407,1.002602,0.000924,0.733482,0.101709,0.003895,1.002947,0.008629,1.006455,0.068038,0.001366,0.000477,1,0.00255,0.009498,0.006737,1.000514,0.009759,0.009813,CR,0.061109,0.003426,0.001205,0.000722,0.315679,0.112003,0.121329,0.155461,0.44524,0.004461,1.001905,1.002199,0.923641,0.203749,2.0,0.955734,0.342053,0.150844,0.000892,0.186047,0.000315,0.001338,0.002588,0.27509,0.0,0.000442,0.002508,0.003369,0.006478,0.001216,0.000358,1.007055,0.000835,1.002109,0.004951,0.271567,3.0,1.0,0.430708,0.127093,0.693552,0.004917,0.180768,U,0.002011,0.003999,0.954053,0.087953,0.231197,0.007802
20960,0.082172,0.009904,0.004354,0.00629,0.583742,0.486102,0.006832,0.028813,0.004066,0.005657,0.548434,0.008123,0.005794,0.004096,0.000645,0.098781,0.186298,0.004067,0.884999,0.291259,0.969711,0.007528,0.007409,0.008687,1.007549,0.00476,0.009472,0.000986,0.888115,0.593757,0.502421,0.003779,0.043235,0.0,0.007354,0.465321,0.507214,0.429631,0.136377,0.283203,0.558515,1.005082,0.005916,0.000359,0.004872,0.576823,0.00037,0.527153,0.0,0.038611,0.005016,0.006851,0.500932,1.008748,0.000903,0.885377,0.494798,0.002514,0.008654,0.404054,0.633723,0.860143,0.003418,0.002252,0.007735,0.009475,6.0,0.005181,0.068254,1.0,0.005791,0.000316,0.004458,0.372075,0.00441,0.003174,0.003257,0.000965,0.0065,0.000897,1.008832,0.005962,0.004433,0.005896,0.008763,0.45962,0.006909,0.001518,0.005083,0.062982,0.0081,0.006105,1.003223,1.007101,1.001199,0.011924,0.004227,0.008247,1,0.000624,0.005675,0.009044,1.003595,0.001019,0.00808,CO,0.510385,0.002315,0.000729,0.005575,0.007055,0.003219,0.156079,0.04656,0.089275,0.002589,1.002698,1.005906,0.889918,0.182568,2.0,0.875796,0.003173,0.169742,0.009419,0.03806,0.000141,0.008056,0.003585,0.549144,0.0,0.0042,0.00134,0.002167,0.003633,0.008514,0.009302,1.005032,0.004457,0.001556,0.004344,0.476517,6.0,1.0,0.444448,0.101747,0.718126,0.503306,0.527613,R,0.007191,0.009234,0.007653,0.091957,0.00838,0.003537


D_47     49365
B_5      49365
R_24     49365
D_65     49365
B_19     49345
D_119    47807
S_5      49365
B_14     49365
S_26     49361
D_91     47987
B_9      49365
R_16     49365
D_81     49153
D_133    49013
P_4      49365
B_37     49365
S_12     49365
D_89     49121
D_61     44017
D_122    47807
S_25     49255
R_8      49365
R_20     49365
D_92     49365
B_20     49345
D_144    49033
D_70     48529
B_27     49345
D_48     42933
B_40     49365
B_22     49345
S_13     49365
B_25     49299
D_114        3
B_33     49345
B_4      49365
S_15     49365
D_74     49167
S_23     49361
S_11     49365
B_7      49365
D_143    48465
S_6      49365
D_93     49365
D_131    48465
D_121    47807
R_25     49365
B_3      49345
D_120        3
D_39     49365
R_14     49365
D_113    47807
D_115    47807
D_140    49035
R_15     49365
D_55     47701
D_118    47807
D_69     47659
B_12     49365
P_2      48947
D_44     46937
S_24     49205
R_28     49365
S_27     36905
B_24     49365
D_80     49167
D_68      

Number of features before transaformation:  (49365, 156)
time to do feature proprocessing:  4589.863786697388
Number of features after transaformation:  (49365, 198)


In [184]:
str_feat = [col for col in train.columns if train[col].dtype=='object']
str_feat

['D_63']

In [185]:
train[str_feat].head(10)

Unnamed: 0,D_63
0,CO
1,CO
3,CO
4,CO
5,CL
6,CO
7,CO
8,CO
9,CL
10,CO


In [186]:
train.dtypes

D_47      float64
B_5       float64
R_24      float64
D_65      float64
B_19      float64
D_119     float64
S_5       float64
B_14      float64
S_26      float64
D_91      float64
B_9       float64
R_16      float64
D_81      float64
D_133     float64
P_4       float64
B_37      float64
S_12      float64
D_89      float64
D_61      float64
D_122     float64
S_25      float64
R_8       float64
R_20      float64
D_92      float64
B_20      float64
D_144     float64
D_70      float64
B_27      float64
D_48      float64
target      int64
B_40      float64
B_22      float64
S_13      float64
B_25      float64
D_114     float64
B_33      float64
B_4       float64
S_15      float64
D_74      float64
S_23      float64
S_11      float64
B_7       float64
D_143     float64
S_6       float64
D_93      float64
D_131     float64
D_121     float64
R_25      float64
B_3       float64
D_120     float64
D_39      float64
R_14      float64
D_113     float64
D_115     float64
D_140     float64
R_15      

In [187]:
train.describe()

Unnamed: 0,D_47,B_5,R_24,D_65,B_19,D_119,S_5,B_14,S_26,D_91,B_9,R_16,D_81,D_133,P_4,B_37,S_12,D_89,D_61,D_122,S_25,R_8,R_20,D_92,B_20,D_144,D_70,B_27,D_48,target,B_40,B_22,S_13,B_25,D_114,B_33,B_4,S_15,D_74,S_23,S_11,B_7,D_143,S_6,D_93,D_131,D_121,R_25,B_3,D_120,D_39,R_14,D_113,D_115,D_140,R_15,D_55,D_118,D_69,B_12,P_2,D_44,S_24,R_28,S_27,B_24,D_80,D_68,B_6,B_11,B_30,R_1,S_8,R_22,D_124,S_20,B_26,D_129,D_125,D_127,D_86,B_8,R_13,D_128,R_4,D_62,D_59,S_16,D_103,D_83,D_60,D_71,S_18,B_16,B_32,D_54,B_10,R_6,D_84,B_31,B_21,R_5,R_2,R_12,R_23,R_19,B_23,R_10,R_17,R_21,D_102,B_13,B_28,B_2,D_45,D_130,D_139,R_27,D_141,B_18,D_117,S_22,D_107,S_3,D_51,D_52,B_36,D_41,B_41,D_145,D_116,S_19,D_72,R_3,D_79,D_109,D_78,D_112,R_7,D_96,D_123,D_75,B_38,D_126,D_46,S_7,P_3,R_11,D_58,D_94,S_17,D_104,B_1,B_15,R_18
count,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0,49365.0
mean,0.406627,0.082423,0.01887674,0.04179396,0.1513353,0.279385,0.085801,0.100383,0.06838555,0.05978214,0.1911401,0.05893295,0.05560024,0.04481998,0.1446612,0.123735,0.240575,0.006493125,0.426924,0.4046082,0.923865,0.03976188,0.03842496,0.0903633,0.2378849,0.0536813,0.1082142,0.005004689,0.370448,0.251372,0.1827179,0.1070847,0.2543187,0.103543,-30.963334,0.6115911,0.1729004,0.375682,0.153252,0.177616,0.375341,0.186608,0.179032,0.2438465,0.01474684,0.1005159,0.533886,0.008016705,0.1334118,-31.446774,0.1540732,0.275898,0.1519643,0.276974,0.02617987,0.01997641,0.295342,0.2835521,0.1110583,0.09310653,0.655293,0.113575,0.736838,0.005776375,0.2828892,0.03914361,0.1109415,-33.693447,0.155604,0.111444,-0.269179,0.0783719,0.326606,0.008573064,0.303071,0.01653307,0.0841795,0.431099,0.08178455,0.106436,0.03839755,0.451475,0.005483498,0.59057,0.03164572,0.175816,0.364209,0.04591986,0.460007,0.03913885,0.3771932,0.071569,0.03187111,0.3504496,0.02662183,0.983286,0.223226,0.05824338,0.0467071,0.99684,0.04575187,0.03657684,0.04821153,0.979997,0.005424949,0.02286871,0.172071,0.06651509,0.00537517,0.02062965,0.1874559,0.09947,0.154081,0.620357,0.251307,0.1989406,0.1791195,0.894338,0.1647928,0.595391,-29.315082,0.780858,0.2023511,0.214147,0.1435272,0.179646,0.006517128,0.06076272,0.03274534,0.06271653,-31.559506,0.005023433,0.03825085,0.1212031,0.06792204,0.006654417,0.06030658,0.8393011,0.08845786,0.03506088,0.04812094,0.1711968,2.29604,-19.898693,0.47179,0.207472,0.601582,0.0520003,0.2157872,0.0210786,0.04145855,0.4436354,0.124524,0.051912,0.005019684
std,0.234777,0.364134,0.1169906,0.3415093,0.2911589,0.254072,0.296474,0.2831,0.4401703,0.1822784,0.2923765,0.2363779,0.3455787,0.168018,0.3389535,0.212197,0.217772,0.01955919,0.359017,0.2333317,0.211489,0.3102226,0.4298028,0.2924905,0.3752813,0.1850527,0.2257278,0.002967788,0.305887,0.433807,0.9453417,0.2223681,0.2966256,0.204345,174.938023,0.4885505,0.2249124,0.196078,0.220676,0.260853,0.190135,0.230825,0.3791034,0.4263974,0.09828546,0.2913763,0.231896,0.05492685,0.2358576,174.850382,0.2745244,8.903325,0.2236608,0.254498,0.1439648,0.1214705,0.284895,0.2534383,4.139701,0.6239471,0.243996,0.2194469,0.802749,0.02824548,0.2795294,0.3158925,0.2152312,193.392001,1.125757,0.20852,20.627976,0.2251642,0.302795,0.05968589,0.222626,0.1068857,1.622748,0.4944545,0.2878889,0.301906,0.1796922,0.4970263,0.006859534,0.492019,0.1610809,0.212883,0.191893,0.6366033,0.4979,0.2833556,0.371942,0.450777,0.1617319,0.4031173,0.145442,0.145634,2.783145,0.6235146,0.317458,0.056127,0.5757629,0.2723547,0.2033673,0.151462,0.02081139,0.1324361,0.229974,0.3142248,0.008054822,0.1240275,0.272367,0.547775,0.226664,0.401389,0.242273,0.395382,0.3792017,0.31149,0.3485983,0.365435,175.249384,0.696794,0.25338,0.173978,0.2411048,0.174025,0.02302016,0.2048818,0.2435485,0.1953622,174.829747,0.002992185,0.2059127,0.2091102,0.2192259,0.04083982,0.25426,0.3710049,1.380762,0.1707841,0.2049909,0.225856,20.737662,142.222237,0.154378,0.187902,0.165988,0.1798686,0.2604627,0.1256852,0.2514833,0.4801079,0.212225,0.30196,0.003008054
min,-0.026593,1e-06,1.847634e-07,1.592383e-07,7.958311e-09,2e-06,1e-06,-0.804076,7.468806e-08,2.328698e-07,1.860822e-07,8.134349e-08,8.216729e-08,1.387257e-07,7.027182e-07,-0.524772,-0.387075,1.48122e-08,-0.00925,5.550462e-07,-2.453045,3.103594e-07,2.875065e-07,1.630582e-07,1.753788e-07,3.879443e-07,4.739344e-08,7.378395e-08,-0.00957,0.0,6.004212e-07,2.351054e-07,1.456153e-07,-0.200786,-1000.0,9.188518e-08,8.321215e-07,-0.299384,1e-06,-6.283708,-0.199681,-0.101928,4.047682e-08,2.740336e-07,5.499336e-08,4.823712e-07,-0.032075,1.7272e-07,5.357586e-07,-1000.0,1.533803e-07,2.208473e-08,6.998217e-09,2e-06,5.624259e-08,2.477822e-07,5e-06,1.865356e-07,3.338321e-08,9.331667e-07,-0.344972,2.222165e-09,-81.305385,2.163839e-07,8.61492e-08,2.443445e-08,5.962603e-08,-1000.0,-0.004288,1e-06,-1000.0,4.750836e-07,1e-06,9.569018e-09,-0.045431,2.298665e-07,2.564318e-08,3.901197e-07,7.497302e-08,3.193136e-08,3.496983e-08,2.162753e-07,1.582444e-08,2e-06,5.203178e-07,-0.001834,-0.083295,1.351091e-07,2e-06,1.252152e-08,8.988731e-07,4e-06,9.969877e-08,7.84357e-07,1.443041e-07,-0.001982,-0.002931,1.381436e-07,2.738734e-07,0.0,1.378001e-07,7.33415e-08,1.699266e-07,-0.090506,3.694188e-08,1.157886e-08,5e-06,4.081329e-08,2.580045e-07,9.657278e-08,1.189697e-07,1e-06,1.2e-05,9e-06,1.6e-05,1.28181e-09,5.287698e-08,-0.013255,3.920676e-07,3e-06,-1000.0,-73.357009,3.061934e-07,-0.299781,6.955128e-07,-0.006388,1.392531e-08,1.530981e-08,1.247751e-08,1.295045e-08,-1000.0,2.083787e-08,1.697843e-08,7.620066e-07,2.88819e-07,7.140634e-08,4.055843e-08,9.475116e-07,2.780381e-08,2.571728e-07,1.162739e-07,1.68799e-07,-1000.0,-1000.0,-3.741901,-0.211989,-0.933934,3.497858e-07,2.602841e-07,3.721699e-07,2.567393e-08,3.874737e-07,-0.25333,-0.991922,2.017355e-07
25%,0.232208,0.00728,0.002540673,0.002602738,0.003410537,0.06169,0.005609,0.008151,0.003378394,0.002861311,0.005760148,0.002688812,0.002630633,0.002750129,0.002956872,0.008849,0.186908,0.002532847,0.112693,0.1516445,0.970866,0.002570125,0.002532847,0.002726409,0.004068356,0.002763971,0.003442522,0.0025066,0.100578,0.0,0.01701039,0.003068409,0.004808179,0.005972,0.0,0.006277116,0.0270608,0.207639,0.005865,0.133388,0.282447,0.028112,0.003092466,0.003302769,0.002504495,0.002842668,0.362726,0.002523394,0.005249404,0.0,0.004542724,0.002520656,0.004484863,0.064914,0.002588093,0.002549476,0.059595,0.06614017,0.002688268,0.01083954,0.479701,0.003996453,0.813575,0.002470494,0.008482272,0.002571987,0.003588762,4.0,0.020487,0.006625,0.0,0.002893782,0.007677,0.002525277,0.141025,0.002515162,0.002631449,0.004493049,0.002783582,0.002810755,0.002557975,0.004569009,0.002535556,0.005982,0.002539721,0.037468,0.236511,0.002537822,0.00476,0.00264713,0.03300518,0.008612,0.00258782,0.006266576,0.002548652,1.002356,0.028684,0.002644107,0.002628788,1.0,0.002567517,0.002591067,0.002627385,1.002289,0.002504989,0.002561223,0.017432,0.002627704,0.002509217,0.002537347,0.004466169,0.009329,0.027057,0.103834,0.054546,0.00315782,0.003102863,1.001565,0.003123811,0.206672,-1.0,0.816644,0.00476036,0.137377,0.003601494,0.073695,0.002531256,0.002858412,0.00255647,0.003082908,0.0,0.002512011,0.002630021,0.004674325,0.002788597,0.00251773,0.002803058,1.001004,0.00259041,0.00261225,0.002711308,0.006776596,2.0,1.0,0.437906,0.100723,0.547022,0.002746094,0.005909254,0.002561311,0.002672997,0.004757356,0.008878,0.003157,0.002499304
50%,0.38187,0.015305,0.00506565,0.005220748,0.006875046,0.215585,0.013395,0.028626,0.006720487,0.005539606,0.02627647,0.0054237,0.005193616,0.00545751,0.005855545,0.03124,0.19084,0.005053796,0.380043,0.4292169,0.973674,0.005118684,0.005075989,0.005449204,0.008113982,0.005512573,0.006781019,0.004978748,0.287675,0.0,0.05829496,0.00614899,0.009784796,0.019708,1.0,1.00178,0.08189302,0.402068,0.075666,0.136205,0.289706,0.074877,0.006119977,0.006576766,0.005046065,0.005562737,0.595825,0.005011408,0.009794426,0.0,0.009021084,0.005046365,0.008699281,0.198263,0.005117006,0.005084382,0.183962,0.2227428,0.00521713,0.019131,0.692421,0.00765883,0.949582,0.004999556,0.2923743,0.005152577,0.007218937,6.0,0.082376,0.019482,0.0,0.005792831,0.32149,0.005056047,0.273516,0.005035628,0.005251176,0.008811179,0.005390677,0.005583647,0.005180989,0.009066983,0.005069616,1.000395,0.005150434,0.093395,0.379306,0.005088488,0.00931,0.005138042,0.2363793,0.012754,0.005138068,0.09176762,0.005115847,1.004876,0.110423,0.005190313,0.005233298,1.0,0.005109065,0.005167517,0.005214788,1.004827,0.00499948,0.005109066,0.05861,0.005269705,0.005022052,0.00509626,0.008929382,0.029184,0.076642,0.81424,0.179022,0.006227716,0.00608181,1.004364,0.006091752,0.64641,3.0,0.942406,0.009325485,0.163856,0.007232164,0.14396,0.005048099,0.005733838,0.005081278,0.006073515,0.0,0.004994355,0.005258869,0.009418705,0.005488232,0.004990743,0.005393232,1.003994,0.0051685,0.005141964,0.005250928,0.07427308,2.0,1.0,0.459904,0.139736,0.617932,0.005463659,0.107231,0.00513024,0.005358961,0.009302796,0.031581,0.006247,0.005019714
75%,0.561993,0.053475,0.007616124,0.007838738,0.09544519,0.434076,0.070985,0.100322,0.01094145,0.008226725,0.3404013,0.008114588,0.007738882,0.008133331,0.008759499,0.124155,0.209058,0.007558863,0.739702,0.5756018,0.976484,0.00763175,0.007609135,0.008160401,0.3585656,0.008261248,0.2505061,0.007504511,0.611068,1.0,0.2469733,0.00923486,0.4277734,0.10712,1.0,1.005928,0.2383249,0.504834,0.21928,0.139013,0.448758,0.271256,0.00906878,0.009850091,0.007591404,0.008289608,0.713,0.007525574,0.1582237,0.0,0.2367314,0.007595503,0.2073382,0.430186,0.007667439,0.007606437,0.490277,0.4387431,0.007659176,0.06865573,0.863013,0.1310072,0.971251,0.00749366,0.3976041,0.007766619,0.2030745,6.0,0.192198,0.102615,0.0,0.008694555,0.494954,0.007524128,0.415051,0.007569359,0.007875285,1.004134,0.008013263,0.008326339,0.007782935,1.004173,0.007583901,1.00468,0.007741995,0.239314,0.467622,0.007655574,1.004492,0.007581218,0.7062009,0.030992,0.007721078,0.7572315,0.007698105,1.007444,0.295437,0.007791254,0.007786522,1.0,0.007668423,0.007749389,0.007839678,1.00744,0.007491618,0.007685669,0.247001,0.007889362,0.007515413,0.007629077,0.3244297,0.087913,0.19274,1.002306,0.372082,0.009286163,0.009075623,1.007135,0.009054659,1.003268,4.0,0.964489,0.3393142,0.213042,0.3359091,0.23113,0.007535754,0.008643176,0.007651086,0.00904702,0.0,0.007489475,0.007882892,0.200445,0.008286251,0.007501615,0.007925094,1.007022,0.007711521,0.007709353,0.007759491,0.2689163,3.0,1.0,0.494927,0.251774,0.678076,0.00815914,0.3722718,0.007670178,0.008066564,0.9639938,0.126471,0.00927,0.007517327
max,1.326108,34.817988,1.009997,35.63528,1.009985,1.974599,26.333936,26.327069,42.0999,2.506917,16.15858,10.00435,8.008345,1.509732,1.227774,1.327678,12.030954,1.000359,31.748039,1.57822,3.150823,15.00412,40.00307,2.00997,1.01,1.343272,5.756029,0.1482431,2.655526,1.0,191.179,2.508282,1.009993,3.561106,1.0,1.01,3.069028,4.90213,2.858731,11.253126,2.205938,1.252743,1.009999,1.009999,1.009994,1.302528,1.784552,1.009913,1.440807,1.0,4.450312,1016.305,2.602699,1.996929,1.009992,1.009977,1.443116,1.968658,842.4615,77.32847,1.009983,3.132336,1.047906,1.009664,4.625868,16.86946,6.609842,6.0,176.747005,1.489733,2.0,2.259461,1.231363,1.009908,1.959851,1.009967,249.6518,1.009999,6.004101,1.009985,1.009997,1.0108,0.422015,1.012245,1.01,5.433479,1.731665,54.89045,1.01,11.00978,1.01,55.511433,1.009997,1.009999,1.009977,1.009999,486.984492,14.44788,16.00742,1.0,49.60182,12.00083,1.01,1.01,1.009878,1.009997,1.424411,9.003333,0.6071508,1.009995,1.089973,73.218177,12.355911,1.01,1.555569,1.013699,1.01,1.009999,1.240671,1.009999,6.0,1.032563,2.671995,2.468565,2.667983,1.009988,0.9462136,4.174959,15.00785,4.819433,1.0,0.07261959,3.009821,5.006648,6.503098,1.009738,5.504998,1.01,137.396,1.009993,3.006997,2.801127,7.0,1.0,5.757775,2.724281,1.902146,7.505761,1.261085,1.009979,4.049044,1.095036,1.323981,31.132379,0.1693831
