Outline:
1. Load evth.
2. Preclean data.
3. Train-test split.
4. Missing values.
5. FE.
6. Modeling.
7. FI.
8. predictions.

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import os, time, warnings, gzip, gc, random, math, shap, pickle, optuna
from IPython.display import display
from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
from matplotlib_venn import venn3, venn3_circles
from matplotlib import pyplot as plt
%matplotlib inline

from sklearn.preprocessing import LabelBinarizer, LabelEncoder, OrdinalEncoder, OneHotEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split, KFold
from sklearn.compose import ColumnTransformer, make_column_transformer
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score, precision_recall_curve, auc
from sklearn.metrics import mean_squared_error, mean_absolute_error, roc_auc_score
from sklearn.inspection import permutation_importance
from category_encoders import MEstimateEncoder
from xgboost import XGBClassifier

pd.set_option('display.max_columns', 5000)
pd.set_option('display.max_rows',200)

warnings.filterwarnings("ignore")

# target encoding code:

### target encoding ###
# source: https://www.kaggle.com/code/ryanholbrook/feature-engineering-for-house-prices/notebook

class CrossFoldEncoder:
    def __init__(self, encoder, **kwargs):
        self.encoder_ = encoder
        self.kwargs_ = kwargs  # keyword arguments for the encoder
        self.cv_ = KFold(n_splits=4)

    # Fit an encoder on one split and transform the feature on the
    # other. Iterating over the splits in all folds gives a complete
    # transformation. We also now have one trained encoder on each
    # fold.
    def fit_transform(self, X, y, cols):
        self.fitted_encoders_ = []
        self.cols_ = cols
        X_encoded = []
        for idx_encode, idx_train in self.cv_.split(X):
            fitted_encoder = self.encoder_(cols=cols, **self.kwargs_)
            fitted_encoder.fit(
                X.iloc[idx_encode, :], y.iloc[idx_encode],
            )
            X_encoded.append(fitted_encoder.transform(X.iloc[idx_train, :])[cols])
            self.fitted_encoders_.append(fitted_encoder)
        X_encoded = pd.concat(X_encoded)
        X_encoded.columns = [name + "_encoded" for name in X_encoded.columns]
        return X_encoded

    # To transform the test data, average the encodings learned from
    # each fold.
    def transform(self, X):
        from functools import reduce

        X_encoded_list = []
        for fitted_encoder in self.fitted_encoders_:
            X_encoded = fitted_encoder.transform(X)
            X_encoded_list.append(X_encoded[self.cols_])
        X_encoded = reduce(
            lambda x, y: x.add(y, fill_value=0), X_encoded_list
        ) / len(X_encoded_list)
        X_encoded.columns = [name + "_encoded" for name in X_encoded.columns]
        return X_encoded


In [2]:
def TargetEncoderMP(train_set, test_set, feature_cols, target_col, M=5):
    
    """ This function implements terget encoding on train-test split with 2-fold CV
    It relies on previously defined CrossFoldEncoder, stolen from somehwere on SO.
    m parameter controls smoothing and is defaulted at 5 """
    
    encoder = CrossFoldEncoder(MEstimateEncoder, m=M)
    train_encoded = encoder.fit_transform(train_set, train_set[target_col], cols=feature_cols)
    test_encoded = encoder.transform(test_set)

    train_set.drop(columns=feature_cols, inplace=True)
    test_set.drop(columns=feature_cols, inplace=True)
    train_set = pd.concat([train_set, train_encoded], axis = 1)
    test_set = pd.concat([test_set, test_encoded], axis = 1)
    
    return ([train_set, test_set])
    

In [3]:
# 1. Load data #

train = pd.read_csv('../input/tabular-playground-series-may-2022/train.csv',
                   skiprows=lambda i: i>0 and random.random() > 0.1)
display(train.shape, train.head())

display(train.target.value_counts())
num_cols = [col for col in train.columns if train[col].nunique()>10]
num_cols.remove('f_27')
cat_cols = list(set(train.columns) - set(num_cols) - set(['target']))
print('num_cols: ', num_cols, '\n', 'cat_cols: ', cat_cols)
display(train[num_cols].describe())
cat_cols = list(set(train.columns) - set(num_cols) - set(['target']))
display([train[col].value_counts() for col in cat_cols])

#[train[col].value_counts() for col in train.columns if train[col].nunique()<10]
#display(train.count())

(90388, 33)

Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,target
0,0,-1.373246,0.238887,-0.243376,0.567405,-0.647715,0.839326,0.113133,1,5,1,3,3,3,1,6,1,0,7,4,0.298218,-0.919717,3.058541,-2.540739,0.766952,-2.730628,-0.208177,1.363402,ABABDADBAB,67.609153,0,0,0
1,1,1.697021,-1.710322,-2.230332,-0.545661,1.113173,-1.552175,0.447825,1,3,4,0,2,3,0,1,0,4,6,0,-3.147667,-1.075434,2.17905,2.278315,-0.633658,-1.217077,-3.782194,-0.058316,ACACCADCEB,377.096415,0,0,1
2,3,-0.118172,-0.587835,-0.804638,2.086822,0.371005,-0.128831,-0.282575,3,2,1,0,1,6,4,2,3,3,0,3,1.081084,-2.100177,-2.343819,0.572594,-1.653213,1.686035,-2.533098,-0.608601,BDBBAACBCB,210.826205,0,0,1
3,36,-1.102277,-1.284341,-0.205395,0.991235,0.627997,-0.42457,1.096652,3,2,4,0,3,5,3,2,2,3,1,1,1.97738,-0.324952,-0.903274,-2.658195,1.604902,-3.068604,-3.955198,3.423714,ACBDCADFCB,-97.235293,1,1,0
4,52,-0.440209,-1.47986,-0.689924,-0.720858,0.831265,0.915615,0.205108,2,3,1,1,2,2,3,2,2,1,2,4,1.298871,1.33926,-3.305279,-1.36973,-5.850482,-0.789891,0.222459,1.274465,BBBDDBCGAB,-103.423547,0,1,0


0    46248
1    44140
Name: target, dtype: int64

num_cols:  ['id', 'f_00', 'f_01', 'f_02', 'f_03', 'f_04', 'f_05', 'f_06', 'f_07', 'f_08', 'f_09', 'f_10', 'f_11', 'f_12', 'f_13', 'f_14', 'f_15', 'f_16', 'f_17', 'f_18', 'f_19', 'f_20', 'f_21', 'f_22', 'f_23', 'f_24', 'f_25', 'f_26', 'f_28'] 
 cat_cols:  ['f_30', 'f_29', 'f_27']


Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_28
count,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0,90388.0
mean,449643.598287,0.004109,0.00204,-0.006131,-0.000407,-0.000726,0.001138,0.004447,2.02871,2.052452,2.359793,2.180068,1.800316,2.836361,2.244181,1.511816,2.102691,2.099394,1.857879,2.060572,0.304031,-0.174891,-0.165422,-0.000308,-0.368219,-0.338323,0.171845,0.35796,-0.452057
std,260066.926368,0.996102,0.998031,1.001925,0.999121,1.000002,1.001956,0.9964,1.656558,1.587608,1.63493,1.651999,1.535493,1.75657,1.537174,1.356981,1.569034,1.564056,1.467413,1.559559,2.303764,2.393958,2.484271,2.459214,2.460466,2.387989,2.41156,2.477665,238.775239
min,0.0,-4.514945,-4.544705,-4.183463,-4.658816,-4.3734,-4.334465,-4.012299,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-10.484786,-11.257917,-11.534975,-11.741608,-10.903498,-10.394414,-11.716732,-11.648411,-1076.052413
25%,225008.0,-0.671127,-0.670606,-0.684153,-0.672306,-0.675644,-0.672048,-0.666135,1.0,1.0,1.0,1.0,1.0,2.0,1.0,0.0,1.0,1.0,1.0,1.0,-1.236628,-1.789839,-1.827183,-1.651322,-2.03352,-1.952281,-1.441384,-1.250699,-159.885352
50%,449022.0,0.008978,0.000455,-0.00452,-0.006372,0.002446,0.001015,0.00676,2.0,2.0,2.0,2.0,2.0,3.0,2.0,1.0,2.0,2.0,2.0,2.0,0.330606,-0.178106,-0.161297,0.039983,-0.401743,-0.342429,0.153363,0.408085,-1.456757
75%,675743.5,0.675508,0.674482,0.67388,0.675349,0.67452,0.676253,0.67527,3.0,3.0,3.0,3.0,3.0,4.0,3.0,2.0,3.0,3.0,3.0,3.0,1.871508,1.448788,1.498065,1.683901,1.261544,1.269718,1.7993,2.021962,159.086522
max,899984.0,4.548971,4.555183,4.487213,4.10274,4.151319,4.971881,4.452692,15.0,13.0,12.0,13.0,12.0,14.0,12.0,11.0,13.0,13.0,11.0,13.0,12.079667,9.309242,11.071167,9.853349,12.2471,10.839238,11.003019,12.913041,1055.956276


[2    30495
 0    30164
 1    29729
 Name: f_30, dtype: int64,
 0    59118
 1    31270
 Name: f_29, dtype: int64,
 BBBCBBCOBB    4
 BBAABADFBB    4
 BCBBBABICB    4
 BBACAADCBD    4
 ADBABABHBC    4
              ..
 AAAEBAEAAA    1
 BCBAAADFAB    1
 ACBBBBDEAB    1
 ACAEAABFCA    1
 BCBBAACNDB    1
 Name: f_27, Length: 88305, dtype: int64]

In [4]:
# 3. split data #

#train_test_split approach does not work when I use TE.

test_size = 0.1
train.reset_index(inplace=True, drop=True)
test_index = random.sample(list(train.index), int(test_size*train.shape[0]))
train_ = train.iloc[list(set(train.index)-set(test_index))]
test = train.iloc[test_index]
display(train.shape, test.shape, train.head(3), test.head(3))

train0, test0 = train.copy(), test.copy()

(90388, 33)

(9038, 33)

Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,target
0,0,-1.373246,0.238887,-0.243376,0.567405,-0.647715,0.839326,0.113133,1,5,1,3,3,3,1,6,1,0,7,4,0.298218,-0.919717,3.058541,-2.540739,0.766952,-2.730628,-0.208177,1.363402,ABABDADBAB,67.609153,0,0,0
1,1,1.697021,-1.710322,-2.230332,-0.545661,1.113173,-1.552175,0.447825,1,3,4,0,2,3,0,1,0,4,6,0,-3.147667,-1.075434,2.17905,2.278315,-0.633658,-1.217077,-3.782194,-0.058316,ACACCADCEB,377.096415,0,0,1
2,3,-0.118172,-0.587835,-0.804638,2.086822,0.371005,-0.128831,-0.282575,3,2,1,0,1,6,4,2,3,3,0,3,1.081084,-2.100177,-2.343819,0.572594,-1.653213,1.686035,-2.533098,-0.608601,BDBBAACBCB,210.826205,0,0,1


Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,target
6750,65970,1.190404,0.546021,0.175603,0.381544,0.343525,1.429484,1.070884,3,0,1,0,0,3,1,1,0,5,1,0,2.303914,1.904145,4.756316,-4.060781,0.130996,0.878569,-1.35815,-0.812604,ACAEEABADA,430.712302,0,1,1
71274,709829,1.192678,-1.573512,-0.5868,-1.071165,-0.059885,-1.48499,-0.285908,2,3,4,2,2,1,1,2,3,6,0,3,0.727097,-2.754576,2.551739,1.347286,-1.780342,-1.981212,1.703427,2.05325,BABADADTCD,-176.212503,0,1,1
71742,714548,1.15222,-0.042901,0.311167,0.794867,-0.010371,1.058612,-0.379048,1,1,3,1,7,5,3,3,0,3,1,1,1.393574,2.585066,-5.359532,-2.731199,-3.153113,-0.882812,-0.518798,-4.716998,AAAGABBNBB,152.906246,0,0,1


In [5]:
# 5. FE #

# first do TE

train, test = train0.copy(), test0.copy()

display(train.head(), test.head())
train, test = TargetEncoderMP(train, test, ['f_27'], 'target')
display(train.head(), test.head())

# then extract a target

X_train = train.copy()
y_train = X_train.pop('target')
X_test = test.copy()
y_test = X_test.pop('target')

# then do OHE with columntransformer

ohe_cols = ['f_29', 'f_30']
feature_transformer = ColumnTransformer([
   ("cat", OneHotEncoder(sparse = False, handle_unknown="ignore"), ohe_cols)],
   remainder="passthrough")
print('Number of features before transaformation: ', X_train.shape)
X_train = pd.DataFrame(feature_transformer.fit_transform(X_train), columns=feature_transformer.get_feature_names_out())
X_test = pd.DataFrame(feature_transformer.transform(X_test), columns=feature_transformer.get_feature_names_out())
display(X_train.head(), X_test.head())


Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,target
0,0,-1.373246,0.238887,-0.243376,0.567405,-0.647715,0.839326,0.113133,1,5,1,3,3,3,1,6,1,0,7,4,0.298218,-0.919717,3.058541,-2.540739,0.766952,-2.730628,-0.208177,1.363402,ABABDADBAB,67.609153,0,0,0
1,1,1.697021,-1.710322,-2.230332,-0.545661,1.113173,-1.552175,0.447825,1,3,4,0,2,3,0,1,0,4,6,0,-3.147667,-1.075434,2.17905,2.278315,-0.633658,-1.217077,-3.782194,-0.058316,ACACCADCEB,377.096415,0,0,1
2,3,-0.118172,-0.587835,-0.804638,2.086822,0.371005,-0.128831,-0.282575,3,2,1,0,1,6,4,2,3,3,0,3,1.081084,-2.100177,-2.343819,0.572594,-1.653213,1.686035,-2.533098,-0.608601,BDBBAACBCB,210.826205,0,0,1
3,36,-1.102277,-1.284341,-0.205395,0.991235,0.627997,-0.42457,1.096652,3,2,4,0,3,5,3,2,2,3,1,1,1.97738,-0.324952,-0.903274,-2.658195,1.604902,-3.068604,-3.955198,3.423714,ACBDCADFCB,-97.235293,1,1,0
4,52,-0.440209,-1.47986,-0.689924,-0.720858,0.831265,0.915615,0.205108,2,3,1,1,2,2,3,2,2,1,2,4,1.298871,1.33926,-3.305279,-1.36973,-5.850482,-0.789891,0.222459,1.274465,BBBDDBCGAB,-103.423547,0,1,0


Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_27,f_28,f_29,f_30,target
6750,65970,1.190404,0.546021,0.175603,0.381544,0.343525,1.429484,1.070884,3,0,1,0,0,3,1,1,0,5,1,0,2.303914,1.904145,4.756316,-4.060781,0.130996,0.878569,-1.35815,-0.812604,ACAEEABADA,430.712302,0,1,1
71274,709829,1.192678,-1.573512,-0.5868,-1.071165,-0.059885,-1.48499,-0.285908,2,3,4,2,2,1,1,2,3,6,0,3,0.727097,-2.754576,2.551739,1.347286,-1.780342,-1.981212,1.703427,2.05325,BABADADTCD,-176.212503,0,1,1
71742,714548,1.15222,-0.042901,0.311167,0.794867,-0.010371,1.058612,-0.379048,1,1,3,1,7,5,3,3,0,3,1,1,1.393574,2.585066,-5.359532,-2.731199,-3.153113,-0.882812,-0.518798,-4.716998,AAAGABBNBB,152.906246,0,0,1
17792,176324,-1.753778,-0.472221,0.255829,-1.425182,-0.753593,1.014755,-2.642095,4,2,5,0,3,4,4,1,6,0,2,2,1.188931,-3.757693,0.691058,-0.413274,-3.73086,-2.300765,-0.267999,-4.403298,AABCBABPCF,-766.130454,0,1,0
42974,427637,1.508565,-0.697014,-1.980226,0.99079,2.150346,-0.369277,-0.04568,2,1,1,0,1,4,2,0,0,1,4,4,2.391394,-2.433326,-1.511281,-1.666687,-3.308513,4.321432,-0.075165,-1.778567,AEBDBBCABD,157.163871,0,0,1


Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_28,f_29,f_30,target,f_27_encoded
0,0,-1.373246,0.238887,-0.243376,0.567405,-0.647715,0.839326,0.113133,1,5,1,3,3,3,1,6,1,0,7,4,0.298218,-0.919717,3.058541,-2.540739,0.766952,-2.730628,-0.208177,1.363402,67.609153,0,0,0,0.488649
1,1,1.697021,-1.710322,-2.230332,-0.545661,1.113173,-1.552175,0.447825,1,3,4,0,2,3,0,1,0,4,6,0,-3.147667,-1.075434,2.17905,2.278315,-0.633658,-1.217077,-3.782194,-0.058316,377.096415,0,0,1,0.488649
2,3,-0.118172,-0.587835,-0.804638,2.086822,0.371005,-0.128831,-0.282575,3,2,1,0,1,6,4,2,3,3,0,3,1.081084,-2.100177,-2.343819,0.572594,-1.653213,1.686035,-2.533098,-0.608601,210.826205,0,0,1,0.573874
3,36,-1.102277,-1.284341,-0.205395,0.991235,0.627997,-0.42457,1.096652,3,2,4,0,3,5,3,2,2,3,1,1,1.97738,-0.324952,-0.903274,-2.658195,1.604902,-3.068604,-3.955198,3.423714,-97.235293,1,1,0,0.488649
4,52,-0.440209,-1.47986,-0.689924,-0.720858,0.831265,0.915615,0.205108,2,3,1,1,2,2,3,2,2,1,2,4,1.298871,1.33926,-3.305279,-1.36973,-5.850482,-0.789891,0.222459,1.274465,-103.423547,0,1,0,0.573874


Unnamed: 0,id,f_00,f_01,f_02,f_03,f_04,f_05,f_06,f_07,f_08,f_09,f_10,f_11,f_12,f_13,f_14,f_15,f_16,f_17,f_18,f_19,f_20,f_21,f_22,f_23,f_24,f_25,f_26,f_28,f_29,f_30,target,f_27_encoded
6750,65970,1.190404,0.546021,0.175603,0.381544,0.343525,1.429484,1.070884,3,0,1,0,0,3,1,1,0,5,1,0,2.303914,1.904145,4.756316,-4.060781,0.130996,0.878569,-1.35815,-0.812604,430.712302,0,1,1,0.55231
71274,709829,1.192678,-1.573512,-0.5868,-1.071165,-0.059885,-1.48499,-0.285908,2,3,4,2,2,1,1,2,3,6,0,3,0.727097,-2.754576,2.551739,1.347286,-1.780342,-1.981212,1.703427,2.05325,-176.212503,0,1,1,0.552315
71742,714548,1.15222,-0.042901,0.311167,0.794867,-0.010371,1.058612,-0.379048,1,1,3,1,7,5,3,3,0,3,1,1,1.393574,2.585066,-5.359532,-2.731199,-3.153113,-0.882812,-0.518798,-4.716998,152.906246,0,0,1,0.552315
17792,176324,-1.753778,-0.472221,0.255829,-1.425182,-0.753593,1.014755,-2.642095,4,2,5,0,3,4,4,1,6,0,2,2,1.188931,-3.757693,0.691058,-0.413274,-3.73086,-2.300765,-0.267999,-4.403298,-766.130454,0,1,0,0.42731
42974,427637,1.508565,-0.697014,-1.980226,0.99079,2.150346,-0.369277,-0.04568,2,1,1,0,1,4,2,0,0,1,4,4,2.391394,-2.433326,-1.511281,-1.666687,-3.308513,4.321432,-0.075165,-1.778567,157.163871,0,0,1,0.552276


Number of features before transaformation:  (90388, 32)


Unnamed: 0,cat__f_29_0,cat__f_29_1,cat__f_30_0,cat__f_30_1,cat__f_30_2,remainder__id,remainder__f_00,remainder__f_01,remainder__f_02,remainder__f_03,remainder__f_04,remainder__f_05,remainder__f_06,remainder__f_07,remainder__f_08,remainder__f_09,remainder__f_10,remainder__f_11,remainder__f_12,remainder__f_13,remainder__f_14,remainder__f_15,remainder__f_16,remainder__f_17,remainder__f_18,remainder__f_19,remainder__f_20,remainder__f_21,remainder__f_22,remainder__f_23,remainder__f_24,remainder__f_25,remainder__f_26,remainder__f_28,remainder__f_27_encoded
0,1.0,0.0,1.0,0.0,0.0,0.0,-1.373246,0.238887,-0.243376,0.567405,-0.647715,0.839326,0.113133,1.0,5.0,1.0,3.0,3.0,3.0,1.0,6.0,1.0,0.0,7.0,4.0,0.298218,-0.919717,3.058541,-2.540739,0.766952,-2.730628,-0.208177,1.363402,67.609153,0.488649
1,1.0,0.0,1.0,0.0,0.0,1.0,1.697021,-1.710322,-2.230332,-0.545661,1.113173,-1.552175,0.447825,1.0,3.0,4.0,0.0,2.0,3.0,0.0,1.0,0.0,4.0,6.0,0.0,-3.147667,-1.075434,2.17905,2.278315,-0.633658,-1.217077,-3.782194,-0.058316,377.096415,0.488649
2,1.0,0.0,1.0,0.0,0.0,3.0,-0.118172,-0.587835,-0.804638,2.086822,0.371005,-0.128831,-0.282575,3.0,2.0,1.0,0.0,1.0,6.0,4.0,2.0,3.0,3.0,0.0,3.0,1.081084,-2.100177,-2.343819,0.572594,-1.653213,1.686035,-2.533098,-0.608601,210.826205,0.573874
3,0.0,1.0,0.0,1.0,0.0,36.0,-1.102277,-1.284341,-0.205395,0.991235,0.627997,-0.42457,1.096652,3.0,2.0,4.0,0.0,3.0,5.0,3.0,2.0,2.0,3.0,1.0,1.0,1.97738,-0.324952,-0.903274,-2.658195,1.604902,-3.068604,-3.955198,3.423714,-97.235293,0.488649
4,1.0,0.0,0.0,1.0,0.0,52.0,-0.440209,-1.47986,-0.689924,-0.720858,0.831265,0.915615,0.205108,2.0,3.0,1.0,1.0,2.0,2.0,3.0,2.0,2.0,1.0,2.0,4.0,1.298871,1.33926,-3.305279,-1.36973,-5.850482,-0.789891,0.222459,1.274465,-103.423547,0.573874


Unnamed: 0,cat__f_29_0,cat__f_29_1,cat__f_30_0,cat__f_30_1,cat__f_30_2,remainder__id,remainder__f_00,remainder__f_01,remainder__f_02,remainder__f_03,remainder__f_04,remainder__f_05,remainder__f_06,remainder__f_07,remainder__f_08,remainder__f_09,remainder__f_10,remainder__f_11,remainder__f_12,remainder__f_13,remainder__f_14,remainder__f_15,remainder__f_16,remainder__f_17,remainder__f_18,remainder__f_19,remainder__f_20,remainder__f_21,remainder__f_22,remainder__f_23,remainder__f_24,remainder__f_25,remainder__f_26,remainder__f_28,remainder__f_27_encoded
0,1.0,0.0,0.0,1.0,0.0,65970.0,1.190404,0.546021,0.175603,0.381544,0.343525,1.429484,1.070884,3.0,0.0,1.0,0.0,0.0,3.0,1.0,1.0,0.0,5.0,1.0,0.0,2.303914,1.904145,4.756316,-4.060781,0.130996,0.878569,-1.35815,-0.812604,430.712302,0.55231
1,1.0,0.0,0.0,1.0,0.0,709829.0,1.192678,-1.573512,-0.5868,-1.071165,-0.059885,-1.48499,-0.285908,2.0,3.0,4.0,2.0,2.0,1.0,1.0,2.0,3.0,6.0,0.0,3.0,0.727097,-2.754576,2.551739,1.347286,-1.780342,-1.981212,1.703427,2.05325,-176.212503,0.552315
2,1.0,0.0,1.0,0.0,0.0,714548.0,1.15222,-0.042901,0.311167,0.794867,-0.010371,1.058612,-0.379048,1.0,1.0,3.0,1.0,7.0,5.0,3.0,3.0,0.0,3.0,1.0,1.0,1.393574,2.585066,-5.359532,-2.731199,-3.153113,-0.882812,-0.518798,-4.716998,152.906246,0.552315
3,1.0,0.0,0.0,1.0,0.0,176324.0,-1.753778,-0.472221,0.255829,-1.425182,-0.753593,1.014755,-2.642095,4.0,2.0,5.0,0.0,3.0,4.0,4.0,1.0,6.0,0.0,2.0,2.0,1.188931,-3.757693,0.691058,-0.413274,-3.73086,-2.300765,-0.267999,-4.403298,-766.130454,0.42731
4,1.0,0.0,1.0,0.0,0.0,427637.0,1.508565,-0.697014,-1.980226,0.99079,2.150346,-0.369277,-0.04568,2.0,1.0,1.0,0.0,1.0,4.0,2.0,0.0,0.0,1.0,4.0,4.0,2.391394,-2.433326,-1.511281,-1.666687,-3.308513,4.321432,-0.075165,-1.778567,157.163871,0.552276


In [6]:
# 6. fit XGBoost #

In [7]:
# 7. Do FI analysis