# Problem Statement

### Credit Card Lead Prediction

Happy Customer Bank is a mid-sized private bank that deals in all kinds of banking products, like Savings accounts, Current accounts, investment products, credit products, among other offerings.


The bank also cross-sells products to its existing customers and to do so they use different kinds of communication like tele-calling, e-mails, recommendations on net banking, mobile banking, etc. 


In this case, the Happy Customer Bank wants to cross sell its credit cards to its existing customers. The bank has identified a set of customers that are eligible for taking these credit cards.


Now, the bank is looking for your help in identifying customers that could show higher intent towards a recommended credit card, given:

    Customer details (gender, age, region etc.)
    Details of his/her relationship with the bank (Channel_Code,Vintage, 'Avg_Asset_Value etc.)


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier


from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

from sklearn.linear_model import LogisticRegression

pd.options.display.max_columns = 200

In [2]:
data = pd.read_csv("train_s3TEQDk.csv")
test = pd.read_csv("test_mSzZ8RL.csv")
print(f"Train shape {data.shape}, Test Shape {test.shape}")

Train shape (245725, 11), Test Shape (105312, 10)


In [3]:
train,valid = train_test_split(data,test_size=0.20,random_state=345,stratify=data['Is_Lead'])
train = train.copy()
valid = valid.copy()
print(f"Train shape {train.shape} Validation shape {valid.shape}")

Train shape (196580, 11) Validation shape (49145, 11)


In [4]:
train.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
53078,N3ZQ84QR,Female,46,RG280,Self_Employed,X2,51,No,863584,Yes,0
213644,JWGAMK7P,Male,67,RG258,Other,X2,43,Yes,706126,No,0
131870,CX9NGNQT,Male,46,RG279,Self_Employed,X2,26,Yes,422207,Yes,0


In [5]:
test.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active
0,VBENBARO,Male,29,RG254,Other,X1,25,Yes,742366,No
1,CCMEWNKY,Male,43,RG268,Other,X2,49,,925537,No
2,VK3KGA9M,Male,31,RG270,Salaried,X1,14,No,215949,No


In [6]:
valid.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
148453,OK9KJGZ2,Female,55,RG268,Self_Employed,X1,37,No,929257,No,0
117997,TTC7CPSI,Male,57,RG283,Self_Employed,X3,87,,909740,No,0
5432,MPUWVRAX,Male,39,RG275,Salaried,X1,8,Yes,961742,Yes,0


In [7]:
train['ID'].nunique()

196580

In [8]:
train.isna().sum()

ID                         0
Gender                     0
Age                        0
Region_Code                0
Occupation                 0
Channel_Code               0
Vintage                    0
Credit_Product         23525
Avg_Account_Balance        0
Is_Active                  0
Is_Lead                    0
dtype: int64

In [9]:
train['Gender'].value_counts(normalize=True)

Male      0.546693
Female    0.453307
Name: Gender, dtype: float64

In [10]:
train['Region_Code'].nunique()

35

In [11]:
train['Occupation'].value_counts(normalize=True)

Self_Employed    0.411532
Salaried         0.292731
Other            0.284866
Entrepreneur     0.010871
Name: Occupation, dtype: float64

In [12]:
train['Channel_Code'].value_counts(normalize=True)

X1    0.421279
X3    0.280359
X2    0.275669
X4    0.022693
Name: Channel_Code, dtype: float64

In [13]:
train['Credit_Product'].value_counts(normalize=True)

No     0.667019
Yes    0.332981
Name: Credit_Product, dtype: float64

In [14]:
train['Avg_Account_Balance'].describe()

count    1.965800e+05
mean     1.129489e+06
std      8.532486e+05
min      2.079000e+04
25%      6.042470e+05
50%      8.954865e+05
75%      1.368733e+06
max      1.035201e+07
Name: Avg_Account_Balance, dtype: float64

In [15]:
train['Is_Active'].value_counts(normalize=True)

No     0.611375
Yes    0.388625
Name: Is_Active, dtype: float64

In [16]:
train['Is_Lead'].value_counts(normalize=True)

0    0.762794
1    0.237206
Name: Is_Lead, dtype: float64

In [17]:
train['Age'].describe()

count    196580.000000
mean         43.864971
std          14.821238
min          23.000000
25%          30.000000
50%          43.000000
75%          54.000000
max          85.000000
Name: Age, dtype: float64

In [18]:
train['Vintage'].describe()

count    196580.000000
mean         46.978121
std          32.346981
min           7.000000
25%          20.000000
50%          32.000000
75%          73.000000
max         135.000000
Name: Vintage, dtype: float64

In [19]:
train.groupby(['Is_Lead'])[['Age','Avg_Account_Balance']].median()

Unnamed: 0_level_0,Age,Avg_Account_Balance
Is_Lead,Unnamed: 1_level_1,Unnamed: 2_level_1
0,38,871158
1,49,980686


In [20]:
train['Credit_Product'] = train['Credit_Product'].fillna('NA')
train['Avg_Account_Balance'] = np.log(1+train['Avg_Account_Balance'])

test['Credit_Product'] = test['Credit_Product'].fillna('NA')
test['Avg_Account_Balance'] = np.log(1+test['Avg_Account_Balance'])

valid['Credit_Product'] = valid['Credit_Product'].fillna('NA')
valid['Avg_Account_Balance'] = np.log(1+valid['Avg_Account_Balance'])


In [21]:
train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
144163,CAJTUNNQ,Male,32,RG260,Salaried,X1,33,No,13.00713,Yes,1
242829,KDGIYR3A,Female,24,RG251,Salaried,X1,15,No,13.160129,Yes,0
42203,VBJG4EXP,Female,42,RG277,Self_Employed,X2,32,Yes,13.444576,No,1


In [22]:
train['Is_Active'] = train['Is_Active'].replace({'No':'N','Yes':'Y'})
test['Is_Active'] = test['Is_Active'].replace({'No':'N','Yes':'Y'})
valid['Is_Active'] = valid['Is_Active'].replace({'No':'N','Yes':'Y'})

In [23]:
cat_cols = ['Gender','Region_Code','Occupation','Channel_Code','Credit_Product','Is_Active']
featured_cols = []
for idx,col in enumerate(cat_cols):
    for sub_col in cat_cols[idx+1:]:
        new_col = f"{col}-{sub_col}"
        featured_cols.append(new_col)
        train[new_col] = train[col] + "-" + train[sub_col]
        test[new_col] = test[col] + "-" + test[sub_col]
        valid[new_col] = valid[col] + "-" + valid[sub_col]

train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active
145908,QDDBTOEQ,Male,59,RG279,Other,X1,13,Yes,13.163125,N,0,Male-RG279,Male-Other,Male-X1,Male-Yes,Male-N,RG279-Other,RG279-X1,RG279-Yes,RG279-N,Other-X1,Other-Yes,Other-N,X1-Yes,X1-N,Yes-N
173895,LI6UNF8C,Male,63,RG283,Other,X3,109,,14.72344,Y,1,Male-RG283,Male-Other,Male-X3,Male-NA,Male-Y,RG283-Other,RG283-X3,RG283-NA,RG283-Y,Other-X3,Other-NA,Other-Y,X3-NA,X3-Y,NA-Y
58332,XTCVH4XR,Male,26,RG268,Self_Employed,X1,21,No,14.051379,N,0,Male-RG268,Male-Self_Employed,Male-X1,Male-No,Male-N,RG268-Self_Employed,RG268-X1,RG268-No,RG268-N,Self_Employed-X1,Self_Employed-No,Self_Employed-N,X1-No,X1-N,No-N


In [24]:
all_cat_cols = cat_cols + featured_cols
num_col = ['Age','Vintage','Avg_Account_Balance']
for idx,col in enumerate (all_cat_cols):
    for ind, num in enumerate(num_col):
        print(f"Working Cat Col {col} {idx}/{len(all_cat_cols)}, Num col {num} {ind}/{len(num_col)}")
        grp = train.groupby([col])[num].agg(['mean','median','std'])
        grp = grp.add_prefix(f'{col}-{num}-')
        grp = grp.reset_index()
        train = train.merge(grp,on=[col],how='left')
        test = test.merge(grp,on=[col],how='left')
        valid = valid.merge(grp,on=[col],how='left')

Working Cat Col Gender 0/21, Num col Age 0/3
Working Cat Col Gender 0/21, Num col Vintage 1/3
Working Cat Col Gender 0/21, Num col Avg_Account_Balance 2/3
Working Cat Col Region_Code 1/21, Num col Age 0/3
Working Cat Col Region_Code 1/21, Num col Vintage 1/3
Working Cat Col Region_Code 1/21, Num col Avg_Account_Balance 2/3
Working Cat Col Occupation 2/21, Num col Age 0/3
Working Cat Col Occupation 2/21, Num col Vintage 1/3
Working Cat Col Occupation 2/21, Num col Avg_Account_Balance 2/3
Working Cat Col Channel_Code 3/21, Num col Age 0/3
Working Cat Col Channel_Code 3/21, Num col Vintage 1/3
Working Cat Col Channel_Code 3/21, Num col Avg_Account_Balance 2/3
Working Cat Col Credit_Product 4/21, Num col Age 0/3
Working Cat Col Credit_Product 4/21, Num col Vintage 1/3
Working Cat Col Credit_Product 4/21, Num col Avg_Account_Balance 2/3
Working Cat Col Is_Active 5/21, Num col Age 0/3
Working Cat Col Is_Active 5/21, Num col Vintage 1/3
Working Cat Col Is_Active 5/21, Num col Avg_Account_Bala

In [25]:
all_cat_cols = cat_cols + featured_cols
encoder = LabelEncoder()
for col in all_cat_cols:
    train[col] = encoder.fit_transform(train[col])
    test[col] = encoder.transform(test[col])
    valid[col] = encoder.transform(valid[col])

In [26]:
train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
23645,OJGYJSXW,1,61,4,3,2,122,1,13.337089,0,0,39,7,6,4,2,19,18,13,8,14,10,6,7,4,2,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,54.709415,53,12.125373,74.830457,80,32.852358,13.863153,13.848448,0.614125,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,45.979602,47.0,14.69246,52.321858,38.0,34.365724,14.000739,13.976548,0.55821,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,55.332957,54,...,0.625067,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,46.988973,48.0,8.859679,55.804968,51.0,33.0961,13.950268,13.928111,0.550486,55.134468,54.0,12.039841,75.378735,80.0,33.716311,14.048134,14.01505,0.549842,41.020341,35.0,14.830789,41.716909,27.0,29.775013,13.979975,13.949191,0.566093,41.556842,37.0,14.632986,43.260592,27,30.415394,13.988598,13.962556,0.556208,50.10095,51,6.705361,73.301676,79.0,32.354135,13.825329,13.810187,0.605881,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,55.160681,54,12.96831,72.752381,74,32.049188,13.810513,13.787153,0.624411,54.145345,53,12.732743,70.575908,74,33.149403,13.845411,13.829655,0.606719,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927
169391,2ACGJZPK,1,35,4,3,1,25,1,12.803273,1,0,39,7,5,4,3,19,17,13,9,13,10,7,4,3,3,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,45.979602,47.0,14.69246,52.321858,38.0,34.365724,14.000739,13.976548,0.55821,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,50.58274,49,...,0.625067,49.342086,49,13.601337,59.018389,57,34.934066,13.801728,13.781203,0.625037,46.988973,48.0,8.859679,55.804968,51.0,33.0961,13.950268,13.928111,0.550486,51.082556,50.0,10.87277,56.57203,57.0,29.199342,13.95487,13.937758,0.550302,41.020341,35.0,14.830789,41.716909,27.0,29.775013,13.979975,13.949191,0.566093,46.388168,47.0,14.609313,52.528075,38,34.961976,13.995444,13.967262,0.564224,47.153494,47,7.104712,53.313132,55.0,27.966771,13.726652,13.706341,0.59804,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,47.476003,48,7.946399,59.285561,57,33.368966,13.769124,13.749167,0.617108,50.699305,49,11.595563,53.602277,55,26.623489,13.728676,13.70099,0.618954,50.028448,48,11.195013,53.959872,55,28.68473,13.748476,13.72776,0.61782,46.009136,46,15.148168,48.25175,33,32.637442,13.763813,13.73815,0.63022
182928,KYF2RYFH,0,31,32,2,0,31,1,13.846328,0,0,32,2,0,1,0,130,128,97,64,8,7,4,1,0,2,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,40.763124,35,14.34951,40.640418,31.0,27.71331,13.639894,13.681643,0.516113,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,38.45253,32.0,13.715178,36.208675,31.0,24.063587,13.629081,13.664603,0.511172,30.233873,29,5.548506,24.825372,21,13.665404,13.615808,13.580002,0.613589,31.947681,29,...,0.62249,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,30.576464,29.0,5.573557,26.411029,26.0,13.140568,13.656512,13.689946,0.5203,31.239238,29.0,7.105578,25.06819,26.0,8.768899,13.642681,13.679774,0.512677,37.919215,32.0,13.612718,35.759575,31.0,23.748314,13.624375,13.667239,0.52037,37.64435,32.0,13.084208,35.392002,27,24.179234,13.650638,13.686034,0.511757,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,29.56043,29,3.889061,23.426746,21,9.253663,13.619111,13.579394,0.617434,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927


In [27]:
test.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,Gender-Channel_Code-Age-std,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
66727,95NZKCKG,1,42,33,3,0,74,2,14.321911,0,68,7,4,5,2,135,132,101,66,12,11,6,2,0,4,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,49.03886,49,14.141979,58.949928,57.0,33.935316,14.020919,14.015742,0.595516,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,46.864084,47,14.19592,51.701079,39,34.259627,13.794534,13.771814,0.608655,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,50.099778,49.0,13.653215,61.784858,62.0,33.889134,14.025825,14.017518,0.592916,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,32.826819,29,10.206629,...,0.604623,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,47.888273,48.0,7.582412,63.168501,63.0,32.304118,13.97886,13.967018,0.587847,38.341542,31.0,14.820396,32.155379,26.0,20.710089,14.021016,14.017787,0.618966,51.052599,50.0,12.973797,62.473023,62.0,34.593373,14.050301,14.048077,0.578569,49.041642,49.0,14.323854,57.077298,55,32.513307,14.021352,14.013357,0.588656,37.666341,35,10.237646,30.848173,26.0,18.336994,13.656689,13.612417,0.617393,47.475027,48,8.047989,56.46292,55,33.669439,13.784911,13.767253,0.595917,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,34.347592,29,12.273945,26.619366,25,15.423915,13.690755,13.663485,0.610405,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,45.197443,45,14.694064,45.636257,32,32.006965,13.761389,13.736186,0.602281
26276,DZXXQYTF,1,30,18,1,0,27,1,13.650155,0,53,5,4,4,2,73,72,55,36,4,4,2,1,0,2,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,49.016579,49,14.133032,59.242276,57.0,34.127409,14.013321,14.011128,0.59425,53.130842,56,18.615624,54.886123,50,34.306833,13.798887,13.774818,0.630414,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,50.065278,50.0,13.606399,62.16117,62.0,34.078826,14.016509,14.005866,0.593029,55.304334,60,17.651923,59.464858,57,34.764459,13.818952,13.796086,0.62599,32.826819,29,10.206629,...,0.625067,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,59.175442,63.0,16.224971,66.165597,68.0,34.110853,14.056102,14.066017,0.604029,38.307069,31.0,14.781783,31.828172,26.0,20.662957,14.019672,14.015414,0.619927,46.15722,46.0,15.029966,52.012814,44.0,32.819819,13.990097,13.979808,0.608925,49.06548,49.0,14.370139,57.380598,55,32.795914,14.017349,14.016863,0.588426,36.419296,30,15.566254,27.707784,25.0,16.014826,13.678502,13.636975,0.633103,49.62441,48,19.664724,48.276304,33,32.204767,13.741391,13.710649,0.635718,49.95129,50,19.146527,49.045805,33,32.594454,13.75803,13.730922,0.625097,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927
96829,STQYKPZN,1,49,10,3,1,93,1,13.959247,0,45,7,5,4,2,43,41,31,20,13,10,6,4,2,2,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,40.393964,34,14.243897,40.410865,31.0,28.117289,13.374431,13.340758,0.574998,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,43.508124,42.0,14.55236,46.258493,32.0,30.996573,13.408011,13.381694,0.580539,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,50.58274,49,10.999853,...,0.625067,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,45.306193,46.0,9.444578,50.402523,43.0,31.863041,13.395641,13.365528,0.577238,50.862306,49.0,11.915434,52.120482,50.0,29.442099,13.416928,13.382169,0.606143,37.356024,31.0,13.414219,35.566867,27.0,23.868456,13.3416,13.297305,0.571299,37.254415,31.0,12.817233,36.081126,27,24.232892,13.364664,13.330468,0.568735,47.153494,47,7.104712,53.313132,55.0,27.966771,13.726652,13.706341,0.59804,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,50.699305,49,11.595563,53.602277,55,26.623489,13.728676,13.70099,0.618954,50.757953,49,11.403322,55.277626,57,27.576043,13.764424,13.742765,0.590941,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927


In [28]:
valid.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
6587,58WEG7YP,0,34,7,2,0,27,1,13.532107,0,0,7,2,0,1,0,30,28,22,14,8,7,4,1,0,2,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,39.151274,32,13.959944,37.854842,27.0,27.606765,13.493599,13.465645,0.571355,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,36.027363,31.0,12.357595,32.920398,26.0,22.533175,13.478702,13.434394,0.572737,30.233873,29,5.548506,24.825372,21,13.665404,13.615808,13.580002,0.613589,31.947681,29,...,0.62249,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,29.981754,29.0,4.935144,24.685251,25.0,11.667111,13.450863,13.411802,0.554358,30.018583,29.0,5.191595,23.726157,25.0,7.610086,13.451952,13.415845,0.558613,35.709906,30.0,12.47916,32.084021,26.0,22.314507,13.458884,13.430862,0.564979,36.40492,31.0,12.686058,32.93173,26,23.148247,13.477788,13.437028,0.561844,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,29.56043,29,3.889061,23.426746,21,9.253663,13.619111,13.579394,0.617434,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927
9825,9JUBRDIM,1,63,33,1,1,39,1,13.835857,1,0,68,5,5,4,3,133,133,100,67,5,4,3,4,3,3,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,49.03886,49,14.141979,58.949928,57.0,33.935316,14.020919,14.015742,0.595516,53.130842,56,18.615624,54.886123,50,34.306833,13.798887,13.774818,0.630414,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,50.099778,49.0,13.653215,61.784858,62.0,33.889134,14.025825,14.017518,0.592916,55.304334,60,17.651923,59.464858,57,34.764459,13.818952,13.796086,0.62599,50.58274,49,...,0.625067,49.342086,49,13.601337,59.018389,57,34.934066,13.801728,13.781203,0.625037,58.967986,63.0,16.35266,65.572662,68.0,33.636895,14.075728,14.077804,0.595842,50.416812,49.0,11.302582,57.207332,61.0,27.273778,13.989698,13.976631,0.591709,46.264987,46.0,15.044316,51.727422,44.0,32.348306,13.992441,13.984893,0.610751,49.036115,49.0,13.960779,60.797907,62,35.186266,14.020491,14.017926,0.602234,59.442674,62,13.895012,58.589195,61.0,27.653358,13.804688,13.785444,0.612041,49.62441,48,19.664724,48.276304,33,32.204767,13.741391,13.710649,0.635718,57.710886,62,16.796927,63.298919,63,34.958821,13.857741,13.839231,0.633392,50.699305,49,11.595563,53.602277,55,26.623489,13.728676,13.70099,0.618954,50.028448,48,11.195013,53.959872,55,28.68473,13.748476,13.72776,0.61782,46.009136,46,15.148168,48.25175,33,32.637442,13.763813,13.73815,0.63022
29143,GC3KESLX,1,57,4,3,2,63,1,14.744259,0,0,39,7,6,4,2,19,18,13,8,14,10,6,7,4,2,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,54.709415,53,12.125373,74.830457,80,32.852358,13.863153,13.848448,0.614125,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,45.979602,47.0,14.69246,52.321858,38.0,34.365724,14.000739,13.976548,0.55821,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,55.332957,54,...,0.625067,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,46.988973,48.0,8.859679,55.804968,51.0,33.0961,13.950268,13.928111,0.550486,55.134468,54.0,12.039841,75.378735,80.0,33.716311,14.048134,14.01505,0.549842,41.020341,35.0,14.830789,41.716909,27.0,29.775013,13.979975,13.949191,0.566093,41.556842,37.0,14.632986,43.260592,27,30.415394,13.988598,13.962556,0.556208,50.10095,51,6.705361,73.301676,79.0,32.354135,13.825329,13.810187,0.605881,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,55.160681,54,12.96831,72.752381,74,32.049188,13.810513,13.787153,0.624411,54.145345,53,12.732743,70.575908,74,33.149403,13.845411,13.829655,0.606719,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927


In [29]:
x_train = train.drop(['ID','Is_Lead'],axis=1)
y_train = train['Is_Lead']

x_valid = valid.drop(['ID','Is_Lead'],axis=1)
y_valid = valid['Is_Lead']

x_test = test.drop(['ID'],axis=1)

In [30]:
x_train = x_train.fillna(0)
x_valid = x_valid.fillna(0)
x_test = x_test.fillna(0)

In [31]:
lgb_params= {'learning_rate': 0.05, 
             'n_estimators': 500, 
             'max_bin': 100,
             'num_leaves': 10, 
             'max_depth': 30, 
             'reg_alpha': 8.5, 
             'reg_lambda': 7.0, 
             'subsample': 0.75,
            'random_state': 42,
            'class_weight':'balanced'}


lgb = LGBMClassifier(**lgb_params)
lgb.fit(x_train, y_train,eval_set=[(x_valid, y_valid)],early_stopping_rounds=10,verbose=400)
pred = lgb.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

Training until validation scores don't improve for 10 rounds
[400]	valid_0's binary_logloss: 0.422714
Early stopping, best iteration is:
[472]	valid_0's binary_logloss: 0.422459
roc_auc_score: 0.8733204076195035


In [32]:
xgb_params= {'n_estimators': 500, 
             'max_depth': 5, 
             'learning_rate': 0.02, 
             'reg_lambda': 29, 
             'subsample': 0.80, 
             'colsample_bytree': 0.25, 
             'colsample_bynode': 0.80, 
             'colsample_bylevel': 0.5,
            'random_state':42,
            'use_label_encoder':False }


xgb = XGBClassifier(**xgb_params)
xgb.fit(x_train, y_train,eval_set=[(x_valid, y_valid)],early_stopping_rounds=10,verbose=400)
pred = xgb.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

[0]	validation_0-logloss:0.68217
[400]	validation_0-logloss:0.34840
[499]	validation_0-logloss:0.34752
roc_auc_score: 0.8715869800266962


In [33]:
cat_params= {'n_estimators': 500, 
            'depth': 5, 
            'learning_rate': 0.02, 
            'colsample_bylevel': 0.70, 
            'bagging_temperature': 0.90, 
            'l2_leaf_reg': 10,
            'random_seed': 23}

cat = CatBoostClassifier(**cat_params)
cat.fit(x_train, y_train,eval_set=[(x_valid, y_valid)],early_stopping_rounds=10,verbose=400)
pred = cat.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

0:	learn: 0.6760811	test: 0.6761160	best: 0.6761160 (0)	total: 504ms	remaining: 4m 11s
400:	learn: 0.3435602	test: 0.3465535	best: 0.3465535 (400)	total: 1m 3s	remaining: 15.7s
499:	learn: 0.3429350	test: 0.3462430	best: 0.3462430 (499)	total: 1m 18s	remaining: 0us

bestTest = 0.3462429726
bestIteration = 499

roc_auc_score: 0.872413181694604


In [34]:
ada_params= {'n_estimators': 200, 
            'learning_rate': 0.08, 
            'random_state': 23}

ada = AdaBoostClassifier(**ada_params)
ada.fit(x_train,y_train)
pred = ada.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.865400087710626


In [35]:
grad_params = {'n_estimators':100,
               'learning_rate' : 0.08,
               'subsample' : 0.7,
               'random_state' : 45,
               'max_features' : 'auto',
               'verbose':1
              }


grad = GradientBoostingClassifier(**grad_params)
grad.fit(x_train,y_train)
pred = grad.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

      Iter       Train Loss      OOB Improve   Remaining Time 
         1           1.0336           0.0587            3.84m
         2           0.9909           0.0463            3.76m
         3           0.9522           0.0377            3.72m
         4           0.9210           0.0309            3.67m
         5           0.8953           0.0261            3.63m
         6           0.8732           0.0222            3.60m
         7           0.8527           0.0193            3.56m
         8           0.8371           0.0166            3.52m
         9           0.8215           0.0144            3.48m
        10           0.8109           0.0124            3.44m
        20           0.7419           0.0037            3.14m
        30           0.7152           0.0020            2.74m
        40           0.7067           0.0006            2.35m
        50           0.7031           0.0005            1.98m
        60           0.6938           0.0003            1.59m
       

In [36]:
hist_params = {'max_iter':500,
               'learning_rate' : 0.06,
               'max_depth' : 7,
               'early_stopping' : 'auto',
               'verbose':1,
               'random_state':63
              }


hist = HistGradientBoostingClassifier(**hist_params)
hist.fit(x_train,y_train)
pred = hist.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

Binning 0.301 GB of training data: 2.823 s
Binning 0.033 GB of validation data: 0.047 s
Fitting gradient boosted rounds:
[1/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.52359, val loss: 0.52335, in 0.193s
[2/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.50351, val loss: 0.50320, in 0.213s
[3/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.48647, val loss: 0.48612, in 0.208s
[4/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.47170, val loss: 0.47134, in 0.207s
[5/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.45883, val loss: 0.45841, in 0.213s
[6/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.44749, val loss: 0.44709, in 0.206s
[7/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43740, val loss: 0.43701, in 0.230s
[8/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.42849, val loss: 0.42813, in 0.214s
[9/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.42045, val loss: 0.42009, in 0.210s
[10/500] 1 tree, 31 leaves, max depth = 7, train lo

[88/500] 1 tree, 24 leaves, max depth = 7, train loss: 0.33949, val loss: 0.34343, in 0.149s
[89/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33942, val loss: 0.34342, in 0.180s
[90/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33934, val loss: 0.34338, in 0.187s
[91/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33927, val loss: 0.34340, in 0.125s
[92/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33919, val loss: 0.34340, in 0.173s
[93/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33913, val loss: 0.34339, in 0.139s
[94/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33907, val loss: 0.34340, in 0.175s
[95/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33900, val loss: 0.34341, in 0.162s
[96/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33895, val loss: 0.34341, in 0.153s
[97/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33888, val loss: 0.34341, in 0.146s
[98/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33883, val lo

In [37]:
train_pred = pd.DataFrame()
train_pred['lgb'] = lgb.predict_proba(x_train)[:, 1]
train_pred['xgb'] = xgb.predict_proba(x_train)[:, 1]
train_pred['cat'] = cat.predict_proba(x_train)[:, 1]
train_pred['ada'] = ada.predict_proba(x_train)[:, 1]
train_pred['grad'] = grad.predict_proba(x_train)[:, 1]
train_pred['hist'] = hist.predict_proba(x_train)[:, 1]
train_pred.head(3)

Unnamed: 0,lgb,xgb,cat,ada,grad,hist
0,0.177978,0.072633,0.065084,0.460619,0.059675,0.061099
1,0.309638,0.204664,0.173076,0.483216,0.180596,0.136129
2,0.7839,0.493995,0.479599,0.494111,0.468828,0.500255


In [38]:
valid_pred = pd.DataFrame()
valid_pred['lgb'] = lgb.predict_proba(x_valid)[:, 1]
valid_pred['xgb'] = xgb.predict_proba(x_valid)[:, 1]
valid_pred['cat'] = cat.predict_proba(x_valid)[:, 1]
valid_pred['ada'] = ada.predict_proba(x_valid)[:, 1]
valid_pred['grad'] = grad.predict_proba(x_valid)[:, 1]
valid_pred['hist'] = hist.predict_proba(x_valid)[:, 1]
valid_pred.head(3)

Unnamed: 0,lgb,xgb,cat,ada,grad,hist
0,0.146775,0.062817,0.056691,0.458025,0.054826,0.051147
1,0.944318,0.834179,0.830782,0.529344,0.835377,0.834074
2,0.692287,0.373265,0.429635,0.4922,0.544626,0.412364


In [39]:
test_pred = pd.DataFrame()
test_pred['lgb'] = lgb.predict_proba(x_test)[:, 1]
test_pred['xgb'] = xgb.predict_proba(x_test)[:, 1]
test_pred['cat'] = cat.predict_proba(x_test)[:, 1]
test_pred['ada'] = ada.predict_proba(x_test)[:, 1]
test_pred['grad'] = grad.predict_proba(x_test)[:, 1]
test_pred['hist'] = hist.predict_proba(x_test)[:, 1]
test_pred.head(3)

Unnamed: 0,lgb,xgb,cat,ada,grad,hist
0,0.115147,0.041271,0.052321,0.461465,0.057002,0.038952
1,0.945285,0.854583,0.842623,0.51964,0.849023,0.846096
2,0.160164,0.056577,0.052935,0.455125,0.052681,0.053951


In [40]:
model = LogisticRegression(random_state=10,max_iter=1000,class_weight='balanced',fit_intercept=True)
model.fit(train_pred,y_train)
pred = model.predict_proba(valid_pred)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.8726312363570534


In [41]:
pred = valid_pred.mean(axis=1)
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.8730279976786417


In [42]:
test_pred['stacked_avg'] = test_pred.mean(axis=1)
test_pred['stacked_logistic'] = model.predict_proba(test_pred.drop(['stacked_avg'],axis=1))[:, 1]
test_pred['ID'] = test['ID']
test_pred.head(1)

Unnamed: 0,lgb,xgb,cat,ada,grad,hist,stacked_avg,stacked_logistic,ID
0,0.115147,0.041271,0.052321,0.461465,0.057002,0.038952,0.127693,0.117788,VBENBARO


In [45]:
test_pred[['ID','lgb']].rename({'lgb':'Is_Lead'},axis=1).to_csv('LGB_submit_v1.csv',index=False)
test_pred[['ID','xgb']].rename({'xgb':'Is_Lead'},axis=1).to_csv('XGB_submit_v1.csv',index=False)
test_pred[['ID','cat']].rename({'cat':'Is_Lead'},axis=1).to_csv('CAT_submit_v1.csv',index=False)
test_pred[['ID','hist']].rename({'hist':'Is_Lead'},axis=1).to_csv('HIST_submit_v1.csv',index=False)
test_pred[['ID','ada']].rename({'ada':'Is_Lead'},axis=1).to_csv('ADA_submit_v1.csv',index=False)
test_pred[['ID','grad']].rename({'grad':'Is_Lead'},axis=1).to_csv('GRAD_submit_v1.csv',index=False)

test_pred[['ID','stacked_avg']].rename({'stacked_avg':'Is_Lead'},axis=1).to_csv('AVG.csv',index=False)
test_pred[['ID','stacked_logistic']].rename({'stacked_logistic':'Is_Lead'},axis=1).to_csv('LOG.csv',index=False)