# Problem Statement

### Credit Card Lead Prediction

Happy Customer Bank is a mid-sized private bank that deals in all kinds of banking products, like Savings accounts, Current accounts, investment products, credit products, among other offerings.


The bank also cross-sells products to its existing customers and to do so they use different kinds of communication like tele-calling, e-mails, recommendations on net banking, mobile banking, etc. 


In this case, the Happy Customer Bank wants to cross sell its credit cards to its existing customers. The bank has identified a set of customers that are eligible for taking these credit cards.


Now, the bank is looking for your help in identifying customers that could show higher intent towards a recommended credit card, given:

    Customer details (gender, age, region etc.)
    Details of his/her relationship with the bank (Channel_Code,Vintage, 'Avg_Asset_Value etc.)


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier


from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

from sklearn.linear_model import LogisticRegression

pd.options.display.max_columns = 200

In [2]:
data = pd.read_csv("train_s3TEQDk.csv")
test = pd.read_csv("test_mSzZ8RL.csv")
print(f"Train shape {data.shape}, Test Shape {test.shape}")

Train shape (245725, 11), Test Shape (105312, 10)


In [3]:
train,valid = train_test_split(data,test_size=0.20,random_state=345,stratify=data['Is_Lead'])
train = train.copy()
valid = valid.copy()
print(f"Train shape {train.shape} Validation shape {valid.shape}")

Train shape (196580, 11) Validation shape (49145, 11)


In [4]:
train.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
53078,N3ZQ84QR,Female,46,RG280,Self_Employed,X2,51,No,863584,Yes,0
213644,JWGAMK7P,Male,67,RG258,Other,X2,43,Yes,706126,No,0
131870,CX9NGNQT,Male,46,RG279,Self_Employed,X2,26,Yes,422207,Yes,0


In [5]:
test.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active
0,VBENBARO,Male,29,RG254,Other,X1,25,Yes,742366,No
1,CCMEWNKY,Male,43,RG268,Other,X2,49,,925537,No
2,VK3KGA9M,Male,31,RG270,Salaried,X1,14,No,215949,No


In [6]:
valid.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
148453,OK9KJGZ2,Female,55,RG268,Self_Employed,X1,37,No,929257,No,0
117997,TTC7CPSI,Male,57,RG283,Self_Employed,X3,87,,909740,No,0
5432,MPUWVRAX,Male,39,RG275,Salaried,X1,8,Yes,961742,Yes,0


In [7]:
train['ID'].nunique()

196580

In [8]:
train.isna().sum()

ID                         0
Gender                     0
Age                        0
Region_Code                0
Occupation                 0
Channel_Code               0
Vintage                    0
Credit_Product         23525
Avg_Account_Balance        0
Is_Active                  0
Is_Lead                    0
dtype: int64

In [9]:
train['Gender'].value_counts(normalize=True)

Male      0.546693
Female    0.453307
Name: Gender, dtype: float64

In [10]:
train['Region_Code'].nunique()

35

In [11]:
train['Occupation'].value_counts(normalize=True)

Self_Employed    0.411532
Salaried         0.292731
Other            0.284866
Entrepreneur     0.010871
Name: Occupation, dtype: float64

In [12]:
train['Channel_Code'].value_counts(normalize=True)

X1    0.421279
X3    0.280359
X2    0.275669
X4    0.022693
Name: Channel_Code, dtype: float64

In [13]:
train['Credit_Product'].value_counts(normalize=True)

No     0.667019
Yes    0.332981
Name: Credit_Product, dtype: float64

In [14]:
train['Avg_Account_Balance'].describe()

count    1.965800e+05
mean     1.129489e+06
std      8.532486e+05
min      2.079000e+04
25%      6.042470e+05
50%      8.954865e+05
75%      1.368733e+06
max      1.035201e+07
Name: Avg_Account_Balance, dtype: float64

In [15]:
train['Is_Active'].value_counts(normalize=True)

No     0.611375
Yes    0.388625
Name: Is_Active, dtype: float64

In [16]:
train['Is_Lead'].value_counts(normalize=True)

0    0.762794
1    0.237206
Name: Is_Lead, dtype: float64

In [17]:
train['Age'].describe()

count    196580.000000
mean         43.864971
std          14.821238
min          23.000000
25%          30.000000
50%          43.000000
75%          54.000000
max          85.000000
Name: Age, dtype: float64

In [18]:
train['Vintage'].describe()

count    196580.000000
mean         46.978121
std          32.346981
min           7.000000
25%          20.000000
50%          32.000000
75%          73.000000
max         135.000000
Name: Vintage, dtype: float64

In [19]:
train.groupby(['Is_Lead'])[['Age','Avg_Account_Balance']].median()

Unnamed: 0_level_0,Age,Avg_Account_Balance
Is_Lead,Unnamed: 1_level_1,Unnamed: 2_level_1
0,38,871158
1,49,980686


In [20]:
train['Credit_Product'] = train['Credit_Product'].fillna('NA')
train['Avg_Account_Balance'] = np.log(1+train['Avg_Account_Balance'])

test['Credit_Product'] = test['Credit_Product'].fillna('NA')
test['Avg_Account_Balance'] = np.log(1+test['Avg_Account_Balance'])

valid['Credit_Product'] = valid['Credit_Product'].fillna('NA')
valid['Avg_Account_Balance'] = np.log(1+valid['Avg_Account_Balance'])


In [21]:
train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
210727,INWLBJ8N,Male,47,RG278,Self_Employed,X2,67,Yes,13.881524,Yes,1
149903,JQQF8NNH,Female,39,RG254,Self_Employed,X2,51,No,13.683,Yes,0
143480,JJNMNGVY,Male,38,RG280,Self_Employed,X2,13,No,13.012269,Yes,0


In [22]:
train['Is_Active'] = train['Is_Active'].replace({'No':'N','Yes':'Y'})
test['Is_Active'] = test['Is_Active'].replace({'No':'N','Yes':'Y'})
valid['Is_Active'] = valid['Is_Active'].replace({'No':'N','Yes':'Y'})

In [23]:
cat_cols = ['Gender','Region_Code','Occupation','Channel_Code','Credit_Product','Is_Active']
featured_cols = []
for idx,col in enumerate(cat_cols):
    for sub_col in cat_cols[idx+1:]:
        new_col = f"{col}-{sub_col}"
        featured_cols.append(new_col)
        train[new_col] = train[col] + "-" + train[sub_col]
        test[new_col] = test[col] + "-" + test[sub_col]
        valid[new_col] = valid[col] + "-" + valid[sub_col]

train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active
234584,DY97RXBL,Male,57,RG254,Self_Employed,X3,56,No,14.765183,N,0,Male-RG254,Male-Self_Employed,Male-X3,Male-No,Male-N,RG254-Self_Employed,RG254-X3,RG254-No,RG254-N,Self_Employed-X3,Self_Employed-No,Self_Employed-N,X3-No,X3-N,No-N
178011,8GVCRDVG,Female,46,RG281,Other,X2,33,No,12.981703,Y,0,Female-RG281,Female-Other,Female-X2,Female-No,Female-Y,RG281-Other,RG281-X2,RG281-No,RG281-Y,Other-X2,Other-No,Other-Y,X2-No,X2-Y,No-Y
188431,BHISAIXX,Female,47,RG268,Self_Employed,X2,85,,13.471461,N,1,Female-RG268,Female-Self_Employed,Female-X2,Female-NA,Female-N,RG268-Self_Employed,RG268-X2,RG268-NA,RG268-N,Self_Employed-X2,Self_Employed-NA,Self_Employed-N,X2-NA,X2-N,NA-N


In [24]:
all_cat_cols = cat_cols + featured_cols
num_col = ['Age','Vintage','Avg_Account_Balance']
for idx,col in enumerate (all_cat_cols):
    for ind, num in enumerate(num_col):
        print(f"Working Cat Col {col} {idx}/{len(all_cat_cols)}, Num col {num} {ind}/{len(num_col)}")
        grp = train.groupby([col])[num].agg(['mean','median','std'])
        grp = grp.add_prefix(f'{col}-{num}-')
        grp = grp.reset_index()
        train = train.merge(grp,on=[col],how='left')
        test = test.merge(grp,on=[col],how='left')
        valid = valid.merge(grp,on=[col],how='left')

Working Cat Col Gender 0/21, Num col Age 0/3
Working Cat Col Gender 0/21, Num col Vintage 1/3
Working Cat Col Gender 0/21, Num col Avg_Account_Balance 2/3
Working Cat Col Region_Code 1/21, Num col Age 0/3
Working Cat Col Region_Code 1/21, Num col Vintage 1/3
Working Cat Col Region_Code 1/21, Num col Avg_Account_Balance 2/3
Working Cat Col Occupation 2/21, Num col Age 0/3
Working Cat Col Occupation 2/21, Num col Vintage 1/3
Working Cat Col Occupation 2/21, Num col Avg_Account_Balance 2/3
Working Cat Col Channel_Code 3/21, Num col Age 0/3
Working Cat Col Channel_Code 3/21, Num col Vintage 1/3
Working Cat Col Channel_Code 3/21, Num col Avg_Account_Balance 2/3
Working Cat Col Credit_Product 4/21, Num col Age 0/3
Working Cat Col Credit_Product 4/21, Num col Vintage 1/3
Working Cat Col Credit_Product 4/21, Num col Avg_Account_Balance 2/3
Working Cat Col Is_Active 5/21, Num col Age 0/3
Working Cat Col Is_Active 5/21, Num col Vintage 1/3
Working Cat Col Is_Active 5/21, Num col Avg_Account_Bala

In [25]:
all_cat_cols = cat_cols + featured_cols
encoder = LabelEncoder()
for col in all_cat_cols:
    train[col] = encoder.fit_transform(train[col])
    test[col] = encoder.transform(test[col])
    valid[col] = encoder.transform(valid[col])

In [26]:
train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
188807,6ZYZXTOJ,1,53,33,3,2,45,2,13.850599,1,1,68,7,6,5,3,135,134,101,67,14,11,7,8,5,5,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,49.03886,49,14.141979,58.949928,57.0,33.935316,14.020919,14.015742,0.595516,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,54.709415,53,12.125373,74.830457,80,32.852358,13.863153,13.848448,0.614125,46.864084,47,14.19592,51.701079,39,34.259627,13.794534,13.771814,0.608655,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,50.099778,49.0,13.653215,61.784858,62.0,33.889134,14.025825,14.017518,0.592916,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,55.332957,54,...,0.604623,49.342086,49,13.601337,59.018389,57,34.934066,13.801728,13.781203,0.625037,47.888273,48.0,7.582412,63.168501,63.0,32.304118,13.97886,13.967018,0.587847,55.198879,53.0,11.489144,80.155612,87.0,31.181109,14.053723,14.053493,0.581382,51.052599,50.0,12.973797,62.473023,62.0,34.593373,14.050301,14.048077,0.578569,49.036115,49.0,13.960779,60.797907,62,35.186266,14.020491,14.017926,0.602234,50.10095,51,6.705361,73.301676,79.0,32.354135,13.825329,13.810187,0.605881,47.475027,48,8.047989,56.46292,55,33.669439,13.784911,13.767253,0.595917,47.476003,48,7.946399,59.285561,57,33.368966,13.769124,13.749167,0.617108,54.219523,53,11.699384,74.287363,81,34.31649,13.9027,13.890391,0.607581,55.252966,54,11.483858,78.930238,86,32.033911,13.88025,13.867758,0.62071,50.659766,50,12.166537,65.513366,69,35.202327,13.870021,13.852496,0.616328
179342,FHJTKDE2,1,29,4,2,0,26,1,13.802282,0,0,39,6,4,4,2,18,16,13,8,8,7,4,1,0,2,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,45.979602,47.0,14.69246,52.321858,38.0,34.365724,14.000739,13.976548,0.55821,31.733941,29,7.651692,28.277983,25,20.17391,13.683201,13.647073,0.620615,32.826819,29,...,0.625067,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,30.394812,29.0,6.378475,26.442712,25.0,16.419692,14.006068,13.978921,0.565824,30.373686,29.0,6.784324,23.747719,25.0,7.841774,13.991901,13.960572,0.569228,41.020341,35.0,14.830789,41.716909,27.0,29.775013,13.979975,13.949191,0.566093,41.556842,37.0,14.632986,43.260592,27,30.415394,13.988598,13.962556,0.556208,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,29.56043,29,3.889061,23.426746,21,9.253663,13.619111,13.579394,0.617434,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927
127307,7GPEWZUC,0,85,18,1,2,80,2,13.929335,0,0,18,1,2,2,0,73,74,56,36,6,5,2,8,4,4,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,49.016579,49,14.133032,59.242276,57.0,34.127409,14.013321,14.011128,0.59425,53.130842,56,18.615624,54.886123,50,34.306833,13.798887,13.774818,0.630414,54.709415,53,12.125373,74.830457,80,32.852358,13.863153,13.848448,0.614125,46.864084,47,14.19592,51.701079,39,34.259627,13.794534,13.771814,0.608655,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,47.689236,47.0,14.666105,55.547822,51.0,33.830926,14.009285,14.017205,0.59579,50.280667,49,19.444018,48.881872,33,32.74159,13.772575,13.743262,0.635221,53.732041,52,...,0.614152,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,59.175442,63.0,16.224971,66.165597,68.0,34.110853,14.056102,14.066017,0.604029,55.238554,53.0,11.47148,80.762909,87.0,31.096863,14.04657,14.053356,0.585742,51.310362,50.0,12.978811,62.813163,63.0,34.234049,14.034171,14.037169,0.584952,49.06548,49.0,14.370139,57.380598,55,32.795914,14.017349,14.016863,0.588426,63.993684,65,12.49499,79.697953,86.0,31.596858,13.914579,13.905024,0.62025,56.266929,60,16.882645,59.013508,56,35.438797,13.866254,13.84261,0.620115,49.95129,50,19.146527,49.045805,33,32.594454,13.75803,13.730922,0.625097,54.219523,53,11.699384,74.287363,81,34.31649,13.9027,13.890391,0.607581,54.145345,53,12.732743,70.575908,74,33.149403,13.845411,13.829655,0.606719,45.197443,45,14.694064,45.636257,32,32.006965,13.761389,13.736186,0.602281


In [27]:
test.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,Gender-Channel_Code-Age-std,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
22895,7JVFXKFP,0,40,26,3,1,61,1,13.437561,1,26,3,1,1,1,107,105,79,53,13,10,7,4,3,3,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,46.299683,46,15.119087,47.660933,37.0,30.687038,13.710853,13.640172,0.53217,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,44.004171,43.0,15.286477,42.914494,31.0,28.465798,13.681988,13.598947,0.534672,45.469309,47,9.038495,52.403134,49,31.859805,13.756265,13.730588,0.610664,50.025439,48,11.813178,...,0.62249,45.904513,46,14.819066,50.468969,38,33.627966,13.803423,13.78169,0.626325,47.034161,48.0,8.735365,53.79089,55.0,30.781526,13.711831,13.645233,0.525788,51.363636,49.0,12.131543,52.229141,51.0,27.349904,13.712941,13.646916,0.522456,44.680665,43.0,15.942334,44.177603,33.0,28.201628,13.687099,13.603292,0.530686,50.822892,50.0,13.894613,55.942169,56,31.837821,13.744926,13.653079,0.536942,47.153494,47,7.104712,53.313132,55.0,27.966771,13.726652,13.706341,0.59804,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,47.476003,48,7.946399,59.285561,57,33.368966,13.769124,13.749167,0.617108,50.699305,49,11.595563,53.602277,55,26.623489,13.728676,13.70099,0.618954,50.028448,48,11.195013,53.959872,55,28.68473,13.748476,13.72776,0.61782,46.009136,46,15.148168,48.25175,33,32.637442,13.763813,13.73815,0.63022
3070,JZ4C9IY6,0,31,20,1,0,25,1,13.316809,0,20,1,0,1,0,81,80,61,40,4,4,2,1,0,2,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,37.070246,31,12.87043,30.033596,21.0,22.852744,13.338468,13.307386,0.476418,53.130842,56,18.615624,54.886123,50,34.306833,13.798887,13.774818,0.630414,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,34.94536,30.0,11.944646,25.939218,21.0,17.304803,13.326143,13.303563,0.472757,50.280667,49,19.444018,48.881872,33,32.74159,13.772575,13.743262,0.635221,31.947681,29,9.237083,...,0.62249,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,43.29153,34.0,18.105048,34.236376,25.0,26.867257,13.366748,13.33182,0.484068,31.692343,29.0,8.539184,21.673143,21.0,6.889629,13.318202,13.290317,0.469431,35.245,30.0,12.013735,27.463478,21.0,19.066927,13.323525,13.291122,0.471936,35.210405,30.0,11.576942,26.940214,21,18.759035,13.330405,13.301451,0.471164,36.419296,30,15.566254,27.707784,25.0,16.014826,13.678502,13.636975,0.633103,49.62441,48,19.664724,48.276304,33,32.204767,13.741391,13.710649,0.635718,49.95129,50,19.146527,49.045805,33,32.594454,13.75803,13.730922,0.625097,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927
2920,34APQXB4,0,55,18,3,0,44,1,14.061577,0,18,3,0,1,0,75,72,55,36,12,10,6,1,0,2,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,49.016579,49,14.133032,59.242276,57.0,34.127409,14.013321,14.011128,0.59425,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,47.689236,47.0,14.666105,55.547822,51.0,33.830926,14.009285,14.017205,0.59579,45.469309,47,9.038495,52.403134,49,31.859805,13.756265,13.730588,0.610664,31.947681,29,9.237083,...,0.62249,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,47.877195,48.0,7.639835,63.682737,63.0,32.274667,13.971731,13.972692,0.579434,38.307069,31.0,14.781783,31.828172,26.0,20.662957,14.019672,14.015414,0.619927,46.15722,46.0,15.029966,52.012814,44.0,32.819819,13.990097,13.979808,0.608925,49.06548,49.0,14.370139,57.380598,55,32.795914,14.017349,14.016863,0.588426,37.666341,35,10.237646,30.848173,26.0,18.336994,13.656689,13.612417,0.617393,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927


In [28]:
valid.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
13912,EC3AS96V,0,67,33,1,0,50,1,14.270273,1,0,33,1,0,1,1,133,132,100,67,4,4,3,1,1,3,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,49.03886,49,14.141979,58.949928,57.0,33.935316,14.020919,14.015742,0.595516,53.130842,56,18.615624,54.886123,50,34.306833,13.798887,13.774818,0.630414,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,47.710911,47.0,14.623511,55.401454,51.0,33.659813,14.014778,14.012334,0.598727,50.280667,49,19.444018,48.881872,33,32.74159,13.772575,13.743262,0.635221,31.947681,29,...,0.62249,45.904513,46,14.819066,50.468969,38,33.627966,13.803423,13.78169,0.626325,58.967986,63.0,16.35266,65.572662,68.0,33.636895,14.075728,14.077804,0.595842,38.341542,31.0,14.820396,32.155379,26.0,20.710089,14.021016,14.017787,0.618966,46.264987,46.0,15.044316,51.727422,44.0,32.348306,13.992441,13.984893,0.610751,49.036115,49.0,13.960779,60.797907,62,35.186266,14.020491,14.017926,0.602234,36.419296,30,15.566254,27.707784,25.0,16.014826,13.678502,13.636975,0.633103,49.62441,48,19.664724,48.276304,33,32.204767,13.741391,13.710649,0.635718,57.710886,62,16.796927,63.298919,63,34.958821,13.857741,13.839231,0.633392,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,33.841374,29,12.104989,26.812701,21,17.430365,13.789223,13.758726,0.631895,46.009136,46,15.148168,48.25175,33,32.637442,13.763813,13.73815,0.63022
21421,5ETVB79V,0,49,2,3,1,21,1,12.968149,0,0,2,3,1,1,0,11,9,7,4,13,10,6,4,2,2,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,35.797947,30,12.451322,32.687977,26.0,22.740956,13.216919,13.217881,0.536583,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,33.164214,29.0,10.486999,28.595644,26.0,17.639219,13.190621,13.191414,0.542506,45.469309,47,9.038495,52.403134,49,31.859805,13.756265,13.730588,0.610664,50.025439,48,...,0.62249,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,42.360953,43.0,10.499741,43.166856,32.0,29.09019,13.251701,13.279864,0.510408,50.111597,48.0,11.472823,52.203501,51.0,28.506235,13.347081,13.369751,0.50435,33.438705,30.0,10.696362,28.904395,26.0,17.435311,13.183905,13.189201,0.532997,33.857747,30.0,10.739062,29.936563,26,19.322173,13.205558,13.207432,0.542091,47.153494,47,7.104712,53.313132,55.0,27.966771,13.726652,13.706341,0.59804,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,50.699305,49,11.595563,53.602277,55,26.623489,13.728676,13.70099,0.618954,50.757953,49,11.403322,55.277626,57,27.576043,13.764424,13.742765,0.590941,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927
6724,E3ZZXJY7,0,25,29,3,0,14,1,13.490488,0,0,29,3,0,1,0,119,116,88,58,12,10,6,1,0,2,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,40.893853,37,14.153913,40.181242,27.0,29.860427,13.336136,13.335824,0.525134,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,40.791607,35,14.753063,40.581871,27,28.885153,13.684008,13.647863,0.624317,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,38.413404,32.0,13.588856,35.2714,26.0,26.802138,13.302037,13.299052,0.506059,45.469309,47,9.038495,52.403134,49,31.859805,13.756265,13.730588,0.610664,31.947681,29,...,0.62249,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,44.940594,46.0,9.250069,48.938944,37.0,31.888579,13.337831,13.347382,0.491703,31.571699,29.0,8.281459,23.628553,21.0,10.497775,13.284626,13.278023,0.52427,38.230728,32.0,13.781769,35.303562,26.0,26.307527,13.305424,13.299828,0.524013,38.801435,32.0,13.650822,36.300957,26,26.780348,13.322731,13.327819,0.514896,37.666341,35,10.237646,30.848173,26.0,18.336994,13.656689,13.612417,0.617393,45.354059,47,9.352548,51.904785,49,31.022695,13.705602,13.672885,0.616762,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,31.437029,29,8.277654,24.406913,25,10.435279,13.622954,13.581629,0.61863,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,37.114239,31,13.289813,35.176062,27,24.514767,13.62776,13.589475,0.613927


In [29]:
x_train = train.drop(['ID','Is_Lead'],axis=1)
y_train = train['Is_Lead']

x_valid = valid.drop(['ID','Is_Lead'],axis=1)
y_valid = valid['Is_Lead']

x_test = test.drop(['ID'],axis=1)

In [30]:
lgb_params= {'learning_rate': 0.05, 
             'n_estimators': 500, 
             'max_bin': 100,
             'num_leaves': 10, 
             'max_depth': 30, 
             'reg_alpha': 8.5, 
             'reg_lambda': 7.0, 
             'subsample': 0.75,
            'random_state': 42,
            'class_weight':'balanced'}


lgb = LGBMClassifier(**lgb_params)
lgb.fit(x_train, y_train,eval_set=[(x_valid, y_valid)],early_stopping_rounds=10,verbose=400)
pred = lgb.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

Training until validation scores don't improve for 10 rounds
[400]	valid_0's binary_logloss: 0.422752
Early stopping, best iteration is:
[468]	valid_0's binary_logloss: 0.42258
roc_auc_score: 0.8733319870897727


In [31]:
x_train = x_train.fillna(0)
x_valid = x_valid.fillna(0)
x_test = x_test.fillna(0)

In [32]:
hist_params = {'max_iter':500,
               'learning_rate' : 0.06,
               'max_depth' : 7,
               'early_stopping' : 'auto',
               'verbose':1,
               'random_state':63
              }


hist = HistGradientBoostingClassifier(**hist_params)
hist.fit(x_train,y_train)
pred = hist.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

Binning 0.301 GB of training data: 4.523 s
Binning 0.033 GB of validation data: 0.051 s
Fitting gradient boosted rounds:
[1/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.52359, val loss: 0.52335, in 0.247s
[2/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.50351, val loss: 0.50320, in 0.214s
[3/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.48647, val loss: 0.48612, in 0.225s
[4/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.47170, val loss: 0.47134, in 0.224s
[5/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.45883, val loss: 0.45841, in 0.238s
[6/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.44749, val loss: 0.44709, in 0.236s
[7/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43740, val loss: 0.43701, in 0.225s
[8/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.42849, val loss: 0.42813, in 0.237s
[9/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.42045, val loss: 0.42009, in 0.221s
[10/500] 1 tree, 31 leaves, max depth = 7, train lo

[88/500] 1 tree, 24 leaves, max depth = 7, train loss: 0.33949, val loss: 0.34343, in 0.123s
[89/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33942, val loss: 0.34342, in 0.170s
[90/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33934, val loss: 0.34338, in 0.187s
[91/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33927, val loss: 0.34340, in 0.124s
[92/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33919, val loss: 0.34340, in 0.170s
[93/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33913, val loss: 0.34339, in 0.132s
[94/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33907, val loss: 0.34340, in 0.163s
[95/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33900, val loss: 0.34341, in 0.158s
[96/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33895, val loss: 0.34341, in 0.132s
[97/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33888, val loss: 0.34341, in 0.157s
[98/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.33883, val lo

In [33]:
train_pred = pd.DataFrame()
train_pred['lgb'] = lgb.predict_proba(x_train)[:, 1]
train_pred['hist'] = hist.predict_proba(x_train)[:, 1]
train_pred.head(3)

Unnamed: 0,lgb,hist
0,0.177617,0.061099
1,0.319354,0.136129
2,0.77665,0.500255


In [34]:
valid_pred = pd.DataFrame()
valid_pred['lgb'] = lgb.predict_proba(x_valid)[:, 1]
valid_pred['hist'] = hist.predict_proba(x_valid)[:, 1]
valid_pred.head(3)

Unnamed: 0,lgb,hist
0,0.151794,0.051147
1,0.943953,0.834074
2,0.686379,0.412364


In [35]:
test_pred = pd.DataFrame()
test_pred['lgb'] = lgb.predict_proba(x_test)[:, 1]
test_pred['hist'] = hist.predict_proba(x_test)[:, 1]
test_pred.head(3)

Unnamed: 0,lgb,hist
0,0.113031,0.038952
1,0.944304,0.846096
2,0.162903,0.053951


In [36]:
model = LogisticRegression(random_state=10,max_iter=1000,class_weight='balanced',fit_intercept=True)
model.fit(train_pred,y_train)
pred = model.predict_proba(valid_pred)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.8735747120533207


In [37]:
pred = valid_pred.mean(axis=1)
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.873609447031819


In [38]:
test_pred['stacked_avg'] = test_pred.mean(axis=1)
test_pred['stacked_logistic'] = model.predict_proba(test_pred.drop(['stacked_avg'],axis=1))[:, 1]
test_pred['ID'] = test['ID']
test_pred.head(1)

Unnamed: 0,lgb,hist,stacked_avg,stacked_logistic,ID
0,0.113031,0.038952,0.075992,0.130696,VBENBARO


In [40]:
test_pred[['ID','stacked_avg']].rename({'stacked_avg':'Is_Lead'},axis=1).to_csv('submit_4.csv',index=False)
