# Problem Statement

### Credit Card Lead Prediction

Happy Customer Bank is a mid-sized private bank that deals in all kinds of banking products, like Savings accounts, Current accounts, investment products, credit products, among other offerings.


The bank also cross-sells products to its existing customers and to do so they use different kinds of communication like tele-calling, e-mails, recommendations on net banking, mobile banking, etc. 


In this case, the Happy Customer Bank wants to cross sell its credit cards to its existing customers. The bank has identified a set of customers that are eligible for taking these credit cards.


Now, the bank is looking for your help in identifying customers that could show higher intent towards a recommended credit card, given:

    Customer details (gender, age, region etc.)
    Details of his/her relationship with the bank (Channel_Code,Vintage, 'Avg_Asset_Value etc.)


In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder
from lightgbm import LGBMClassifier
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier


from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier

from sklearn.linear_model import LogisticRegression

pd.options.display.max_columns = 200

In [2]:
data = pd.read_csv("train_s3TEQDk.csv")
test = pd.read_csv("test_mSzZ8RL.csv")
print(f"Train shape {data.shape}, Test Shape {test.shape}")

Train shape (245725, 11), Test Shape (105312, 10)


In [3]:
train,valid = train_test_split(data,test_size=0.20,random_state=345,stratify=data['Is_Lead'])
train = train.copy()
valid = valid.copy()
print(f"Train shape {train.shape} Validation shape {valid.shape}")

Train shape (196580, 11) Validation shape (49145, 11)


In [4]:
train.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
53078,N3ZQ84QR,Female,46,RG280,Self_Employed,X2,51,No,863584,Yes,0
213644,JWGAMK7P,Male,67,RG258,Other,X2,43,Yes,706126,No,0
131870,CX9NGNQT,Male,46,RG279,Self_Employed,X2,26,Yes,422207,Yes,0


In [5]:
test.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active
0,VBENBARO,Male,29,RG254,Other,X1,25,Yes,742366,No
1,CCMEWNKY,Male,43,RG268,Other,X2,49,,925537,No
2,VK3KGA9M,Male,31,RG270,Salaried,X1,14,No,215949,No


In [6]:
valid.head(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
148453,OK9KJGZ2,Female,55,RG268,Self_Employed,X1,37,No,929257,No,0
117997,TTC7CPSI,Male,57,RG283,Self_Employed,X3,87,,909740,No,0
5432,MPUWVRAX,Male,39,RG275,Salaried,X1,8,Yes,961742,Yes,0


In [7]:
train['ID'].nunique()

196580

In [8]:
train.isna().sum()

ID                         0
Gender                     0
Age                        0
Region_Code                0
Occupation                 0
Channel_Code               0
Vintage                    0
Credit_Product         23525
Avg_Account_Balance        0
Is_Active                  0
Is_Lead                    0
dtype: int64

In [9]:
train['Gender'].value_counts(normalize=True)

Male      0.546693
Female    0.453307
Name: Gender, dtype: float64

In [10]:
train['Region_Code'].nunique()

35

In [11]:
train['Occupation'].value_counts(normalize=True)

Self_Employed    0.411532
Salaried         0.292731
Other            0.284866
Entrepreneur     0.010871
Name: Occupation, dtype: float64

In [12]:
train['Channel_Code'].value_counts(normalize=True)

X1    0.421279
X3    0.280359
X2    0.275669
X4    0.022693
Name: Channel_Code, dtype: float64

In [13]:
train['Credit_Product'].value_counts(normalize=True)

No     0.667019
Yes    0.332981
Name: Credit_Product, dtype: float64

In [14]:
train['Avg_Account_Balance'].describe()

count    1.965800e+05
mean     1.129489e+06
std      8.532486e+05
min      2.079000e+04
25%      6.042470e+05
50%      8.954865e+05
75%      1.368733e+06
max      1.035201e+07
Name: Avg_Account_Balance, dtype: float64

In [15]:
train['Is_Active'].value_counts(normalize=True)

No     0.611375
Yes    0.388625
Name: Is_Active, dtype: float64

In [16]:
train['Is_Lead'].value_counts(normalize=True)

0    0.762794
1    0.237206
Name: Is_Lead, dtype: float64

In [17]:
train['Age'].describe()

count    196580.000000
mean         43.864971
std          14.821238
min          23.000000
25%          30.000000
50%          43.000000
75%          54.000000
max          85.000000
Name: Age, dtype: float64

In [18]:
train['Vintage'].describe()

count    196580.000000
mean         46.978121
std          32.346981
min           7.000000
25%          20.000000
50%          32.000000
75%          73.000000
max         135.000000
Name: Vintage, dtype: float64

In [19]:
train.groupby(['Is_Lead'])[['Age','Avg_Account_Balance']].median()

Unnamed: 0_level_0,Age,Avg_Account_Balance
Is_Lead,Unnamed: 1_level_1,Unnamed: 2_level_1
0,38,871158
1,49,980686


In [20]:
train['Credit_Product'] = train['Credit_Product'].fillna('No')
train['Avg_Account_Balance'] = np.log(1+train['Avg_Account_Balance'])

test['Credit_Product'] = test['Credit_Product'].fillna('No')
test['Avg_Account_Balance'] = np.log(1+test['Avg_Account_Balance'])

valid['Credit_Product'] = valid['Credit_Product'].fillna('No')
valid['Avg_Account_Balance'] = np.log(1+valid['Avg_Account_Balance'])


In [21]:
train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead
179539,5BXCNPC8,Male,56,RG284,Self_Employed,X3,61,Yes,13.416296,Yes,1
182366,ACGSDKWX,Female,29,RG283,Salaried,X2,15,Yes,14.188355,No,0
35595,E3HHPGPG,Male,28,RG280,Salaried,X1,19,No,14.05881,No,1


In [22]:
train['Is_Active'] = train['Is_Active'].replace({'No':'N','Yes':'Y'})
test['Is_Active'] = test['Is_Active'].replace({'No':'N','Yes':'Y'})
valid['Is_Active'] = valid['Is_Active'].replace({'No':'N','Yes':'Y'})

In [23]:
cat_cols = ['Gender','Region_Code','Occupation','Channel_Code','Credit_Product','Is_Active']
featured_cols = []
for idx,col in enumerate(cat_cols):
    for sub_col in cat_cols[idx+1:]:
        new_col = f"{col}-{sub_col}"
        featured_cols.append(new_col)
        train[new_col] = train[col] + "-" + train[sub_col]
        test[new_col] = test[col] + "-" + test[sub_col]
        valid[new_col] = valid[col] + "-" + valid[sub_col]

train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active
133001,FBYFTGSE,Female,27,RG272,Salaried,X1,27,No,13.52999,N,0,Female-RG272,Female-Salaried,Female-X1,Female-No,Female-N,RG272-Salaried,RG272-X1,RG272-No,RG272-N,Salaried-X1,Salaried-No,Salaried-N,X1-No,X1-N,No-N
202987,TEXTYPBW,Female,27,RG254,Self_Employed,X1,26,No,13.546653,N,0,Female-RG254,Female-Self_Employed,Female-X1,Female-No,Female-N,RG254-Self_Employed,RG254-X1,RG254-No,RG254-N,Self_Employed-X1,Self_Employed-No,Self_Employed-N,X1-No,X1-N,No-N
157433,WP5AEJVW,Male,69,RG280,Other,X3,80,No,13.903949,N,0,Male-RG280,Male-Other,Male-X3,Male-No,Male-N,RG280-Other,RG280-X3,RG280-No,RG280-N,Other-X3,Other-No,Other-N,X3-No,X3-N,No-N


In [24]:
all_cat_cols = cat_cols + featured_cols
num_col = ['Age','Vintage','Avg_Account_Balance']
for idx,col in enumerate (all_cat_cols):
    for ind, num in enumerate(num_col):
        print(f"Working Cat Col {col} {idx}/{len(all_cat_cols)}, Num col {num} {ind}/{len(num_col)}")
        grp = train.groupby([col])[num].agg(['mean','median','std'])
        grp = grp.add_prefix(f'{col}-{num}-')
        grp = grp.reset_index()
        train = train.merge(grp,on=[col],how='left')
        test = test.merge(grp,on=[col],how='left')
        valid = valid.merge(grp,on=[col],how='left')

Working Cat Col Gender 0/21, Num col Age 0/3
Working Cat Col Gender 0/21, Num col Vintage 1/3
Working Cat Col Gender 0/21, Num col Avg_Account_Balance 2/3
Working Cat Col Region_Code 1/21, Num col Age 0/3
Working Cat Col Region_Code 1/21, Num col Vintage 1/3
Working Cat Col Region_Code 1/21, Num col Avg_Account_Balance 2/3
Working Cat Col Occupation 2/21, Num col Age 0/3
Working Cat Col Occupation 2/21, Num col Vintage 1/3
Working Cat Col Occupation 2/21, Num col Avg_Account_Balance 2/3
Working Cat Col Channel_Code 3/21, Num col Age 0/3
Working Cat Col Channel_Code 3/21, Num col Vintage 1/3
Working Cat Col Channel_Code 3/21, Num col Avg_Account_Balance 2/3
Working Cat Col Credit_Product 4/21, Num col Age 0/3
Working Cat Col Credit_Product 4/21, Num col Vintage 1/3
Working Cat Col Credit_Product 4/21, Num col Avg_Account_Balance 2/3
Working Cat Col Is_Active 5/21, Num col Age 0/3
Working Cat Col Is_Active 5/21, Num col Vintage 1/3
Working Cat Col Is_Active 5/21, Num col Avg_Account_Bala

In [25]:
all_cat_cols = cat_cols + featured_cols
encoder = LabelEncoder()
for col in all_cat_cols:
    train[col] = encoder.fit_transform(train[col])
    test[col] = encoder.transform(test[col])
    valid[col] = encoder.transform(valid[col])

In [26]:
train.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
78841,P9TYDBJ6,1,26,1,2,0,20,0,12.896328,0,0,36,6,4,2,2,6,4,2,2,8,4,4,0,0,0,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,43.649198,42,14.443586,41.859465,27.0,30.611513,13.324506,13.297365,0.602871,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,45.601117,45.0,14.323958,45.903538,31.0,32.65438,13.335241,13.309485,0.609869,31.733941,29,7.651692,28.277983,25,20.17391,13.683201,13.647073,0.620615,32.826819,29,...,0.62207,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,31.079666,30.0,6.03864,24.492413,21.0,14.226752,13.299287,13.245587,0.598086,31.207977,30.0,6.743175,23.033048,21.0,8.026967,13.279949,13.234718,0.58288,42.765107,40,14.396386,41.731043,31,30.00466,13.314315,13.286669,0.602492,40.691678,35.0,14.119721,37.072988,26,27.806457,13.317397,13.286669,0.590312,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,30.522412,29,5.954059,25.560656,25,15.008034,13.633016,13.594566,0.620188,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.847115,29,8.878776,25.00126,25,11.774384,13.629393,13.588124,0.619886,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,39.330676,32,14.181333,39.534975,27,28.006431,13.655642,13.620665,0.614959
21680,UQCUBJJJ,0,48,6,3,2,20,0,13.83611,1,1,6,3,2,0,1,27,26,12,13,14,6,7,4,5,1,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,36.003501,30,12.490346,31.940044,26.0,22.085061,13.296274,13.332699,0.504806,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,54.709415,53,12.125373,74.830457,80,32.852358,13.863153,13.848448,0.614125,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,34.043441,30.0,11.163849,28.870528,25.0,18.276467,13.246285,13.268162,0.50773,45.469309,47,9.038495,52.403134,49,31.859805,13.756265,13.730588,0.610664,53.732041,52,...,0.623503,45.904513,46,14.819066,50.468969,38,33.627966,13.803423,13.78169,0.626325,42.588869,43.0,10.095306,41.18851,31.0,28.05529,13.334337,13.358499,0.519863,52.255556,52.5,14.25907,61.959259,57.0,33.901264,13.366191,13.405048,0.493631,34.998344,30,11.875783,31.131347,26,20.737969,13.280276,13.312895,0.505261,43.960499,43.0,15.820625,44.372141,31,31.089065,13.338119,13.388436,0.5061,50.10095,51,6.705361,73.301676,79.0,32.354135,13.825329,13.810187,0.605881,46.100705,47,8.980271,55.094062,51,32.034048,13.725906,13.69673,0.612442,47.476003,48,7.946399,59.285561,57,33.368966,13.769124,13.749167,0.617108,55.002319,53,12.363964,75.155171,80,31.941017,13.839509,13.823741,0.616803,55.252966,54,11.483858,78.930238,86,32.033911,13.88025,13.867758,0.62071,47.105128,47,14.677052,52.493012,43,33.922717,13.782222,13.758816,0.626892
116084,U3JUTPS6,0,28,4,2,0,15,0,13.025359,0,1,4,2,0,0,0,18,16,8,8,8,4,4,0,0,0,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,40.649219,35.0,14.431614,40.736859,27.0,29.393469,13.97935,13.948171,0.56175,30.233873,29,5.548506,24.825372,21,13.665404,13.615808,13.580002,0.613589,31.947681,29,...,0.623503,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,30.394812,29.0,6.378475,26.442712,25.0,16.419692,14.006068,13.978921,0.565824,30.373686,29.0,6.784324,23.747719,25.0,7.841774,13.991901,13.960572,0.569228,42.589778,40,14.972429,45.466629,31,31.750845,13.987204,13.960048,0.564412,41.556842,37.0,14.632986,43.260592,27,30.415394,13.988598,13.962556,0.556208,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,30.522412,29,5.954059,25.560656,25,15.008034,13.633016,13.594566,0.620188,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.847115,29,8.878776,25.00126,25,11.774384,13.629393,13.588124,0.619886,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,39.330676,32,14.181333,39.534975,27,28.006431,13.655642,13.620665,0.614959


In [27]:
test.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,Gender-Channel_Code-Age-std,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
43900,IUO48YJB,1,66,27,1,1,73,1,13.136051,0,62,5,5,3,2,109,109,55,54,5,3,2,3,2,2,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,41.647938,38,14.112767,42.354309,31.0,29.926868,13.615181,13.600124,0.586843,53.130842,56,18.615624,54.886123,50,34.306833,13.798887,13.774818,0.630414,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,46.864084,47,14.19592,51.701079,39,34.259627,13.794534,13.771814,0.608655,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,43.845791,43.0,14.233459,46.734035,32.0,31.801034,13.636541,13.626154,0.581819,55.304334,60,17.651923,59.464858,57,34.764459,13.818952,13.796086,0.62599,50.58274,49,10.999853,...,0.604623,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,49.834913,49.0,18.583332,49.175656,33.0,32.695634,13.656585,13.640675,0.590342,49.756576,48.0,11.460631,53.329407,55.0,28.181759,13.62418,13.612373,0.569004,45.896405,47,13.804068,49.081175,33,33.359701,13.67644,13.646641,0.569034,38.802073,33.0,13.445579,36.728516,27,26.302461,13.605013,13.588213,0.590112,59.442674,62,13.895012,58.589195,61.0,27.653358,13.804688,13.785444,0.612041,56.266929,60,16.882645,59.013508,56,35.438797,13.866254,13.84261,0.620115,49.95129,50,19.146527,49.045805,33,32.594454,13.75803,13.730922,0.625097,49.803935,48,11.000145,52.7749,51,28.80404,13.783989,13.765037,0.589136,50.757953,49,11.403322,55.277626,57,27.576043,13.764424,13.742765,0.590941,45.197443,45,14.694064,45.636257,32,32.006965,13.761389,13.736186,0.602281
85833,DTIJBDMT,0,61,30,3,2,63,0,13.390847,1,30,3,2,0,1,123,122,60,61,14,6,7,4,5,1,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,43.205554,42,14.808561,43.188539,27.0,32.184806,13.411736,13.378379,0.548784,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,54.709415,53,12.125373,74.830457,80,32.852358,13.863153,13.848448,0.614125,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,47.923203,48,14.217695,55.489554,51,34.657305,13.802428,13.781543,0.625565,40.050022,35.0,14.537903,37.097365,25.0,28.727333,13.401521,13.366465,0.550985,45.469309,47,9.038495,52.403134,49,31.859805,13.756265,13.730588,0.610664,53.732041,52,12.593626,...,0.623503,45.904513,46,14.819066,50.468969,38,33.627966,13.803423,13.78169,0.626325,46.544604,48.0,8.937011,52.210358,49.0,32.865881,13.383783,13.359287,0.528443,54.87283,54.0,12.046721,69.916604,74.0,33.612253,13.482402,13.43808,0.559908,42.146436,40,15.054009,42.173633,26,31.506583,13.391947,13.355268,0.552284,48.250457,48.0,14.158527,53.452126,50,34.630722,13.427537,13.39975,0.552327,50.10095,51,6.705361,73.301676,79.0,32.354135,13.825329,13.810187,0.605881,46.100705,47,8.980271,55.094062,51,32.034048,13.725906,13.69673,0.612442,47.476003,48,7.946399,59.285561,57,33.368966,13.769124,13.749167,0.617108,55.002319,53,12.363964,75.155171,80,31.941017,13.839509,13.823741,0.616803,55.252966,54,11.483858,78.930238,86,32.033911,13.88025,13.867758,0.62071,47.105128,47,14.677052,52.493012,43,33.922717,13.782222,13.758816,0.626892
87102,9NZCKWKE,1,31,14,2,0,33,0,12.691257,0,49,6,4,2,2,58,56,28,28,8,4,4,0,0,0,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,36.93488,32,11.885901,32.293488,26.5,21.889166,13.306952,13.31546,0.45766,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,39.394737,34.0,12.922788,35.896552,27.0,25.741223,13.329174,13.327929,0.458231,31.733941,29,7.651692,28.277983,25,20.17391,13.683201,13.647073,0.620615,32.826819,29,10.206629,...,0.62207,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,30.759225,30.0,4.47628,25.259225,26.0,9.730731,13.281847,13.287913,0.456347,31.737531,30.0,6.159884,24.933915,26.0,7.387627,13.288222,13.299485,0.451876,35.870276,32,11.167162,31.175409,27,20.122178,13.302239,13.307193,0.459132,35.125071,31.0,10.610824,29.641273,26,17.807315,13.292151,13.302805,0.451086,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,30.522412,29,5.954059,25.560656,25,15.008034,13.633016,13.594566,0.620188,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.847115,29,8.878776,25.00126,25,11.774384,13.629393,13.588124,0.619886,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,39.330676,32,14.181333,39.534975,27,28.006431,13.655642,13.620665,0.614959


In [28]:
valid.sample(3)

Unnamed: 0,ID,Gender,Age,Region_Code,Occupation,Channel_Code,Vintage,Credit_Product,Avg_Account_Balance,Is_Active,Is_Lead,Gender-Region_Code,Gender-Occupation,Gender-Channel_Code,Gender-Credit_Product,Gender-Is_Active,Region_Code-Occupation,Region_Code-Channel_Code,Region_Code-Credit_Product,Region_Code-Is_Active,Occupation-Channel_Code,Occupation-Credit_Product,Occupation-Is_Active,Channel_Code-Credit_Product,Channel_Code-Is_Active,Credit_Product-Is_Active,Gender-Age-mean,Gender-Age-median,Gender-Age-std,Gender-Vintage-mean,Gender-Vintage-median,Gender-Vintage-std,Gender-Avg_Account_Balance-mean,Gender-Avg_Account_Balance-median,Gender-Avg_Account_Balance-std,Region_Code-Age-mean,Region_Code-Age-median,Region_Code-Age-std,Region_Code-Vintage-mean,Region_Code-Vintage-median,Region_Code-Vintage-std,Region_Code-Avg_Account_Balance-mean,Region_Code-Avg_Account_Balance-median,Region_Code-Avg_Account_Balance-std,Occupation-Age-mean,Occupation-Age-median,Occupation-Age-std,Occupation-Vintage-mean,Occupation-Vintage-median,Occupation-Vintage-std,Occupation-Avg_Account_Balance-mean,Occupation-Avg_Account_Balance-median,Occupation-Avg_Account_Balance-std,Channel_Code-Age-mean,Channel_Code-Age-median,Channel_Code-Age-std,Channel_Code-Vintage-mean,Channel_Code-Vintage-median,Channel_Code-Vintage-std,Channel_Code-Avg_Account_Balance-mean,Channel_Code-Avg_Account_Balance-median,Channel_Code-Avg_Account_Balance-std,Credit_Product-Age-mean,Credit_Product-Age-median,Credit_Product-Age-std,Credit_Product-Vintage-mean,Credit_Product-Vintage-median,Credit_Product-Vintage-std,Credit_Product-Avg_Account_Balance-mean,Credit_Product-Avg_Account_Balance-median,Credit_Product-Avg_Account_Balance-std,Is_Active-Age-mean,Is_Active-Age-median,Is_Active-Age-std,Is_Active-Vintage-mean,Is_Active-Vintage-median,Is_Active-Vintage-std,Is_Active-Avg_Account_Balance-mean,Is_Active-Avg_Account_Balance-median,Is_Active-Avg_Account_Balance-std,Gender-Region_Code-Age-mean,Gender-Region_Code-Age-median,Gender-Region_Code-Age-std,Gender-Region_Code-Vintage-mean,Gender-Region_Code-Vintage-median,Gender-Region_Code-Vintage-std,Gender-Region_Code-Avg_Account_Balance-mean,Gender-Region_Code-Avg_Account_Balance-median,Gender-Region_Code-Avg_Account_Balance-std,Gender-Occupation-Age-mean,Gender-Occupation-Age-median,Gender-Occupation-Age-std,Gender-Occupation-Vintage-mean,Gender-Occupation-Vintage-median,Gender-Occupation-Vintage-std,Gender-Occupation-Avg_Account_Balance-mean,Gender-Occupation-Avg_Account_Balance-median,Gender-Occupation-Avg_Account_Balance-std,Gender-Channel_Code-Age-mean,Gender-Channel_Code-Age-median,...,Gender-Credit_Product-Avg_Account_Balance-std,Gender-Is_Active-Age-mean,Gender-Is_Active-Age-median,Gender-Is_Active-Age-std,Gender-Is_Active-Vintage-mean,Gender-Is_Active-Vintage-median,Gender-Is_Active-Vintage-std,Gender-Is_Active-Avg_Account_Balance-mean,Gender-Is_Active-Avg_Account_Balance-median,Gender-Is_Active-Avg_Account_Balance-std,Region_Code-Occupation-Age-mean,Region_Code-Occupation-Age-median,Region_Code-Occupation-Age-std,Region_Code-Occupation-Vintage-mean,Region_Code-Occupation-Vintage-median,Region_Code-Occupation-Vintage-std,Region_Code-Occupation-Avg_Account_Balance-mean,Region_Code-Occupation-Avg_Account_Balance-median,Region_Code-Occupation-Avg_Account_Balance-std,Region_Code-Channel_Code-Age-mean,Region_Code-Channel_Code-Age-median,Region_Code-Channel_Code-Age-std,Region_Code-Channel_Code-Vintage-mean,Region_Code-Channel_Code-Vintage-median,Region_Code-Channel_Code-Vintage-std,Region_Code-Channel_Code-Avg_Account_Balance-mean,Region_Code-Channel_Code-Avg_Account_Balance-median,Region_Code-Channel_Code-Avg_Account_Balance-std,Region_Code-Credit_Product-Age-mean,Region_Code-Credit_Product-Age-median,Region_Code-Credit_Product-Age-std,Region_Code-Credit_Product-Vintage-mean,Region_Code-Credit_Product-Vintage-median,Region_Code-Credit_Product-Vintage-std,Region_Code-Credit_Product-Avg_Account_Balance-mean,Region_Code-Credit_Product-Avg_Account_Balance-median,Region_Code-Credit_Product-Avg_Account_Balance-std,Region_Code-Is_Active-Age-mean,Region_Code-Is_Active-Age-median,Region_Code-Is_Active-Age-std,Region_Code-Is_Active-Vintage-mean,Region_Code-Is_Active-Vintage-median,Region_Code-Is_Active-Vintage-std,Region_Code-Is_Active-Avg_Account_Balance-mean,Region_Code-Is_Active-Avg_Account_Balance-median,Region_Code-Is_Active-Avg_Account_Balance-std,Occupation-Channel_Code-Age-mean,Occupation-Channel_Code-Age-median,Occupation-Channel_Code-Age-std,Occupation-Channel_Code-Vintage-mean,Occupation-Channel_Code-Vintage-median,Occupation-Channel_Code-Vintage-std,Occupation-Channel_Code-Avg_Account_Balance-mean,Occupation-Channel_Code-Avg_Account_Balance-median,Occupation-Channel_Code-Avg_Account_Balance-std,Occupation-Credit_Product-Age-mean,Occupation-Credit_Product-Age-median,Occupation-Credit_Product-Age-std,Occupation-Credit_Product-Vintage-mean,Occupation-Credit_Product-Vintage-median,Occupation-Credit_Product-Vintage-std,Occupation-Credit_Product-Avg_Account_Balance-mean,Occupation-Credit_Product-Avg_Account_Balance-median,Occupation-Credit_Product-Avg_Account_Balance-std,Occupation-Is_Active-Age-mean,Occupation-Is_Active-Age-median,Occupation-Is_Active-Age-std,Occupation-Is_Active-Vintage-mean,Occupation-Is_Active-Vintage-median,Occupation-Is_Active-Vintage-std,Occupation-Is_Active-Avg_Account_Balance-mean,Occupation-Is_Active-Avg_Account_Balance-median,Occupation-Is_Active-Avg_Account_Balance-std,Channel_Code-Credit_Product-Age-mean,Channel_Code-Credit_Product-Age-median,Channel_Code-Credit_Product-Age-std,Channel_Code-Credit_Product-Vintage-mean,Channel_Code-Credit_Product-Vintage-median,Channel_Code-Credit_Product-Vintage-std,Channel_Code-Credit_Product-Avg_Account_Balance-mean,Channel_Code-Credit_Product-Avg_Account_Balance-median,Channel_Code-Credit_Product-Avg_Account_Balance-std,Channel_Code-Is_Active-Age-mean,Channel_Code-Is_Active-Age-median,Channel_Code-Is_Active-Age-std,Channel_Code-Is_Active-Vintage-mean,Channel_Code-Is_Active-Vintage-median,Channel_Code-Is_Active-Vintage-std,Channel_Code-Is_Active-Avg_Account_Balance-mean,Channel_Code-Is_Active-Avg_Account_Balance-median,Channel_Code-Is_Active-Avg_Account_Balance-std,Credit_Product-Is_Active-Age-mean,Credit_Product-Is_Active-Age-median,Credit_Product-Is_Active-Age-std,Credit_Product-Is_Active-Vintage-mean,Credit_Product-Is_Active-Vintage-median,Credit_Product-Is_Active-Vintage-std,Credit_Product-Is_Active-Avg_Account_Balance-mean,Credit_Product-Is_Active-Avg_Account_Balance-median,Credit_Product-Is_Active-Avg_Account_Balance-std
25087,CFMPHPUB,1,50,4,3,1,50,0,13.704015,0,0,39,7,5,2,2,19,17,8,8,13,6,6,2,2,0,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,45.979602,47.0,14.69246,52.321858,38.0,34.365724,14.000739,13.976548,0.55821,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,50.58274,49,...,0.62207,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,46.988973,48.0,8.859679,55.804968,51.0,33.0961,13.950268,13.928111,0.550486,51.082556,50.0,10.87277,56.57203,57.0,29.199342,13.95487,13.937758,0.550302,42.589778,40,14.972429,45.466629,31,31.750845,13.987204,13.960048,0.564412,41.556842,37.0,14.632986,43.260592,27,30.415394,13.988598,13.962556,0.556208,47.153494,47,7.104712,53.313132,55.0,27.966771,13.726652,13.706341,0.59804,46.100705,47,8.980271,55.094062,51,32.034048,13.725906,13.69673,0.612442,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,50.689538,49,11.447889,55.568247,56,27.763311,13.741448,13.718221,0.612731,50.757953,49,11.403322,55.277626,57,27.576043,13.764424,13.742765,0.590941,39.330676,32,14.181333,39.534975,27,28.006431,13.655642,13.620665,0.614959
18501,9CVVQMZJ,1,53,19,3,1,79,0,13.80295,0,0,54,7,5,2,2,79,77,38,38,13,6,6,2,2,0,45.906178,46,14.621736,51.248928,38,33.568613,13.751552,13.726291,0.617448,41.786198,38,14.625964,41.577123,31.0,29.243786,13.675593,13.644289,0.557492,46.559537,48,8.704273,55.55107,51,32.595353,13.745605,13.722401,0.607609,50.382222,49,11.30229,54.598919,56,28.159967,13.75621,13.73557,0.604981,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,44.369338,44.0,14.778097,46.204413,32.0,31.499435,13.687733,13.666134,0.554076,47.293512,48,8.392542,57.670355,56,32.912512,13.738429,13.716965,0.605444,50.58274,49,...,0.62207,43.444031,42,14.829476,45.681388,32,31.394054,13.715595,13.689432,0.609421,45.871345,47.0,9.086352,49.408521,43.0,31.096647,13.647094,13.621766,0.555261,49.909674,48.0,11.366797,50.173077,50.0,27.916051,13.668542,13.650837,0.5584,40.021836,33,14.512974,39.202992,27,27.638561,13.6562,13.628968,0.553222,38.519799,31.0,13.716989,36.39826,27,25.497149,13.66233,13.635728,0.552732,47.153494,47,7.104712,53.313132,55.0,27.966771,13.726652,13.706341,0.59804,46.100705,47,8.980271,55.094062,51,32.034048,13.725906,13.69673,0.612442,45.697397,47,9.279214,52.037951,49,31.4486,13.72348,13.698787,0.597698,50.689538,49,11.447889,55.568247,56,27.763311,13.741448,13.718221,0.612731,50.757953,49,11.403322,55.277626,57,27.576043,13.764424,13.742765,0.590941,39.330676,32,14.181333,39.534975,27,28.006431,13.655642,13.620665,0.614959
30219,FSDFS426,0,32,4,2,0,27,0,14.161169,0,0,4,2,0,0,0,18,16,8,8,8,4,4,0,0,0,41.40325,37,14.685726,41.827474,27,30.01163,13.713333,13.680049,0.622809,43.716526,43,14.818066,47.403313,32.0,32.850418,13.991658,13.96426,0.559802,30.911582,29,6.624586,26.385212,25,17.005681,13.646255,13.609381,0.617679,32.334167,29,9.685063,25.31644,25,12.584854,13.641346,13.601937,0.618524,42.621261,40,14.896923,45.019546,31,31.311646,13.709218,13.67687,0.623182,41.285321,36,14.618073,41.567754,31,29.540018,13.690874,13.659566,0.612792,40.649219,35.0,14.431614,40.736859,27.0,29.393469,13.97935,13.948171,0.56175,30.233873,29,5.548506,24.825372,21,13.665404,13.615808,13.580002,0.613589,31.947681,29,...,0.623503,38.938101,32,14.01236,37.094897,27,26.671735,13.663994,13.62667,0.615316,30.394812,29.0,6.378475,26.442712,25.0,16.419692,14.006068,13.978921,0.565824,30.373686,29.0,6.784324,23.747719,25.0,7.841774,13.991901,13.960572,0.569228,42.589778,40,14.972429,45.466629,31,31.750845,13.987204,13.960048,0.564412,41.556842,37.0,14.632986,43.260592,27,30.415394,13.988598,13.962556,0.556208,29.342995,29,3.403885,22.884158,21.0,7.426934,13.621987,13.584699,0.611982,30.522412,29,5.954059,25.560656,25,15.008034,13.633016,13.594566,0.620188,30.476931,29,5.708913,25.479779,25,14.052418,13.60242,13.567376,0.607033,31.847115,29,8.878776,25.00126,25,11.774384,13.629393,13.588124,0.619886,31.905289,29,8.82993,24.890678,25,10.778658,13.599267,13.562733,0.608134,39.330676,32,14.181333,39.534975,27,28.006431,13.655642,13.620665,0.614959


In [29]:
x_train = train.drop(['ID','Is_Lead'],axis=1)
y_train = train['Is_Lead']

x_valid = valid.drop(['ID','Is_Lead'],axis=1)
y_valid = valid['Is_Lead']

x_test = test.drop(['ID'],axis=1)

In [30]:
lgb_params= {'learning_rate': 0.05, 
             'n_estimators': 500, 
             'max_bin': 100,
             'num_leaves': 10, 
             'max_depth': 30, 
             'reg_alpha': 8.5, 
             'reg_lambda': 7.0, 
             'subsample': 0.75,
            'random_state': 42,
            'class_weight':'balanced'}


lgb = LGBMClassifier(**lgb_params)
lgb.fit(x_train, y_train,eval_set=[(x_valid, y_valid)],early_stopping_rounds=10,verbose=400)
pred = lgb.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

Training until validation scores don't improve for 10 rounds
[400]	valid_0's binary_logloss: 0.548287
Did not meet early stopping. Best iteration is:
[500]	valid_0's binary_logloss: 0.547725
roc_auc_score: 0.7853294122805484


In [31]:
x_train = x_train.fillna(0)
x_valid = x_valid.fillna(0)
x_test = x_test.fillna(0)

In [32]:
hist_params = {'max_iter':500,
               'learning_rate' : 0.06,
               'max_depth' : 7,
               'early_stopping' : 'auto',
               'verbose':1,
               'random_state':63
              }


hist = HistGradientBoostingClassifier(**hist_params)
hist.fit(x_train,y_train)
pred = hist.predict_proba(x_valid)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

Binning 0.301 GB of training data: 5.153 s
Binning 0.033 GB of validation data: 0.053 s
Fitting gradient boosted rounds:
[1/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.53582, val loss: 0.53565, in 0.531s
[2/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.52578, val loss: 0.52546, in 0.275s
[3/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.51709, val loss: 0.51670, in 0.294s
[4/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.50939, val loss: 0.50890, in 0.302s
[5/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.50272, val loss: 0.50220, in 0.327s
[6/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.49678, val loss: 0.49626, in 0.300s
[7/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.49148, val loss: 0.49098, in 0.315s
[8/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.48663, val loss: 0.48614, in 0.322s
[9/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.48235, val loss: 0.48187, in 0.313s
[10/500] 1 tree, 31 leaves, max depth = 7, train lo

[88/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43299, val loss: 0.43654, in 0.161s
[89/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43292, val loss: 0.43654, in 0.154s
[90/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43283, val loss: 0.43654, in 0.183s
[91/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43276, val loss: 0.43652, in 0.164s
[92/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43266, val loss: 0.43650, in 0.261s
[93/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43259, val loss: 0.43648, in 0.203s
[94/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43251, val loss: 0.43647, in 0.184s
[95/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43242, val loss: 0.43644, in 0.379s
[96/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43235, val loss: 0.43642, in 0.136s
[97/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43226, val loss: 0.43639, in 0.202s
[98/500] 1 tree, 31 leaves, max depth = 7, train loss: 0.43218, val lo

In [33]:
train_pred = pd.DataFrame()
train_pred['lgb'] = lgb.predict_proba(x_train)[:, 1]
train_pred['hist'] = hist.predict_proba(x_train)[:, 1]
train_pred.head(3)

Unnamed: 0,lgb,hist
0,0.346292,0.145404
1,0.308001,0.128163
2,0.795196,0.548271


In [34]:
valid_pred = pd.DataFrame()
valid_pred['lgb'] = lgb.predict_proba(x_valid)[:, 1]
valid_pred['hist'] = hist.predict_proba(x_valid)[:, 1]
valid_pred.head(3)

Unnamed: 0,lgb,hist
0,0.382979,0.190234
1,0.776676,0.532996
2,0.710023,0.348629


In [35]:
test_pred = pd.DataFrame()
test_pred['lgb'] = lgb.predict_proba(x_test)[:, 1]
test_pred['hist'] = hist.predict_proba(x_test)[:, 1]
test_pred.head(3)

Unnamed: 0,lgb,hist
0,0.119812,0.03736
1,0.524932,0.228525
2,0.214959,0.069202


In [36]:
model = LogisticRegression(random_state=10,max_iter=1000,class_weight='balanced',fit_intercept=True)
model.fit(train_pred,y_train)
pred = model.predict_proba(valid_pred)[:, 1]
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.7855294232886535


In [37]:
pred = valid_pred.mean(axis=1)
roc_score = roc_auc_score(y_valid, pred)
print(f"roc_auc_score: {roc_score}")

roc_auc_score: 0.785601957154491


In [38]:
test_pred['stacked_avg'] = test_pred.mean(axis=1)
test_pred['stacked_logistic'] = model.predict_proba(test_pred.drop(['stacked_avg'],axis=1))[:, 1]
test_pred['ID'] = test['ID']
test_pred.head(1)

Unnamed: 0,lgb,hist,stacked_avg,stacked_logistic,ID
0,0.119812,0.03736,0.078586,0.1555,VBENBARO


In [39]:
# test_pred[['ID','stacked_avg']].rename({'stacked_avg':'Is_Lead'},axis=1).to_csv('submit_4.csv',index=False)
