# Lab | Handling Data Imbalance in Classification Models

In [32]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns 
pd.set_option('display.max_columns', None)

In [33]:
targets = pd.read_csv('targets_20230911.csv')
categorical = pd.read_csv('categorical_20230911.csv')
numerical = pd.read_csv('numerical_20230911.csv')

In [79]:
targets

Unnamed: 0,TARGET_B,TARGET_D
0,0,0.0
1,0,0.0
2,0,0.0
3,0,0.0
4,0,0.0
...,...,...
95407,0,0.0
95408,0,0.0
95409,0,0.0
95410,1,18.0


#### Categorical.

In [34]:
categorical

Unnamed: 0,STATE,MAILCODE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,SOLIH,VETERANS,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B
0,IL,A,36,U,F,0,,,L,E,C,T,2
1,CA,A,14,H,M,3,,,L,G,A,S,1
2,NC,A,43,U,M,3,,,L,E,C,R,2
3,CA,A,44,U,F,3,,,L,E,C,R,2
4,FL,A,16,H,F,3,12.0,,L,F,A,S,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,other,A,27,U,M,0,,,L,G,C,C,2
95408,TX,A,24,H,M,3,,,L,F,A,C,1
95409,MI,A,30,U,M,0,,,L,E,B,C,3
95410,CA,A,24,H,F,2,12.0,,L,F,A,C,1


SOLIH.

In [35]:
categorical.SOLIH.value_counts(dropna=False)

NaN     89212
12.0     5693
0.0       296
1.0        94
2.0        75
3.0        19
4.0        16
6.0         7
Name: SOLIH, dtype: int64

It was required to keep this column. The nans are the majority, wouldn't work assinging the mode. We assume they mean that the code is unknown and so will put them in a separate category ('U' - 'Unknown').

In [36]:
categorical.SOLIH = categorical.SOLIH.fillna('U')

In [37]:
categorical.SOLIH.value_counts(dropna=False)

U       89212
12.0     5693
0.0       296
1.0        94
2.0        75
3.0        19
4.0        16
6.0         7
Name: SOLIH, dtype: int64

VETERANS.

In [38]:
categorical.VETERANS.value_counts(dropna=False)

NaN    84986
Y      10426
Name: VETERANS, dtype: int64

In the descriptions provided, this column is supposed to have Y/N values. We assume the nans represent "N", so will fill the nans with 'N'.

In [39]:
categorical.VETERANS = categorical.VETERANS.fillna('N')

In [40]:
categorical.VETERANS.value_counts(dropna=False)

N    84986
Y    10426
Name: VETERANS, dtype: int64

Checking nulls.

In [41]:
nulls_percent_cat_df = pd.DataFrame(categorical.isna().sum()/len(categorical)).reset_index()
nulls_percent_cat_df.columns = ['column_name', 'nulls_percentage']

In [42]:
nulls_percent_cat_df[nulls_percent_cat_df['nulls_percentage']>0]

Unnamed: 0,column_name,nulls_percentage


In [43]:
unique_values_cat = {}

for col in categorical.columns:
    unique_values_cat[col] = categorical[col].unique()

for col, values in unique_values_cat.items():
    print(f'Column "{col}": {values}')

Column "STATE": ['IL' 'CA' 'NC' 'FL' 'other' 'IN' 'MI' 'MO' 'TX' 'WA' 'WI' 'GA']
Column "MAILCODE": ['A' 'B']
Column "CLUSTER": [36 14 43 44 16 40 39 45 35 53 17 51  2 20 27 12 22 13  8 25 15 42 11 28
 18 24 34  5 31 32 46  3 50  7 37 10 38 21  9 29 30  4 41 49 23 33  1 47
 26 48 19  6 52]
Column "HOMEOWNR": ['U' 'H']
Column "GENDER": ['F' 'M' 'other']
Column "DATASRCE": [0 3 1 2]
Column "SOLIH": ['U' 12.0 0.0 2.0 1.0 4.0 6.0 3.0]
Column "VETERANS": ['N' 'Y']
Column "RFA_2R": ['L']
Column "RFA_2A": ['E' 'G' 'F' 'D']
Column "GEOCODE2": ['C' 'A' 'D' 'B']
Column "DOMAIN_A": ['T' 'S' 'R' 'U' 'C']
Column "DOMAIN_B": [2 1 3 4]


### Numerical.

In [44]:
numerical

Unnamed: 0,ODATEDW,TCODE,DOB,AGE,INCOME,WEALTH1,HIT,MALEMILI,MALEVET,VIETVETS,WWIIVETS,LOCALGOV,STATEGOV,FEDGOV,WEALTH2,POP901,POP902,POP903,POP90C1,POP90C2,POP90C3,POP90C4,POP90C5,ETH1,ETH2,ETH3,ETH4,ETH5,ETH6,ETH7,ETH8,ETH9,ETH10,ETH11,ETH12,ETH13,ETH14,ETH15,ETH16,AGE901,AGE902,AGE903,AGE904,AGE905,AGE906,AGE907,CHIL1,CHIL2,CHIL3,AGEC1,AGEC2,AGEC3,AGEC4,AGEC5,AGEC6,AGEC7,CHILC1,CHILC2,CHILC3,CHILC4,CHILC5,HHAGE1,HHAGE2,HHAGE3,HHN1,HHN2,HHN3,HHN4,HHN5,HHN6,MARR1,MARR2,MARR3,MARR4,HHP1,HHP2,DW1,DW2,DW3,DW4,DW5,DW6,DW7,DW8,DW9,HV1,HV2,HV3,HV4,HU1,HU2,HU3,HU4,HU5,HHD1,HHD2,HHD3,HHD4,HHD5,HHD6,HHD7,HHD8,HHD9,HHD10,HHD11,HHD12,ETHC1,ETHC2,ETHC3,ETHC4,ETHC5,ETHC6,HVP1,HVP2,HVP3,HVP4,HVP5,HVP6,HUR1,HUR2,RHP1,RHP2,RHP3,RHP4,HUPA1,HUPA2,HUPA3,HUPA4,HUPA5,HUPA6,HUPA7,RP1,RP2,RP3,RP4,MSA,ADI,DMA,IC1,IC2,IC3,IC4,IC5,IC6,IC7,IC8,IC9,IC10,IC11,IC12,IC13,IC14,IC15,IC16,IC17,IC18,IC19,IC20,IC21,IC22,IC23,HHAS1,HHAS2,HHAS3,HHAS4,MC1,MC2,MC3,TPE1,TPE2,TPE3,TPE4,TPE5,TPE6,TPE7,TPE8,TPE9,PEC1,PEC2,TPE10,TPE11,TPE12,TPE13,LFC1,LFC2,LFC3,LFC4,LFC5,LFC6,LFC7,LFC8,LFC9,LFC10,OCC1,OCC2,OCC3,OCC4,OCC5,OCC6,OCC7,OCC8,OCC9,OCC10,OCC11,OCC12,OCC13,EIC1,EIC2,EIC3,EIC4,EIC5,EIC6,EIC7,EIC8,EIC9,EIC10,EIC11,EIC12,EIC13,EIC14,EIC15,EIC16,OEDC1,OEDC2,OEDC3,OEDC4,OEDC5,OEDC6,OEDC7,EC1,EC2,EC3,EC4,EC5,EC6,EC7,EC8,SEC1,SEC2,SEC3,SEC4,SEC5,AFC1,AFC2,AFC3,AFC4,AFC5,AFC6,VC1,VC2,VC3,VC4,ANC1,ANC2,ANC3,ANC4,ANC5,ANC6,ANC7,ANC8,ANC9,ANC10,ANC11,ANC12,ANC13,ANC14,ANC15,POBC1,POBC2,LSC1,LSC2,LSC3,LSC4,VOC1,VOC2,VOC3,HC1,HC2,HC3,HC4,HC5,HC6,HC7,HC8,HC9,HC10,HC11,HC12,HC13,HC14,HC15,HC16,HC17,HC18,HC19,HC20,HC21,MHUC1,MHUC2,AC1,AC2,CARDPROM,MAXADATE,NUMPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,TIMELAG,AVGGIFT,CONTROLN,HPHONE_D,RFA_2F,CLUSTER2
0,8901,0,3712,60.000000,5.0,6.0,0,0,39,34,18,10,2,1,5.0,992,264,332,0,35,65,47,53,92,1,0,0,11,0,0,0,0,0,0,0,11,0,0,0,39,48,51,40,50,54,25,31,42,27,11,14,18,17,13,11,15,12,11,34,25,18,26,10,23,18,33,49,28,12,4,61,7,12,19,198,276,97,95,2,2,0,0,7,7,0,479,635,3,2,86,14,96,4,7,38,80,70,32,84,16,6,2,5,9,15,3,17,50,25,0,0,0,2,7,13,27,47,0,1,61,58,61,15,4,2,0,0,14,1,0,0,2,5,17,73,0.0,177.0,682.0,307,318,349,378,12883,13,23,23,23,15,1,0,0,1,4,25,24,26,17,2,0,0,2,28,4,51,1,46,54,3,88,8,0,0,0,0,0,0,4,1,13,14,16,2,45,56,64,50,64,44,62,53,99,0,0,9,3,8,13,9,0,3,9,3,15,19,5,4,3,0,3,41,1,0,7,13,6,5,0,4,9,4,1,3,10,2,1,7,78,2,0,120,16,10,39,21,8,4,3,5,20,3,19,4,0,0,0,18,39,0,34,23,18,16,1,4,0,23,0,0,5,1,0,0,0,0,0,2,0,3,74,88,8,0,4,96,77,19,13,31,5,14,14,31,54,46,0,0,90,0,10,0,0,0,33,65,40,99,99,6,2,10,7,27,9702,74,6,14,240.0,31,14,5.0,9208,12.0,9402,10.0,9512,8911,4.0,7.741935,95515,0,4,39.0
1,9401,1,5202,46.000000,6.0,9.0,16,0,15,55,11,6,2,1,9.0,3611,940,998,99,0,0,50,50,67,0,0,31,6,4,2,6,4,14,0,0,2,0,1,4,34,41,43,32,42,45,32,33,46,21,13,14,33,23,10,4,2,11,16,36,22,15,12,1,5,4,21,75,55,23,9,69,4,3,24,317,360,99,99,0,0,0,0,0,0,0,5468,5218,12,10,96,4,97,3,9,59,94,88,55,95,5,4,1,3,5,4,2,18,44,5,0,0,0,97,98,98,98,99,94,0,83,76,73,21,5,0,0,0,4,0,0,0,91,91,91,94,4480.0,13.0,803.0,1088,1096,1026,1037,36175,2,6,2,5,15,14,13,10,33,2,5,2,5,15,14,14,10,32,6,2,66,3,56,44,9,80,14,0,0,0,0,0,0,6,0,2,24,32,12,71,70,83,58,81,57,64,57,99,99,0,22,24,4,21,13,2,1,6,0,4,1,0,3,1,0,6,13,1,2,8,18,11,4,3,4,10,7,11,1,6,2,1,16,69,5,2,160,5,5,12,21,7,30,20,14,24,4,24,10,0,0,0,8,15,0,55,10,11,0,0,2,0,3,1,1,2,3,1,1,0,3,0,0,0,42,39,50,7,27,16,99,92,53,5,10,2,26,56,97,99,0,0,0,96,0,4,0,0,0,99,0,99,99,99,20,4,6,5,12,9702,32,6,13,47.0,3,1,10.0,9310,25.0,9512,25.0,9512,9310,18.0,15.666667,148535,0,2,1.0
2,9001,1,0,61.611649,3.0,1.0,2,0,20,29,33,6,8,1,1.0,7001,2040,2669,0,2,98,49,51,96,2,0,0,2,0,0,0,0,0,0,0,2,0,0,0,35,43,46,37,45,49,23,35,40,25,13,20,19,16,13,10,8,15,14,30,22,19,25,10,23,21,35,44,22,6,2,63,9,9,19,183,254,69,69,1,6,5,3,3,3,0,497,546,2,1,78,22,93,7,18,36,76,65,30,86,14,7,2,5,11,17,3,17,60,18,0,1,0,0,1,6,18,50,0,4,36,49,51,14,5,4,2,24,11,2,3,6,0,2,9,44,0.0,281.0,518.0,251,292,292,340,11576,32,18,20,15,12,2,0,0,1,20,19,24,18,16,2,0,0,1,28,8,31,11,38,62,8,74,22,0,0,0,0,0,2,2,1,21,19,24,6,61,65,73,59,70,56,78,62,82,99,4,10,5,2,6,12,0,1,9,5,18,20,5,7,6,0,11,33,4,3,2,12,3,3,2,0,7,8,3,3,6,7,1,8,74,3,1,120,22,20,28,16,6,5,3,1,23,1,16,6,0,0,0,10,21,0,28,23,32,8,1,14,1,5,0,0,7,0,0,0,0,0,1,0,0,2,84,96,3,0,0,92,65,29,9,22,3,12,23,50,69,31,0,0,0,6,35,44,0,15,22,77,17,97,92,9,2,6,5,26,9702,63,6,14,202.0,27,14,2.0,9111,16.0,9207,5.0,9512,9001,12.0,7.481481,15078,1,4,60.0
3,8701,0,2801,70.000000,1.0,4.0,2,0,23,14,31,3,0,3,0.0,640,160,219,0,8,92,54,46,61,0,0,11,32,6,2,0,0,0,0,0,31,0,0,1,32,40,44,34,43,47,25,45,35,20,15,25,17,17,12,7,7,20,17,30,14,19,25,11,23,23,27,50,30,15,8,63,9,6,23,199,283,85,83,3,4,1,0,2,0,2,1000,1263,2,1,48,52,93,7,6,36,73,61,30,84,16,6,3,3,21,12,4,13,36,13,0,0,0,10,25,50,69,92,10,15,42,55,50,15,5,4,0,9,42,4,0,5,1,8,17,34,9340.0,67.0,862.0,386,388,396,423,15130,27,12,4,26,22,5,0,0,4,35,5,6,12,30,6,0,0,5,22,14,26,20,46,54,3,58,36,0,0,0,0,0,6,0,0,17,13,15,0,43,69,81,53,68,45,33,31,0,99,23,17,3,0,6,6,0,0,13,42,12,0,0,0,42,0,6,3,0,0,0,23,3,3,6,0,3,3,3,3,3,0,3,6,87,0,0,120,28,12,14,27,10,3,5,0,19,1,17,0,0,0,0,13,23,0,14,40,31,16,0,1,0,13,0,0,4,0,0,0,3,0,0,0,0,29,67,56,41,3,0,94,43,27,4,38,0,10,19,39,45,55,0,0,45,22,17,0,0,16,23,77,22,93,89,16,2,6,6,27,9702,66,6,14,109.0,16,7,2.0,8711,11.0,9411,10.0,9512,8702,9.0,6.812500,172556,1,4,41.0
4,8601,0,2001,78.000000,3.0,2.0,60,1,28,9,53,26,3,2,5.0,2520,627,761,99,0,0,46,54,2,98,0,0,1,0,0,0,0,0,0,0,0,0,0,0,33,45,50,36,46,50,27,34,43,23,14,21,13,15,20,12,5,13,15,34,19,19,31,7,27,16,26,57,36,24,14,42,17,9,33,235,323,99,98,0,0,0,0,0,0,0,576,594,4,3,90,10,97,3,0,42,82,49,22,92,8,20,3,17,9,23,1,1,1,0,21,58,19,0,1,2,16,67,0,2,45,52,53,16,6,0,0,0,9,0,0,0,25,58,74,83,5000.0,127.0,528.0,240,250,293,321,9836,24,29,23,13,4,4,0,0,2,21,30,22,16,4,5,0,0,3,35,8,11,14,20,80,4,73,22,1,1,0,0,0,3,1,2,1,24,27,3,76,61,73,51,65,49,80,31,81,99,10,17,8,2,6,15,3,7,22,2,9,0,7,2,2,0,6,1,5,2,2,12,2,7,6,4,15,29,4,3,26,3,2,7,49,12,1,120,16,20,30,13,3,12,5,2,26,1,20,7,1,1,1,15,28,4,9,16,53,20,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,65,99,0,0,0,90,45,18,25,34,0,1,3,6,33,67,0,0,9,14,72,3,0,0,99,1,21,99,96,6,2,7,11,43,9702,113,10,25,254.0,37,8,3.0,9310,15.0,9601,15.0,9601,7903,14.0,6.864865,7112,1,2,26.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,1,0,61.611649,5.0,6.0,0,14,36,47,11,7,8,13,5.0,27380,7252,10037,99,0,0,50,50,78,10,6,4,5,0,0,0,1,1,0,0,3,1,0,2,28,35,38,29,38,41,30,45,37,18,16,31,25,15,8,3,1,20,18,31,18,13,7,3,5,20,32,48,28,10,4,58,15,3,24,195,271,54,38,8,32,24,14,0,0,0,988,1025,6,6,56,44,89,11,3,44,72,56,32,83,17,12,3,10,16,15,8,19,55,5,3,6,0,2,10,49,73,92,0,4,40,52,53,15,4,24,8,13,14,15,12,3,69,84,92,97,380.0,0.0,743.0,433,481,499,535,18807,11,13,13,21,22,13,4,2,2,9,11,11,21,24,16,4,2,2,9,6,70,6,63,37,27,76,15,2,2,0,0,0,5,2,1,2,18,20,2,69,81,89,73,83,69,69,57,61,94,7,15,16,5,10,21,0,3,11,1,11,2,3,3,1,4,6,4,7,3,3,17,7,5,3,1,9,8,7,14,7,8,13,6,59,7,0,136,2,7,28,33,8,15,8,3,26,2,19,8,8,15,2,20,35,5,48,15,11,25,1,5,1,9,0,0,4,1,1,1,0,0,1,1,0,4,26,92,3,2,4,95,60,19,3,14,0,7,32,78,91,9,6,5,86,1,12,0,0,1,93,7,98,99,98,16,4,4,3,6,9702,14,5,12,25.0,1,0,25.0,9602,25.0,9602,25.0,9602,9602,0.0,25.000000,184568,0,1,12.0
95408,9601,1,5001,48.000000,7.0,9.0,1,0,31,43,19,4,1,0,5.0,1254,322,361,96,0,4,51,49,91,3,0,2,6,1,0,1,0,0,0,0,5,0,0,1,30,40,40,28,41,43,39,33,42,25,9,19,43,17,7,4,2,10,16,35,23,16,9,2,7,10,20,70,52,25,6,73,4,2,20,307,346,89,88,1,1,0,0,0,0,0,1679,1723,3,3,88,12,97,3,0,63,89,85,60,96,4,2,1,1,7,5,1,28,58,5,2,2,0,18,71,88,91,97,5,1,77,82,75,20,4,1,0,10,7,1,0,5,16,26,44,79,3360.0,201.0,618.0,806,836,802,849,26538,8,9,7,6,11,29,13,2,15,10,0,8,2,13,35,16,3,13,8,5,61,7,83,17,36,80,4,4,4,0,0,0,6,5,3,3,25,32,10,61,73,88,56,87,52,48,43,99,0,0,18,31,0,13,17,0,1,2,4,6,0,3,5,1,8,8,9,3,7,9,13,9,6,0,0,4,7,13,3,4,1,0,4,78,12,0,160,1,6,12,24,7,36,14,9,35,5,32,7,0,0,0,21,31,8,43,5,19,15,1,12,1,14,0,0,4,0,0,1,0,0,0,1,0,2,51,94,3,0,2,99,84,29,4,7,2,55,90,94,94,6,0,0,82,2,16,0,0,0,69,31,67,99,97,18,5,3,2,4,9702,10,3,8,20.0,1,0,20.0,9603,20.0,9603,20.0,9603,9603,0.0,20.000000,122706,1,1,2.0
95409,9501,1,3801,60.000000,5.0,6.0,0,0,18,46,20,7,23,0,5.0,552,131,205,99,0,0,53,47,82,14,0,1,9,0,0,0,0,0,0,0,9,0,0,0,28,35,37,30,41,44,32,46,38,17,13,34,21,9,9,9,4,21,17,32,20,10,18,7,17,27,29,44,31,14,5,45,19,5,31,179,268,96,95,1,2,1,0,0,0,0,376,377,4,3,66,34,95,5,10,37,64,43,21,80,20,16,2,14,21,20,9,20,49,12,7,7,1,0,0,0,1,9,0,2,45,51,54,14,5,2,0,0,31,2,0,0,3,34,78,91,4040.0,61.0,551.0,263,264,319,345,12178,21,26,20,18,12,0,3,0,0,26,18,17,11,21,0,6,0,0,10,13,26,26,43,57,3,83,17,0,0,0,0,0,0,0,0,25,17,17,0,69,69,70,69,70,69,77,24,62,0,25,5,13,9,5,22,0,2,14,0,13,9,5,2,0,0,4,14,3,11,0,10,5,2,0,5,6,19,3,19,7,23,0,0,52,18,0,120,5,3,51,23,7,11,0,6,32,4,27,7,0,0,0,9,18,0,46,0,20,20,2,8,0,14,0,0,0,1,0,0,0,0,1,0,0,6,82,92,5,3,0,93,42,12,6,51,0,0,0,0,0,99,0,0,97,0,0,0,0,4,99,0,99,99,99,5,2,3,11,14,9702,33,7,17,58.0,7,4,3.0,9603,10.0,9501,10.0,9610,9410,3.0,8.285714,189641,1,3,34.0
95410,8601,0,4005,58.000000,7.0,6.0,0,0,28,35,20,9,1,1,7.0,1746,432,508,99,0,0,47,53,92,1,1,5,8,0,1,2,0,1,0,0,5,0,0,3,34,42,45,36,45,49,25,38,40,22,12,21,21,18,12,7,9,13,16,34,20,17,20,4,16,9,26,65,41,17,6,56,9,8,27,262,324,99,99,0,0,0,0,5,4,1,2421,2459,11,10,88,12,99,1,0,44,85,71,36,84,16,8,2,6,9,12,6,19,56,16,0,0,0,89,96,99,99,99,9,0,90,65,68,18,5,0,0,0,12,0,0,0,88,88,90,91,8735.0,13.0,803.0,552,544,568,556,15948,7,4,11,18,38,15,5,3,0,4,6,15,19,38,13,4,3,0,25,2,46,3,43,57,9,80,11,0,0,0,0,1,2,6,0,24,18,28,11,52,73,88,60,85,57,70,54,99,99,0,14,16,6,16,17,0,2,12,1,11,2,0,2,1,0,2,22,4,6,4,19,4,7,2,4,6,7,9,4,9,1,1,7,72,8,2,140,7,6,20,35,12,15,5,6,29,4,21,10,0,0,0,13,28,1,35,18,20,8,0,3,1,9,0,0,2,6,1,2,0,0,0,0,0,14,50,83,8,4,5,99,85,43,9,25,0,0,6,17,99,1,0,0,99,0,1,0,0,0,99,0,99,99,99,12,3,6,3,36,9702,127,9,31,498.0,41,18,5.0,9011,21.0,9608,18.0,9701,8612,4.0,12.146341,4693,1,4,11.0


TCODE.

We drop TCODE, doesn't seem to add much information.

In [51]:
# numerical.TCODE.value_counts()

In [49]:
numerical = numerical.drop('TCODE', axis=1)

DOB.

We drop DOB, we already have the age of the donors.

In [50]:
numerical.DOB.value_counts()

0       23661
4801     1479
5001     1326
3001     1288
2801     1225
        ...  
7304        1
9704        1
4           1
7504        1
8011        1
Name: DOB, Length: 947, dtype: int64

In [52]:
numerical = numerical.drop('DOB', axis=1)

ADI, DMA, MSA.

These features don´t seem to add much information, so we can drop them.

In [58]:
numerical = numerical.drop('ADI', axis=1).drop('DMA', axis=1).drop('MSA', axis=1)

CONTROLN.

We drop Controln as it is a unique identifier.

In [59]:
numerical = numerical.drop('CONTROLN', axis=1)

RFA_2F.

In [57]:
numerical.RFA_2F.value_counts()

1    47675
2    20545
3    15291
4    11901
Name: RFA_2F, dtype: int64

This is a code, so we need to treat it as categorical.

In [66]:
categorical['RFA_2F'] = numerical['RFA_2F']

In [67]:
numerical = numerical.drop('RFA_2F', axis=1)

CLUSTER2.

We understand cluster2 is also a category so we should treat it as categorical.

In [69]:
# numerical.CLUSTER2.value_counts()

In [68]:
# categorical.CLUSTER.value_counts()

In [70]:
categorical['CLUSTER2'] = numerical['CLUSTER2']

In [71]:
numerical = numerical.drop('CLUSTER2', axis=1)

INCOME.

In [65]:
numerical.INCOME.value_counts()

5.0    36737
2.0    13114
4.0    12732
1.0     9022
3.0     8558
6.0     7778
7.0     7471
Name: INCOME, dtype: int64

It also looks like a category, so we'll treat it as cateorical.

In [142]:
categorical = categorical.astype(str)

#### Feature selection.

We apply RFE for the numericals.

We include here income, wealt1 and wealth2. They are categories that are ordered, so we can scale them with minmax scaler.

In [87]:
# Scaling numerical_2
from sklearn.preprocessing import MinMaxScaler
scaler_1 = MinMaxScaler()

In [88]:
num_scaled = scaler_1.fit_transform(numerical)

In [90]:
num_scaled = pd.DataFrame(num_scaled, columns=numerical.columns)

In [None]:
# RFE
X = num_scaled
y = targets['TARGET_B']

from sklearn.feature_selection import RFE
from sklearn import linear_model
logre = linear_model.LogisticRegression()
rfe = RFE(logre, n_features_to_select=70, verbose=False)
rfe.fit(X, y)

In [95]:
df = pd.DataFrame(data = rfe.ranking_, columns=['Rank'])
df['Column_name'] = pd.DataFrame(num_scaled).columns
df[df['Rank']==1]

Unnamed: 0,Rank,Column_name
0,1,ODATEDW
20,1,POP90C5
28,1,ETH8
30,1,ETH10
34,1,ETH14
...,...,...
307,1,MAXRDATE
308,1,LASTGIFT
309,1,LASTDATE
310,1,FISTDATE


Selecting the numerical columns.

In [98]:
selected_cols = df[df['Rank'] == 1]['Column_name']
num_selec = numerical[selected_cols]

In [99]:
num_selec

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT
0,8901,53,0,0,0,0,51,27,18,15,26,23,7,19,7,0,635,16,2,5,25,15,4,0,0,0,0,64,64,3,3,3,5,4,0,7,5,0,9,10,2,78,120,16,39,4,3,5,1,0,0,0,33,65,99,10,27,6,14,240.0,31,14,5.0,9208,12.0,9402,10.0,9512,8911,7.741935
1,9401,50,6,14,0,4,43,21,33,2,12,5,4,24,0,0,5218,5,1,3,5,21,5,0,13,0,0,83,81,24,1,0,0,3,2,8,4,3,10,6,2,69,160,5,12,30,20,14,0,1,1,3,99,0,99,6,12,6,13,47.0,3,1,10.0,9310,25.0,9512,25.0,9512,9310,15.666667
2,9001,51,0,0,0,0,46,25,19,8,25,23,9,19,3,0,546,14,2,5,18,14,5,6,0,0,0,73,70,5,1,5,5,7,3,2,3,2,7,6,7,74,120,22,28,5,3,1,1,0,0,0,22,77,92,6,26,6,14,202.0,27,14,2.0,9111,16.0,9207,5.0,9512,9001,7.481481
3,8701,46,0,0,0,1,44,20,17,7,25,23,9,23,0,2,1263,16,3,3,13,15,5,5,0,0,0,81,68,3,0,42,0,0,0,0,3,6,3,3,0,87,120,28,14,3,5,0,0,0,0,0,23,77,89,6,27,6,14,109.0,16,7,2.0,8711,11.0,9411,10.0,9512,8702,6.812500
4,8601,54,0,0,0,0,50,23,13,5,31,27,17,33,0,0,594,8,3,17,0,16,6,0,0,1,0,73,65,8,7,2,7,2,2,2,7,6,15,26,3,49,120,16,30,12,5,2,0,0,0,0,99,1,96,7,43,10,25,254.0,37,8,3.0,9310,15.0,9601,15.0,9601,7903,6.864865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,9601,50,0,1,1,2,38,18,25,1,7,5,15,24,0,0,1025,17,3,10,5,15,4,3,4,2,0,89,83,16,3,1,3,3,3,3,5,3,9,7,8,59,136,2,28,15,8,3,1,1,1,0,93,7,98,4,6,5,12,25.0,1,0,25.0,9602,25.0,9602,25.0,9602,9602,25.000000
95408,9601,49,1,0,0,1,40,25,43,2,9,7,4,20,0,0,1723,4,1,1,5,20,4,5,13,4,0,88,87,31,1,4,3,5,7,9,6,0,4,4,1,78,160,1,12,36,14,9,1,0,1,0,69,31,97,3,4,3,8,20.0,1,0,20.0,9603,20.0,9603,20.0,9603,9603,20.000000
95409,9501,47,0,0,0,0,37,17,21,4,18,17,19,31,0,0,377,20,2,14,12,14,5,0,3,0,0,70,70,13,2,0,5,2,11,0,2,0,6,7,23,52,120,5,51,11,0,6,2,0,0,0,99,0,99,3,14,7,17,58.0,7,4,3.0,9603,10.0,9501,10.0,9610,9410,8.285714
95410,8601,53,2,1,0,3,45,22,21,9,20,16,9,27,4,1,2459,16,2,6,16,18,5,0,5,0,1,88,85,16,2,1,0,2,6,4,7,2,6,9,1,72,140,7,20,15,5,6,0,1,2,0,99,0,99,6,36,9,31,498.0,41,18,5.0,9011,21.0,9608,18.0,9701,8612,12.146341


Multicolinearity.

In [134]:
# correlation_matrix = num_selec.corr().abs()

### Concat.

In [143]:
df_selec = pd.concat([categorical, num_selec], axis=1)

In [144]:
categorical.shape

(95412, 15)

In [145]:
num_selec.shape

(95412, 70)

In [146]:
df_selec

Unnamed: 0,STATE,MAILCODE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,SOLIH,VETERANS,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,RFA_2F,CLUSTER2,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT
0,IL,A,36,U,F,0,U,N,L,E,C,T,2,4,39.0,8901,53,0,0,0,0,51,27,18,15,26,23,7,19,7,0,635,16,2,5,25,15,4,0,0,0,0,64,64,3,3,3,5,4,0,7,5,0,9,10,2,78,120,16,39,4,3,5,1,0,0,0,33,65,99,10,27,6,14,240.0,31,14,5.0,9208,12.0,9402,10.0,9512,8911,7.741935
1,CA,A,14,H,M,3,U,N,L,G,A,S,1,2,1.0,9401,50,6,14,0,4,43,21,33,2,12,5,4,24,0,0,5218,5,1,3,5,21,5,0,13,0,0,83,81,24,1,0,0,3,2,8,4,3,10,6,2,69,160,5,12,30,20,14,0,1,1,3,99,0,99,6,12,6,13,47.0,3,1,10.0,9310,25.0,9512,25.0,9512,9310,15.666667
2,NC,A,43,U,M,3,U,N,L,E,C,R,2,4,60.0,9001,51,0,0,0,0,46,25,19,8,25,23,9,19,3,0,546,14,2,5,18,14,5,6,0,0,0,73,70,5,1,5,5,7,3,2,3,2,7,6,7,74,120,22,28,5,3,1,1,0,0,0,22,77,92,6,26,6,14,202.0,27,14,2.0,9111,16.0,9207,5.0,9512,9001,7.481481
3,CA,A,44,U,F,3,U,N,L,E,C,R,2,4,41.0,8701,46,0,0,0,1,44,20,17,7,25,23,9,23,0,2,1263,16,3,3,13,15,5,5,0,0,0,81,68,3,0,42,0,0,0,0,3,6,3,3,0,87,120,28,14,3,5,0,0,0,0,0,23,77,89,6,27,6,14,109.0,16,7,2.0,8711,11.0,9411,10.0,9512,8702,6.812500
4,FL,A,16,H,F,3,12.0,N,L,F,A,S,2,2,26.0,8601,54,0,0,0,0,50,23,13,5,31,27,17,33,0,0,594,8,3,17,0,16,6,0,0,1,0,73,65,8,7,2,7,2,2,2,7,6,15,26,3,49,120,16,30,12,5,2,0,0,0,0,99,1,96,7,43,10,25,254.0,37,8,3.0,9310,15.0,9601,15.0,9601,7903,6.864865
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95407,other,A,27,U,M,0,U,N,L,G,C,C,2,1,12.0,9601,50,0,1,1,2,38,18,25,1,7,5,15,24,0,0,1025,17,3,10,5,15,4,3,4,2,0,89,83,16,3,1,3,3,3,3,5,3,9,7,8,59,136,2,28,15,8,3,1,1,1,0,93,7,98,4,6,5,12,25.0,1,0,25.0,9602,25.0,9602,25.0,9602,9602,25.000000
95408,TX,A,24,H,M,3,U,N,L,F,A,C,1,1,2.0,9601,49,1,0,0,1,40,25,43,2,9,7,4,20,0,0,1723,4,1,1,5,20,4,5,13,4,0,88,87,31,1,4,3,5,7,9,6,0,4,4,1,78,160,1,12,36,14,9,1,0,1,0,69,31,97,3,4,3,8,20.0,1,0,20.0,9603,20.0,9603,20.0,9603,9603,20.000000
95409,MI,A,30,U,M,0,U,N,L,E,B,C,3,3,34.0,9501,47,0,0,0,0,37,17,21,4,18,17,19,31,0,0,377,20,2,14,12,14,5,0,3,0,0,70,70,13,2,0,5,2,11,0,2,0,6,7,23,52,120,5,51,11,0,6,2,0,0,0,99,0,99,3,14,7,17,58.0,7,4,3.0,9603,10.0,9501,10.0,9610,9410,8.285714
95410,CA,A,24,H,F,2,12.0,N,L,F,A,C,1,4,11.0,8601,53,2,1,0,3,45,22,21,9,20,16,9,27,4,1,2459,16,2,6,16,18,5,0,5,0,1,88,85,16,2,1,0,2,6,4,7,2,6,9,1,72,140,7,20,15,5,6,0,1,2,0,99,0,99,6,36,9,31,498.0,41,18,5.0,9011,21.0,9608,18.0,9701,8612,12.146341


### X-y.

In [147]:
X = df_selec
y = targets['TARGET_B']

### Train-test split.

In [148]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [149]:
X_train_num = X_train.select_dtypes(include=['float64', 'int64'])
X_train_cat = X_train.select_dtypes(include=['object'])
X_test_num = X_test.select_dtypes(include=['float64', 'int64'])
X_test_cat = X_test.select_dtypes(include=['object'])

In [150]:
X_train_num

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT
80169,8601,51,0,0,0,0,45,23,19,5,24,22,10,21,0,0,550,13,2,5,16,15,4,8,0,0,0,75,72,5,3,3,6,5,4,6,6,6,3,4,11,71,120,22,27,9,3,1,0,0,1,0,46,54,87,6,31,5,15,199.0,16,8,5.0,9012,30.0,9312,25.0,9602,8701,12.437500
37985,8901,53,3,1,0,2,43,16,24,9,26,24,14,29,0,0,4672,24,1,8,17,13,4,0,6,7,0,83,83,24,0,2,0,0,0,0,16,3,3,8,4,71,160,0,18,36,34,2,2,0,1,1,99,0,99,2,21,5,12,108.0,9,2,7.0,8912,15.0,9112,15.0,9603,8912,12.000000
7502,9001,56,0,0,0,1,69,23,3,32,76,75,8,6,0,0,924,26,0,1,71,11,3,6,0,0,0,23,23,12,6,0,5,0,7,0,12,4,6,19,0,75,120,12,53,5,0,1,1,0,1,0,97,3,99,7,21,6,15,55.0,9,6,2.0,9002,15.0,9512,15.0,9512,9002,6.111111
91759,9501,50,1,1,0,1,46,22,25,4,18,17,5,18,0,0,939,10,0,3,15,14,4,0,6,3,0,73,72,17,1,0,1,0,1,1,3,1,9,11,21,53,160,0,12,26,38,3,0,2,0,0,99,0,99,6,12,6,13,41.0,4,2,5.0,9412,25.0,9509,25.0,9509,9411,10.250000
9233,9501,49,2,0,0,1,37,18,33,1,6,4,11,24,0,0,894,13,2,7,4,17,5,0,2,1,2,90,87,17,1,1,0,4,4,5,4,1,7,7,4,72,140,1,18,19,13,4,1,1,1,0,99,0,99,4,5,3,8,15.0,1,0,15.0,9509,15.0,9509,15.0,9509,9509,15.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61026,8601,52,0,0,1,0,45,22,22,7,28,24,7,19,0,0,918,7,1,5,17,16,5,0,0,0,0,85,85,5,0,0,4,1,3,8,3,2,7,0,1,83,120,1,44,13,4,1,1,1,6,0,99,0,99,7,38,8,31,876.0,52,17,2.0,8610,31.0,9611,31.0,9701,8610,16.846154
69968,9001,53,0,0,0,0,47,24,19,10,30,28,11,22,3,0,464,14,2,10,19,14,5,3,1,0,0,57,51,8,3,1,8,7,3,4,3,2,7,7,11,69,120,31,29,4,4,1,1,0,0,0,64,30,80,7,22,6,13,89.0,9,5,5.0,9104,15.0,9510,15.0,9510,9012,9.888889
8681,8701,52,0,0,0,0,40,20,31,2,11,9,8,20,0,0,1553,13,1,3,9,14,4,1,5,0,0,89,88,27,0,0,2,2,13,7,4,2,10,2,5,78,160,1,12,35,16,7,1,1,1,0,96,4,99,4,20,5,13,63.0,13,7,2.0,8903,7.0,9502,5.0,9603,8702,4.846154
9051,9401,50,0,0,0,3,39,25,28,1,11,8,11,25,0,0,561,7,2,10,6,21,6,0,1,0,0,84,80,10,1,0,9,9,3,6,5,1,6,7,5,74,120,11,38,6,1,5,0,0,0,0,99,1,99,1,14,6,14,37.0,7,3,5.0,9603,7.0,9512,5.0,9603,9406,5.285714


In [163]:
X_train_cat

Unnamed: 0,STATE,MAILCODE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,SOLIH,VETERANS,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,RFA_2F,CLUSTER2
80169,other,A,43,U,F,3,U,Y,L,G,B,R,2,1,58.0
37985,CA,A,11,H,F,3,U,N,L,F,A,S,1,2,3.0
7502,other,A,33,H,M,3,U,N,L,F,A,C,3,3,45.0
91759,other,A,24,U,M,1,U,Y,L,G,C,C,1,1,11.0
9233,other,A,2,U,F,0,U,N,L,F,A,U,1,1,9.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61026,IL,A,27,U,F,0,12.0,N,L,G,A,C,2,3,22.0
69968,other,A,45,U,F,2,U,N,L,F,D,R,2,2,55.0
8681,other,A,34,U,F,0,U,N,L,D,B,T,1,4,14.0
9051,other,A,18,U,M,3,U,Y,L,D,B,S,2,4,26.0


In [152]:
X_test_num

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT
15497,9601,56,0,0,0,0,58,24,14,27,41,39,4,18,11,0,798,26,0,2,37,12,4,0,0,0,0,63,63,8,0,2,7,7,0,2,4,2,2,5,0,74,120,8,42,11,6,1,1,0,1,0,41,58,96,5,6,5,13,29.00,2,1,6.0,9603,23.00,9602,6.0,9603,9602,14.500000
43807,8601,56,0,0,0,0,46,17,18,16,15,14,13,26,11,0,609,25,1,22,21,14,5,3,0,2,0,68,61,9,1,0,6,4,2,7,4,2,17,11,4,74,120,10,33,9,2,4,0,6,5,0,94,6,98,3,25,6,30,269.00,20,8,10.0,9404,25.00,9609,25.0,9609,8611,13.450000
93726,9001,49,0,0,0,0,43,20,30,4,21,18,8,18,0,0,1374,12,2,2,14,15,4,0,4,0,0,80,77,26,1,2,3,2,0,5,3,4,9,6,2,74,140,3,28,20,9,1,1,0,1,1,0,99,99,4,24,6,14,245.00,6,5,15.0,9002,75.00,9602,75.0,9602,9002,40.833333
39010,9301,52,0,0,0,0,41,23,25,3,14,12,9,20,0,0,777,9,2,5,10,15,4,0,3,2,0,79,78,13,3,0,4,4,7,11,4,2,9,5,6,77,125,4,28,15,8,3,1,0,0,0,95,5,99,2,12,6,14,47.00,5,0,5.0,9502,20.00,9511,12.0,9512,9308,9.400000
8932,8801,51,0,1,0,1,44,23,31,1,9,8,6,21,0,0,1788,4,0,4,8,16,3,0,7,1,0,81,79,35,0,0,1,0,1,13,6,3,1,7,1,73,160,0,13,42,15,7,1,0,0,0,99,0,99,7,28,6,12,166.00,14,7,10.0,9509,20.00,9301,10.0,9509,8809,11.857143
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81247,8801,51,0,0,0,2,42,21,22,6,25,24,14,28,0,0,3162,24,1,5,19,14,4,0,6,3,1,78,75,18,1,1,1,4,2,3,8,7,9,7,2,68,143,3,19,23,13,5,1,0,0,1,99,0,98,7,28,6,13,176.00,12,6,5.0,8806,30.00,9512,30.0,9512,8801,14.666667
60497,9401,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,15,6,13,70.00,5,1,10.0,9511,20.00,9412,10.0,9511,9310,14.000000
4025,8701,48,0,0,0,1,37,20,22,3,13,12,16,37,2,0,689,34,2,3,10,11,5,0,1,0,0,86,81,7,0,2,5,0,1,5,4,1,9,1,1,78,121,6,30,16,3,0,0,0,7,1,98,2,99,2,29,6,13,137.15,14,9,5.0,8805,15.15,9410,15.0,9512,8701,9.796429
86390,8701,53,0,0,0,3,54,25,17,13,37,35,14,22,0,0,3261,29,1,2,33,11,3,0,7,1,0,72,70,22,2,2,1,1,1,5,6,3,11,4,1,76,144,3,20,25,17,9,0,1,3,2,99,0,99,8,32,4,10,166.00,21,10,3.0,8705,14.00,9004,10.0,9504,8702,7.904762


In [153]:
X_test_cat

Unnamed: 0,STATE,MAILCODE,CLUSTER,HOMEOWNR,GENDER,DATASRCE,SOLIH,VETERANS,RFA_2R,RFA_2A,GEOCODE2,DOMAIN_A,DOMAIN_B,RFA_2F,CLUSTER2
15497,other,A,46,H,M,3,U,N,L,F,D,R,2,2,13.0
43807,WI,A,40,U,F,3,0.0,N,L,F,B,R,2,2,55.0
93726,MI,A,42,U,F,3,U,N,L,G,A,R,1,1,15.0
39010,other,A,18,H,M,3,U,N,L,F,B,S,2,2,17.0
8932,GA,A,13,U,F,0,U,N,L,E,A,S,1,2,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81247,CA,A,3,H,F,2,U,N,L,G,A,U,1,1,11.0
60497,TX,A,12,H,M,2,U,N,L,F,A,S,1,2,20.0
4025,MI,A,17,U,F,0,U,N,L,F,A,S,2,2,24.0
86390,FL,A,31,H,other,2,U,N,L,E,A,C,3,1,13.0


### Encoding.

In [167]:
from sklearn.preprocessing import OneHotEncoder
encoder = OneHotEncoder(drop='first').fit(X_train_cat)

In [168]:
X_train_cat_encoded = encoder.transform(X_train_cat).toarray()

In [169]:
feature_names = encoder.get_feature_names_out(input_features=X_train_cat.columns)

X_train_cat_enc = pd.DataFrame(X_train_cat_encoded, index=X_train_cat.index, columns=feature_names)

In [170]:
X_train_cat_enc

Unnamed: 0,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,MAILCODE_B,CLUSTER_10,CLUSTER_11,CLUSTER_12,CLUSTER_13,CLUSTER_14,CLUSTER_15,CLUSTER_16,CLUSTER_17,CLUSTER_18,CLUSTER_19,CLUSTER_2,CLUSTER_20,CLUSTER_21,CLUSTER_22,CLUSTER_23,CLUSTER_24,CLUSTER_25,CLUSTER_26,CLUSTER_27,CLUSTER_28,CLUSTER_29,CLUSTER_3,CLUSTER_30,CLUSTER_31,CLUSTER_32,CLUSTER_33,CLUSTER_34,CLUSTER_35,CLUSTER_36,CLUSTER_37,CLUSTER_38,CLUSTER_39,CLUSTER_4,CLUSTER_40,CLUSTER_41,CLUSTER_42,CLUSTER_43,CLUSTER_44,CLUSTER_45,CLUSTER_46,CLUSTER_47,CLUSTER_48,CLUSTER_49,CLUSTER_5,CLUSTER_50,CLUSTER_51,CLUSTER_52,CLUSTER_53,CLUSTER_6,CLUSTER_7,CLUSTER_8,CLUSTER_9,HOMEOWNR_U,GENDER_M,GENDER_other,DATASRCE_1,DATASRCE_2,DATASRCE_3,SOLIH_1.0,SOLIH_12.0,SOLIH_2.0,SOLIH_3.0,SOLIH_4.0,SOLIH_6.0,SOLIH_U,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,DOMAIN_B_2,DOMAIN_B_3,DOMAIN_B_4,RFA_2F_2,RFA_2F_3,RFA_2F_4,CLUSTER2_10.0,CLUSTER2_11.0,CLUSTER2_12.0,CLUSTER2_13.0,CLUSTER2_14.0,CLUSTER2_15.0,CLUSTER2_16.0,CLUSTER2_17.0,CLUSTER2_18.0,CLUSTER2_19.0,CLUSTER2_2.0,CLUSTER2_20.0,CLUSTER2_21.0,CLUSTER2_22.0,CLUSTER2_23.0,CLUSTER2_24.0,CLUSTER2_25.0,CLUSTER2_26.0,CLUSTER2_27.0,CLUSTER2_28.0,CLUSTER2_29.0,CLUSTER2_3.0,CLUSTER2_30.0,CLUSTER2_31.0,CLUSTER2_32.0,CLUSTER2_33.0,CLUSTER2_34.0,CLUSTER2_35.0,CLUSTER2_36.0,CLUSTER2_37.0,CLUSTER2_38.0,CLUSTER2_39.0,CLUSTER2_4.0,CLUSTER2_40.0,CLUSTER2_41.0,CLUSTER2_42.0,CLUSTER2_43.0,CLUSTER2_44.0,CLUSTER2_45.0,CLUSTER2_46.0,CLUSTER2_47.0,CLUSTER2_48.0,CLUSTER2_49.0,CLUSTER2_5.0,CLUSTER2_50.0,CLUSTER2_51.0,CLUSTER2_52.0,CLUSTER2_53.0,CLUSTER2_54.0,CLUSTER2_55.0,CLUSTER2_56.0,CLUSTER2_57.0,CLUSTER2_58.0,CLUSTER2_59.0,CLUSTER2_6.0,CLUSTER2_60.0,CLUSTER2_61.0,CLUSTER2_62.0,CLUSTER2_7.0,CLUSTER2_8.0,CLUSTER2_9.0
80169,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37985,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7502,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91759,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9233,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61026,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69968,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8681,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9051,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


X_test_cat.

In [171]:
X_test_cat_encoded = encoder.transform(X_test_cat).toarray()

In [172]:
feature_names = encoder.get_feature_names_out(input_features=X_test_cat.columns)

X_test_cat_enc = pd.DataFrame(X_test_cat_encoded, index=X_test_cat.index, columns=feature_names)

In [173]:
X_test_cat_enc

Unnamed: 0,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,MAILCODE_B,CLUSTER_10,CLUSTER_11,CLUSTER_12,CLUSTER_13,CLUSTER_14,CLUSTER_15,CLUSTER_16,CLUSTER_17,CLUSTER_18,CLUSTER_19,CLUSTER_2,CLUSTER_20,CLUSTER_21,CLUSTER_22,CLUSTER_23,CLUSTER_24,CLUSTER_25,CLUSTER_26,CLUSTER_27,CLUSTER_28,CLUSTER_29,CLUSTER_3,CLUSTER_30,CLUSTER_31,CLUSTER_32,CLUSTER_33,CLUSTER_34,CLUSTER_35,CLUSTER_36,CLUSTER_37,CLUSTER_38,CLUSTER_39,CLUSTER_4,CLUSTER_40,CLUSTER_41,CLUSTER_42,CLUSTER_43,CLUSTER_44,CLUSTER_45,CLUSTER_46,CLUSTER_47,CLUSTER_48,CLUSTER_49,CLUSTER_5,CLUSTER_50,CLUSTER_51,CLUSTER_52,CLUSTER_53,CLUSTER_6,CLUSTER_7,CLUSTER_8,CLUSTER_9,HOMEOWNR_U,GENDER_M,GENDER_other,DATASRCE_1,DATASRCE_2,DATASRCE_3,SOLIH_1.0,SOLIH_12.0,SOLIH_2.0,SOLIH_3.0,SOLIH_4.0,SOLIH_6.0,SOLIH_U,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,DOMAIN_B_2,DOMAIN_B_3,DOMAIN_B_4,RFA_2F_2,RFA_2F_3,RFA_2F_4,CLUSTER2_10.0,CLUSTER2_11.0,CLUSTER2_12.0,CLUSTER2_13.0,CLUSTER2_14.0,CLUSTER2_15.0,CLUSTER2_16.0,CLUSTER2_17.0,CLUSTER2_18.0,CLUSTER2_19.0,CLUSTER2_2.0,CLUSTER2_20.0,CLUSTER2_21.0,CLUSTER2_22.0,CLUSTER2_23.0,CLUSTER2_24.0,CLUSTER2_25.0,CLUSTER2_26.0,CLUSTER2_27.0,CLUSTER2_28.0,CLUSTER2_29.0,CLUSTER2_3.0,CLUSTER2_30.0,CLUSTER2_31.0,CLUSTER2_32.0,CLUSTER2_33.0,CLUSTER2_34.0,CLUSTER2_35.0,CLUSTER2_36.0,CLUSTER2_37.0,CLUSTER2_38.0,CLUSTER2_39.0,CLUSTER2_4.0,CLUSTER2_40.0,CLUSTER2_41.0,CLUSTER2_42.0,CLUSTER2_43.0,CLUSTER2_44.0,CLUSTER2_45.0,CLUSTER2_46.0,CLUSTER2_47.0,CLUSTER2_48.0,CLUSTER2_49.0,CLUSTER2_5.0,CLUSTER2_50.0,CLUSTER2_51.0,CLUSTER2_52.0,CLUSTER2_53.0,CLUSTER2_54.0,CLUSTER2_55.0,CLUSTER2_56.0,CLUSTER2_57.0,CLUSTER2_58.0,CLUSTER2_59.0,CLUSTER2_6.0,CLUSTER2_60.0,CLUSTER2_61.0,CLUSTER2_62.0,CLUSTER2_7.0,CLUSTER2_8.0,CLUSTER2_9.0
15497,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43807,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93726,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39010,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8932,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81247,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60497,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4025,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86390,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Scaling.

In [174]:
scaler_2 = MinMaxScaler()

In [175]:
scaler_2.fit(X_train_num)

In [177]:
X_train_num_sc = pd.DataFrame(scaler_2.transform(X_train_num), index=X_train_num.index, columns=X_train_num.columns)

In [178]:
X_train_num_sc

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT
80169,0.211470,0.515152,0.000000,0.000000,0.000000,0.000000,0.535714,0.232323,0.191919,0.050505,0.242424,0.222222,0.101010,0.212121,0.000000,0.0,0.091667,0.131313,0.04,0.050505,0.161616,0.245902,0.100,0.080808,0.00,0.000000,0.00,0.757576,0.727273,0.050505,0.054545,0.030303,0.060606,0.050505,0.062500,0.060606,0.060606,0.060606,0.030303,0.040404,0.111111,0.717172,0.705882,0.222222,0.272727,0.090909,0.030303,0.010309,0.000000,0.000000,0.010101,0.000000,0.464646,0.545455,0.878788,0.060606,0.526316,0.263158,0.171053,0.019533,0.063559,0.195122,0.005,0.685792,0.005005,0.822080,0.025,0.497487,0.906071,0.011166
37985,0.426523,0.535354,0.030612,0.021739,0.000000,0.023256,0.511905,0.161616,0.242424,0.090909,0.262626,0.242424,0.141414,0.292929,0.000000,0.0,0.778667,0.242424,0.02,0.080808,0.171717,0.213115,0.100,0.000000,0.12,0.070707,0.00,0.838384,0.838384,0.242424,0.000000,0.020202,0.000000,0.000000,0.000000,0.000000,0.161616,0.030303,0.030303,0.080808,0.040404,0.717172,0.941176,0.000000,0.181818,0.363636,0.343434,0.020619,0.024096,0.000000,0.010101,0.019231,1.000000,0.000000,1.000000,0.020202,0.350877,0.263158,0.131579,0.009925,0.033898,0.048780,0.007,0.640255,0.002002,0.730839,0.015,0.502513,0.928043,0.010728
7502,0.498208,0.565657,0.000000,0.000000,0.000000,0.011628,0.821429,0.232323,0.030303,0.323232,0.767677,0.757576,0.080808,0.060606,0.000000,0.0,0.154000,0.262626,0.00,0.010101,0.717172,0.180328,0.075,0.060606,0.00,0.000000,0.00,0.232323,0.232323,0.121212,0.109091,0.000000,0.050505,0.000000,0.109375,0.000000,0.121212,0.040404,0.060606,0.191919,0.000000,0.757576,0.705882,0.121212,0.535354,0.050505,0.000000,0.010309,0.012048,0.000000,0.010101,0.000000,0.979798,0.030303,1.000000,0.070707,0.350877,0.315789,0.171053,0.004329,0.033898,0.146341,0.002,0.681239,0.002002,0.913321,0.015,0.045226,0.937415,0.004832
91759,0.856631,0.505051,0.010204,0.021739,0.000000,0.011628,0.547619,0.222222,0.252525,0.040404,0.181818,0.171717,0.050505,0.181818,0.000000,0.0,0.156500,0.101010,0.00,0.030303,0.151515,0.229508,0.100,0.000000,0.12,0.030303,0.00,0.737374,0.727273,0.171717,0.018182,0.000000,0.010101,0.000000,0.015625,0.010101,0.030303,0.010101,0.090909,0.111111,0.212121,0.535354,0.941176,0.000000,0.121212,0.262626,0.383838,0.030928,0.000000,0.029412,0.000000,0.000000,1.000000,0.000000,1.000000,0.060606,0.192982,0.315789,0.144737,0.002851,0.012712,0.048780,0.005,0.867942,0.004004,0.911953,0.025,0.030151,0.980006,0.008976
9233,0.856631,0.494949,0.020408,0.000000,0.000000,0.011628,0.440476,0.181818,0.333333,0.010101,0.060606,0.040404,0.111111,0.242424,0.000000,0.0,0.149000,0.131313,0.04,0.070707,0.040404,0.278689,0.125,0.000000,0.04,0.010101,0.08,0.909091,0.878788,0.171717,0.018182,0.010101,0.000000,0.040404,0.062500,0.050505,0.040404,0.010101,0.070707,0.070707,0.040404,0.727273,0.823529,0.010101,0.181818,0.191919,0.131313,0.041237,0.012048,0.014706,0.010101,0.000000,1.000000,0.000000,1.000000,0.040404,0.070175,0.157895,0.078947,0.000106,0.000000,0.000000,0.015,0.912113,0.002002,0.911953,0.015,0.030151,0.990211,0.013732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61026,0.211470,0.525253,0.000000,0.000000,0.017544,0.000000,0.535714,0.222222,0.222222,0.070707,0.282828,0.242424,0.070707,0.191919,0.000000,0.0,0.153000,0.070707,0.02,0.050505,0.171717,0.262295,0.125,0.000000,0.00,0.000000,0.00,0.858586,0.858586,0.050505,0.000000,0.000000,0.040404,0.010101,0.046875,0.080808,0.030303,0.020202,0.070707,0.000000,0.010101,0.838384,0.705882,0.010101,0.444444,0.131313,0.040404,0.010309,0.012048,0.014706,0.060606,0.000000,1.000000,0.000000,1.000000,0.070707,0.649123,0.421053,0.381579,0.091015,0.216102,0.414634,0.002,0.502732,0.005205,0.958485,0.031,0.994975,0.896595,0.015580
69968,0.498208,0.535354,0.000000,0.000000,0.000000,0.000000,0.559524,0.242424,0.191919,0.101010,0.303030,0.282828,0.111111,0.222222,0.030303,0.0,0.077333,0.141414,0.04,0.101010,0.191919,0.229508,0.125,0.030303,0.02,0.000000,0.00,0.575758,0.515152,0.080808,0.054545,0.010101,0.080808,0.070707,0.046875,0.040404,0.030303,0.020202,0.070707,0.070707,0.111111,0.696970,0.705882,0.313131,0.292929,0.040404,0.040404,0.010309,0.012048,0.000000,0.000000,0.000000,0.646465,0.303030,0.808081,0.070707,0.368421,0.315789,0.144737,0.007919,0.033898,0.121951,0.005,0.727687,0.002002,0.912409,0.015,0.035176,0.938457,0.008614
8681,0.283154,0.525253,0.000000,0.000000,0.000000,0.000000,0.476190,0.202020,0.313131,0.020202,0.111111,0.090909,0.080808,0.202020,0.000000,0.0,0.258833,0.131313,0.02,0.030303,0.090909,0.229508,0.100,0.010101,0.10,0.000000,0.00,0.898990,0.888889,0.272727,0.000000,0.000000,0.020202,0.020202,0.203125,0.070707,0.040404,0.020202,0.101010,0.020202,0.050505,0.787879,0.941176,0.010101,0.121212,0.353535,0.161616,0.072165,0.012048,0.014706,0.010101,0.000000,0.969697,0.040404,1.000000,0.040404,0.333333,0.263158,0.144737,0.005174,0.050847,0.170732,0.002,0.636157,0.000400,0.908759,0.005,0.502513,0.906175,0.003565
9051,0.784946,0.505051,0.000000,0.000000,0.000000,0.034884,0.464286,0.252525,0.282828,0.010101,0.111111,0.080808,0.111111,0.252525,0.000000,0.0,0.093500,0.070707,0.04,0.101010,0.060606,0.344262,0.150,0.000000,0.02,0.000000,0.00,0.848485,0.808081,0.101010,0.018182,0.000000,0.090909,0.090909,0.046875,0.060606,0.050505,0.010101,0.060606,0.070707,0.050505,0.747475,0.705882,0.111111,0.383838,0.060606,0.010101,0.051546,0.000000,0.000000,0.000000,0.000000,1.000000,0.010101,1.000000,0.010101,0.228070,0.315789,0.157895,0.002428,0.025424,0.073171,0.005,0.954918,0.000400,0.913321,0.005,0.502513,0.979486,0.004005


X_test_num.

In [179]:
X_test_num_sc = pd.DataFrame(scaler_2.transform(X_test_num), index=X_test_num.index, columns=X_test_num.columns)

In [180]:
X_test_num_sc

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT
15497,0.928315,0.565657,0.0,0.000000,0.0,0.000000,0.690476,0.242424,0.141414,0.272727,0.414141,0.393939,0.040404,0.181818,0.111111,0.0,0.133000,0.262626,0.00,0.020202,0.373737,0.196721,0.100,0.000000,0.00,0.000000,0.00,0.636364,0.636364,0.080808,0.000000,0.020202,0.070707,0.070707,0.000000,0.020202,0.040404,0.020202,0.020202,0.050505,0.000000,0.747475,0.705882,0.080808,0.424242,0.111111,0.060606,0.010309,0.012048,0.000000,0.010101,0.000000,0.414141,0.585859,0.969697,0.050505,0.087719,0.263158,0.144737,0.001584,0.004237,0.024390,0.006,0.954918,0.003604,0.954380,0.006,0.502513,0.999896,0.013231
43807,0.211470,0.565657,0.0,0.000000,0.0,0.000000,0.547619,0.171717,0.181818,0.161616,0.151515,0.141414,0.131313,0.262626,0.111111,0.0,0.101500,0.252525,0.02,0.222222,0.212121,0.229508,0.125,0.030303,0.00,0.020202,0.00,0.686869,0.616162,0.090909,0.018182,0.000000,0.060606,0.040404,0.031250,0.070707,0.040404,0.020202,0.171717,0.111111,0.040404,0.747475,0.705882,0.101010,0.333333,0.090909,0.020202,0.041237,0.000000,0.088235,0.050505,0.000000,0.949495,0.060606,0.989899,0.030303,0.421053,0.315789,0.368421,0.026924,0.080508,0.195122,0.010,0.864299,0.004004,0.957573,0.025,0.532663,0.896699,0.012180
93726,0.498208,0.494949,0.0,0.000000,0.0,0.000000,0.511905,0.202020,0.303030,0.040404,0.212121,0.181818,0.080808,0.181818,0.000000,0.0,0.229000,0.121212,0.04,0.020202,0.141414,0.245902,0.100,0.000000,0.08,0.000000,0.00,0.808081,0.777778,0.262626,0.018182,0.020202,0.030303,0.020202,0.000000,0.050505,0.030303,0.040404,0.090909,0.060606,0.020202,0.747475,0.823529,0.030303,0.282828,0.202020,0.090909,0.010309,0.012048,0.000000,0.010101,0.019231,0.000000,1.000000,1.000000,0.040404,0.403509,0.315789,0.157895,0.024390,0.021186,0.121951,0.015,0.681239,0.014014,0.954380,0.075,0.497487,0.937415,0.039599
39010,0.713262,0.525253,0.0,0.000000,0.0,0.000000,0.488095,0.232323,0.252525,0.030303,0.141414,0.121212,0.090909,0.202020,0.000000,0.0,0.129500,0.090909,0.04,0.050505,0.101010,0.245902,0.100,0.000000,0.06,0.020202,0.00,0.797980,0.787879,0.131313,0.054545,0.000000,0.040404,0.040404,0.109375,0.111111,0.040404,0.020202,0.090909,0.050505,0.060606,0.777778,0.735294,0.040404,0.282828,0.151515,0.080808,0.030928,0.012048,0.000000,0.000000,0.000000,0.959596,0.050505,1.000000,0.020202,0.192982,0.315789,0.157895,0.003484,0.016949,0.000000,0.005,0.908925,0.003003,0.912865,0.012,0.045226,0.969280,0.008125
8932,0.354839,0.515152,0.0,0.021739,0.0,0.011628,0.523810,0.232323,0.313131,0.010101,0.090909,0.080808,0.060606,0.212121,0.000000,0.0,0.298000,0.040404,0.00,0.040404,0.080808,0.262295,0.075,0.000000,0.14,0.010101,0.00,0.818182,0.797980,0.353535,0.000000,0.000000,0.010101,0.000000,0.015625,0.131313,0.060606,0.030303,0.010101,0.070707,0.010101,0.737374,0.941176,0.000000,0.131313,0.424242,0.151515,0.072165,0.012048,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.070707,0.473684,0.315789,0.131579,0.016049,0.055085,0.170732,0.010,0.912113,0.003003,0.817062,0.010,0.030151,0.917318,0.010585
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81247,0.354839,0.515152,0.0,0.000000,0.0,0.023256,0.500000,0.212121,0.222222,0.060606,0.252525,0.242424,0.141414,0.282828,0.000000,0.0,0.527000,0.242424,0.02,0.050505,0.191919,0.229508,0.100,0.000000,0.12,0.030303,0.04,0.787879,0.757576,0.181818,0.018182,0.010101,0.010101,0.040404,0.031250,0.030303,0.080808,0.070707,0.090909,0.070707,0.020202,0.686869,0.841176,0.030303,0.191919,0.232323,0.131313,0.051546,0.012048,0.000000,0.000000,0.019231,1.000000,0.000000,0.989899,0.070707,0.473684,0.315789,0.144737,0.017105,0.046610,0.146341,0.005,0.591985,0.005005,0.913321,0.030,0.045226,0.916484,0.013398
60497,0.784946,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.245614,0.315789,0.144737,0.005913,0.016949,0.024390,0.010,0.913024,0.003003,0.867701,0.010,0.040201,0.969489,0.012731
4025,0.283154,0.484848,0.0,0.000000,0.0,0.011628,0.440476,0.202020,0.222222,0.030303,0.131313,0.121212,0.161616,0.373737,0.020202,0.0,0.114833,0.343434,0.04,0.030303,0.101010,0.180328,0.125,0.000000,0.02,0.000000,0.00,0.868687,0.818182,0.070707,0.000000,0.020202,0.050505,0.000000,0.015625,0.050505,0.040404,0.010101,0.090909,0.010101,0.010101,0.787879,0.711765,0.060606,0.303030,0.161616,0.030303,0.000000,0.000000,0.000000,0.070707,0.019231,0.989899,0.020202,1.000000,0.020202,0.491228,0.315789,0.144737,0.013003,0.055085,0.219512,0.005,0.591530,0.002032,0.866788,0.015,0.045226,0.906071,0.008522
86390,0.283154,0.535354,0.0,0.000000,0.0,0.034884,0.642857,0.252525,0.171717,0.131313,0.373737,0.353535,0.141414,0.222222,0.000000,0.0,0.543500,0.292929,0.02,0.020202,0.333333,0.180328,0.075,0.000000,0.14,0.010101,0.00,0.727273,0.707071,0.222222,0.036364,0.020202,0.010101,0.010101,0.015625,0.050505,0.060606,0.030303,0.111111,0.040404,0.010101,0.767677,0.847059,0.030303,0.202020,0.252525,0.171717,0.092784,0.000000,0.014706,0.030303,0.038462,1.000000,0.000000,1.000000,0.080808,0.543860,0.210526,0.105263,0.016049,0.084746,0.243902,0.003,0.545993,0.001802,0.681569,0.010,0.005025,0.906175,0.006628


### Concat.

In [181]:
X_train_tr = pd.concat([X_train_num_sc, X_train_cat_enc], axis=1)

In [182]:
X_train_tr

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,MAILCODE_B,CLUSTER_10,CLUSTER_11,CLUSTER_12,CLUSTER_13,CLUSTER_14,CLUSTER_15,CLUSTER_16,CLUSTER_17,CLUSTER_18,CLUSTER_19,CLUSTER_2,CLUSTER_20,CLUSTER_21,CLUSTER_22,CLUSTER_23,CLUSTER_24,CLUSTER_25,CLUSTER_26,CLUSTER_27,CLUSTER_28,CLUSTER_29,CLUSTER_3,CLUSTER_30,CLUSTER_31,CLUSTER_32,CLUSTER_33,CLUSTER_34,CLUSTER_35,CLUSTER_36,CLUSTER_37,CLUSTER_38,CLUSTER_39,CLUSTER_4,CLUSTER_40,CLUSTER_41,CLUSTER_42,CLUSTER_43,CLUSTER_44,CLUSTER_45,CLUSTER_46,CLUSTER_47,CLUSTER_48,CLUSTER_49,CLUSTER_5,CLUSTER_50,CLUSTER_51,CLUSTER_52,CLUSTER_53,CLUSTER_6,CLUSTER_7,CLUSTER_8,CLUSTER_9,HOMEOWNR_U,GENDER_M,GENDER_other,DATASRCE_1,DATASRCE_2,DATASRCE_3,SOLIH_1.0,SOLIH_12.0,SOLIH_2.0,SOLIH_3.0,SOLIH_4.0,SOLIH_6.0,SOLIH_U,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,DOMAIN_B_2,DOMAIN_B_3,DOMAIN_B_4,RFA_2F_2,RFA_2F_3,RFA_2F_4,CLUSTER2_10.0,CLUSTER2_11.0,CLUSTER2_12.0,CLUSTER2_13.0,CLUSTER2_14.0,CLUSTER2_15.0,CLUSTER2_16.0,CLUSTER2_17.0,CLUSTER2_18.0,CLUSTER2_19.0,CLUSTER2_2.0,CLUSTER2_20.0,CLUSTER2_21.0,CLUSTER2_22.0,CLUSTER2_23.0,CLUSTER2_24.0,CLUSTER2_25.0,CLUSTER2_26.0,CLUSTER2_27.0,CLUSTER2_28.0,CLUSTER2_29.0,CLUSTER2_3.0,CLUSTER2_30.0,CLUSTER2_31.0,CLUSTER2_32.0,CLUSTER2_33.0,CLUSTER2_34.0,CLUSTER2_35.0,CLUSTER2_36.0,CLUSTER2_37.0,CLUSTER2_38.0,CLUSTER2_39.0,CLUSTER2_4.0,CLUSTER2_40.0,CLUSTER2_41.0,CLUSTER2_42.0,CLUSTER2_43.0,CLUSTER2_44.0,CLUSTER2_45.0,CLUSTER2_46.0,CLUSTER2_47.0,CLUSTER2_48.0,CLUSTER2_49.0,CLUSTER2_5.0,CLUSTER2_50.0,CLUSTER2_51.0,CLUSTER2_52.0,CLUSTER2_53.0,CLUSTER2_54.0,CLUSTER2_55.0,CLUSTER2_56.0,CLUSTER2_57.0,CLUSTER2_58.0,CLUSTER2_59.0,CLUSTER2_6.0,CLUSTER2_60.0,CLUSTER2_61.0,CLUSTER2_62.0,CLUSTER2_7.0,CLUSTER2_8.0,CLUSTER2_9.0
80169,0.211470,0.515152,0.000000,0.000000,0.000000,0.000000,0.535714,0.232323,0.191919,0.050505,0.242424,0.222222,0.101010,0.212121,0.000000,0.0,0.091667,0.131313,0.04,0.050505,0.161616,0.245902,0.100,0.080808,0.00,0.000000,0.00,0.757576,0.727273,0.050505,0.054545,0.030303,0.060606,0.050505,0.062500,0.060606,0.060606,0.060606,0.030303,0.040404,0.111111,0.717172,0.705882,0.222222,0.272727,0.090909,0.030303,0.010309,0.000000,0.000000,0.010101,0.000000,0.464646,0.545455,0.878788,0.060606,0.526316,0.263158,0.171053,0.019533,0.063559,0.195122,0.005,0.685792,0.005005,0.822080,0.025,0.497487,0.906071,0.011166,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37985,0.426523,0.535354,0.030612,0.021739,0.000000,0.023256,0.511905,0.161616,0.242424,0.090909,0.262626,0.242424,0.141414,0.292929,0.000000,0.0,0.778667,0.242424,0.02,0.080808,0.171717,0.213115,0.100,0.000000,0.12,0.070707,0.00,0.838384,0.838384,0.242424,0.000000,0.020202,0.000000,0.000000,0.000000,0.000000,0.161616,0.030303,0.030303,0.080808,0.040404,0.717172,0.941176,0.000000,0.181818,0.363636,0.343434,0.020619,0.024096,0.000000,0.010101,0.019231,1.000000,0.000000,1.000000,0.020202,0.350877,0.263158,0.131579,0.009925,0.033898,0.048780,0.007,0.640255,0.002002,0.730839,0.015,0.502513,0.928043,0.010728,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7502,0.498208,0.565657,0.000000,0.000000,0.000000,0.011628,0.821429,0.232323,0.030303,0.323232,0.767677,0.757576,0.080808,0.060606,0.000000,0.0,0.154000,0.262626,0.00,0.010101,0.717172,0.180328,0.075,0.060606,0.00,0.000000,0.00,0.232323,0.232323,0.121212,0.109091,0.000000,0.050505,0.000000,0.109375,0.000000,0.121212,0.040404,0.060606,0.191919,0.000000,0.757576,0.705882,0.121212,0.535354,0.050505,0.000000,0.010309,0.012048,0.000000,0.010101,0.000000,0.979798,0.030303,1.000000,0.070707,0.350877,0.315789,0.171053,0.004329,0.033898,0.146341,0.002,0.681239,0.002002,0.913321,0.015,0.045226,0.937415,0.004832,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
91759,0.856631,0.505051,0.010204,0.021739,0.000000,0.011628,0.547619,0.222222,0.252525,0.040404,0.181818,0.171717,0.050505,0.181818,0.000000,0.0,0.156500,0.101010,0.00,0.030303,0.151515,0.229508,0.100,0.000000,0.12,0.030303,0.00,0.737374,0.727273,0.171717,0.018182,0.000000,0.010101,0.000000,0.015625,0.010101,0.030303,0.010101,0.090909,0.111111,0.212121,0.535354,0.941176,0.000000,0.121212,0.262626,0.383838,0.030928,0.000000,0.029412,0.000000,0.000000,1.000000,0.000000,1.000000,0.060606,0.192982,0.315789,0.144737,0.002851,0.012712,0.048780,0.005,0.867942,0.004004,0.911953,0.025,0.030151,0.980006,0.008976,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9233,0.856631,0.494949,0.020408,0.000000,0.000000,0.011628,0.440476,0.181818,0.333333,0.010101,0.060606,0.040404,0.111111,0.242424,0.000000,0.0,0.149000,0.131313,0.04,0.070707,0.040404,0.278689,0.125,0.000000,0.04,0.010101,0.08,0.909091,0.878788,0.171717,0.018182,0.010101,0.000000,0.040404,0.062500,0.050505,0.040404,0.010101,0.070707,0.070707,0.040404,0.727273,0.823529,0.010101,0.181818,0.191919,0.131313,0.041237,0.012048,0.014706,0.010101,0.000000,1.000000,0.000000,1.000000,0.040404,0.070175,0.157895,0.078947,0.000106,0.000000,0.000000,0.015,0.912113,0.002002,0.911953,0.015,0.030151,0.990211,0.013732,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61026,0.211470,0.525253,0.000000,0.000000,0.017544,0.000000,0.535714,0.222222,0.222222,0.070707,0.282828,0.242424,0.070707,0.191919,0.000000,0.0,0.153000,0.070707,0.02,0.050505,0.171717,0.262295,0.125,0.000000,0.00,0.000000,0.00,0.858586,0.858586,0.050505,0.000000,0.000000,0.040404,0.010101,0.046875,0.080808,0.030303,0.020202,0.070707,0.000000,0.010101,0.838384,0.705882,0.010101,0.444444,0.131313,0.040404,0.010309,0.012048,0.014706,0.060606,0.000000,1.000000,0.000000,1.000000,0.070707,0.649123,0.421053,0.381579,0.091015,0.216102,0.414634,0.002,0.502732,0.005205,0.958485,0.031,0.994975,0.896595,0.015580,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
69968,0.498208,0.535354,0.000000,0.000000,0.000000,0.000000,0.559524,0.242424,0.191919,0.101010,0.303030,0.282828,0.111111,0.222222,0.030303,0.0,0.077333,0.141414,0.04,0.101010,0.191919,0.229508,0.125,0.030303,0.02,0.000000,0.00,0.575758,0.515152,0.080808,0.054545,0.010101,0.080808,0.070707,0.046875,0.040404,0.030303,0.020202,0.070707,0.070707,0.111111,0.696970,0.705882,0.313131,0.292929,0.040404,0.040404,0.010309,0.012048,0.000000,0.000000,0.000000,0.646465,0.303030,0.808081,0.070707,0.368421,0.315789,0.144737,0.007919,0.033898,0.121951,0.005,0.727687,0.002002,0.912409,0.015,0.035176,0.938457,0.008614,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8681,0.283154,0.525253,0.000000,0.000000,0.000000,0.000000,0.476190,0.202020,0.313131,0.020202,0.111111,0.090909,0.080808,0.202020,0.000000,0.0,0.258833,0.131313,0.02,0.030303,0.090909,0.229508,0.100,0.010101,0.10,0.000000,0.00,0.898990,0.888889,0.272727,0.000000,0.000000,0.020202,0.020202,0.203125,0.070707,0.040404,0.020202,0.101010,0.020202,0.050505,0.787879,0.941176,0.010101,0.121212,0.353535,0.161616,0.072165,0.012048,0.014706,0.010101,0.000000,0.969697,0.040404,1.000000,0.040404,0.333333,0.263158,0.144737,0.005174,0.050847,0.170732,0.002,0.636157,0.000400,0.908759,0.005,0.502513,0.906175,0.003565,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9051,0.784946,0.505051,0.000000,0.000000,0.000000,0.034884,0.464286,0.252525,0.282828,0.010101,0.111111,0.080808,0.111111,0.252525,0.000000,0.0,0.093500,0.070707,0.04,0.101010,0.060606,0.344262,0.150,0.000000,0.02,0.000000,0.00,0.848485,0.808081,0.101010,0.018182,0.000000,0.090909,0.090909,0.046875,0.060606,0.050505,0.010101,0.060606,0.070707,0.050505,0.747475,0.705882,0.111111,0.383838,0.060606,0.010101,0.051546,0.000000,0.000000,0.000000,0.000000,1.000000,0.010101,1.000000,0.010101,0.228070,0.315789,0.157895,0.002428,0.025424,0.073171,0.005,0.954918,0.000400,0.913321,0.005,0.502513,0.979486,0.004005,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [183]:
X_test_tr = pd.concat([X_test_num_sc, X_test_cat_enc], axis=1)

In [184]:
X_test_tr

Unnamed: 0,ODATEDW,POP90C5,ETH8,ETH10,ETH14,ETH16,AGE903,CHIL3,AGEC3,AGEC7,HHAGE1,HHAGE3,MARR2,MARR4,DW8,DW9,HV2,HHD6,HHD8,HHD9,ETHC3,RHP3,RHP4,HUPA7,IC12,TPE3,TPE7,LFC2,LFC4,OCC2,OCC7,OCC9,OCC12,OCC13,EIC6,EIC7,EIC10,EIC11,EIC13,OEDC1,OEDC2,OEDC5,EC1,EC2,EC4,EC7,EC8,SEC1,ANC1,ANC9,ANC10,ANC12,HC17,HC18,HC21,AC1,CARDPROM,CARDPM12,NUMPRM12,RAMNTALL,NGIFTALL,CARDGIFT,MINRAMNT,MINRDATE,MAXRAMNT,MAXRDATE,LASTGIFT,LASTDATE,FISTDATE,AVGGIFT,STATE_FL,STATE_GA,STATE_IL,STATE_IN,STATE_MI,STATE_MO,STATE_NC,STATE_TX,STATE_WA,STATE_WI,STATE_other,MAILCODE_B,CLUSTER_10,CLUSTER_11,CLUSTER_12,CLUSTER_13,CLUSTER_14,CLUSTER_15,CLUSTER_16,CLUSTER_17,CLUSTER_18,CLUSTER_19,CLUSTER_2,CLUSTER_20,CLUSTER_21,CLUSTER_22,CLUSTER_23,CLUSTER_24,CLUSTER_25,CLUSTER_26,CLUSTER_27,CLUSTER_28,CLUSTER_29,CLUSTER_3,CLUSTER_30,CLUSTER_31,CLUSTER_32,CLUSTER_33,CLUSTER_34,CLUSTER_35,CLUSTER_36,CLUSTER_37,CLUSTER_38,CLUSTER_39,CLUSTER_4,CLUSTER_40,CLUSTER_41,CLUSTER_42,CLUSTER_43,CLUSTER_44,CLUSTER_45,CLUSTER_46,CLUSTER_47,CLUSTER_48,CLUSTER_49,CLUSTER_5,CLUSTER_50,CLUSTER_51,CLUSTER_52,CLUSTER_53,CLUSTER_6,CLUSTER_7,CLUSTER_8,CLUSTER_9,HOMEOWNR_U,GENDER_M,GENDER_other,DATASRCE_1,DATASRCE_2,DATASRCE_3,SOLIH_1.0,SOLIH_12.0,SOLIH_2.0,SOLIH_3.0,SOLIH_4.0,SOLIH_6.0,SOLIH_U,VETERANS_Y,RFA_2A_E,RFA_2A_F,RFA_2A_G,GEOCODE2_B,GEOCODE2_C,GEOCODE2_D,DOMAIN_A_R,DOMAIN_A_S,DOMAIN_A_T,DOMAIN_A_U,DOMAIN_B_2,DOMAIN_B_3,DOMAIN_B_4,RFA_2F_2,RFA_2F_3,RFA_2F_4,CLUSTER2_10.0,CLUSTER2_11.0,CLUSTER2_12.0,CLUSTER2_13.0,CLUSTER2_14.0,CLUSTER2_15.0,CLUSTER2_16.0,CLUSTER2_17.0,CLUSTER2_18.0,CLUSTER2_19.0,CLUSTER2_2.0,CLUSTER2_20.0,CLUSTER2_21.0,CLUSTER2_22.0,CLUSTER2_23.0,CLUSTER2_24.0,CLUSTER2_25.0,CLUSTER2_26.0,CLUSTER2_27.0,CLUSTER2_28.0,CLUSTER2_29.0,CLUSTER2_3.0,CLUSTER2_30.0,CLUSTER2_31.0,CLUSTER2_32.0,CLUSTER2_33.0,CLUSTER2_34.0,CLUSTER2_35.0,CLUSTER2_36.0,CLUSTER2_37.0,CLUSTER2_38.0,CLUSTER2_39.0,CLUSTER2_4.0,CLUSTER2_40.0,CLUSTER2_41.0,CLUSTER2_42.0,CLUSTER2_43.0,CLUSTER2_44.0,CLUSTER2_45.0,CLUSTER2_46.0,CLUSTER2_47.0,CLUSTER2_48.0,CLUSTER2_49.0,CLUSTER2_5.0,CLUSTER2_50.0,CLUSTER2_51.0,CLUSTER2_52.0,CLUSTER2_53.0,CLUSTER2_54.0,CLUSTER2_55.0,CLUSTER2_56.0,CLUSTER2_57.0,CLUSTER2_58.0,CLUSTER2_59.0,CLUSTER2_6.0,CLUSTER2_60.0,CLUSTER2_61.0,CLUSTER2_62.0,CLUSTER2_7.0,CLUSTER2_8.0,CLUSTER2_9.0
15497,0.928315,0.565657,0.0,0.000000,0.0,0.000000,0.690476,0.242424,0.141414,0.272727,0.414141,0.393939,0.040404,0.181818,0.111111,0.0,0.133000,0.262626,0.00,0.020202,0.373737,0.196721,0.100,0.000000,0.00,0.000000,0.00,0.636364,0.636364,0.080808,0.000000,0.020202,0.070707,0.070707,0.000000,0.020202,0.040404,0.020202,0.020202,0.050505,0.000000,0.747475,0.705882,0.080808,0.424242,0.111111,0.060606,0.010309,0.012048,0.000000,0.010101,0.000000,0.414141,0.585859,0.969697,0.050505,0.087719,0.263158,0.144737,0.001584,0.004237,0.024390,0.006,0.954918,0.003604,0.954380,0.006,0.502513,0.999896,0.013231,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
43807,0.211470,0.565657,0.0,0.000000,0.0,0.000000,0.547619,0.171717,0.181818,0.161616,0.151515,0.141414,0.131313,0.262626,0.111111,0.0,0.101500,0.252525,0.02,0.222222,0.212121,0.229508,0.125,0.030303,0.00,0.020202,0.00,0.686869,0.616162,0.090909,0.018182,0.000000,0.060606,0.040404,0.031250,0.070707,0.040404,0.020202,0.171717,0.111111,0.040404,0.747475,0.705882,0.101010,0.333333,0.090909,0.020202,0.041237,0.000000,0.088235,0.050505,0.000000,0.949495,0.060606,0.989899,0.030303,0.421053,0.315789,0.368421,0.026924,0.080508,0.195122,0.010,0.864299,0.004004,0.957573,0.025,0.532663,0.896699,0.012180,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
93726,0.498208,0.494949,0.0,0.000000,0.0,0.000000,0.511905,0.202020,0.303030,0.040404,0.212121,0.181818,0.080808,0.181818,0.000000,0.0,0.229000,0.121212,0.04,0.020202,0.141414,0.245902,0.100,0.000000,0.08,0.000000,0.00,0.808081,0.777778,0.262626,0.018182,0.020202,0.030303,0.020202,0.000000,0.050505,0.030303,0.040404,0.090909,0.060606,0.020202,0.747475,0.823529,0.030303,0.282828,0.202020,0.090909,0.010309,0.012048,0.000000,0.010101,0.019231,0.000000,1.000000,1.000000,0.040404,0.403509,0.315789,0.157895,0.024390,0.021186,0.121951,0.015,0.681239,0.014014,0.954380,0.075,0.497487,0.937415,0.039599,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
39010,0.713262,0.525253,0.0,0.000000,0.0,0.000000,0.488095,0.232323,0.252525,0.030303,0.141414,0.121212,0.090909,0.202020,0.000000,0.0,0.129500,0.090909,0.04,0.050505,0.101010,0.245902,0.100,0.000000,0.06,0.020202,0.00,0.797980,0.787879,0.131313,0.054545,0.000000,0.040404,0.040404,0.109375,0.111111,0.040404,0.020202,0.090909,0.050505,0.060606,0.777778,0.735294,0.040404,0.282828,0.151515,0.080808,0.030928,0.012048,0.000000,0.000000,0.000000,0.959596,0.050505,1.000000,0.020202,0.192982,0.315789,0.157895,0.003484,0.016949,0.000000,0.005,0.908925,0.003003,0.912865,0.012,0.045226,0.969280,0.008125,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8932,0.354839,0.515152,0.0,0.021739,0.0,0.011628,0.523810,0.232323,0.313131,0.010101,0.090909,0.080808,0.060606,0.212121,0.000000,0.0,0.298000,0.040404,0.00,0.040404,0.080808,0.262295,0.075,0.000000,0.14,0.010101,0.00,0.818182,0.797980,0.353535,0.000000,0.000000,0.010101,0.000000,0.015625,0.131313,0.060606,0.030303,0.010101,0.070707,0.010101,0.737374,0.941176,0.000000,0.131313,0.424242,0.151515,0.072165,0.012048,0.000000,0.000000,0.000000,1.000000,0.000000,1.000000,0.070707,0.473684,0.315789,0.131579,0.016049,0.055085,0.170732,0.010,0.912113,0.003003,0.817062,0.010,0.030151,0.917318,0.010585,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
81247,0.354839,0.515152,0.0,0.000000,0.0,0.023256,0.500000,0.212121,0.222222,0.060606,0.252525,0.242424,0.141414,0.282828,0.000000,0.0,0.527000,0.242424,0.02,0.050505,0.191919,0.229508,0.100,0.000000,0.12,0.030303,0.04,0.787879,0.757576,0.181818,0.018182,0.010101,0.010101,0.040404,0.031250,0.030303,0.080808,0.070707,0.090909,0.070707,0.020202,0.686869,0.841176,0.030303,0.191919,0.232323,0.131313,0.051546,0.012048,0.000000,0.000000,0.019231,1.000000,0.000000,0.989899,0.070707,0.473684,0.315789,0.144737,0.017105,0.046610,0.146341,0.005,0.591985,0.005005,0.913321,0.030,0.045226,0.916484,0.013398,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
60497,0.784946,0.000000,0.0,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.00,0.000000,0.000000,0.000000,0.000,0.000000,0.00,0.000000,0.00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.245614,0.315789,0.144737,0.005913,0.016949,0.024390,0.010,0.913024,0.003003,0.867701,0.010,0.040201,0.969489,0.012731,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4025,0.283154,0.484848,0.0,0.000000,0.0,0.011628,0.440476,0.202020,0.222222,0.030303,0.131313,0.121212,0.161616,0.373737,0.020202,0.0,0.114833,0.343434,0.04,0.030303,0.101010,0.180328,0.125,0.000000,0.02,0.000000,0.00,0.868687,0.818182,0.070707,0.000000,0.020202,0.050505,0.000000,0.015625,0.050505,0.040404,0.010101,0.090909,0.010101,0.010101,0.787879,0.711765,0.060606,0.303030,0.161616,0.030303,0.000000,0.000000,0.000000,0.070707,0.019231,0.989899,0.020202,1.000000,0.020202,0.491228,0.315789,0.144737,0.013003,0.055085,0.219512,0.005,0.591530,0.002032,0.866788,0.015,0.045226,0.906071,0.008522,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
86390,0.283154,0.535354,0.0,0.000000,0.0,0.034884,0.642857,0.252525,0.171717,0.131313,0.373737,0.353535,0.141414,0.222222,0.000000,0.0,0.543500,0.292929,0.02,0.020202,0.333333,0.180328,0.075,0.000000,0.14,0.010101,0.00,0.727273,0.707071,0.222222,0.036364,0.020202,0.010101,0.010101,0.015625,0.050505,0.060606,0.030303,0.111111,0.040404,0.010101,0.767677,0.847059,0.030303,0.202020,0.252525,0.171717,0.092784,0.000000,0.014706,0.030303,0.038462,1.000000,0.000000,1.000000,0.080808,0.543860,0.210526,0.105263,0.016049,0.084746,0.243902,0.003,0.545993,0.001802,0.681569,0.010,0.005025,0.906175,0.006628,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Logistic regression classification model.

In [186]:
log_re = linear_model.LogisticRegression()
log_re.fit(X_train_tr, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [188]:
log_re.score(X_test_tr, y_test)

0.9488114702553138

In [194]:
y_test.value_counts()

0    22632
1     1221
Name: TARGET_B, dtype: int64

In [208]:
pred = log_re.predict(X_test_tr)

In [209]:
pd.Series(pred).value_counts()

0    23853
dtype: int64

In [210]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, pred)

array([[22632,     0],
       [ 1221,     0]], dtype=int64)

In [204]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

In [211]:
print("precision: ",precision_score(y_test,pred))
print("recall: ",recall_score(y_test,pred))
print("f1: ",f1_score(y_test,pred))

precision:  0.0
recall:  0.0
f1:  0.0


  _warn_prf(average, modifier, msg_start, len(result))


### Treating the imbalance.

SMOTE.

In [195]:
from imblearn.over_sampling import SMOTE
sm = SMOTE()

In [196]:
X_train_SMOTE, y_train_SMOTE = sm.fit_resample(X_train_tr, y_train)

New model (SMOTE).

In [197]:
log_re_2 = linear_model.LogisticRegression()
log_re_2.fit(X_train_SMOTE, y_train_SMOTE)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [198]:
log_re_2.score(X_test_tr, y_test)

0.6187481658491595

In [199]:
y_test.value_counts()

0    22632
1     1221
Name: TARGET_B, dtype: int64

In [205]:
pred2 = log_re_2.predict(X_test_tr)

In [201]:
pd.Series(pred).value_counts()

0    14698
1     9155
dtype: int64

In [206]:
confusion_matrix(y_test, pred2)

array([[14118,  8514],
       [  580,   641]], dtype=int64)

In [207]:
print("precision: ",precision_score(y_test,pred2))
print("recall: ",recall_score(y_test,pred2))
print("f1: ",f1_score(y_test,pred2))

precision:  0.07001638448935008
recall:  0.5249795249795249
f1:  0.12355435620663066


In [221]:
# df_ov = pd.concat([X_train_tr, y_train], axis=1)