In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import make_scorer
from Amex_Metric import amex_metric

## Reading data-file 
data = pd.read_csv('Delinquency_Features.csv')
data.head()

Unnamed: 0,customer_ID,target,D_39_mean,D_39_median,D_39_min,D_39_max,D_39_range,D_39_IQR,D_39_values_above_mean
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0,0.010704,0.002483,0.001082,0.091492,0.090393,0.003593,1.0
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0,0.215088,0.21167,0.002224,0.567383,0.564941,0.293152,5.0
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0,0.004181,0.00423,0.000802,0.009705,0.008904,0.003944,7.0
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0,0.048859,0.007423,0.00066,0.268555,0.267822,0.032093,3.0
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0,0.004642,0.004147,3e-05,0.008682,0.008652,0.00474,6.0


In [2]:
## Defining input and target variables
X = data.drop(columns = ['customer_ID', 'target'], axis = 1)
Y = data['target']

## Spliting the data into train, validation, and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y)

## Defining the customized scoring function 
amex_function = make_scorer(amex_metric, greater_is_better = True, needs_proba = True)

## Defining list to store results
features_to_select = list()

for i in range(0, 2):

    ## Running RFE with Random forest
    RF_auto_feature = RFECV(estimator = RandomForestClassifier(n_estimators = 50, max_depth = 5), step = 1, scoring = amex_function, min_features_to_select = 5, cv = 3).fit(X_train, Y_train)
    
    ## Appending results 
    features_to_select.append(X_train.columns[RF_auto_feature.support_])
    
print(features_to_select)

[Index(['D_39_mean', 'D_39_median', 'D_39_max', 'D_39_range', 'D_39_IQR',
       'D_39_values_above_mean'],
      dtype='object'), Index(['D_39_mean', 'D_39_median', 'D_39_max', 'D_39_range', 'D_39_IQR',
       'D_39_values_above_mean'],
      dtype='object')]


In [5]:
pd.DataFrame(features_to_select)

Unnamed: 0,0,1,2,3,4,5
0,D_39_mean,D_39_median,D_39_max,D_39_range,D_39_IQR,D_39_values_above_mean
1,D_39_mean,D_39_median,D_39_max,D_39_range,D_39_IQR,D_39_values_above_mean


In [6]:
features_to_select.append(['X_1', 'X_2', 'X_3', 'X_4', 'X_5', 'X_6', 'X_7'])

In [8]:
pd.DataFrame(features_to_select)

Unnamed: 0,0,1,2,3,4,5,6
0,D_39_mean,D_39_median,D_39_max,D_39_range,D_39_IQR,D_39_values_above_mean,
1,D_39_mean,D_39_median,D_39_max,D_39_range,D_39_IQR,D_39_values_above_mean,
2,X_1,X_2,X_3,X_4,X_5,X_6,X_7


In [12]:
RF_auto_feature.ranking_

array([1, 1, 2, 1, 1, 1, 1])

In [13]:
RF_auto_feature.support_

array([ True,  True, False,  True,  True,  True,  True])

In [14]:
RF_auto_feature

RFECV(cv=3, estimator=RandomForestClassifier(max_depth=5, n_estimators=50),
      min_features_to_select=5,
      scoring=make_scorer(amex_metric, needs_proba=True))

In [8]:
## Identifying important features 
print(X_train.columns[RF_auto_feature.support_])

Index(['D_39_mean', 'D_39_median', 'D_39_min', 'D_39_max', 'D_39_range',
       'D_39_IQR', 'D_39_values_above_mean'],
      dtype='object')


In [15]:
## Identifying important features 
print(X_train.columns[RF_auto_feature.support_])

Index(['D_39_mean', 'D_39_median', 'D_39_max', 'D_39_range', 'D_39_IQR',
       'D_39_values_above_mean'],
      dtype='object')


In [10]:
X_train.shape

(367130, 7)

In [None]:
## Defining input and target variables
X = data.drop(columns = ['customer_ID', 'target'], axis = 1)
Y = data['target']

## Spliting the data into train, validation, and test
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, stratify = Y)

## Defining hyper-paramerters for RF
RF_param_grid = {'n_estimators': [100, 300, 500],
                 'min_samples_split': [10, 15],
                 'min_samples_leaf': [5, 7],
                 'max_depth' : [3, 5, 7]}

## Defining the customized scoring function 
amex_function = make_scorer(amex_metric, greater_is_better = True, needs_proba = True)

## Performing grid search
RF_grid_search = GridSearchCV(RandomForestClassifier(), RF_param_grid, cv = 3, scoring = amex_function, n_jobs = -1).fit(X_train, Y_train)

# Feature Selecting with Imputed Variables

In [14]:
import boto3
import pandas as pd; pd.set_option('display.max_columns', 500) 
import numpy as np

s3 = boto3.resource('s3')
bucket_name = 'analytics-data-science-competitions'
bucket = s3.Bucket(bucket_name)

## Defining files names
file_key = 'AmericanExpress/Delinquency_Features_Filled.csv'

bucket_object = bucket.Object(file_key)
file_object = bucket_object.get()
file_content_stream = file_object.get('Body')

## Reading data-files
data = pd.read_csv(file_content_stream)
data = data.drop(columns = ['D_64_last'], axis = 1)

## Putting variables in the right shape 
data['D_68_last'] = data['D_68_last'].astype(str)
data['D_114_last'] = data['D_114_last'].astype(str)
data['D_116_last'] = data['D_116_last'].astype(str)
data['D_117_last'] = data['D_117_last'].astype(str)
data['D_120_last'] = data['D_120_last'].astype(str)
data['D_126_last'] = data['D_126_last'].astype(str)

## Converting to dummies
dummies = pd.get_dummies(data[['D_63_last', 'D_68_last', 'D_114_last', 'D_116_last', 'D_117_last', 'D_120_last', 'D_126_last']])

## Appeding dummies 
data = data.drop(columns = ['D_63_last', 'D_68_last', 'D_114_last', 'D_116_last', 'D_117_last', 'D_120_last', 'D_126_last'], axis = 1)
data = pd.concat([data, dummies], axis = 0)
data.head()

Unnamed: 0,customer_ID,target,D_39_mean,D_39_median,D_39_min,D_39_max,D_39_range,D_39_IQR,D_39_std,D_39_pct_values_above_mean,D_41_mean,D_41_median,D_41_min,D_41_max,D_41_range,D_41_IQR,D_41_std,D_41_pct_values_above_mean,D_44_mean,D_44_median,D_44_min,D_44_max,D_44_range,D_44_IQR,D_44_std,D_44_pct_values_above_mean,D_45_range,D_45_pct_values_above_mean,D_47_mean,D_47_median,D_47_min,D_47_max,D_47_range,D_47_IQR,D_47_std,D_47_pct_values_above_mean,D_47_avg_pct_change,D_51_mean,D_51_median,D_51_min,D_51_max,D_51_range,D_51_IQR,D_51_std,D_51_pct_values_above_mean,D_52_mean,D_52_median,D_52_min,D_52_max,D_52_range,D_52_IQR,D_52_std,D_52_pct_values_above_mean,D_52_avg_pct_change,D_54_mean,D_54_median,D_54_min,D_54_max,D_54_range,D_54_IQR,D_54_std,D_54_pct_values_above_mean,D_54_avg_pct_change,D_55_pct_values_above_mean,D_58_mean,D_58_median,D_58_min,D_58_max,D_58_range,D_58_IQR,D_58_std,D_58_pct_values_above_mean,D_59_mean,D_59_median,D_59_min,D_59_max,D_59_range,D_59_IQR,D_59_std,D_59_pct_values_above_mean,D_59_avg_pct_change,D_60_mean,D_60_median,D_60_min,D_60_max,D_60_range,D_60_IQR,D_60_std,D_60_pct_values_above_mean,D_65_mean,D_65_median,D_65_min,D_65_max,D_65_range,D_65_IQR,D_65_pct_values_above_mean,D_68_pct_values_above_mean,D_69_median,D_69_min,D_69_max,D_69_range,D_69_IQR,D_69_pct_values_above_mean,D_70_mean,D_70_median,D_70_min,D_70_max,D_70_range,D_70_IQR,D_70_std,D_70_pct_values_above_mean,D_71_mean,D_71_median,D_71_min,D_71_max,D_71_range,D_71_IQR,D_71_std,D_71_pct_values_above_mean,D_72_mean,D_72_median,D_72_min,D_72_max,D_72_range,D_72_IQR,D_72_std,D_72_pct_values_above_mean,D_74_mean,D_74_median,D_74_min,D_74_max,D_74_range,D_74_IQR,D_74_std,D_74_pct_values_above_mean,D_75_mean,D_75_median,D_75_min,D_75_max,D_75_range,D_75_IQR,D_75_std,D_75_pct_values_above_mean,D_78_mean,D_78_median,D_78_min,D_78_max,D_78_range,D_78_IQR,D_78_std,D_78_pct_values_above_mean,D_79_mean,D_79_median,D_79_min,D_79_max,D_79_range,D_79_IQR,D_79_std,D_79_pct_values_above_mean,D_80_mean,D_80_median,D_80_min,D_80_max,D_80_range,D_80_IQR,D_80_std,D_80_pct_values_above_mean,D_81_mean,D_81_median,D_81_min,D_81_max,D_81_range,D_81_IQR,D_81_std,D_81_pct_values_above_mean,D_83_mean,D_83_median,D_83_min,D_83_max,D_83_range,D_83_IQR,D_83_std,D_83_pct_values_above_mean,D_84_mean,D_84_median,D_84_min,D_84_max,D_84_range,D_84_IQR,D_84_std,D_84_pct_values_above_mean,D_86_mean,D_86_median,D_86_min,D_86_max,D_86_range,D_86_IQR,D_86_std,D_86_pct_values_above_mean,D_89_mean,D_89_median,D_89_min,D_89_max,D_89_range,D_89_IQR,D_89_std,D_89_pct_values_above_mean,D_91_mean,D_91_median,D_91_min,D_91_max,D_91_range,D_91_IQR,D_91_std,D_91_pct_values_above_mean,D_92_mean,D_92_median,D_92_min,D_92_max,D_92_range,D_92_IQR,D_92_std,D_92_pct_values_above_mean,D_93_mean,D_93_median,D_93_min,D_93_max,D_93_range,D_93_IQR,D_93_std,D_93_pct_values_above_mean,D_94_mean,D_94_median,D_94_min,D_94_max,D_94_range,D_94_IQR,D_94_std,D_94_pct_values_above_mean,D_96_mean,D_96_median,D_96_min,D_96_max,D_96_range,D_96_IQR,D_96_std,D_96_pct_values_above_mean,D_102_mean,D_102_median,D_102_min,D_102_max,D_102_range,D_102_IQR,D_102_std,D_102_pct_values_above_mean,D_103_mean,D_103_median,D_103_min,D_103_max,D_103_range,D_103_IQR,D_103_std,D_103_pct_values_above_mean,D_104_mean,D_104_median,D_104_min,D_104_max,D_104_range,D_104_IQR,D_104_std,D_104_pct_values_above_mean,D_107_mean,D_107_median,D_107_min,D_107_max,D_107_range,D_107_IQR,D_107_std,D_107_pct_values_above_mean,D_109_mean,D_109_median,D_109_min,D_109_max,D_109_range,D_109_IQR,D_109_std,D_109_pct_values_above_mean,D_112_mean,D_112_median,D_112_min,D_112_max,D_112_range,D_112_IQR,D_112_std,D_112_pct_values_above_mean,D_112_avg_pct_change,D_113_mean,D_113_median,D_113_min,D_113_max,D_113_range,D_113_IQR,D_113_std,D_113_pct_values_above_mean,D_115_mean,D_115_median,D_115_min,D_115_max,D_115_range,D_115_IQR,D_115_std,D_115_pct_values_above_mean,D_115_avg_pct_change,D_118_mean,D_118_median,D_118_min,D_118_max,D_118_range,D_118_IQR,D_118_std,D_118_pct_values_above_mean,D_119_mean,D_119_median,D_119_min,D_119_max,D_119_range,D_119_IQR,D_119_std,D_119_pct_values_above_mean,D_119_avg_pct_change,D_121_mean,D_121_median,D_121_min,D_121_max,D_121_range,D_121_IQR,D_121_std,D_121_pct_values_above_mean,D_121_avg_pct_change,D_122_mean,D_122_median,D_122_min,D_122_max,D_122_range,D_122_IQR,D_122_std,D_122_pct_values_above_mean,D_122_avg_pct_change,D_123_mean,D_123_median,D_123_min,D_123_max,D_123_range,D_123_IQR,D_123_std,D_123_pct_values_above_mean,D_124_mean,D_124_median,D_124_min,D_124_max,D_124_range,D_124_IQR,D_124_std,D_124_pct_values_above_mean,D_124_avg_pct_change,D_125_mean,D_125_median,D_125_min,D_125_max,D_125_range,D_125_IQR,D_125_std,D_125_pct_values_above_mean,D_127_mean,D_127_median,D_127_min,D_127_max,D_127_range,D_127_IQR,D_127_std,D_127_pct_values_above_mean,D_128_mean,D_128_median,D_128_min,D_128_max,D_128_range,D_128_IQR,D_128_std,D_128_pct_values_above_mean,D_129_mean,D_129_median,D_129_min,D_129_max,D_129_range,D_129_IQR,D_129_std,D_129_pct_values_above_mean,D_130_mean,D_130_median,D_130_min,D_130_max,D_130_range,D_130_IQR,D_130_std,D_130_pct_values_above_mean,D_131_mean,D_131_median,D_131_min,D_131_max,D_131_range,D_131_IQR,D_131_std,D_131_pct_values_above_mean,D_133_mean,D_133_median,D_133_min,D_133_max,D_133_range,D_133_IQR,D_133_std,D_133_pct_values_above_mean,D_139_mean,D_139_median,D_139_min,D_139_max,D_139_range,D_139_IQR,D_139_std,D_139_pct_values_above_mean,D_140_mean,D_140_median,D_140_min,D_140_max,D_140_range,D_140_IQR,D_140_std,D_140_pct_values_above_mean,D_141_mean,D_141_median,D_141_min,D_141_max,D_141_range,D_141_IQR,D_141_std,D_141_pct_values_above_mean,D_143_mean,D_143_median,D_143_min,D_143_max,D_143_range,D_143_IQR,D_143_std,D_143_pct_values_above_mean,D_144_mean,D_144_median,D_144_min,D_144_max,D_144_range,D_144_IQR,D_144_std,D_144_pct_values_above_mean,D_145_mean,D_145_median,D_145_min,D_145_max,D_145_range,D_145_IQR,D_145_std,D_145_pct_values_above_mean,D_63_last,D_68_last,D_114_last,D_116_last,D_117_last,D_120_last,D_126_last
0,0000099d6bd597052cdcda90ffabf56573fe9d7c79be5f...,0,0.010704,0.002483,0.001082,0.091492,0.090393,0.003593,0.024445,0.076923,0.00502,0.004654,0.000685,0.009857,0.009171,0.005746,0.00333,0.461538,0.004673,0.004128,0.00063,0.009399,0.008766,0.00507,0.002827,0.461538,0.03125,0.384615,0.532715,0.533203,0.521484,0.541992,0.020508,0.008789,0.006535,0.538462,0.002241,0.979004,1.000977,0.668457,1.341797,0.67334,0.665527,0.318359,0.538462,0.204956,0.204224,0.200806,0.208252,0.007446,0.003784,0.002392,0.461538,-0.001383,1.005859,1.007812,1.000977,1.009766,0.008789,0.004883,0.003357,0.615385,0.000499,0.384615,0.064819,0.007175,0.000267,0.158569,0.158325,0.133129,0.069458,0.461538,0.061157,0.06366,0.042236,0.071167,0.028931,0.003235,0.00959,0.769231,0.028461,0.534668,0.305908,0.141602,1.009766,0.868164,0.773682,0.392334,0.384615,0.004704,0.003452,3.1e-05,0.009598,0.009567,0.006095,0.461538,0.0,0.00547,0.000152,0.009247,0.009094,0.002998,0.538462,0.005222,0.006523,0.000959,0.008972,0.008011,0.006203,0.003248,0.538462,0.214844,0.145264,0.075867,0.430908,0.35498,0.193604,0.132324,0.384615,0.004494,0.004803,9.4e-05,0.009888,0.009796,0.006241,0.003418,0.538462,0.03952,0.00927,0.001299,0.081421,0.080139,0.07122,0.037903,0.461538,0.036133,0.009895,3.5e-05,0.076538,0.076477,0.067848,0.035736,0.461538,0.005215,0.005844,3.6e-05,0.009895,0.009857,0.0063,0.003622,0.538462,0.004169,0.003895,0.00116,0.008743,0.007584,0.002943,0.002251,0.461538,0.08197,0.005924,0.000126,0.208252,0.20813,0.201263,0.102783,0.384615,0.005836,0.004829,0.001141,0.00985,0.008713,0.004925,0.00301,0.461538,0.00489,0.004208,1.3e-05,0.00882,0.008804,0.003925,0.002697,0.461538,0.004944,0.004532,0.00083,0.009766,0.008934,0.006187,0.003365,0.461538,0.004875,0.003481,0.000813,0.009972,0.009155,0.004051,0.003029,0.461538,0.005234,0.005157,0.000888,0.00972,0.008835,0.005281,0.003117,0.461538,1.197266,1.007812,1.001953,1.509766,0.507812,0.499023,0.252686,0.384615,0.543945,1.00293,0.001555,1.008789,1.006836,1.001514,0.52002,0.538462,0.004417,0.004265,0.000571,0.009445,0.008873,0.002716,0.002537,0.461538,0.005905,0.006828,0.000391,0.009232,0.008842,0.004631,0.002729,0.538462,0.00362,0.00318,6.3e-05,0.009857,0.009796,0.004242,0.00304,0.461538,0.856934,0.863281,0.766602,0.943359,0.176758,0.098633,0.056732,0.538462,1.005859,1.005859,1.0,1.009766,0.009766,0.003906,0.003174,0.461538,1.009766,1.009766,1.003906,1.015625,0.011719,0.009766,0.004601,0.461538,0.671875,0.672852,0.667969,0.675781,0.007812,0.00293,0.002466,0.538462,0.006111,0.007221,0.000226,0.009705,0.009476,0.004612,0.003098,0.538462,1.003906,1.00293,1.0,1.009766,0.009766,0.005859,0.003338,0.384615,0.000167,0.005474,0.005959,5.3e-05,0.00972,0.009666,0.004507,0.003078,0.538462,0.247192,0.247314,0.238281,0.256836,0.018555,0.009766,0.00655,0.538462,0.006178,0.245483,0.244141,0.232178,0.260254,0.028076,0.010742,0.007763,0.307692,0.244751,0.244263,0.236206,0.256592,0.020386,0.005127,0.00526,0.384615,0.007021,0.711914,0.711426,0.702148,0.719727,0.017578,0.009766,0.005878,0.461538,0.002085,0.433594,0.434326,0.429932,0.4375,0.007568,0.003418,0.002514,0.615385,-5.1e-05,0.003103,0.003057,0.000428,0.009209,0.008781,0.002275,0.002277,0.461538,0.686523,0.687012,0.682129,0.689941,0.007812,0.003906,0.00275,0.538462,-0.000163,0.006252,0.0084,9.7e-05,0.009666,0.009567,0.004452,0.003702,0.615385,1.005859,1.005859,1.001953,1.009766,0.007812,0.005859,0.003098,0.461538,1.003906,1.004883,0.999512,1.007812,0.008301,0.006836,0.003248,0.538462,1.005859,1.006836,1.0,1.008789,0.008789,0.004883,0.00305,0.538462,0.005192,0.00515,0.001034,0.00985,0.00882,0.004938,0.003,0.461538,0.005447,0.005653,0.000715,0.008713,0.007996,0.002373,0.002354,0.615385,0.004761,0.005257,0.000122,0.009224,0.009102,0.004234,0.003117,0.538462,0.003664,0.003269,0.000427,0.009743,0.009315,0.002031,0.002619,0.384615,0.005344,0.004517,0.001976,0.009605,0.007629,0.003361,0.002354,0.307692,0.00518,0.005032,0.000427,0.009384,0.008957,0.003654,0.002607,0.384615,0.005066,0.00581,0.000569,0.009575,0.009003,0.006753,0.003563,0.538462,0.005283,0.005493,0.00061,0.009613,0.009003,0.003981,0.002596,0.538462,0.005814,0.006363,0.000996,0.009827,0.008835,0.006397,0.003294,0.538462,CR,6.0,1.0,0.0,4.0,0.0,1.0
1,00000fd6641609c6ece5454664794f0340ad84dddce9a2...,0,0.215088,0.21167,0.002224,0.567383,0.564941,0.293152,0.199097,0.384615,0.00499,0.005402,0.000728,0.009514,0.008789,0.004631,0.002899,0.538462,0.004311,0.0042,0.000123,0.009872,0.00975,0.004063,0.00297,0.384615,0.027832,0.615385,0.392334,0.390381,0.382568,0.402832,0.020264,0.010254,0.00666,0.461538,0.004183,0.390625,0.341553,0.33374,0.672363,0.338623,0.00293,0.12384,0.153846,0.158325,0.110474,0.103516,0.24231,0.138794,0.133484,0.067017,0.384615,0.100285,1.005859,1.005859,1.001953,1.009766,0.007812,0.005859,0.002708,0.461538,-0.000481,0.538462,0.005146,0.005295,4e-06,0.009758,0.009758,0.002686,0.002806,0.538462,0.233032,0.232788,0.212524,0.239136,0.026611,0.006104,0.006813,0.461538,-0.006178,0.32666,0.333496,0.059113,0.857422,0.79834,0.300415,0.221313,0.538462,0.004131,0.00391,0.000637,0.008743,0.008102,0.005367,0.384615,0.0,0.006329,0.002172,0.008675,0.0065,0.0037,0.692308,0.004143,0.003429,6e-06,0.009781,0.009773,0.006609,0.003588,0.384615,0.011513,0.011078,0.005657,0.016495,0.010834,0.005775,0.003391,0.384615,0.004692,0.004879,0.000353,0.009186,0.008835,0.005096,0.00296,0.538462,0.005337,0.006569,0.00056,0.00975,0.009193,0.004824,0.00322,0.538462,0.00647,0.007771,0.000939,0.009598,0.008659,0.002956,0.002991,0.615385,0.003897,0.003151,0.000161,0.008698,0.008537,0.003407,0.002607,0.384615,0.004829,0.004646,1.8e-05,0.009117,0.009102,0.004658,0.00305,0.461538,0.00539,0.005669,0.001601,0.008217,0.006615,0.003723,0.002489,0.538462,0.004417,0.005009,0.000363,0.009552,0.009186,0.004051,0.002836,0.615385,0.003975,0.002874,0.00055,0.008812,0.008263,0.003723,0.002697,0.461538,0.004299,0.00382,9.1e-05,0.009621,0.009529,0.004503,0.003321,0.384615,0.004768,0.005302,0.000137,0.009926,0.009789,0.003929,0.003136,0.615385,0.003807,0.002399,0.000274,0.009186,0.008911,0.003805,0.002939,0.461538,0.082642,0.007011,0.002569,0.504395,0.501953,0.004972,0.187256,0.153846,0.004726,0.004642,0.000142,0.008598,0.008453,0.004616,0.003059,0.461538,0.004097,0.004337,3.7e-05,0.008896,0.008858,0.004904,0.003258,0.538462,0.005066,0.003551,0.000523,0.009338,0.00882,0.005814,0.003321,0.461538,0.004662,0.004536,3.6e-05,0.009972,0.009933,0.005468,0.003088,0.384615,0.004578,0.002831,0.000723,0.00959,0.008865,0.006937,0.003637,0.384615,0.005608,0.007507,7.5e-05,0.009979,0.009903,0.007062,0.003967,0.615385,0.004929,0.004364,0.000431,0.009308,0.008881,0.004602,0.00305,0.461538,0.005859,0.006889,0.000299,0.009605,0.009308,0.003975,0.00297,0.615385,0.00687,0.008125,0.001736,0.009712,0.00798,0.004253,0.002663,0.615385,1.005859,1.006836,1.000977,1.009766,0.008789,0.001953,0.00293,0.615385,-0.00015,0.00639,0.007114,0.001209,0.009674,0.008469,0.004101,0.00292,0.615385,0.439697,0.436279,0.430176,0.454346,0.02417,0.015381,0.008209,0.307692,0.004656,0.433105,0.431885,0.418457,0.446045,0.027588,0.007812,0.007187,0.384615,0.430908,0.428467,0.41626,0.444092,0.027832,0.011475,0.008301,0.461538,0.004083,0.536133,0.534668,0.526855,0.55127,0.024414,0.007324,0.006371,0.384615,0.003744,0.290771,0.291504,0.285889,0.295166,0.009277,0.003418,0.002806,0.615385,-0.001505,0.006023,0.005653,0.000968,0.009933,0.008965,0.004021,0.003088,0.461538,0.140381,0.138062,0.136597,0.145508,0.008911,0.00769,0.003679,0.461538,-0.000403,0.004238,0.003532,0.00026,0.009583,0.009323,0.005239,0.003338,0.384615,0.080627,0.003235,0.000229,1.005859,1.005859,0.006083,0.278076,0.076923,1.001953,1.00293,0.999023,1.006836,0.007812,0.00293,0.002405,0.615385,0.004173,0.00251,4.1e-05,0.009262,0.009224,0.00557,0.003504,0.384615,0.003448,0.002806,0.000322,0.006813,0.006493,0.003212,0.002142,0.384615,0.004215,0.00391,0.000364,0.009567,0.009201,0.00532,0.003069,0.461538,0.00452,0.005028,0.001288,0.00959,0.008301,0.003519,0.002619,0.538462,0.004906,0.004047,0.000275,0.009323,0.009048,0.005116,0.003069,0.461538,0.006271,0.007481,0.001808,0.009377,0.007568,0.003647,0.002548,0.615385,0.006008,0.006805,0.000191,0.00988,0.009689,0.003918,0.003155,0.615385,0.004822,0.003979,0.000411,0.010002,0.00959,0.005739,0.003469,0.384615,0.004219,0.00317,2.7e-05,0.009567,0.009537,0.003527,0.002869,0.384615,0.004902,0.004471,0.000796,0.009392,0.008598,0.004284,0.002729,0.461538,CO,6.0,1.0,0.0,-1.0,0.0,1.0
2,00001b22f846c82c51f6e3958ccd81970162bae8b007e8...,0,0.004181,0.00423,0.000802,0.009705,0.008904,0.003944,0.002762,0.538462,0.00684,0.008011,0.001654,0.00956,0.007904,0.004768,0.002686,0.692308,0.013901,0.00457,0.000628,0.128174,0.127563,0.002699,0.034424,0.076923,0.029297,0.538462,0.328613,0.32666,0.318359,0.3396,0.02124,0.010986,0.007141,0.307692,0.005302,0.209839,0.334717,0.002537,0.342041,0.3396,0.331879,0.168457,0.615385,0.199829,0.200073,0.19519,0.203613,0.008423,0.004761,0.002979,0.615385,0.001108,1.004883,1.005859,1.0,1.008789,0.008789,0.004883,0.002939,0.615385,0.000251,0.384615,0.023575,0.004459,0.000726,0.093994,0.093262,0.008427,0.037567,0.230769,0.232666,0.234131,0.213013,0.238159,0.025146,0.005737,0.006554,0.692308,-0.007285,0.004734,0.005077,0.000553,0.008553,0.008003,0.00346,0.002607,0.538462,0.004272,0.00383,0.000578,0.008804,0.008224,0.003124,0.461538,0.0,0.006874,0.000701,0.009888,0.009186,0.005524,0.615385,0.004574,0.005043,0.000965,0.009377,0.008408,0.004673,0.00304,0.538462,0.012291,0.013,0.007515,0.015808,0.008293,0.003357,0.002857,0.615385,0.004402,0.003765,0.000417,0.009682,0.009262,0.005282,0.003321,0.461538,0.019791,0.003798,0.000283,0.080383,0.080078,0.006308,0.031555,0.230769,0.020981,0.006039,0.000628,0.075867,0.075256,0.006025,0.030258,0.230769,0.005337,0.005608,0.002399,0.008568,0.006168,0.004238,0.002264,0.538462,0.003859,0.004383,0.000266,0.006786,0.006519,0.003403,0.002155,0.538462,0.004978,0.00441,0.000931,0.009209,0.008278,0.004944,0.002783,0.384615,0.003407,0.003502,0.000167,0.007851,0.007683,0.004932,0.002794,0.538462,0.005276,0.00515,0.000662,0.009872,0.009209,0.005592,0.003136,0.461538,0.005062,0.00592,0.000359,0.009369,0.00901,0.005976,0.003357,0.538462,0.005058,0.004082,0.000419,0.009224,0.008804,0.005085,0.003202,0.461538,0.005703,0.006195,0.000916,0.009682,0.008766,0.003805,0.002773,0.538462,0.004387,0.003059,5.9e-05,0.009407,0.009346,0.003969,0.003136,0.384615,0.004173,0.004894,0.00043,0.007721,0.00729,0.004702,0.00275,0.538462,0.004734,0.004009,0.001739,0.009926,0.008186,0.001524,0.002489,0.384615,0.004364,0.003765,0.001188,0.008957,0.007767,0.003717,0.002573,0.461538,0.00618,0.006512,0.000662,0.009872,0.009209,0.003296,0.002951,0.538462,0.005508,0.006405,0.00125,0.009789,0.008537,0.004915,0.00292,0.615385,0.004509,0.004509,0.000295,0.009628,0.009331,0.003325,0.002741,0.461538,0.00444,0.005283,0.000261,0.008865,0.008606,0.004051,0.002619,0.615385,0.005375,0.006298,0.000575,0.00898,0.008408,0.004494,0.00304,0.615385,0.005558,0.005733,0.001049,0.009819,0.008774,0.004507,0.002878,0.538462,1.004883,1.003906,1.0,1.008789,0.008789,0.004883,0.003019,0.461538,0.000496,0.00391,0.002338,0.000257,0.008408,0.008148,0.004923,0.00297,0.461538,0.380371,0.379639,0.368652,0.38916,0.020508,0.008301,0.006432,0.461538,0.0045,0.354248,0.354248,0.345459,0.368164,0.022705,0.010986,0.007084,0.461538,0.354248,0.352783,0.338135,0.368896,0.030762,0.010498,0.009079,0.384615,0.007538,0.431885,0.430908,0.419922,0.44458,0.024658,0.01001,0.00703,0.384615,0.004798,0.147339,0.147461,0.143555,0.151978,0.008423,0.003052,0.002441,0.538462,0.00287,0.004982,0.005737,8.3e-05,0.00901,0.008926,0.005322,0.00301,0.538462,0.276611,0.275879,0.273193,0.28125,0.008057,0.004395,0.002815,0.461538,-0.000601,0.004742,0.004139,0.000358,0.009193,0.008835,0.006203,0.003145,0.384615,0.005184,0.00489,0.000425,0.009514,0.009087,0.00378,0.00293,0.461538,0.005241,0.005325,0.001105,0.009201,0.008095,0.004347,0.00256,0.538462,0.004696,0.004265,0.001046,0.009705,0.008659,0.003389,0.00263,0.307692,0.005955,0.006809,0.001974,0.009956,0.00798,0.004341,0.002991,0.538462,0.004646,0.003469,0.000511,0.009857,0.009346,0.006164,0.003338,0.461538,0.006779,0.007767,0.00198,0.00988,0.007904,0.002777,0.002806,0.615385,0.006004,0.007381,0.000371,0.009132,0.008759,0.00445,0.002783,0.538462,0.004677,0.004852,0.000304,0.009544,0.009239,0.002781,0.002718,0.538462,0.003605,0.002178,0.000512,0.009346,0.008835,0.005506,0.003294,0.384615,0.004288,0.003332,0.001196,0.009842,0.008644,0.002905,0.002686,0.461538,0.005112,0.004745,0.000129,0.009415,0.009285,0.007236,0.003637,0.461538,0.004501,0.005112,0.000443,0.006931,0.006489,0.002632,0.002155,0.538462,CO,6.0,1.0,0.0,-1.0,0.0,1.0
3,000041bdba6ecadd89a52d11886e8eaaec9325906c9723...,0,0.048859,0.007423,0.00066,0.268555,0.267822,0.032093,0.088501,0.230769,0.005489,0.004799,0.001972,0.008667,0.006695,0.003357,0.002043,0.461538,0.005245,0.005249,5.8e-05,0.008873,0.008812,0.004976,0.002951,0.538462,0.028687,0.461538,0.40332,0.400146,0.392334,0.414307,0.021973,0.009766,0.006348,0.461538,0.004596,0.029892,0.005322,0.000235,0.333252,0.333008,0.006481,0.091248,0.076923,0.199707,0.199463,0.195312,0.203247,0.007935,0.002563,0.002142,0.461538,-0.001331,1.004883,1.003906,1.0,1.008789,0.008789,0.003906,0.002674,0.307692,-7.3e-05,0.461538,0.023346,0.006836,5.3e-05,0.088379,0.088318,0.006025,0.03476,0.230769,0.454102,0.443604,0.397705,0.509277,0.111572,0.066162,0.043213,0.461538,0.02132,0.672852,0.628906,0.081787,1.008789,0.926758,0.527588,0.331787,0.461538,0.004795,0.004116,0.00122,0.008629,0.007408,0.003242,0.461538,0.153846,0.005035,0.000213,0.009644,0.00943,0.003475,0.538462,0.005043,0.004803,0.000558,0.009941,0.009384,0.00466,0.003098,0.461538,0.009491,0.01001,0.003347,0.014664,0.011314,0.002968,0.003,0.538462,0.004639,0.004608,0.000343,0.009361,0.009018,0.003231,0.002642,0.461538,0.022476,0.006603,0.00223,0.080017,0.077759,0.004707,0.031982,0.230769,0.02092,0.00666,0.002979,0.075256,0.072266,0.004086,0.029068,0.230769,0.00576,0.006306,0.000862,0.009819,0.008957,0.006155,0.003164,0.538462,0.00507,0.005859,0.000474,0.009682,0.009209,0.005207,0.003294,0.538462,0.004887,0.005417,0.001213,0.009483,0.00827,0.003313,0.002651,0.538462,0.00721,0.008408,0.000958,0.009842,0.008881,0.003059,0.002939,0.615385,0.006462,0.007759,0.001689,0.009796,0.00811,0.007309,0.003428,0.615385,0.005058,0.006142,0.000332,0.009171,0.008835,0.006405,0.003435,0.615385,0.003952,0.00346,0.000194,0.009857,0.009666,0.003233,0.002827,0.461538,0.004307,0.00359,0.000387,0.008492,0.008102,0.005864,0.003164,0.461538,0.043793,0.00489,7e-05,0.509766,0.509766,0.00466,0.140015,0.076923,0.005405,0.00592,0.000173,0.008972,0.008797,0.004938,0.002991,0.538462,0.005222,0.005234,0.001033,0.00972,0.00869,0.003332,0.002979,0.538462,0.004631,0.004234,0.000575,0.009979,0.009407,0.007121,0.003563,0.384615,0.004261,0.005642,3.6e-05,0.007664,0.007629,0.004221,0.002762,0.538462,0.005581,0.00724,0.000713,0.009628,0.008911,0.005976,0.003479,0.538462,0.004906,0.004765,0.000808,0.009903,0.009094,0.004162,0.002674,0.461538,0.00502,0.004715,0.000154,0.009064,0.008911,0.005121,0.003019,0.461538,0.005642,0.006546,0.000886,0.009048,0.008163,0.00527,0.00293,0.692308,0.005711,0.005768,0.00069,0.008972,0.008286,0.00206,0.002264,0.538462,0.621582,1.000977,0.00573,1.007812,1.001953,0.99575,0.504395,0.615385,6.901802,0.2052,0.205444,0.201538,0.208008,0.00647,0.003296,0.002171,0.615385,0.153076,0.05249,0.039398,0.499756,0.460449,0.013702,0.196045,0.230769,-0.047401,0.048523,0.047333,0.036682,0.059814,0.023132,0.010956,0.007278,0.384615,0.046692,0.044434,0.032135,0.062744,0.030609,0.010468,0.008728,0.384615,0.062227,0.621582,0.620117,0.61377,0.632324,0.018555,0.013184,0.006741,0.384615,0.001871,0.324707,0.292725,0.289062,0.436768,0.147705,0.003174,0.062256,0.230769,-0.026973,0.004776,0.003435,0.000753,0.009979,0.009224,0.006891,0.003391,0.384615,0.606934,0.598633,0.59082,0.643066,0.052246,0.003906,0.019363,0.230769,-0.006079,0.004311,0.004467,5e-06,0.009117,0.009109,0.006745,0.00358,0.538462,0.003635,0.00276,5.1e-05,0.009323,0.00927,0.004978,0.002979,0.384615,1.003906,1.003906,1.0,1.007812,0.007812,0.004883,0.00263,0.384615,1.005859,1.006836,1.0,1.009766,0.009766,0.004883,0.00297,0.615385,0.004509,0.004601,6e-05,0.009277,0.009216,0.00536,0.003202,0.538462,0.004463,0.00457,0.000434,0.009628,0.009193,0.00239,0.002674,0.538462,0.003368,0.002348,0.000591,0.009529,0.008942,0.003046,0.002537,0.461538,0.005775,0.006199,0.002382,0.009315,0.006935,0.004662,0.002466,0.538462,0.005779,0.005684,0.000461,0.009735,0.009277,0.002655,0.002584,0.461538,0.004181,0.003736,0.000242,0.008865,0.008621,0.004861,0.002773,0.461538,0.006741,0.008186,0.000615,0.009872,0.009254,0.003819,0.003391,0.538462,0.004768,0.004791,0.000492,0.009918,0.00943,0.002876,0.002651,0.538462,0.005234,0.006355,2.9e-05,0.009834,0.009804,0.004604,0.003408,0.538462,CO,3.0,1.0,0.0,6.0,0.0,1.0
4,00007889e4fcd2614b6cbe7f8f3d2e5c728eca32d9eb8a...,0,0.004642,0.004147,3e-05,0.008682,0.008652,0.00474,0.00289,0.461538,0.005352,0.005905,0.000231,0.009697,0.009468,0.004778,0.003267,0.538462,0.00355,0.002741,0.000342,0.008812,0.008469,0.005122,0.002951,0.384615,0.242188,0.615385,0.472168,0.472412,0.461426,0.484619,0.023193,0.012939,0.007553,0.538462,0.002587,0.055237,0.004238,0.000174,0.341064,0.34082,0.002232,0.125977,0.153846,0.233521,0.249756,0.191772,0.256348,0.064575,0.056274,0.028412,0.692308,0.025849,1.005859,1.006836,1.0,1.008789,0.008789,0.005859,0.003088,0.538462,8.7e-05,0.615385,0.318115,0.341797,0.094116,0.392578,0.29834,0.051758,0.102295,0.846154,0.38501,0.384033,0.275146,0.48877,0.213623,0.141357,0.078308,0.461538,0.048423,0.003477,0.002783,0.000846,0.009552,0.008705,0.001528,0.002264,0.307692,0.004642,0.004368,0.001734,0.009773,0.008041,0.003801,0.461538,0.0,0.004566,0.000696,0.008972,0.008278,0.005592,0.461538,0.12146,0.009277,0.001121,0.259766,0.258545,0.248661,0.129761,0.461538,0.012611,0.012245,0.007332,0.016464,0.009132,0.004929,0.003059,0.461538,0.004707,0.004398,0.001033,0.009239,0.008209,0.006055,0.003059,0.461538,0.135864,0.147217,0.07196,0.151367,0.079407,0.004761,0.028397,0.846154,0.128052,0.137817,0.067322,0.143066,0.075745,0.005737,0.02597,0.846154,0.006042,0.005848,0.001287,0.009857,0.008568,0.00452,0.002651,0.461538,0.004841,0.005978,0.000551,0.009506,0.008957,0.004189,0.002806,0.615385,0.205322,0.205322,0.001004,0.409668,0.408691,0.009277,0.143188,0.461538,0.00444,0.004219,0.000519,0.008537,0.008018,0.003016,0.002405,0.307692,0.003983,0.003279,0.000914,0.008553,0.007637,0.005649,0.002762,0.461538,0.005344,0.005722,0.000878,0.00985,0.008972,0.004118,0.003174,0.538462,0.005905,0.005352,0.001287,0.009377,0.008087,0.004547,0.002878,0.384615,0.003941,0.004471,0.000116,0.008698,0.008583,0.003258,0.002537,0.538462,0.006348,0.006962,0.000956,0.009773,0.00882,0.003948,0.00301,0.538462,0.005135,0.005077,0.001295,0.009064,0.007767,0.002407,0.002224,0.384615,0.004066,0.003828,2.9e-05,0.009232,0.009201,0.00626,0.003408,0.461538,0.005486,0.00618,0.0004,0.009857,0.00946,0.004587,0.002979,0.692308,0.00663,0.007027,0.001382,0.009056,0.007675,0.002113,0.002291,0.615385,0.480713,0.489014,0.369141,0.589355,0.220215,0.106934,0.071838,0.538462,1.004883,1.004883,1.0,1.009766,0.009766,0.003906,0.003,0.307692,0.960449,0.960449,0.957031,0.964844,0.007812,0.004883,0.00289,0.384615,1.005859,1.005859,1.000977,1.009766,0.008789,0.006836,0.003529,0.461538,0.004475,0.004677,0.000268,0.008247,0.00798,0.005013,0.002794,0.538462,1.005859,1.005859,1.000977,1.009766,0.008789,0.003906,0.00304,0.384615,-0.000152,0.005516,0.005959,6e-05,0.00956,0.009499,0.006218,0.003452,0.615385,0.434082,0.434082,0.425049,0.448486,0.023438,0.01123,0.008125,0.461538,0.004398,0.426758,0.424072,0.418701,0.442383,0.023682,0.012939,0.00782,0.307692,0.426025,0.425537,0.412842,0.437744,0.024902,0.007812,0.007053,0.384615,0.004746,0.55127,0.548828,0.54248,0.565918,0.023438,0.011719,0.007751,0.384615,0.003548,0.434082,0.433838,0.428955,0.438232,0.009277,0.003174,0.002815,0.461538,3e-06,0.004509,0.004307,0.001289,0.008362,0.007072,0.002813,0.0021,0.384615,0.186646,0.187134,0.182007,0.191162,0.009155,0.004395,0.003078,0.538462,-0.003624,0.004642,0.00491,9.1e-05,0.00956,0.009468,0.004513,0.00301,0.538462,0.005554,0.006081,0.000491,0.009956,0.009468,0.006758,0.003391,0.538462,1.004883,1.004883,1.0,1.007812,0.007812,0.00293,0.00243,0.461538,1.004883,1.006836,1.0,1.008789,0.008789,0.005859,0.003487,0.538462,1.005859,1.005859,1.001953,1.008789,0.006836,0.004883,0.002478,0.384615,0.0051,0.004639,9.9e-05,0.009666,0.009567,0.004152,0.003117,0.461538,0.004745,0.003387,0.001776,0.009537,0.007759,0.00469,0.002783,0.461538,0.003853,0.002974,0.000528,0.008102,0.007576,0.005001,0.002857,0.461538,0.004818,0.004162,0.000686,0.009186,0.008499,0.003565,0.002502,0.461538,0.004818,0.005421,0.000579,0.008698,0.008118,0.004452,0.002741,0.538462,0.004856,0.004662,0.000468,0.008812,0.008347,0.002096,0.002264,0.461538,0.004379,0.00407,0.000633,0.009438,0.008804,0.003775,0.00263,0.461538,0.004219,0.002884,8.3e-05,0.009666,0.009583,0.003861,0.003428,0.461538,CO,6.0,1.0,0.0,4.0,0.0,1.0


In [15]:
data.shape

(458913, 476)

In [21]:
## Putting variables in the right shape 
data['D_68_last'] = data['D_68_last'].astype(str)
data['D_114_last'] = data['D_114_last'].astype(str)
data['D_116_last'] = data['D_116_last'].astype(str)
data['D_117_last'] = data['D_117_last'].astype(str)
data['D_120_last'] = data['D_120_last'].astype(str)
data['D_126_last'] = data['D_126_last'].astype(str)

In [22]:
X = pd.get_dummies(data[['D_63_last', 'D_68_last', 'D_114_last', 'D_116_last', 'D_117_last', 'D_120_last', 'D_126_last']])
X

Unnamed: 0,D_63_last_CL,D_63_last_CO,D_63_last_CR,D_63_last_XL,D_63_last_XM,D_63_last_XZ,D_68_last_1.0,D_68_last_2.0,D_68_last_3.0,D_68_last_4.0,D_68_last_5.0,D_68_last_6.0,D_114_last_0.0,D_114_last_1.0,D_116_last_0.0,D_116_last_1.0,D_117_last_-1.0,D_117_last_1.0,D_117_last_2.0,D_117_last_3.0,D_117_last_4.0,D_117_last_5.0,D_117_last_6.0,D_120_last_0.0,D_120_last_1.0,D_126_last_0.0,D_126_last_1.0
0,0,0,1,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1
1,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1
2,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1
3,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,0,0,0,0,1,1,0,0,1
4,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,0,0,1,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
458908,0,1,0,0,0,0,0,0,0,0,0,1,1,0,1,0,0,0,0,1,0,0,0,0,1,0,1
458909,0,1,0,0,0,0,0,0,0,0,0,1,0,1,1,0,0,0,1,0,0,0,0,1,0,0,1
458910,0,1,0,0,0,0,0,0,0,0,1,0,0,1,1,0,1,0,0,0,0,0,0,1,0,0,1
458911,0,1,0,0,0,0,0,0,1,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,0,1


In [20]:
pd.get_dummies(data['D_68_last'].astype(str))

Unnamed: 0,1.0,2.0,3.0,4.0,5.0,6.0
0,0,0,0,0,0,1
1,0,0,0,0,0,1
2,0,0,0,0,0,1
3,0,0,1,0,0,0
4,0,0,0,0,0,1
...,...,...,...,...,...,...
458908,0,0,0,0,0,1
458909,0,0,0,0,0,1
458910,0,0,0,0,1,0
458911,0,0,1,0,0,0
