In [1]:
import pandas as pd
import numpy as np
import gc

# Gradient Boosting
import lightgbm as lgb
import xgboost as xgb

# Scikit-learn
from sklearn.metrics import average_precision_score
from sklearn.model_selection import StratifiedKFold 
from sklearn.preprocessing import PolynomialFeatures
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc

# Graphics
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Skopt functions
from skopt import BayesSearchCV
from skopt import gp_minimize # Bayesian optimization using Gaussian Processes
from skopt.space import Real, Categorical, Integer
from skopt.utils import use_named_args # decorator to convert a list of parameters to named arguments
from skopt.callbacks import DeadlineStopper # Stop the optimization before running out of a fixed budget of time.
from skopt.callbacks import VerboseCallback # Callback to control the verbosity
from skopt.callbacks import DeltaXStopper # Stop the optimization If the last two positions at which the objective has been evaluated are less than delta

# Hyperparameters distributions
from scipy.stats import randint
from scipy.stats import uniform

# Metrics
from sklearn.metrics import average_precision_score, roc_auc_score, mean_absolute_error

import os
import warnings
def ignore_warn(*args, **kwargs):
    pass
warnings.warn = ignore_warn

In [2]:
santander_data = pd.read_csv('../input/train.csv')
santander_data_test = pd.read_csv('../input/test.csv')

In [3]:
# Taking the labels (price)
label_df = santander_data['target']

In [4]:
santander_data.drop(['ID_code','target'], axis=1, inplace=True)

santander_data_test.drop('ID_code', axis=1, inplace=True)
santander_data.head(10)

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,...,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
0,8.9255,-6.7863,11.9081,5.093,11.4607,-9.2834,5.1187,18.6266,-4.92,5.747,2.9252,3.1821,14.0137,0.5745,8.7989,14.5691,5.7487,-7.2393,4.284,30.7133,10.535,16.2191,2.5791,2.4716,14.3831,13.4325,-5.1488,-0.4073,4.9306,5.9965,-0.3085,12.9041,-3.8766,16.8911,11.192,10.5785,0.6764,7.8871,4.6667,3.8743,...,15.4576,5.3133,3.6159,5.0384,6.676,12.6644,2.7004,-0.6975,9.5981,5.4879,-4.7645,-8.4254,20.8773,3.1531,18.5618,7.7423,-10.1245,13.7241,-3.5189,1.7202,-8.4051,9.0164,3.0657,14.3691,25.8398,5.8764,11.8411,-19.7159,17.5743,0.5857,4.4354,3.9642,3.1364,1.691,18.5227,-2.3978,7.8784,8.5635,12.7803,-1.0914
1,11.5006,-4.1473,13.8588,5.389,12.3622,7.0433,5.6208,16.5338,3.1468,8.0851,-0.4032,8.0585,14.0239,8.4135,5.4345,13.7003,13.8275,-15.5849,7.8,28.5708,3.4287,2.7407,8.5524,3.3716,6.9779,13.891,-11.7684,-2.5586,5.0464,0.5481,-9.2987,7.8755,1.2859,19.371,11.3702,0.7399,2.7995,5.8434,10.816,3.6783,...,29.4846,5.8683,3.8208,15.8348,-5.0121,15.1345,3.2003,9.3192,3.8821,5.7999,5.5378,5.0988,22.033,5.5134,30.2645,10.4968,-7.2352,16.5721,-7.3477,11.0752,-5.5937,9.4878,-14.91,9.4245,22.5441,-4.8622,7.6543,-15.9319,13.3175,-0.3566,7.6421,7.7214,2.5837,10.9516,15.4305,2.0339,8.1267,8.7889,18.356,1.9518
2,8.6093,-2.7457,12.0805,7.8928,10.5825,-9.0837,6.9427,14.6155,-4.9193,5.9525,-0.3249,-11.2648,14.1929,7.3124,7.5244,14.6472,7.6782,-1.7395,4.7011,20.4775,17.7559,18.1377,1.2145,3.5137,5.6777,13.2177,-7.994,-2.9029,5.8463,6.1439,-11.1025,12.4858,-2.2871,19.0422,11.0449,4.1087,4.6974,6.9346,10.8917,0.9003,...,13.207,5.8442,4.7086,5.7141,-1.041,20.5092,3.279,-5.5952,7.3176,5.769,-7.0927,-3.9116,7.2569,-5.8234,25.682,10.9202,-0.3104,8.8438,-9.7009,2.4013,-4.2935,9.3908,-13.2648,3.1545,23.0866,-5.3,5.3745,-6.266,10.1934,-0.8417,2.9057,9.7905,1.6704,1.6858,21.6042,3.1417,-6.5213,8.2675,14.7222,0.3965
3,11.0604,-2.1518,8.9522,7.1957,12.5846,-1.8361,5.8428,14.925,-5.8609,8.245,2.3061,2.8102,13.8463,11.9704,6.4569,14.8372,10.743,-0.4299,15.9426,13.7257,20.301,12.5579,6.8202,2.7229,12.1354,13.7367,0.8135,-0.9059,5.907,2.8407,-15.2398,10.4407,-2.5731,6.1796,10.6093,-5.9158,8.1723,2.8521,9.1738,0.6665,...,31.8833,5.9684,7.2084,3.8899,-11.0882,17.2502,2.5881,-2.7018,0.5641,5.343,-7.1541,-6.192,18.2366,11.7134,14.7483,8.1013,11.8771,13.9552,-10.4701,5.6961,-3.7546,8.4117,1.8986,7.2601,-0.4639,-0.0498,7.9336,-12.8279,12.4124,1.8489,4.4666,4.7433,0.7178,1.4214,23.0347,-1.2706,-2.9275,10.2922,17.9697,-8.9996
4,9.8369,-1.4834,12.8746,6.6375,12.2772,2.4486,5.9405,19.2514,6.2654,7.6784,-9.4458,-12.1419,13.8481,7.8895,7.7894,15.0553,8.4871,-3.068,6.5263,11.3152,21.4246,18.9608,10.1102,2.7142,14.208,13.5433,3.1736,-3.3423,5.9015,7.9352,-3.1582,9.4668,-0.0083,19.3239,12.4057,0.6329,2.7922,5.8184,19.3038,1.445,...,33.5107,5.6953,5.4663,18.2201,6.5769,21.2607,3.2304,-1.7759,3.1283,5.5518,1.4493,-2.6627,19.8056,2.3705,18.4685,16.3309,-3.3456,13.5261,1.7189,5.1743,-7.6938,9.7685,4.891,12.2198,11.8503,-7.8931,6.4209,5.927,16.0201,-0.2829,-1.4905,9.5214,-0.1508,9.1942,13.2876,-1.5121,3.9267,9.5031,17.9974,-8.8104
5,11.4763,-2.3182,12.608,8.6264,10.9621,3.5609,4.5322,15.2255,3.5855,5.979,0.801,-0.6192,13.638,1.2589,8.1939,14.9894,12.0763,-1.471,6.7341,14.8241,19.7172,11.9882,1.0468,3.8663,4.7252,13.9427,-1.2796,-4.3763,5.1494,0.4124,-5.0732,4.901,1.5459,15.6423,10.7209,15.1886,1.8685,7.7223,5.5317,2.2308,...,16.5552,5.3739,6.4487,11.5631,1.3847,14.9638,2.8455,-9.0953,3.8278,5.9714,-6.1449,-2.0285,18.4106,1.4457,21.8853,9.2654,-6.5247,10.7687,-7.6283,1.0208,7.1968,11.1227,2.2257,6.4056,21.055,-13.6509,4.7691,-8.9114,15.1007,2.4286,-6.3068,6.6025,5.2912,0.4403,14.9452,1.0314,-3.6241,9.767,12.5809,-4.7602
6,11.8091,-0.0832,9.3494,4.2916,11.1355,-8.0198,6.1961,12.0771,-4.3781,7.9232,-5.1288,-7.5271,14.1629,13.3058,7.8412,14.3363,7.5951,11.0922,21.1976,6.2946,15.8877,24.2595,8.1159,3.9769,7.6851,13.36,-0.5156,0.069,5.6452,4.614,-12.389,12.088,-1.529,9.2376,11.151,6.6352,4.8462,7.0202,19.9479,-6.2271,...,39.9599,5.5552,3.3459,9.2661,6.1213,23.7558,3.0298,5.9109,8.1035,6.1887,0.2619,-1.1405,25.1675,2.6965,17.0152,12.7942,-3.0403,8.1735,4.5637,3.8973,-8.1416,10.057,15.7862,3.3593,11.914,-4.287,7.5015,-29.9763,17.2867,1.8539,8.783,6.4521,3.5325,0.1777,18.3314,0.5845,9.1104,9.1143,10.8869,-3.2097
7,13.558,-7.9881,13.8776,7.5985,8.6543,0.831,5.689,22.3262,5.0647,7.1971,1.4532,-6.7033,14.2919,10.9699,6.919,14.2459,9.5376,-0.7226,5.1548,17.1535,13.7326,14.4195,1.2375,3.1711,9.1258,13.325,3.3883,-0.4418,5.4501,7.9894,-0.9976,14.5609,-2.0712,16.9717,11.5257,-0.499,2.8303,7.5772,9.0294,1.2659,...,23.7765,5.4098,5.1402,10.7013,-8.2583,26.3286,2.6085,-10.9163,8.7362,5.2273,8.9519,-2.3522,6.1335,0.0876,19.5642,13.2008,-11.1786,17.3041,-0.6535,0.0592,5.114,10.5478,6.9736,6.9724,24.0369,-4.822,8.4947,-5.9076,18.8663,1.9731,13.17,6.5491,3.9906,5.8061,23.1407,-0.3776,4.2178,9.4237,8.6624,3.4806
8,16.1071,2.4426,13.9307,5.6327,8.8014,6.163,4.4514,10.1854,-3.1882,9.0827,0.9501,1.7982,14.0654,-3.0572,11.1642,14.8757,10.0075,-8.9472,3.8349,0.856,10.6958,6.3738,6.558,2.6182,13.2506,13.7929,-14.4918,-2.5407,5.9575,2.4882,-11.1344,10.5106,-1.0573,19.329,12.2897,-2.816,6.9208,5.1498,14.7272,3.7692,...,30.674,5.7888,4.118,9.1486,-5.2618,14.4422,2.6893,-9.5251,1.7455,5.9018,3.1838,-1.7865,4.9105,3.5803,32.9149,13.0201,-2.4845,11.0988,7.4609,-2.1408,-3.9172,7.7291,-11.4027,2.0696,-1.7937,-0.003,11.5024,-18.3172,13.1403,0.7014,1.4298,14.751,1.6395,1.4181,14.837,-1.994,-1.0733,8.1975,19.5114,4.8453
9,12.5088,1.9743,8.896,5.4508,13.6043,-16.2859,6.0637,16.841,0.1287,7.9682,0.8787,3.0537,13.9639,0.8071,9.924,15.2659,11.39,1.5367,5.4649,13.6196,23.7806,4.4221,6.1695,3.2978,4.5923,13.3778,-3.22,-2.3302,6.112,-0.0289,-13.1141,9.127,2.258,19.845,10.9237,2.4796,7.2948,5.1347,11.5906,-0.5429,...,13.7379,5.4536,6.2403,17.1668,-5.3527,14.378,2.4139,-9.1925,2.6859,5.854,-3.0868,-1.2558,24.2683,-4.5382,18.2209,7.5652,6.3377,14.6223,-13.896,2.391,2.7878,11.3457,-9.6774,10.3382,19.0645,-7.6785,6.758,-21.607,20.8112,-0.1873,0.5543,6.316,1.0371,3.6885,14.8344,0.4467,14.1287,7.9133,16.2375,14.2514


In [5]:
santander_data_test.head(10)

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,...,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
0,11.0656,7.7798,12.9536,9.4292,11.4327,-2.3805,5.8493,18.2675,2.1337,8.81,-2.0248,-4.3554,13.9696,0.3458,7.5408,14.5001,7.7028,-19.0919,15.5806,16.1763,3.7088,18.8064,1.5899,3.0654,6.4509,14.1192,-9.4902,-2.1917,5.7107,3.7864,-1.7981,9.2645,2.0657,12.7753,11.3334,8.1462,-0.061,3.5331,9.7804,8.7625,...,5.9232,5.4113,3.8302,5.738,-8.6105,22.953,2.5531,-0.2836,4.3416,5.1855,4.2603,1.6779,29.0849,8.4685,18.1317,12.2818,-0.6912,10.2226,-5.5579,2.2926,-4.5358,10.3903,-15.4937,3.9697,31.3521,-1.1651,9.2874,-23.5705,13.2643,1.6591,-2.1556,11.8495,-1.43,2.4508,13.7112,2.4669,4.3654,10.72,15.4722,-8.7197
1,8.5304,1.2543,11.3047,5.1858,9.1974,-4.0117,6.0196,18.6316,-4.4131,5.9739,-1.3809,-0.331,14.1129,2.5667,5.4988,14.1853,7.0196,4.6564,29.1609,0.091,12.1469,3.1389,5.2578,2.4228,16.2064,13.5023,-5.2341,-3.6648,5.708,2.9965,-10.472,11.4938,-0.966,15.3445,10.6361,0.8966,6.7428,2.3421,12.8678,-1.5536,...,30.9641,5.6723,3.6873,13.0429,-10.6572,15.5134,3.2185,9.0535,7.0535,5.3924,-0.772,-8.1783,29.9227,-5.6274,10.5018,9.6083,-0.4935,8.1696,-4.3605,5.211,0.4087,12.003,-10.3812,5.8496,25.1958,-8.8468,11.8263,-8.7112,15.9072,0.9812,10.6165,8.8349,0.9403,10.1282,15.5765,0.4773,-1.4852,9.8714,19.1293,-20.976
2,5.4827,-10.3581,10.1407,7.0479,10.2628,9.8052,4.895,20.2537,1.5233,8.3442,-4.7057,-3.0422,13.6751,3.8183,10.8535,14.2126,9.8837,2.6541,21.2181,20.8163,12.4666,12.3696,4.7473,2.7936,5.2189,13.567,-15.4246,-0.1655,7.2633,3.431,-9.1508,9.732,3.1062,22.3076,11.9593,9.9255,4.0702,4.9934,8.0667,0.8804,...,39.3654,5.5228,3.3159,4.3324,-0.5382,13.3009,3.1243,-4.1731,1.233,6.1513,-0.0391,1.495,16.8874,-2.9787,27.4035,15.8819,-10.966,15.6415,-9.4056,4.4611,-3.0835,8.5549,-2.8517,13.477,24.4721,-3.4824,4.9178,-2.072,11.539,1.1821,-0.7484,10.9935,1.9803,2.18,12.9813,2.1281,-7.1086,7.0618,19.8956,-23.1794
3,8.5374,-1.3222,12.022,6.5749,8.8458,3.1744,4.9397,20.566,3.3755,7.4578,0.0095,-5.0659,14.0526,13.501,8.766,14.7352,10.0383,-15.3508,2.1273,21.4797,14.5372,12.5527,2.9707,4.2398,13.7796,14.1408,1.0061,-1.3479,5.257,6.5911,6.2161,9.554,2.3628,10.2124,10.8047,-2.5588,6.072,3.2613,16.5632,8.8336,...,19.7251,5.3882,3.6775,7.4753,-11.078,24.8712,2.6415,2.2673,7.2788,5.6406,7.2048,3.4504,2.413,11.1674,14.5499,10.6151,-5.7922,13.9407,7.1078,1.1019,9.459,9.8243,5.9917,5.1634,8.1154,3.6638,3.3102,-19.7819,13.4499,1.3104,9.5702,9.0766,1.658,3.5813,15.1874,3.1656,3.9567,9.2295,13.0168,-4.2108
4,11.7058,-0.1327,14.1295,7.7506,9.1035,-8.5848,6.8595,10.6048,2.989,7.1437,5.1025,-3.2827,14.1013,8.9672,4.7276,14.5811,11.8615,3.148,18.0126,13.8006,1.6026,16.3059,6.7954,3.6015,13.6569,13.8807,8.6228,-2.2654,5.2255,7.0165,-15.6961,10.6239,-4.7674,17.5447,11.8668,3.0154,4.2546,6.7601,5.9613,0.3695,...,22.87,5.6688,6.1159,13.2433,-11.9785,26.204,3.2348,-5.5775,5.7036,6.1717,-1.6039,-2.4866,17.2728,2.364,14.0037,12.9165,-12.0311,10.1161,-8.7562,6.0889,-1.362,10.3559,-7.4915,9.4588,3.9829,5.858,8.3635,-24.8254,11.4928,1.6321,4.2259,9.1723,1.2835,3.3778,19.5542,-0.286,-5.1612,7.2882,13.926,-9.1846
5,5.9862,-2.2913,8.6058,7.0685,14.2465,-8.6761,4.2467,14.7632,1.879,7.2842,-4.9194,-9.1869,14.0581,11.4403,4.295,13.8808,10.9588,-7.0124,15.1829,18.271,9.7086,22.9103,6.508,2.3607,1.7899,13.9555,-7.2154,-2.4084,5.1616,6.9291,0.1141,9.7667,1.2339,17.7117,12.3433,-0.1787,-1.2858,5.522,13.0498,-5.3328,...,10.4064,5.6731,5.2368,5.2973,-13.2809,26.6861,3.0711,-5.841,3.2268,5.9344,0.6292,-4.0885,15.8299,1.9671,27.9116,12.1392,-5.8638,7.9274,3.2828,1.6738,-6.687,10.8213,-9.0377,13.9614,5.7416,3.6823,5.108,-7.5139,15.2017,2.6198,-2.1115,7.1178,-0.4249,8.8781,14.9438,-2.2151,-6.0233,9.8117,17.1127,10.824
6,8.4624,-6.1065,7.3603,8.2627,12.0104,-7.2073,4.167,13.0809,-4.3004,6.3181,3.3959,-2.0205,13.7682,12.0534,7.2186,14.6552,10.706,-8.0046,16.5411,-1.9177,23.4575,20.4773,3.9038,2.4832,18.0843,13.2727,-15.1986,-0.4427,5.1813,6.4509,1.1644,8.1706,-0.3277,18.6378,10.6855,11.9737,5.862,1.6195,16.8667,-6.1878,...,16.6887,5.5245,4.1781,16.6762,-6.2154,23.8006,2.8187,-16.7911,4.1165,5.7013,8.8525,0.2582,17.731,-10.3297,20.0649,11.6405,-4.2624,12.0176,10.9073,2.2162,-4.0787,8.1047,0.3254,13.1701,16.1184,-3.1572,10.4842,-10.6925,12.8029,2.1781,12.3609,6.8661,4.0971,8.8484,17.501,0.0295,7.7443,9.1509,18.4736,5.1499
7,17.3035,-2.4212,13.3989,8.3998,11.0777,9.6449,5.9596,17.8477,-4.8068,7.4643,4.0355,1.6185,14.1455,9.2276,10.3202,15.1863,10.4344,-16.2784,20.4167,16.3993,26.0324,18.2594,5.8071,4.0316,19.0425,13.2969,-6.3679,0.1439,5.1571,4.2024,-7.6541,11.0271,-4.0223,14.8908,11.2465,12.2744,2.6117,3.7225,11.9105,7.4879,...,33.2387,5.5475,5.0035,13.2442,-3.8513,9.9877,3.1809,-12.016,0.7334,5.7269,2.5807,-1.2435,25.3933,6.051,15.6766,12.1939,-1.3424,13.5346,-13.1117,2.0752,-0.3896,9.4374,-3.2351,9.3452,20.7098,0.9508,9.6602,-11.1847,15.7181,-1.0241,4.4676,4.4214,0.9303,1.4994,15.2648,-1.7931,6.5316,10.4855,23.4631,0.7283
8,6.9856,0.8402,13.7161,4.7749,8.6784,-13.7607,4.3386,14.5843,2.5883,7.2215,9.375,8.4046,14.3322,17.1366,7.9301,14.2254,6.722,-8.7087,-1.3541,19.2714,9.3437,18.6532,7.7823,2.6009,6.7642,13.6464,-6.3747,1.1001,6.3812,7.6421,4.3898,9.0597,1.204,7.1039,12.3777,-2.9641,7.0173,7.4202,19.6818,-4.4557,...,6.578,5.505,5.7826,3.0612,1.3582,21.0711,2.7477,-5.5203,5.7496,5.1234,-4.5422,-1.0625,13.1249,-0.7765,18.7814,12.293,-11.3608,17.1317,-0.645,3.0689,6.4179,7.8819,1.66,10.0574,6.2029,1.5205,10.0629,-13.7457,13.591,-0.1388,-3.4657,7.8754,2.4698,-0.0362,16.7144,0.1221,-1.4328,9.9207,16.9865,-3.3304
9,10.3811,-6.9348,14.669,9.0941,11.9058,-10.8018,3.4508,20.2816,-1.4112,6.7401,0.3727,-4.1918,14.0862,0.7104,8.1878,13.7374,11.3068,-12.0935,7.5325,10.5814,23.2748,12.6576,0.6735,3.5861,14.11,13.4447,-11.4772,-2.614,6.024,9.8656,-6.5061,12.4767,2.5978,18.235,11.01,0.0708,3.4443,6.4965,13.2953,1.8353,...,13.6283,5.688,6.0106,18.1349,-9.8056,15.6642,2.7786,-0.7066,4.8203,5.5257,-0.848,-12.2675,9.241,-2.8859,19.9355,12.2314,-6.6672,11.6131,-6.1622,-1.2921,-4.5264,10.0482,2.4023,14.5056,34.7555,-7.9347,3.707,10.0144,15.0549,2.0603,1.8052,11.0723,0.8907,4.768,15.1425,0.6075,-4.4447,9.5788,15.8146,9.3457


In [6]:
santander_data.describe()

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,...,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199
count,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,...,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0,200000.0
mean,10.679914,-1.627622,10.715192,6.796529,11.078333,-5.065317,5.408949,16.54585,0.284162,7.567236,0.39434,-3.245596,14.023978,8.530232,7.537606,14.573126,9.333264,-5.696731,15.244013,12.438567,13.290894,17.257883,4.30543,3.01954,10.5844,13.667496,-4.055133,-1.137908,5.53298,5.053874,-7.68774,10.393046,-0.512886,14.774147,11.43425,3.842499,2.18723,5.868899,10.642131,0.662956,...,24.2593,5.633293,5.362896,11.00217,-2.871906,19.315753,2.963335,-4.151155,4.937124,5.636008,-0.004962,-0.831777,19.817094,-0.677967,20.210677,11.640613,-2.799585,11.882933,-1.014064,2.591444,-2.741666,10.085518,0.719109,8.769088,12.756676,-3.983261,8.970274,-10.335043,15.377174,0.746072,3.23444,7.438408,1.927839,3.331774,17.993784,-0.142088,2.303335,8.908158,15.87072,-3.326537
std,3.040051,4.050044,2.640894,2.043319,1.62315,7.863267,0.866607,3.418076,3.332634,1.23507,5.500793,5.970253,0.190059,4.639536,2.247908,0.411711,2.557421,6.712612,7.85137,7.996694,5.876254,8.196564,2.847958,0.526893,3.777245,0.285535,5.92221,1.523714,0.783367,2.615942,7.965198,2.159891,2.58783,4.322325,0.541614,5.179559,3.119978,2.24973,4.278903,4.068845,...,10.880263,0.217938,1.419612,5.262056,5.457784,5.024182,0.369684,7.79802,3.105986,0.369437,4.424621,5.378008,8.674171,5.966674,7.136427,2.892167,7.513939,2.628895,8.57981,2.798956,5.261243,1.371862,8.963434,4.474924,9.31828,4.725167,3.189759,11.574708,3.944604,0.976348,4.559922,3.023272,1.478423,3.99203,3.135162,1.429372,5.454369,0.921625,3.010945,10.438015
min,0.4084,-15.0434,2.1171,-0.0402,5.0748,-32.5626,2.3473,5.3497,-10.5055,3.9705,-20.7313,-26.095,13.4346,-6.0111,1.0133,13.0769,0.6351,-33.3802,-10.6642,-12.4025,-5.4322,-10.089,-5.3225,1.2098,-0.6784,12.72,-24.2431,-6.1668,2.0896,-4.7872,-34.7984,2.1406,-8.9861,1.5085,9.8169,-16.5136,-8.0951,-1.1834,-6.3371,-14.5457,...,-7.4522,4.8526,0.6231,-6.5317,-19.9977,3.8167,1.8512,-35.9695,-5.2502,4.2588,-14.506,-22.4793,-11.4533,-22.7487,-2.9953,3.2415,-29.1165,4.9521,-29.2734,-7.8561,-22.0374,5.4165,-26.0011,-4.8082,-18.4897,-22.5833,-3.0223,-47.7536,4.4123,-2.5543,-14.0933,-2.6917,-3.8145,-11.7834,8.6944,-5.261,-14.2096,5.9606,6.2993,-38.8528
25%,8.45385,-4.740025,8.722475,5.254075,9.883175,-11.20035,4.7677,13.9438,-2.3178,6.6188,-3.59495,-7.5106,13.894,5.0728,5.781875,14.2628,7.452275,-10.476225,9.17795,6.276475,8.6278,11.551,2.1824,2.6341,7.613,13.4564,-8.321725,-2.3079,4.9921,3.1717,-13.766175,8.87,-2.500875,11.4563,11.0323,0.116975,-0.007125,4.125475,7.59105,-2.1995,...,15.696125,5.4705,4.3261,7.0296,-7.094025,15.74455,2.699,-9.6431,2.7032,5.3746,-3.2585,-4.72035,13.731775,-5.009525,15.0646,9.3716,-8.3865,9.808675,-7.3957,0.625575,-6.6739,9.0847,-6.064425,5.4231,5.6633,-7.36,6.7152,-19.205125,12.50155,0.0149,-0.058825,5.1574,0.889775,0.5846,15.6298,-1.1707,-1.946925,8.2528,13.8297,-11.208475
50%,10.52475,-1.60805,10.58,6.825,11.10825,-4.83315,5.3851,16.4568,0.3937,7.6296,0.4873,-3.28695,14.0255,8.60425,7.5203,14.5741,9.23205,-5.66635,15.19625,12.4539,13.1968,17.23425,4.27515,3.00865,10.38035,13.6625,-4.1969,-1.1321,5.53485,4.9502,-7.41175,10.36565,-0.49765,14.576,11.4352,3.91775,2.198,5.90065,10.5627,0.6723,...,23.8645,5.6335,5.3597,10.7887,-2.6378,19.2708,2.9602,-4.0116,4.7616,5.6343,0.0028,-0.80735,19.748,-0.56975,20.2061,11.6798,-2.53845,11.73725,-0.94205,2.5123,-2.6888,10.03605,0.7202,8.6,12.521,-3.94695,8.90215,-10.20975,15.23945,0.7426,3.2036,7.34775,1.9013,3.39635,17.95795,-0.1727,2.4089,8.8882,15.93405,-2.81955
75%,12.7582,1.358625,12.5167,8.3241,12.261125,0.9248,6.003,19.1029,2.9379,8.584425,4.382925,0.852825,14.1642,12.274775,9.270425,14.8745,11.0559,-0.810775,21.013325,18.4333,17.8794,23.08905,6.2932,3.4038,13.4796,13.8637,-0.0902,0.015625,6.0937,6.798925,-1.44345,11.885,1.4691,18.097125,11.8444,7.487725,4.4604,7.5424,13.598925,3.637825,...,32.62285,5.792,6.3712,14.6239,1.3236,23.024025,3.2415,1.318725,7.020025,5.9054,3.0964,2.9568,25.907725,3.6199,25.641225,13.7455,2.7044,13.9313,5.33875,4.391125,0.9962,11.0113,7.499175,12.127425,19.45615,-0.59065,11.1938,-1.466,18.345225,1.4829,6.4062,9.512525,2.9495,6.2058,20.396525,0.8296,6.556725,9.5933,18.064725,4.8368
max,20.315,10.3768,19.353,13.1883,16.6714,17.2516,8.4477,27.6918,10.1513,11.1506,18.6702,17.1887,14.6545,22.3315,14.9377,15.8633,17.9506,19.0259,41.748,35.183,31.2859,49.0443,14.5945,4.8752,25.446,14.6546,15.6751,3.2431,8.7874,13.1431,15.6515,20.1719,6.7871,29.5466,13.2878,21.5289,14.2456,11.8638,29.8235,15.3223,...,58.3942,6.3099,10.1344,27.5648,12.1193,38.3322,4.2204,21.2766,14.8861,7.089,16.7319,17.9173,53.5919,18.8554,43.5468,20.8548,20.2452,20.5965,29.8413,13.4487,12.7505,14.3939,29.2487,23.7049,44.3634,12.9975,21.7392,22.7861,29.3303,4.0341,18.4409,16.7165,8.4024,18.2818,27.9288,4.2729,18.3215,12.0004,26.0791,28.5007


In [7]:
santander_data[santander_data.isnull().any(axis=1)]

Unnamed: 0,var_0,var_1,var_2,var_3,var_4,var_5,var_6,var_7,var_8,var_9,var_10,var_11,var_12,var_13,var_14,var_15,var_16,var_17,var_18,var_19,var_20,var_21,var_22,var_23,var_24,var_25,var_26,var_27,var_28,var_29,var_30,var_31,var_32,var_33,var_34,var_35,var_36,var_37,var_38,var_39,...,var_160,var_161,var_162,var_163,var_164,var_165,var_166,var_167,var_168,var_169,var_170,var_171,var_172,var_173,var_174,var_175,var_176,var_177,var_178,var_179,var_180,var_181,var_182,var_183,var_184,var_185,var_186,var_187,var_188,var_189,var_190,var_191,var_192,var_193,var_194,var_195,var_196,var_197,var_198,var_199


In [8]:
santander_data.select_dtypes(exclude=np.number).columns

Index([], dtype='object')

In [9]:
len_train = len(santander_data)
len_train

200000

In [10]:
"""#Merge test and train
merged = pd.concat([santander_data, santander_data_test])
#Saving the list of original features in a new list `original_features`.
original_features = merged.columns
merged.shape"""

'#Merge test and train\nmerged = pd.concat([santander_data, santander_data_test])\n#Saving the list of original features in a new list `original_features`.\noriginal_features = merged.columns\nmerged.shape'

In [11]:
"""idx = features = merged.columns.values[0:200]
for df in [merged]:
    df['sum'] = df[idx].sum(axis=1)  
    df['min'] = df[idx].min(axis=1)
    df['max'] = df[idx].max(axis=1)
    df['mean'] = df[idx].mean(axis=1)
    df['std'] = df[idx].std(axis=1)
    df['skew'] = df[idx].skew(axis=1)
    df['kurt'] = df[idx].kurtosis(axis=1)
    df['med'] = df[idx].median(axis=1)"""

"idx = features = merged.columns.values[0:200]\nfor df in [merged]:\n    df['sum'] = df[idx].sum(axis=1)  \n    df['min'] = df[idx].min(axis=1)\n    df['max'] = df[idx].max(axis=1)\n    df['mean'] = df[idx].mean(axis=1)\n    df['std'] = df[idx].std(axis=1)\n    df['skew'] = df[idx].skew(axis=1)\n    df['kurt'] = df[idx].kurtosis(axis=1)\n    df['med'] = df[idx].median(axis=1)"

In [12]:
"""print("Total number of features: ",merged.shape[1])"""

'print("Total number of features: ",merged.shape[1])'

In [13]:
"""train_df = merged.iloc[:len_train]
train_df.head()"""

'train_df = merged.iloc[:len_train]\ntrain_df.head()'

In [14]:
"""X_test = merged.iloc[len_train:]
X_test.head()"""

'X_test = merged.iloc[len_train:]\nX_test.head()'

**Data Augment**
Augmentation is a method to increase the amount of training data by randomly shuffle/transform the features in a certain way. It improves accuracy by letting the model see more cases of both "1" and "0" samples in training so the model can generalize better to new data.

Thanks to Jiwei Lu for teaching this new concept . *https://www.kaggle.com/jiweiliu/lgb-2-leaves-augment*

In [15]:
def augment(x,y,t=2):
    xs,xn = [],[]
    for i in range(t):
        mask = y>0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xs.append(x1)

    for i in range(t//2):
        mask = y==0
        x1 = x[mask].copy()
        ids = np.arange(x1.shape[0])
        for c in range(x1.shape[1]):
            np.random.shuffle(ids)
            x1[:,c] = x1[ids][:,c]
        xn.append(x1)

    xs = np.vstack(xs)
    xn = np.vstack(xn)
    ys = np.ones(xs.shape[0])
    yn = np.zeros(xn.shape[0])
    x = np.vstack([x,xs,xn])
    y = np.concatenate([y,ys,yn])
    return x,y

In [16]:
train_df = santander_data
X_test = santander_data_test
del santander_data
del santander_data_test
gc.collect()

78

In [17]:
skf_three= StratifiedKFold(n_splits=5, shuffle=False, random_state=2319)

In [18]:
"""param = {
    'bagging_freq': 5,
    'bagging_fraction': 0.331,
    'boost_from_average':'false',
    'boost': 'gbdt',
    'feature_fraction': 0.0405,
    'learning_rate': 0.0083,
    'max_depth': -1,
    'metric':'auc',
    'min_data_in_leaf': 80,
    'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 13,
    'num_threads': 8,
    'tree_learner': 'serial',
    'objective': 'binary',
    'verbosity': 1
}"""

"param = {\n    'bagging_freq': 5,\n    'bagging_fraction': 0.331,\n    'boost_from_average':'false',\n    'boost': 'gbdt',\n    'feature_fraction': 0.0405,\n    'learning_rate': 0.0083,\n    'max_depth': -1,\n    'metric':'auc',\n    'min_data_in_leaf': 80,\n    'min_sum_hessian_in_leaf': 10.0,\n    'num_leaves': 13,\n    'num_threads': 8,\n    'tree_learner': 'serial',\n    'objective': 'binary',\n    'verbosity': 1\n}"

In [19]:
param = {
    'bagging_freq': 5,
    'bagging_fraction': 0.335,
    'boost_from_average':'false',
    'boost': 'gbdt',
    'feature_fraction': 0.041,
    'learning_rate': 0.0083,
    'max_depth': -1,
    'metric':'auc',
    'min_data_in_leaf': 80,
    'min_sum_hessian_in_leaf': 10.0,
    'num_leaves': 13,
    'num_threads': 8,
    'tree_learner': 'serial',
    'objective': 'binary', 
    'verbosity': -1
}

In [20]:
   # Create arrays and dataframes to store results
oof_preds = np.zeros(train_df.shape[0])
sub_preds = np.zeros(len(X_test))
feats = [f for f in train_df.columns]
    
for n_fold, (train_idx, valid_idx) in enumerate(skf_three.split(train_df[feats], label_df)):
    X_train, y_train = train_df.iloc[train_idx][feats], label_df.iloc[train_idx]
    X_valid, y_valid = train_df.iloc[valid_idx][feats], label_df.iloc[valid_idx]
    
    X_tr, y_tr = augment(X_train.values, y_train.values)
    X_tr = pd.DataFrame(X_tr)
    
    print("Fold idx:{}".format(n_fold + 1))
    trn_data = lgb.Dataset(X_tr, label=y_tr)
    val_data = lgb.Dataset(X_valid, label=y_valid)
        
    clf = lgb.train(param, trn_data,1000000, valid_sets = [trn_data, val_data], verbose_eval=1000, early_stopping_rounds = 3000)
        

    oof_preds[valid_idx] = clf.predict(train_df.iloc[valid_idx][feats], num_iteration=clf.best_iteration)
    sub_preds += clf.predict(X_test[feats], num_iteration=clf.best_iteration) / 5


print('Full AUC score %.6f' % roc_auc_score(label_df, oof_preds))

pred3=sub_preds




Fold idx:1
Training until validation scores don't improve for 3000 rounds.
[1000]	training's auc: 0.893769	valid_1's auc: 0.88167
[2000]	training's auc: 0.901669	valid_1's auc: 0.888047
[3000]	training's auc: 0.906613	valid_1's auc: 0.891692
[4000]	training's auc: 0.910472	valid_1's auc: 0.894432
[5000]	training's auc: 0.913489	valid_1's auc: 0.896173
[6000]	training's auc: 0.916036	valid_1's auc: 0.897416
[7000]	training's auc: 0.918351	valid_1's auc: 0.898359
[8000]	training's auc: 0.920415	valid_1's auc: 0.898999
[9000]	training's auc: 0.922348	valid_1's auc: 0.89945
[10000]	training's auc: 0.924175	valid_1's auc: 0.899734
[11000]	training's auc: 0.92595	valid_1's auc: 0.899857
[12000]	training's auc: 0.927623	valid_1's auc: 0.899969
[13000]	training's auc: 0.929296	valid_1's auc: 0.900137
[14000]	training's auc: 0.930909	valid_1's auc: 0.90026
[15000]	training's auc: 0.932531	valid_1's auc: 0.900222
[16000]	training's auc: 0.934108	valid_1's auc: 0.900171
[17000]	training's auc: 0.

In [21]:
sample_submission = pd.read_csv('../input/sample_submission.csv')
sample_submission['target'] = pred3
sample_submission.to_csv('submission.csv', index=False)