In [3]:

import pandas as pd

import warnings

from functions import reduce_mem_usage

from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.pipeline import FeatureUnion, make_pipeline

from sklearn.impute import SimpleImputer
from sklearn.feature_selection import SelectFromModel, SelectKBest, f_classif


from sklearn.model_selection import train_test_split

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression


In [4]:
%matplotlib inline

warnings.simplefilter("ignore")
pd.set_option("display.max_columns", 999)

In [5]:
RANDOM_STATE = 42

In [6]:
TRAIN_PATH = "data/train_merge.csv"
TEST_PATH = "data/test_merge.csv"

In [7]:
train = reduce_mem_usage(pd.read_csv(TRAIN_PATH))
test = reduce_mem_usage(pd.read_csv(TEST_PATH))

Memory usage of the dataframe is 1614.34 MB
Memory usage after optimization is: 807.17 MB
Decreased by 50.0%
Memory usage of the dataframe is 141.63 MB
Memory usage after optimization is: 70.82 MB
Decreased by 50.0%


In [8]:
train.head(20)

Unnamed: 0,id,vas_id,buy_time_x,target,buy_time_y,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,76,77,78,79,80,82,83,84,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252
0,540968,8.0,1537131600,0.0,1541970000,-31.559971,327.360901,-45.500786,274.753204,-50.918247,253.648209,49.15382,195.883896,-1.835267,-12.831245,-2.736328,56.284081,-2.558594,-66.1875,-77.5,-0.000725,-0.016434,-0.107056,-5.417969,-3.177734,-13.9375,54.5,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,2.648438,0.785645,0.788574,-0.001884,-2.3e-05,-3e-05,62.59375,-0.065613,-0.700684,-192.026962,239.787094,-1005.230652,1245.017822,-89.504288,-119.724358,-3.511719,33.25,-36.75,-7.894531,-7.997875,-10.697456,-28.551693,-17.125,-16.09375,-26.171875,-5.332031,1.041016,-3.746094,-1.670898,-0.001656,9.320312,15.117188,0.174805,-0.108154,0.028351,-0.017578,-0.165649,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,-2.824219,-10.703125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,-3.447266,-0.040039,-9.40625,-0.212158,-11.953125,-1.019531,-1.473633,0.37915,-1006.426575,1242.881104,0.032013,0.196167,236.454514,848.953552,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,0.031921,-0.030457,0.104858,1.381836,5.316406,1.426758,-0.468506,-0.017288,-190.625,5.855469,7.277344,2.173828,5.0625,-1.276367,-0.020142,-0.042633,-11.476562,-59.375,34.03125,-17.890625,0.13855,-0.317139,-0.007023,-0.143311,-14.414062,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,1.322266,-0.014542,-0.009506,-8.179688,35.75,-14.320312,-40.8125,-5.960938,33.1875,-12.617188,-26.671875,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,-15.976562,-36.8125,-9.351562,-25.328125,-0.034576,-0.163208,-109.0625,0.533203,-1.928711,-0.623535,-0.228149,-0.251953,-0.000567,0.566406,-0.000708,-0.029205,-0.104675,-0.001357,0.039215,0.665527,-0.008995,-11953.712891,-45175.257812,-0.623047,-30.716053,-61790.15625,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,-0.623047,-4e-05,-0.620605,-0.01226,-0.107849,969818880.0,-49.21875,-440560400.0,-120441800.0,-8.632812,-1.464844,-33.30238,810.871033,270.25,-0.364746,-0.133789,-0.209473,269.75,-109.884567,364.30899,41.625,939.0,-108.4375,-504.437317,-53.617977,-12.296875,-845.37384,-613.770813,-20.996269,-37.625,-28.747725,4.167111,7.304688,-12.175933,21.546875,0.0
1,1454121,4.0,1531688400,0.0,1531083600,547.27002,238.430893,533.329224,274.803192,527.911743,253.698212,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,578.0,566.5,-0.000725,-0.016434,-0.107056,-5.417969,-3.177734,-13.9375,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,-0.700684,-192.026962,-2764.330078,-1102.746338,-1661.58374,-89.504288,-119.724358,-119.6875,-54.59375,-65.0625,49.78125,-7.997875,-10.717958,-28.571102,-29.125,-34.09375,-56.15625,-17.328125,-4.957031,-8.742188,-1.670898,-0.001656,-16.6875,-2.882812,-0.265137,-0.408203,-0.091675,-0.237549,-0.295654,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,-2.824219,-10.703125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,-3.447266,-0.040039,-9.40625,-0.212158,-11.953125,-1.019531,-2.472656,-5.621094,-1103.942139,-1663.720459,0.291992,-0.073853,-2767.662598,-2298.725098,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.060455,-0.115112,-5.617188,-14.679688,-3.574219,-0.468506,-0.417236,537.5,-1.143555,-2.722656,-0.826172,-1.935547,-1.276367,-0.020142,-0.042633,-29.796875,-89.4375,-8.960938,-34.875,-3.861328,-0.317139,-0.007023,-0.143311,-43.40625,-0.212646,-0.019562,-4.4e-05,-0.000379,14.453125,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,-4.679688,-0.014542,0.260498,-21.1875,-44.375,-25.3125,-52.0,-25.96875,-54.65625,-13.617188,-30.828125,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,28.75,30.828125,0.067261,19.0625,-30.984375,-65.125,-16.359375,-35.3125,-0.034576,-0.163208,69.125,0.533203,3.070312,-0.623535,-0.228149,0.748047,-0.000567,0.566406,-0.000708,-0.029205,0.895508,-0.001357,0.039215,0.665527,-0.008995,10798.220703,-45175.257812,-0.623047,-30.716053,-61790.15625,0.756836,-37911.167969,-9239.707031,-2.107422,-8.3e-05,-0.623047,-4e-05,-0.620605,-0.01226,-0.107849,958586880.0,585.5,1072212000.0,-120441800.0,3.328125,-1.464844,-33.30238,-206.128983,-12.773438,-0.364746,-0.133789,-0.209473,-8.359375,-109.884567,-876.69104,-5.367188,-247.125,-108.4375,-512.437317,-80.617981,-17.296875,-972.37384,-613.770813,-25.996269,-19.625,-278.747711,-24.832888,-0.694336,-11.175933,-0.456055,0.0
2,2458816,1.0,1534107600,0.0,1543179600,-92.139969,-95.469109,-106.080788,-139.596802,-111.498245,-142.901794,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,-75.0,-0.000725,-0.016434,-0.107056,-5.417969,-1.058594,-13.9375,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,-0.700684,-92.026962,-2761.896484,-1112.468994,-1649.42749,-89.504288,-119.724358,-101.375,-39.5625,-61.8125,-7.894531,-7.858951,-10.717958,-28.571102,-20.125,-24.09375,-37.15625,-13.328125,-3.958984,-7.746094,-1.670898,-0.001656,-13.679688,-3.882812,-0.005234,-0.178223,-0.031647,0.402344,0.034332,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,-0.704102,-2.226562,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,0.24939,-1.327148,-0.040039,-6.867188,-0.212158,-9.414062,-1.019531,-1.473633,2.378906,-1113.664795,-1651.564209,-0.36792,-0.093872,-2765.229004,-2118.181152,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.060455,0.414795,-4.617188,-5.683594,1.426758,0.531738,0.58252,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,-1.276367,-0.020142,-0.042633,-29.796875,-101.1875,13.039062,-8.890625,-2.861328,-0.317139,-0.007023,-0.143311,-17.421875,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,-3.677734,-0.014542,0.210449,-4.183594,-29.34375,-17.3125,-48.75,-8.960938,-39.625,3.384766,-15.789062,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,-22.984375,-61.84375,-8.351562,-32.0625,-0.034576,-0.163208,-97.0625,0.533203,-1.928711,0.376221,0.771973,0.748047,-0.000567,0.566406,-0.000708,-0.029205,-0.104675,-0.001357,0.039215,0.665527,-0.008995,4972.062012,-45175.257812,0.377197,-30.716053,-61790.15625,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,0.377197,-4e-05,0.37915,-0.01226,-0.107849,-572669504.0,-58.53125,-440560400.0,-120441800.0,-6.316406,-1.464844,-33.30238,-266.128998,-39.78125,-0.364746,-0.133789,-0.209473,-35.34375,-109.884567,-703.69104,-5.367188,-247.125,-108.4375,-339.437317,-106.617981,-17.296875,-977.37384,-613.770813,-25.996269,-37.625,-304.747711,-25.832888,-0.694336,-12.175933,-0.456055,0.0
3,3535012,5.0,1535922000,0.0,1533502800,54.880028,12.970888,54.079212,-9.116798,48.661755,-30.22179,60.17382,87.453903,-6.78366,-30.006538,73.5,-4.007526,-2.558594,-9.828125,-2.097656,-0.000725,-0.016434,-0.107056,-5.417969,-3.177734,-0.800781,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,-0.700684,57.973042,-2406.325195,-650.650635,-1755.674561,-89.504288,-119.724358,-33.5,-54.59375,21.109375,-7.894531,-7.122595,-10.575712,-28.537996,20.875,20.90625,9.828125,11.671875,0.04129,-1.745117,1.329102,-0.001656,-8.679688,-2.882812,-0.265137,0.061829,0.108337,-0.237549,-0.055664,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,73.5,-2.607422,70.875,72.25,-2.824219,-10.703125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,5.328125,-3.447266,-0.040039,14.71875,-0.212158,12.171875,-1.019531,0.526367,-1.621094,-651.846497,-1757.811279,-0.36792,0.476074,-2409.657715,-1762.609863,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.030457,0.324951,-0.618164,-7.683594,1.426758,0.531738,0.182739,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,-1.276367,-0.020142,-0.042633,-13.882812,-81.0625,20.03125,32.125,1.138672,0.432861,-0.007023,-0.143311,23.578125,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,3.462891,-0.061493,0.177856,-0.002596,0.321777,-0.014542,0.130493,-21.1875,-44.375,13.679688,34.1875,-25.96875,-54.65625,-13.617188,-30.828125,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,8.023438,21.0625,13.648438,34.9375,-0.034576,-0.163208,1757.0,0.533203,-1.928711,-0.623535,-0.228149,-0.251953,-0.000567,0.566406,-0.000708,-0.029205,0.895508,-0.001357,0.039215,0.665527,-0.008995,-11953.712891,-9900.257812,1.376953,-18.006052,-61786.945312,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,1.376953,-4e-05,1.378906,-0.01226,-0.107849,-572669504.0,-58.53125,1026649000.0,-120441800.0,-11.140625,-1.464844,-33.30238,-107.128983,-38.78125,-0.364746,-0.133789,0.790527,-35.34375,-109.884567,-574.69104,-5.367188,-244.125,2.589844,-324.437317,-90.617981,-17.296875,-977.37384,-613.770813,-25.996269,-18.625,-133.747726,-14.832889,-0.694336,-1.175933,-0.456055,0.0
4,1693214,1.0,1535922000,0.0,1543179600,45.16003,295.240875,64.679214,344.283203,59.261753,323.178223,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,58.09375,-0.000725,-0.016434,-0.107056,-5.417969,3.181641,19.515625,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,1.418945,107.973038,-1472.512695,-1093.75708,-378.755554,-89.504288,-119.724358,101.875,-41.6875,143.625,-7.894531,-7.385824,-10.717958,-28.571102,33.875,31.90625,118.8125,14.671875,-1.958984,-6.746094,-0.671387,-0.001656,52.3125,0.11731,-0.265137,-0.128174,0.05835,-0.237549,-0.105652,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,3.535156,6.253906,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,2.912109,-0.040039,194.0,-0.212158,191.5,-1.019531,1.526367,0.37915,-1094.953003,-380.892273,-0.097961,0.336182,-1475.845337,-828.797424,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.010452,0.374756,5.382812,8.320312,1.426758,-0.468506,-0.217285,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,-1.276367,-0.020142,-0.042633,123.125,399.25,-18.96875,45.125,-0.861328,0.122864,-0.007023,0.296631,36.59375,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,-1.677734,-0.014542,0.120483,-10.179688,-31.453125,64.6875,156.625,-14.960938,-41.75,-10.617188,-23.96875,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,59.03125,143.5,34.65625,20.375,-0.034576,-0.163208,-109.0625,0.533203,-1.928711,0.376221,0.771973,0.748047,-0.000567,0.566406,-0.000708,-0.029205,-0.104675,-0.001357,0.039215,0.665527,-0.008995,3468.745117,-35712.257812,1.376953,-10.559386,-61771.832031,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,1.376953,-4e-05,1.378906,-0.01226,-0.107849,970855680.0,-57.28125,-440560400.0,-120441800.0,-10.539062,-1.464844,-33.30238,-87.128983,9.226562,-0.364746,-0.133789,0.790527,11.640625,-109.884567,-52.691021,6.632812,270.0,145.625,-471.437317,-80.617981,229.75,-965.37384,-612.770813,-22.996269,-32.625,-127.747726,-4.832889,-0.694336,-12.175933,-0.456055,0.0
5,3173665,8.0,1532898000,0.0,1537736400,-67.149971,-372.599121,-81.090782,-425.206787,-86.508247,-446.311798,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,-47.875,-0.000725,-0.016434,-0.107056,-5.417969,-3.177734,-13.9375,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,-0.799805,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,-0.700684,-192.026962,-2942.44043,-1186.765869,-1755.674561,-89.504288,-119.724358,-77.9375,-53.9375,-24.0,-7.894531,-7.287412,-10.717958,-28.571102,-45.125,-49.09375,-63.15625,-27.328125,-3.958984,-8.742188,-1.670898,-0.001656,-25.6875,-2.882812,-0.265137,0.081848,-0.061646,-0.237549,-0.045654,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,-2.824219,-10.703125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,9.570312,-3.447266,-0.040039,20.234375,-0.212158,17.6875,-1.019531,-0.473389,-3.621094,-1187.96167,-1757.811279,-0.36792,-0.393799,-2945.772949,-2298.725098,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.060455,-0.575195,-0.618164,-7.683594,1.426758,-0.468506,-0.017288,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,-1.276367,-0.020142,-0.042633,-8.265625,-93.3125,35.03125,-33.875,-2.861328,-0.317139,-0.007023,-0.143311,-42.40625,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,-3.677734,-0.014542,-0.349609,-20.1875,-43.71875,-6.320312,-10.921875,-24.96875,-54.0,-13.617188,-30.828125,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,-11.976562,-24.03125,-5.355469,-15.773438,-0.034576,-0.163208,-109.0625,-0.466797,-2.929688,-0.623535,-0.228149,-0.251953,-0.000567,-0.433838,-0.000708,-0.029205,-0.104675,-0.001357,0.039215,-0.334473,-0.008995,-11953.712891,-45175.257812,-0.623047,-30.716053,-61790.15625,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,-0.623047,-4e-05,-0.620605,-0.01226,-0.107849,-572669504.0,-58.53125,-440560400.0,-120441800.0,-17.09375,-1.464844,-33.30238,-266.128998,-39.78125,-0.364746,-0.133789,-0.209473,-35.34375,-109.884567,-876.69104,-5.367188,-247.125,-108.4375,-512.437317,-106.617981,-17.296875,-977.37384,-613.770813,-25.996269,-37.625,-306.747711,-25.832888,-0.694336,-12.175933,-0.456055,0.0
6,2611143,2.0,1544994000,0.0,1533502800,-96.799973,-408.179108,-110.740784,-460.786804,-116.158249,-481.891785,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,-77.5,-0.000725,-0.016434,-0.107056,-5.417969,-3.177734,-13.9375,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,-0.799805,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,-0.700684,-192.026962,-2942.44043,-1186.765869,-1755.674561,-89.504288,-119.724358,-119.6875,-54.59375,-65.0625,-7.894531,-7.997875,-10.717958,-28.571102,-46.125,-51.09375,-77.1875,-29.328125,-4.957031,-9.742188,-1.670898,-0.001656,-32.6875,-4.882812,-0.265137,-0.408203,-0.091675,-0.237549,-0.295654,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,-2.824219,-10.703125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,-3.447266,-0.040039,-9.40625,-0.212158,-11.953125,-1.019531,-2.472656,-5.621094,-1187.96167,-1757.811279,-0.36792,-0.393799,-2945.772949,-2298.725098,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.060455,-0.575195,-5.617188,-14.679688,-3.574219,-0.468506,-0.417236,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,-1.276367,-0.020142,-0.042633,-29.796875,-116.0,-42.96875,-34.875,-3.861328,-0.317139,-0.007023,-0.143311,-43.40625,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,-4.679688,-0.014542,-0.679688,-21.1875,-44.375,-25.3125,-52.0,-25.96875,-54.65625,-13.617188,-30.828125,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,-30.984375,-65.125,-16.359375,-35.3125,-0.034576,-0.163208,-96.0625,-0.466797,-2.929688,-0.623535,-0.228149,-0.251953,-0.000567,-0.433838,-0.000708,-0.029205,-0.104675,-0.001357,0.039215,-0.334473,-0.008995,-11953.712891,-45175.257812,-0.623047,-30.716053,-61790.15625,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,-0.623047,-4e-05,-0.620605,-0.01226,-0.107849,-572669504.0,-58.53125,-440560400.0,-120441800.0,-17.09375,-1.464844,-33.30238,-266.128998,-39.78125,-0.364746,-0.133789,-0.209473,-35.34375,-109.884567,-876.69104,-5.367188,-247.125,-108.4375,-512.437317,-106.617981,-17.296875,-977.37384,-613.770813,-25.996269,-37.625,-306.747711,-25.832888,-0.694336,-12.175933,-0.456055,0.0
7,3577737,5.0,1545598800,0.0,1531083600,-96.799973,-10.719112,-110.740784,-63.326797,-116.158249,-84.431793,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,-77.5,-0.000725,-0.016434,-0.107056,-5.417969,-3.177734,-13.9375,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,-0.700684,-192.026962,-2375.806641,-1033.467041,-1342.3396,-89.504288,-119.724358,-10.296875,6.128906,-16.421875,-7.894531,-7.997875,-10.717958,-28.571102,36.875,31.90625,5.828125,15.671875,-4.957031,-8.742188,-1.670898,-0.001656,26.3125,2.117188,-0.245239,-0.108154,0.038361,-0.127563,0.104309,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,-2.824219,-10.703125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,-3.447266,-0.040039,-9.40625,-0.212158,-11.953125,-1.019531,-0.473389,2.378906,-1034.662842,-1344.476318,-0.167969,0.196167,-2379.13916,-2127.632324,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.048065,0.299561,0.304932,-0.618164,12.320312,1.426758,0.531738,0.182739,-190.625,5.855469,27.28125,3.173828,21.0625,-1.276367,-0.020142,-0.042633,-2.246094,-29.703125,23.03125,-34.875,-3.861328,0.182861,-0.007023,-0.143311,39.59375,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,-0.1521,-0.002596,-4.679688,-0.014542,0.130493,-17.1875,-42.96875,-22.3125,-50.28125,8.039062,6.070312,4.386719,11.242188,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,6.023438,-16.46875,-4.355469,-14.21875,-0.034576,-0.163208,-109.0625,0.533203,-1.928711,-0.623535,0.771973,0.748047,-0.000567,0.566406,-0.000708,-0.029205,0.895508,-0.001357,0.039215,0.665527,-0.008995,-6581.077148,-45175.257812,-0.623047,-30.716053,-61790.15625,0.756836,-41261.167969,-9239.707031,-2.107422,-8.3e-05,-0.623047,-4e-05,-0.620605,-0.01226,-0.107849,-572669504.0,-58.53125,1062347000.0,-120441800.0,1.274414,2.535156,-33.30238,-235.128983,-6.773438,-0.364746,-0.133789,-0.209473,-2.357422,-109.884567,-590.69104,-5.367188,-247.125,62.59375,-397.437317,-106.617981,-17.296875,-949.37384,-613.770813,-25.996269,-35.625,-274.747711,106.167114,-0.694336,119.824066,-0.456055,1.0
8,2000856,1.0,1534712400,0.0,1531688400,-21.36997,108.780891,178.619217,770.86322,173.201752,749.75824,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,-42.34375,-0.000725,-0.016434,-0.107056,-5.417969,37.09375,200.0,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,3.539062,-192.026962,-1464.904297,290.770264,-1755.674561,-89.504288,-119.724358,164.0,8.578125,155.375,-7.894531,-7.873892,-10.717958,-28.571102,151.875,165.875,146.875,95.6875,14.039062,9.257812,0.328613,-0.001656,18.3125,-0.882812,0.354736,0.221802,-0.021637,0.192383,0.044342,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,37.46875,47.375,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,36.84375,-0.040039,25.765625,-0.212158,23.21875,-1.019531,9.523438,22.375,289.574371,-1757.811279,-0.147949,0.276123,-1468.236938,-821.189026,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.060455,0.334961,10.382812,30.3125,1.426758,0.531738,-0.017288,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,81.75,-0.020142,-0.042633,11.273438,12.3125,-39.96875,163.125,15.140625,0.092834,-0.007023,0.196777,154.625,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,0.818359,0.728027,-0.002596,14.320312,-0.014542,0.080505,33.8125,18.8125,100.6875,168.5,29.03125,8.523438,26.390625,14.09375,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,95.0,155.375,78.625,144.125,-0.034576,-0.163208,-109.0625,0.533203,-1.928711,0.376221,0.771973,0.748047,-0.000567,0.566406,-0.000708,-0.029205,0.895508,-0.001357,0.039215,0.665527,-0.008995,47313.636719,-4927.257812,-0.623047,267.703949,-61494.238281,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,-0.623047,-4e-05,-0.620605,-0.01226,-0.107849,959537280.0,-56.4375,-440560400.0,-120441800.0,-13.125,-1.464844,-33.30238,-266.128998,-39.78125,-0.364746,-0.133789,-0.209473,-35.34375,-109.884567,-876.69104,-5.367188,-247.125,-108.4375,-512.437317,-106.617981,-17.296875,-977.37384,-613.770813,-25.996269,-37.625,-306.747711,-25.832888,-0.694336,-12.175933,-0.456055,0.0
9,2000856,1.0,1534712400,0.0,1531083600,-92.559967,48.600887,50.239216,609.86322,44.821754,588.75824,-16.08618,-65.076096,-6.78366,-30.006538,-2.736328,-4.007526,-2.558594,-66.1875,-77.5,-0.000725,-0.016434,-0.107056,-5.417969,1.061523,142.75,-10.742188,-0.094238,-0.001733,-0.009331,-2.082031,0.200195,-0.009094,-0.351807,-0.214355,-0.211548,-0.001884,-2.3e-05,-3e-05,-2.660156,-0.065613,3.539062,132.973038,1074.174805,2829.849365,-1755.674561,-89.504288,-119.724358,122.5,32.90625,89.625,-7.894531,-7.997875,-10.717958,-28.571102,82.875,79.9375,146.875,95.6875,-2.958984,2.253906,0.328613,-0.001656,2.318359,-3.882812,0.634766,0.081848,-0.021637,0.062439,-0.075684,-0.028458,-0.044464,-0.301025,-0.554688,-0.036835,-0.130005,-2.783203,-2.607422,-5.390625,-4.023438,1.416016,22.328125,-1.2015,-0.998268,-0.203247,-0.248779,-0.2229,-0.134033,-0.030533,-0.125854,-0.096985,-0.679688,-0.626953,-0.691895,-0.506836,-0.185303,-0.598633,-0.000115,-0.250244,-0.348877,-0.828613,-42.275913,-3.949219,-0.25293,-0.318115,-2.291016,0.79248,-0.040039,-9.40625,-0.212158,-11.953125,-1.019531,12.523438,16.375,2828.653564,-1757.811279,0.471924,-0.093872,1070.842163,1717.890015,-0.343506,-0.089722,-0.278809,-0.433105,-0.024048,-89.211945,-119.674408,-208.886353,-0.058075,-0.060455,0.304932,13.382812,19.3125,1.426758,0.531738,0.58252,-190.625,-1.143555,-2.722656,-0.826172,-1.935547,69.875,-0.020142,-0.042633,-15.367188,52.375,-39.96875,94.125,-1.861328,0.012833,0.052979,-0.083252,85.5625,-0.212646,-0.019562,-4.4e-05,-0.000379,-2.548828,-0.26123,-0.536133,-0.061493,0.3479,-0.002596,-2.677734,-0.014542,0.020493,28.8125,43.125,56.6875,102.6875,24.03125,32.84375,16.390625,36.28125,-0.028854,-0.063232,-0.019196,-0.033783,-0.003149,-0.005184,-0.001431,-0.00189,-1.257812,-2.792969,-1.932617,-5.007812,51.03125,89.5625,49.65625,104.9375,-0.034576,-0.163208,-109.0625,0.533203,-1.928711,0.376221,0.771973,0.748047,-0.000567,0.566406,-0.000708,-0.029205,0.895508,-0.001357,0.039215,0.665527,-0.008995,47313.636719,4796.742188,2.376953,178.203949,-61584.070312,-0.243164,-42051.167969,-9239.707031,-2.107422,-8.3e-05,2.376953,-4e-05,2.378906,-0.01226,-0.107849,958846080.0,-56.4375,-440560400.0,-120441800.0,-13.148438,-1.464844,-33.30238,-266.128998,-39.78125,-0.364746,-0.133789,-0.209473,-35.34375,-109.884567,-876.69104,-5.367188,-247.125,-108.4375,-512.437317,-106.617981,-17.296875,-977.37384,-613.770813,-25.996269,-37.625,-306.747711,-25.832888,-0.694336,-12.175933,-0.456055,0.0


In [10]:
features = [f for f in train.columns if f not in ['target','id']]
len(features)

251

Разделим данные на X и y

In [11]:
X = train.drop('target', axis='columns')
y = train.target
X_valid = test

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

Разделим признаки на бинарные, категориальные и вещественные.

In [13]:
boolean_features = []
categorical_features = []
numeric_features = []
for col in X[features].fillna(0):
    val_count = len(X[col].unique())
    if val_count == 2:
        boolean_features.append(col)
    elif val_count <= 10:
        categorical_features.append(col)
    else:
        numeric_features.append(col)


Селектор колонок

In [14]:
class ColumnSelector(BaseEstimator, TransformerMixin):
    def __init__(self, columns):
        self.columns = columns

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        assert isinstance(X, pd.DataFrame)

        try:
            return X[self.columns]
        except KeyError:
            cols_error = list(set(self.columns) - set(X.columns))
            raise KeyError("DataFrame не содердит следующие колонки: %s" % cols_error)

Генератор новых фич
Поскольку данные обезличены и их много, скорее для примера.
Датасеты имеют две колонки даты - подключения услуги и некого другого действия (покупки сим карты?). Вероятно, имеет смысл сгенерировать новый признак как разницу между этими значениями. Условно, время с подключения сим до подключения услуги.

In [15]:
# new_features_list = ['interval']

In [4]:
class FeaturesGenerator(BaseEstimator, TransformerMixin):
    def __init__(self, features_list):
        self.features_list = features_list

    def fit(self, X, y=None):
        return self

    def transform(self, X, y=None):
        # assert isinstance(X, pd.DataFrame)

        try:
            # if 'interval' in self.features_list:
            #     X['interval'] = X['buy_time_y'] - X['buy_time_x']

            return X
        except KeyError:
            raise KeyError("DataFrame не содердит следующие колонки: buy_time ")

Обработчик вещественных признаков

In [17]:
num_pipe = Pipeline([
    ('ncs', ColumnSelector(columns=numeric_features)),
    ('nsi', SimpleImputer(strategy="mean")),
    ('nss', StandardScaler()),
])

Обработчик категориальных признаков

In [18]:
cat_pipe = Pipeline([
    ('ccs', ColumnSelector(columns=categorical_features)),
    ('csi', SimpleImputer(strategy="most_frequent")),
    ('coe', OneHotEncoder(handle_unknown='ignore')),
])

Обработчик булевых признаков

In [19]:
bool_pipe = Pipeline([
    ('bcs', ColumnSelector(columns=boolean_features)),
    ('bsi', SimpleImputer(strategy="most_frequent")),
])

Собираем в общий пайплайн

In [20]:
transformer_list = [('num_pipe', num_pipe), ('cat_pipe', cat_pipe), ('bool_pipe', bool_pipe)]

In [21]:
transform_pipe = Pipeline([
    ('cs', ColumnSelector(columns=features)),
    ('fg', FeaturesGenerator(features_list=['interval'])),
    ('fu', FeatureUnion(transformer_list=transformer_list)),
])


Отберем признаки с помощью SelectKBest и логистической регрессии с регуляризацией L1 (было 298 признаков)

upd: Логистическая регрессия, даже с большим трешхолдом занулила всего 4 признака, однако сильно увеличила время работы пайплайна, поэтому принял решение ее исключить.

In [22]:
fs_pipe = make_pipeline(
    transform_pipe,
    SelectKBest(k=50, score_func=f_classif),
    SelectFromModel(LogisticRegression(penalty='l1', solver='liblinear', random_state=RANDOM_STATE), threshold=1e-3),
)

In [23]:
from sklearn import set_config

set_config(display='diagram')

fs_pipe

In [28]:
fs_pipe.fit(X_train, y_train)


In [39]:
if X_test is not None:
    X_test_transform = fs_pipe.transform(X_test)

In [35]:
X_train_transform = fs_pipe.transform(X_train)

In [36]:
X_valid_transform = fs_pipe.transform(X_valid)