## Benchmark for measuring performance of engineered features and models

Prepare data for later analysis

In [2]:
import sys
sys.path.insert(0, '../../bosch_helper')
from bosch_helper import *

from sklearn.preprocessing import LabelEncoder

%matplotlib inline

### Load train and test data

In [3]:
# important numeric features are imported
important_features = pd.read_csv('../benchmark_1/important_numeric_features.csv', index_col=0, header=None)
important_features = list(important_features.values.ravel())
important_features.extend(['Id', 'Response'])

In [4]:
x_train_numeric = pd.read_csv('../../data/train_numeric.csv.zip', index_col=0, usecols=important_features, dtype=np.float32)

y_train = x_train_numeric.Response
x_train_numeric.drop(['Response'], axis=1, inplace=True)

In [5]:
date_train = pd.read_csv('../benchmark_2/train_station_flow.csv.gz', index_col=0, header=None)

In [6]:
x_train = x_train_numeric.join(date_train)

columns = list(x_train.columns)
columns[-1] = 'station_flow'
x_train.columns = columns

In [7]:
x_test_numeric = pd.read_csv('../../data/test_numeric.csv.zip', index_col=0, usecols=important_features[:-1], dtype=np.float32)

date_test = pd.read_csv('../benchmark_2/test_station_flow.csv.gz', index_col=0, header=None)

x_test = x_test_numeric.join(date_test)
x_test.columns = columns

In [8]:
x_train.index = x_train.index.astype(np.int64)
x_test.index = x_test.index.astype(np.int64)

In [9]:
del x_train_numeric, x_test_numeric
gc.collect()

60

### Load benchmark_3 features

In [10]:
start_chunk = pd.read_csv('../benchmark_3/start_chunk.csv.gz', index_col=0)

start_chunk_train = start_chunk.loc[start_chunk.Response!=-1].drop(['Response'], axis=1)
start_chunk_test = start_chunk.loc[start_chunk.Response==-1].drop(['Response'], axis=1)

In [11]:
x_train = x_train.join(start_chunk_train, on='Id')
x_test = x_test.join(start_chunk_test, on='Id')

gc.collect()

35

### Load benchmark_4 features

In [12]:
n = pd.read_csv('../benchmark_4/benchmark_4_neighbors.csv.gz', index_col=0)

neighbor_train = n.loc[n.Response!=-1]
neighbor_train.drop(['Response'], axis=1, inplace=True)

neighbor_test = n.loc[n.Response==-1]
neighbor_test.drop(['Response'], axis=1, inplace=True)

print(neighbor_test.shape, neighbor_train.shape)

x_train = x_train.join(neighbor_train, on='Id')
x_test = x_test.join(neighbor_test, on='Id')

gc.collect()

(1183748, 14) (1183747, 14)


28

### Add neighbor numeric features

In [13]:
# Date + station flow data will be added to x
x = pd.concat([x_train, x_test], keys=['train', 'test'])
x.sort_index(axis=0, level='Id', inplace=True)

# Add the neighbor records
x = x.join(x.iloc[:, :150].shift(), rsuffix='_previous')
x = x.join(x.iloc[:, :150].shift(-1), rsuffix='_next')

### Load benchmark_7 features

In [16]:
time_features = pd.read_hdf('../benchmark_7/time_features_diff.hdf', 'time_features')
time_features.drop(['time_start', 'time_end', 'time_duration', 'Response'], axis=1, inplace=True)

In [20]:
# Find MeanTimeDiff and discard them
# time_features.columns.tolist()[-40:]

# Join to x
x = x.join(time_features.iloc[:, :-40])

### Load benchmark_8 features

In [25]:
# hash of rows and counts of duplications
hash_numeric = pd.read_hdf('benchmark_8_numeric_features_2.hdf', 'hash_numeric')

x = x.join(hash_numeric)

In [30]:
# zscore per week
x_zscore_per_week = pd.read_hdf('benchmark_8_numeric_features_2.hdf', 'zscore_per_week')

In [38]:
# select important features
tmp = important_features.copy()
tmp.remove('Response')
tmp.remove('Id')

In [40]:
x_zscore_per_week_if = x_zscore_per_week[tmp]

In [41]:
x = x.join(x_zscore_per_week_if, rsuffix='_zscore')

In [44]:
del x_zscore_per_week, x_zscore_per_week_if
gc.collect()

21

In [45]:
x_count_encoded = pd.read_hdf('benchmark_8_numeric_features_2.hdf', 'count_encoded')

In [50]:
tmp = ['count_'+k for k in tmp]

In [52]:
x_count_encoded_if = x_count_encoded[tmp]

In [53]:
x_count_encoded_if.shape

(2367495, 149)

In [54]:
x = x.join(x_count_encoded_if)

In [59]:
del x_count_encoded, x_count_encoded_if
gc.collect()

123

In [61]:
x_train = x.loc['train']
x_test = x.loc['test']

In [64]:
del x
gc.collect()

Exception ignored in: <function DMatrix.__del__ at 0x7f3bd4348510>
Traceback (most recent call last):
  File "/c3se/NOBACKUP/users/lyaa/conda_dir/miniconda/envs/kaggle/lib/python3.7/site-packages/xgboost/core.py", line 366, in __del__
    if self.handle is not None:
AttributeError: 'DMatrix' object has no attribute 'handle'


28632

### CV score based on stratified KFold with repeated models

In [None]:
# CV
# specify parameters 
# 'booster':'gbtree'
params = {'max_depth':14, 'eta':0.03, 'silent':1, 'objective':'binary:logistic', 'nthread':16,
         'lambda':4, 'subsample':0.9, 'min_child_weight':5, 'booster':'gbtree', 'alpha':0,
         'base_score':0.0058, 'colsample_bytree':0.6}

# 'booster':'dart'
# params = {'max_depth':14, 'eta':0.03, 'silent':1, 'objective':'binary:logistic', 'nthread':20,
#         'lambda':4, 'subsample':0.9, 'min_child_weight':5, 'booster':'dart', 'alpha':0,
#         'base_score':0.0058, 'nthread':20, 'colsample_bytree':0.6, 
#         'sample_type':'uniform', 'normalize_type':'tree', 'rate_drop':0.1, 'skip_drop':0.2, 'one_drop':True}

cv_results, clfs, running_time = cross_val_predict_skf_rm_xgb(params, x_train, y_train, 
                                                              num_boost_round=100, n_splits=5, 
                                                              n_repeats=3, random_state=70864, 
                                                              verbose_eval=True)

results = {'clfs': clfs, 'cv_results': cv_results, 'running_time': running_time}
save_pickle(results, 'results_benchmark_8_cv_1.pickle')

[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.276114	test-MCC:0.265946
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.340044	test-MCC:0.338623
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.346387	test-MCC:0.353783
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.371335	test-MCC:0.382685
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.380802	test-MCC:0.389524
[5]	train-error:0.005811	test-error:0.005812	train-MCC:0.385607	test-MCC:0.389064
[6]	train-error:0.005811	test-error:0.005812	train-MCC:0.396484	test-MCC:0.393424
[7]	train-error:0.005811	test-error:0.005812	train-MCC:0.403017	test-MCC:0.394805
[8]	train-error:0.005811	test-error:0.005812	train-MCC:0.415024	test-MCC:0.404272
[9]	train-error:0.005811	test-error:0.005812	train-MCC:0.42884	test-MCC:0.414574
[10]	train-error:0.005811	test-error:0.005812	train-MCC:0.432537	test-MCC:0.414227
[11]	train-error:0.005811	test-error:0.005812	train-MCC:0.438036	test-MCC:0.422941
[12]	train-erro

Repeat 0, split 0, test MCC = 0.463, running time = 13.610 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.270624	test-MCC:0.269936
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.316855	test-MCC:0.318785
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.341523	test-MCC:0.353729
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.350434	test-MCC:0.355952
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.363715	test-MCC:0.362193
[5]	train-error:0.005811	test-error:0.005812	train-MCC:0.376989	test-MCC:0.369106
[6]	train-error:0.005811	test-error:0.005812	train-MCC:0.390496	test-MCC:0.379317
[7]	train-error:0.005811	test-error:0.005812	train-MCC:0.398553	test-MCC:0.387452
[8]	train-error:0.005811	test-error:0.005812	train-MCC:0.408672	test-MCC:0.38681
[9]	train-error:0.005811	test-error:0.005812	train-MCC:0.420724	test-MCC:0.388132
[10]	train-error:0.005811	test-error:0.005812	train-MCC:0.424462	test-MCC:0.39065
[11]	train-error:0.005811	test-error

[99]	train-error:0.004397	test-error:0.00476	train-MCC:0.619839	test-MCC:0.44926
Repeat 0, split 1, test MCC = 0.449, running time = 14.162 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.286845	test-MCC:0.281136
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.316076	test-MCC:0.308784
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.333271	test-MCC:0.325892
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.352129	test-MCC:0.34269
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.365251	test-MCC:0.345594
[5]	train-error:0.005811	test-error:0.005812	train-MCC:0.378896	test-MCC:0.359648
[6]	train-error:0.005811	test-error:0.005812	train-MCC:0.381073	test-MCC:0.368465
[7]	train-error:0.005811	test-error:0.005812	train-MCC:0.387799	test-MCC:0.370513
[8]	train-error:0.005811	test-error:0.005812	train-MCC:0.395806	test-MCC:0.380384
[9]	train-error:0.005811	test-error:0.005812	train-MCC:0.406147	test-MCC:0.393626
[10]	train-error:0.005811	test-error:

[98]	train-error:0.004356	test-error:0.004684	train-MCC:0.644014	test-MCC:0.45443
[99]	train-error:0.004353	test-error:0.004684	train-MCC:0.646117	test-MCC:0.454942
Repeat 0, split 2, test MCC = 0.455, running time = 13.537 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.279158	test-MCC:0.284718
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.330301	test-MCC:0.336053
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.366782	test-MCC:0.364582
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.372606	test-MCC:0.369124
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.379202	test-MCC:0.368469
[5]	train-error:0.005811	test-error:0.005812	train-MCC:0.384079	test-MCC:0.372892
[6]	train-error:0.005811	test-error:0.005812	train-MCC:0.390397	test-MCC:0.380461
[7]	train-error:0.005811	test-error:0.005812	train-MCC:0.396981	test-MCC:0.382918
[8]	train-error:0.005811	test-error:0.005812	train-MCC:0.404613	test-MCC:0.386428
[9]	train-error:0.005811	test-erro

[97]	train-error:0.004419	test-error:0.004756	train-MCC:0.63159	test-MCC:0.449895
[98]	train-error:0.004418	test-error:0.004752	train-MCC:0.632569	test-MCC:0.449895
[99]	train-error:0.004409	test-error:0.004748	train-MCC:0.63545	test-MCC:0.449541
Repeat 0, split 3, test MCC = 0.450, running time = 13.532 min
[0]	train-error:0.005812	test-error:0.005808	train-MCC:0.277024	test-MCC:0.272333
[1]	train-error:0.005812	test-error:0.005808	train-MCC:0.347857	test-MCC:0.330106
[2]	train-error:0.005812	test-error:0.005808	train-MCC:0.375342	test-MCC:0.362449
[3]	train-error:0.005812	test-error:0.005808	train-MCC:0.381737	test-MCC:0.369143
[4]	train-error:0.005812	test-error:0.005808	train-MCC:0.384589	test-MCC:0.371033
[5]	train-error:0.005812	test-error:0.005808	train-MCC:0.38618	test-MCC:0.371082
[6]	train-error:0.005812	test-error:0.005808	train-MCC:0.393076	test-MCC:0.372958
[7]	train-error:0.005812	test-error:0.005808	train-MCC:0.401139	test-MCC:0.374536
[8]	train-error:0.005812	test-error

[96]	train-error:0.004453	test-error:0.004722	train-MCC:0.607722	test-MCC:0.44907
[97]	train-error:0.004444	test-error:0.004718	train-MCC:0.6091	test-MCC:0.44907
[98]	train-error:0.00444	test-error:0.004718	train-MCC:0.609692	test-MCC:0.449828
[99]	train-error:0.004434	test-error:0.004714	train-MCC:0.610761	test-MCC:0.448902
Repeat 0, split 4, test MCC = 0.449, running time = 13.510 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.314443	test-MCC:0.311095
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.32891	test-MCC:0.331962
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.363724	test-MCC:0.376077
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.372709	test-MCC:0.378207
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.39181	test-MCC:0.391262
[5]	train-error:0.005811	test-error:0.005812	train-MCC:0.392766	test-MCC:0.391515
[6]	train-error:0.005811	test-error:0.005812	train-MCC:0.404887	test-MCC:0.397784
[7]	train-error:0.005811	test-error:0.

[95]	train-error:0.004379	test-error:0.00471	train-MCC:0.64826	test-MCC:0.461672
[96]	train-error:0.004369	test-error:0.00471	train-MCC:0.650061	test-MCC:0.460728
[97]	train-error:0.004353	test-error:0.00471	train-MCC:0.652907	test-MCC:0.459983
[98]	train-error:0.004348	test-error:0.004714	train-MCC:0.656311	test-MCC:0.459367
[99]	train-error:0.004342	test-error:0.004705	train-MCC:0.657852	test-MCC:0.459474
Repeat 1, split 0, test MCC = 0.459, running time = 13.683 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.287039	test-MCC:0.28335
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.317235	test-MCC:0.321726
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.339911	test-MCC:0.333447
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.37831	test-MCC:0.366917
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.384133	test-MCC:0.376579
[5]	train-error:0.005811	test-error:0.005812	train-MCC:0.390242	test-MCC:0.37959
[6]	train-error:0.005811	test-error:0

[94]	train-error:0.004359	test-error:0.004773	train-MCC:0.652332	test-MCC:0.450543
[95]	train-error:0.004353	test-error:0.004769	train-MCC:0.654085	test-MCC:0.450086
[96]	train-error:0.004343	test-error:0.004773	train-MCC:0.656606	test-MCC:0.450755
[97]	train-error:0.004335	test-error:0.004773	train-MCC:0.658385	test-MCC:0.450755
[98]	train-error:0.004333	test-error:0.004777	train-MCC:0.661324	test-MCC:0.450575
[99]	train-error:0.004326	test-error:0.004765	train-MCC:0.663408	test-MCC:0.451196
Repeat 1, split 1, test MCC = 0.451, running time = 13.520 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.277433	test-MCC:0.284046
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.301488	test-MCC:0.309403
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.338415	test-MCC:0.343867
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.355191	test-MCC:0.347346
[4]	train-error:0.005811	test-error:0.005812	train-MCC:0.373037	test-MCC:0.369137
[5]	train-error:0.005811	test

[93]	train-error:0.004402	test-error:0.004714	train-MCC:0.639312	test-MCC:0.453738
[94]	train-error:0.004401	test-error:0.004705	train-MCC:0.641262	test-MCC:0.454276
[95]	train-error:0.004396	test-error:0.004697	train-MCC:0.642646	test-MCC:0.454306
[96]	train-error:0.004391	test-error:0.004701	train-MCC:0.645491	test-MCC:0.453274
[97]	train-error:0.004383	test-error:0.004697	train-MCC:0.648067	test-MCC:0.453065
[98]	train-error:0.004382	test-error:0.004693	train-MCC:0.650287	test-MCC:0.453229
[99]	train-error:0.004376	test-error:0.004688	train-MCC:0.651184	test-MCC:0.453702
Repeat 1, split 2, test MCC = 0.454, running time = 13.780 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.306816	test-MCC:0.293777
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.334709	test-MCC:0.330268
[2]	train-error:0.005811	test-error:0.005812	train-MCC:0.366746	test-MCC:0.350418
[3]	train-error:0.005811	test-error:0.005812	train-MCC:0.369252	test-MCC:0.360459
[4]	train-error:0.005811	tes

[92]	train-error:0.004415	test-error:0.004803	train-MCC:0.636124	test-MCC:0.444733
[93]	train-error:0.004413	test-error:0.004803	train-MCC:0.637019	test-MCC:0.444733
[94]	train-error:0.004412	test-error:0.00479	train-MCC:0.637819	test-MCC:0.445486
[95]	train-error:0.004408	test-error:0.004781	train-MCC:0.639744	test-MCC:0.445224
[96]	train-error:0.004403	test-error:0.004781	train-MCC:0.64182	test-MCC:0.445955
[97]	train-error:0.004394	test-error:0.004777	train-MCC:0.643532	test-MCC:0.446464
[98]	train-error:0.004391	test-error:0.004777	train-MCC:0.644122	test-MCC:0.446464
[99]	train-error:0.004388	test-error:0.004781	train-MCC:0.646809	test-MCC:0.447317
Repeat 1, split 3, test MCC = 0.447, running time = 15.007 min
[0]	train-error:0.005812	test-error:0.005808	train-MCC:0.255422	test-MCC:0.253154
[1]	train-error:0.005812	test-error:0.005808	train-MCC:0.32866	test-MCC:0.320771
[2]	train-error:0.005812	test-error:0.005808	train-MCC:0.348781	test-MCC:0.335598
[3]	train-error:0.005812	test-

[91]	train-error:0.004451	test-error:0.00471	train-MCC:0.604349	test-MCC:0.452056
[92]	train-error:0.004444	test-error:0.004714	train-MCC:0.606622	test-MCC:0.451898
[93]	train-error:0.004435	test-error:0.004714	train-MCC:0.609117	test-MCC:0.450786
[94]	train-error:0.004432	test-error:0.004718	train-MCC:0.610473	test-MCC:0.451597
[95]	train-error:0.004427	test-error:0.004722	train-MCC:0.61347	test-MCC:0.451007
[96]	train-error:0.004422	test-error:0.004727	train-MCC:0.615368	test-MCC:0.453103
[97]	train-error:0.004417	test-error:0.004722	train-MCC:0.617631	test-MCC:0.452352
[98]	train-error:0.00441	test-error:0.004727	train-MCC:0.621102	test-MCC:0.453852
[99]	train-error:0.004402	test-error:0.004727	train-MCC:0.622981	test-MCC:0.453103
Repeat 1, split 4, test MCC = 0.453, running time = 17.662 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.305861	test-MCC:0.303369
[1]	train-error:0.005811	test-error:0.005812	train-MCC:0.316676	test-MCC:0.315015
[2]	train-error:0.005811	test

[90]	train-error:0.004414	test-error:0.004659	train-MCC:0.637185	test-MCC:0.45865
[91]	train-error:0.004407	test-error:0.004659	train-MCC:0.63896	test-MCC:0.45919
[92]	train-error:0.004404	test-error:0.00465	train-MCC:0.639145	test-MCC:0.4592
[93]	train-error:0.004399	test-error:0.004655	train-MCC:0.639851	test-MCC:0.458813
[94]	train-error:0.004392	test-error:0.004655	train-MCC:0.642399	test-MCC:0.458813
[95]	train-error:0.004386	test-error:0.004655	train-MCC:0.644071	test-MCC:0.459164
[96]	train-error:0.004383	test-error:0.00465	train-MCC:0.646758	test-MCC:0.459
[97]	train-error:0.004374	test-error:0.004655	train-MCC:0.649116	test-MCC:0.458895
[98]	train-error:0.004372	test-error:0.00465	train-MCC:0.65091	test-MCC:0.459518
[99]	train-error:0.004371	test-error:0.00465	train-MCC:0.652165	test-MCC:0.459518
Repeat 2, split 0, test MCC = 0.460, running time = 22.574 min
[0]	train-error:0.005811	test-error:0.005812	train-MCC:0.259939	test-MCC:0.257936
[1]	train-error:0.005811	test-error:0.

[89]	train-error:0.004386	test-error:0.00476	train-MCC:0.627655	test-MCC:0.449892
[90]	train-error:0.004385	test-error:0.00476	train-MCC:0.62934	test-MCC:0.448629
[91]	train-error:0.004385	test-error:0.00476	train-MCC:0.632364	test-MCC:0.448186
[92]	train-error:0.004376	test-error:0.00476	train-MCC:0.634759	test-MCC:0.449537
[93]	train-error:0.004374	test-error:0.00476	train-MCC:0.638057	test-MCC:0.449754
[94]	train-error:0.00437	test-error:0.00476	train-MCC:0.638667	test-MCC:0.450105
[95]	train-error:0.004361	test-error:0.004752	train-MCC:0.641794	test-MCC:0.44919
[96]	train-error:0.004358	test-error:0.004752	train-MCC:0.642889	test-MCC:0.449537
[97]	train-error:0.004353	test-error:0.004748	train-MCC:0.645986	test-MCC:0.449537
[98]	train-error:0.004348	test-error:0.004752	train-MCC:0.649576	test-MCC:0.448629
[99]	train-error:0.004344	test-error:0.004748	train-MCC:0.650673	test-MCC:0.448523
Repeat 2, split 1, test MCC = 0.449, running time = 19.730 min


In [None]:
cv_train_mean = cv_results['train'].mean(axis=1)
cv_train_std = cv_results['train'].std(axis=1)
cv_test_mean = cv_results['test'].mean(axis=1)
cv_test_std = cv_results['test'].std(axis=1)

In [None]:
plt.figure(figsize=(14, 7))
plt.plot(np.arange(len(cv_train_mean)), cv_train_mean)
plt.fill_between(np.arange(len(cv_train_mean)), cv_train_mean-cv_train_std, cv_train_mean+cv_train_std, alpha=0.5)
plt.plot(np.arange(len(cv_train_mean)), cv_test_mean)
plt.fill_between(np.arange(len(cv_test_mean)), cv_test_mean-cv_test_std, cv_test_mean+cv_test_std, alpha=0.5)
plt.legend(['train', 'test'])

In [None]:
# Train the model
dtrain = xgb.DMatrix(x_train, label=y_train)
params['seed'] = 28537894
clf = xgb.train(params, dtrain, num_boost_round=60,
               feval=mcc_eval, evals=[(dtrain, 'train')])

y_train_pred = clf.predict(dtrain)

# Find best threshold 
thresholds = np.linspace(0.01, 0.99, 400)
mcc = np.array([matthews_corrcoef(y_train, y_train_pred>thr) for thr in thresholds])
plt.plot(thresholds, mcc)
best_threshold = thresholds[mcc.argmax()]

print('Optimal MCC = {:.3f}'.format(mcc.max()))
print('Optimal threshold = {:.3f}'.format(best_threshold))

### Predict on test data

In [None]:
dtest = xgb.DMatrix(x_test)
y_test_pred = clf.predict(dtest)
y_test_pred_int = (y_test_pred>best_threshold).astype(int)

sub = pd.read_csv("../../data/sample_submission.csv.zip", index_col=0)
sub["Response"] = y_test_pred_int
sub.to_csv("15-benchmark_7_submission_1.csv.gz", compression="gzip")

In [None]:
cv_test_mean[60]+cv_test_std[60]

cv_test_mean[60]-cv_test_std[60]

cv_test_mean[79]

cv_test_std[60]