In [1]:
import datetime
import numpy as np
import os
import pandas as pd
import pickle

def read_from_disk(path, filename):
    with open(os.path.join(path, filename), 'rb') as handle:
        return pickle.load(handle)
    
def save_to_disk(obj, filename):
    with open(filename, 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [2]:
trans_train = pd.read_csv('../input/ieee-fraud-detection/train_transaction.csv')
id_train = pd.read_csv('../input/ieee-fraud-detection/train_identity.csv')
trans_train.shape, id_train.shape

((590540, 394), (144233, 41))

In [3]:
df_train = trans_train.merge(id_train, how='left', left_on='TransactionID', right_on='TransactionID')
df_train.shape

(590540, 434)

In [4]:
START_DATE = '2017-11-30'
startdate = datetime.datetime.strptime(START_DATE, '%Y-%m-%d')
TransactionDT1 = df_train['TransactionDT'].apply(lambda x: (startdate + datetime.timedelta(seconds = x)))
dt_m = TransactionDT1.dt.date.astype('str').str[:7]

In [5]:
os.listdir('../input/kirill-best-9459')

['clean_mean0.9459_median0.9498_std0.0121_all.pickle']

In [6]:
kirill_preds = read_from_disk('../input/kirill-best-9459', 'clean_mean0.9459_median0.9498_std0.0121_all.pickle')

In [7]:
kirill_preds

{'oof': array([[0.12443554],
        [0.01030749],
        [0.00466291],
        ...,
        [0.00153626],
        [0.00996601],
        [0.00915086]]), 'prediction': array([[0.00060136],
        [0.00045988],
        [0.00187167],
        ...,
        [0.00476554],
        [0.00534487],
        [0.01044222]]), 'prediction_folds': array([[0.00071043, 0.00059837, 0.00055918, 0.00052857, 0.00083778,
         0.00037384],
        [0.00047077, 0.00057333, 0.00030442, 0.0003718 , 0.00064038,
         0.00039858],
        [0.00191073, 0.00208861, 0.00192571, 0.00135421, 0.00240772,
         0.00154302],
        ...,
        [0.00567823, 0.00356015, 0.00424526, 0.00423881, 0.005166  ,
         0.00570477],
        [0.00674979, 0.00440649, 0.00573602, 0.00550563, 0.00556297,
         0.00410832],
        [0.01168007, 0.00747009, 0.00862406, 0.00955369, 0.01699577,
         0.00832966]]), 'scores': [0.9220145653582257,
  0.9492834835908349,
  0.9503685236944747,
  0.9408870069394141,
  0.95279

In [8]:
for fold_n, month in enumerate(np.unique(dt_m)):
    X_val = df_train[dt_m.isin([month])]
    y_val = kirill_preds['oof'][:, 0][X_val.index]
    print(y_val.shape)
    save_to_disk(y_val, 'y_pred_valid_fold{}.pkl'.format(fold_n))

(137321,)
(92585,)
(86021,)
(101632,)
(83655,)
(89326,)


Order of months: 12
3
1
5
2
4

In [9]:
kirill_preds['prediction_folds'].shape

(506691, 6)

In [10]:
y_test = kirill_preds['prediction_folds']
y_test_array = [y_test[:, 0], y_test[:, 2], y_test[:, 4], y_test[:, 1], y_test[:, 5], y_test[:, 3]]

for fold_n in range(6):
    save_to_disk(y_test_array[fold_n], 'y_pred_test_fold{}.pkl'.format(fold_n))