In [1]:
import datetime
import numpy as np
import os
import pandas as pd
import pickle

def read_from_disk(path, filename):
    with open(os.path.join(path, filename), 'rb') as handle:
        return pickle.load(handle)
    
def save_to_disk(obj, filename):
    with open(filename, 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)

In [2]:
trans_train = pd.read_csv('../input/ieee-fraud-detection/train_transaction.csv')
id_train = pd.read_csv('../input/ieee-fraud-detection/train_identity.csv')
trans_train.shape, id_train.shape

((590540, 394), (144233, 41))

In [3]:
df_train = trans_train.merge(id_train, how='left', left_on='TransactionID', right_on='TransactionID')
df_train.shape

(590540, 434)

In [4]:
START_DATE = '2017-11-30'
startdate = datetime.datetime.strptime(START_DATE, '%Y-%m-%d')
TransactionDT1 = df_train['TransactionDT'].apply(lambda x: (startdate + datetime.timedelta(seconds = x)))
dt_m = TransactionDT1.dt.date.astype('str').str[:7]

In [5]:
os.listdir('../input/fraud-kirill-yakovlev-9438')

['yakovlev_mean0.9438_median0.9464_std0.0132_all.pickle']

In [6]:
kirill_preds = read_from_disk('../input/fraud-kirill-yakovlev-9438', 'yakovlev_mean0.9438_median0.9464_std0.0132_all.pickle')

In [7]:
kirill_preds

{'oof': array([[0.01104611],
        [0.00654405],
        [0.00262107],
        ...,
        [0.00211418],
        [0.02492638],
        [0.00795693]]), 'prediction': array([[0.0005176 ],
        [0.00332938],
        [0.00165743],
        ...,
        [0.00522938],
        [0.00925344],
        [0.00540673]]), 'prediction_folds': array([[0.00047298, 0.00044827, 0.00051444, 0.00070141, 0.00062471,
         0.00034381],
        [0.00255368, 0.00375722, 0.00259144, 0.00420637, 0.00380816,
         0.00305941],
        [0.00164968, 0.00160534, 0.00165364, 0.0015689 , 0.00187722,
         0.00158978],
        ...,
        [0.00561027, 0.0043187 , 0.00447416, 0.00526921, 0.00551344,
         0.00619049],
        [0.00782586, 0.01071592, 0.00976611, 0.01382718, 0.00712754,
         0.00625805],
        [0.00688522, 0.00411063, 0.00545405, 0.00727351, 0.00546601,
         0.003251  ]]), 'scores': [0.9168432093484126,
  0.9475702867711033,
  0.945206647182937,
  0.9428825376800377,
  0.950581

In [8]:
for fold_n, month in enumerate(np.unique(dt_m)):
    X_val = df_train[dt_m.isin([month])]
    y_val = kirill_preds['oof'][:, 0][X_val.index]
    print(y_val.shape)
    save_to_disk(y_val, 'y_pred_valid_fold{}.pkl'.format(fold_n))

(137321,)
(92585,)
(86021,)
(101632,)
(83655,)
(89326,)


Order of months: 12
3
1
5
2
4

In [9]:
kirill_preds['prediction_folds'].shape

(506691, 6)

In [10]:
y_test = kirill_preds['prediction_folds']
y_test_array = [y_test[:, 0], y_test[:, 2], y_test[:, 4], y_test[:, 1], y_test[:, 5], y_test[:, 3]]

for fold_n in range(6):
    save_to_disk(y_test_array[fold_n], 'y_pred_test_fold{}.pkl'.format(fold_n))