In [1]:
from functools import partial

import datetime
import lightgbm as lgb
import numpy as np
import os
import pandas as pd
import pickle
import random
import time

from copy import deepcopy
from scipy.stats import rankdata
from sklearn.metrics import roc_auc_score

In [2]:
def seed_everything(seed=13):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    
    
def read_from_disk(path, filename):
    with open(os.path.join(path, filename), 'rb') as handle:
        return pickle.load(handle)
    
    
def save_to_disk(obj, filename):
    with open(filename, 'wb') as handle:
        pickle.dump(obj, handle, protocol=pickle.HIGHEST_PROTOCOL)

def timedelta(seconds):
    return str(datetime.timedelta(seconds=seconds)).split('.')[0]

In [3]:
os.listdir('../input')

['fraud-stack-lgbm-26', 'ieee-fraud-detection', 'fraud-kirill-postprocessing']

In [4]:
df_test_postprocessed = read_from_disk('../input/fraud-kirill-postprocessing', 'df_test_postprocessed.pkl')
print(df_test_postprocessed.shape)

(506691, 446)


In [5]:
list_cols = ['card1','card2','card3','card4','card5','card6', 'day_first_trans', 'first_trans_amt', 'addr1', 'id_01', 'DeviceInfo', 'P_emaildomain']

def postprocess_y(df, y):
    df1 = df.copy()
    df1['prediction'] = y
    y_new = df1.merge(df1.groupby(list_cols, as_index=False).agg({'prediction':'mean'}), how='left', on=list_cols)['prediction_y'].values
    del df1
    return y_new

In [6]:
sub = pd.read_csv('../input/fraud-stack-lgbm-26/submission.csv')
y_new = postprocess_y(df_test_postprocessed, sub['isFraud'].values)
sub['isFraud'] = y_new
sub.to_csv('submission.csv', index=False)