In [4]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

import time
import datetime
import dateutil.relativedelta

import gc
import warnings

warnings.simplefilter(action='ignore')

pd.set_option('display.width',None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_info_columns', 200)

In [38]:
def read_data(input_file):
    df = pd.read_csv(input_file)
    df['first_active_month'] = pd.to_datetime(df['first_active_month'])
    return df

def sub_month_lag(x):
    return x['purchase_date_first'] - dateutil.relativedelta.relativedelta(months=x['month_lag_first'])

def get_r(history):
    agg_func = {
        'month_lag': ['first', ],
        'purchase_date': ['first', ],
        }
    agg_history = history.groupby(['card_id']).agg(agg_func)
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
    
    agg_history['reference_date'] = agg_history.apply(sub_month_lag, axis=1)
    agg_history['reference_date'] = agg_history['reference_date'].apply(lambda x: x + dateutil.relativedelta.relativedelta(day=1, months=+1, days=-1))

    agg_history.drop(columns=['month_lag_first', 'purchase_date_first'], inplace=True)

    return agg_history

def binarize(df):
    for col in ['authorized_flag', 'category_1']:
        df[col] = df[col].map({'Y':1, 'N':0})
    return df

def date2date(start, end):
    return (start.dt.date - end.dt.date).dt.days

def summary(df):
    stats = []
    for col in df.columns:
        stats.append((col, df[col].nunique(), 
                      df[col].isnull().sum() * 100 / df.shape[0], 
                      df[col].value_counts(normalize=True, dropna=False).values[0] * 100, 
                      df[col].dtype))
    
    return pd.DataFrame(stats, columns=['feature', 'unique', 'missing', 'mode', 'type'])

In [6]:
historical_transactions = pd.read_csv('./data/historical_transactions.csv', parse_dates=['purchase_date'])
new_transactions = pd.read_csv('./data/new_merchant_transactions.csv', parse_dates=['purchase_date'])

In [7]:
historical_transactions = binarize(historical_transactions)
new_transactions = binarize(new_transactions)

In [8]:
np.percentile(historical_transactions['purchase_amount'], [1, 5, 50, 95, 99])

array([-0.74324133, -0.73819242, -0.68834948, -0.16861249,  1.22084097])

In [9]:
np.percentile(new_transactions['purchase_amount'], [1, 5, 50, 95, 99])

array([-0.74239984, -0.73638924, -0.67484064, -0.08574128,  1.4628174 ])

In [10]:
historical_transactions['purchase_amount'] = historical_transactions['purchase_amount'].apply(lambda x: min(x, 1.5))

In [11]:
new_transactions['purchase_amount'] = new_transactions['purchase_amount'].apply(lambda x: min(x, 1.5))

In [12]:
historical_transactions['category_2'] = historical_transactions['category_2'].fillna(-1, )
new_transactions['category_2'] = new_transactions['category_2'].fillna(-1, )

historical_transactions['category_3'] = historical_transactions['category_3'].map({'A':0, 'B':1, 'C':2})
new_transactions['category_3'] = new_transactions['category_3'].map({'A':0, 'B':1, 'C':2})

historical_transactions['category_3'] = historical_transactions['category_3'].fillna(-1, )
new_transactions['category_3'] = new_transactions['category_3'].fillna(-1, )

historical_transactions['merchant_id'] = historical_transactions['merchant_id'].fillna('-1', )
new_transactions['merchant_id'] = new_transactions['merchant_id'].fillna('-1', )

In [13]:
train = read_data('./data/train.csv')
test = read_data('./data/test.csv')
target = train['target']

In [14]:
test[test['card_id']=='C_ID_c27b4f80f7']

Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3
11578,NaT,C_ID_c27b4f80f7,5,2,1


In [15]:
test['first_active_month'][test['card_id']=='C_ID_c27b4f80f7'] = pd.to_datetime(datetime.date(2017, 3, 1))

In [16]:
train_a = train[['card_id', 'first_active_month']]
test_a = test[['card_id', 'first_active_month']]

In [17]:
tmp_a = pd.concat([train_a, test_a]).drop_duplicates().reset_index(drop=True)

In [18]:
historical_transactions = pd.merge(historical_transactions, tmp_a, on='card_id', how='left')
new_transactions = pd.merge(new_transactions, tmp_a, on='card_id', how='left')

In [19]:
historical_transactions['purchase_date'] = pd.to_datetime(historical_transactions['purchase_date'])
new_transactions['purchase_date'] = pd.to_datetime(new_transactions['purchase_date'])

In [20]:
hist_r = get_r(historical_transactions)

In [21]:
historical_transactions = pd.merge(historical_transactions, hist_r, on='card_id', how='left')
new_transactions = pd.merge(new_transactions, hist_r, on='card_id', how='left')

In [22]:
binary_func = ['mean', ]

category_func = ['nunique', ]

contiguous_func = ['mean', 'median', 'max', 'min', 'std', ]

In [23]:
agg_fun = {
    'authorized_flag': binary_func, 
}
auth_mean = historical_transactions.groupby(['card_id']).agg(agg_fun)
auth_mean.columns = ['_'.join(col).strip() for col in auth_mean.columns.values]
auth_mean.reset_index(inplace=True)

train = pd.merge(train, auth_mean, on='card_id', how='left')
test = pd.merge(test, auth_mean, on='card_id', how='left')

In [24]:
for df in [historical_transactions, new_transactions]:
    df['a2p'] = date2date(df['first_active_month'], df['purchase_date'])
    df['p2r'] = date2date(df['purchase_date'], df['reference_date'])
    
    df['p_vs_m'] = df['purchase_amount'] / (df['month_lag'].abs()+1)
    
    df["installments"].replace(-1, np.NaN, inplace=True)
    df["installments"].replace(999, np.NaN, inplace=True)
    
    df['p_vs_i'] = df['purchase_amount'] / (df['installments'].abs()+1)

    df['quarter'] = df['purchase_date'].dt.quarter

    df['month'] = df['purchase_date'].dt.month
    
    df['weekofyear'] = df['purchase_date'].dt.weekofyear
    df['dayofweek'] = df['purchase_date'].dt.dayofweek
    df['day'] = df['purchase_date'].dt.day
    df['hour'] = df['purchase_date'].dt.hour

    df['is_month_start'] = (df.purchase_date.dt.is_month_start).astype(int)

    df['weekend'] = (df.purchase_date.dt.weekday>=5).astype(int)  

In [25]:
def aggregate_transactions_hist(history):
        
    agg_func = {
        'is_month_start': binary_func, 
        'weekend': binary_func,
        'category_1': binary_func,
        #
        'category_2': category_func, #
        'category_3': category_func, #
        
        'state_id': ['nunique', ],
        'city_id': ['nunique', ],
        'subsector_id': ['nunique', ],
        'merchant_category_id': ['nunique', ],
        'merchant_id': ['nunique', ],
        
        'quarter': category_func,
        'month': category_func, 
        'weekofyear': category_func,
        'dayofweek': category_func,
        'day': category_func,
        'hour': category_func,
        #
        'a2p': contiguous_func, 
        'p2r': contiguous_func, 
        
        'month_lag': contiguous_func,
        'purchase_amount': ['sum', 'mean', 'median', 'max', 'min', 'std', ], 
        'installments': contiguous_func, 
        'p_vs_m': contiguous_func,
        'p_vs_i': contiguous_func,
        'purchase_date': ['max', 'min'],
        'first_active_month': ['first'],
        'reference_date': ['first'],
        }
            
    for col in ['category_2', 'category_3', 
                'state_id', 'city_id', 'subsector_id', 'merchant_category_id', 'merchant_id', 
                'quarter', 
                'month', 'weekofyear', 
                'dayofweek',
                'day',
                'hour'
               ]:
        
        history[col+'_p_mean'] = history.groupby([col])['purchase_amount'].transform('mean')  # mean encode
        agg_func[col+'_p_mean'] = contiguous_func

    agg_history = history.groupby(['card_id']).agg(agg_func)
    
    agg_history.columns = ['_'.join(col).strip() for col in agg_history.columns.values]
    agg_history.reset_index(inplace=True)
    
    agg_history['first_year'] = agg_history['first_active_month_first'].dt.year
    agg_history['first_quarter'] = agg_history['first_active_month_first'].dt.quarter
    agg_history['first_month'] = agg_history['first_active_month_first'].dt.month
    
    agg_history['re_year'] = agg_history['reference_date_first'].dt.year
    agg_history['re_quarter'] = agg_history['reference_date_first'].dt.quarter
    agg_history['re_month'] = agg_history['reference_date_first'].dt.month
        
    agg_history['a2r'] = date2date(agg_history['first_active_month_first'], agg_history['reference_date_first'])

    agg_history.drop(columns=['first_active_month_first', 'reference_date_first'], inplace=True)
    
    df = (history.groupby('card_id')
          .size()
          .reset_index(name='transactions_count'))
    
    agg_history = pd.merge(df, agg_history, on='card_id', how='left')
    
    return agg_history

83

In [26]:
%%time
history = aggregate_transactions_hist(historical_transactions)
    
history.columns = ['hist_' + c if c != 'card_id' else c for c in history.columns]

history['hist_p2p'] = (history['hist_purchase_date_max'] - history['hist_purchase_date_min']).dt.days
history['hist_sleep'] = history['hist_p2p'] - history['hist_day_nunique']

###
history['hist_p2p_vs_count'] = history['hist_p2p']/history['hist_transactions_count']
history['hist_sleep_vs_count'] = history['hist_sleep']/history['hist_transactions_count']

history['hist_p2r_vs_count'] = history['hist_p2r_min'].abs()/history['hist_transactions_count']

###
history['hist_count_vs_p2p'] = history['hist_transactions_count']/(history['hist_p2p'].abs()+1)
history['hist_sleep_vs_p2p'] = history['hist_sleep']/(history['hist_p2p'].abs()+1)
history['hist_p_vs_p2p'] = history['hist_purchase_amount_sum']/(history['hist_p2p'].abs()+1)
###
history['hist_count_vs_p2r'] = history['hist_transactions_count']/(history['hist_p2r_min'].abs()+1)
history['hist_sleep_vs_p2r'] = history['hist_sleep']/(history['hist_p2r_min'].abs()+1)
history['hist_p_vs_p2r'] = history['hist_purchase_amount_sum']/(history['hist_p2r_min'].abs()+1)
###

CPU times: user 9min 21s, sys: 43.7 s, total: 10min 5s
Wall time: 3min 16s


In [27]:
history.head()

Unnamed: 0,card_id,hist_transactions_count,hist_is_month_start_mean,hist_weekend_mean,hist_category_1_mean,hist_category_2_nunique,hist_category_3_nunique,hist_state_id_nunique,hist_city_id_nunique,hist_subsector_id_nunique,hist_merchant_category_id_nunique,hist_merchant_id_nunique,hist_quarter_nunique,hist_month_nunique,hist_weekofyear_nunique,hist_dayofweek_nunique,hist_day_nunique,hist_hour_nunique,hist_a2p_mean,hist_a2p_median,hist_a2p_max,hist_a2p_min,hist_a2p_std,hist_p2r_mean,hist_p2r_median,hist_p2r_max,hist_p2r_min,hist_p2r_std,hist_month_lag_mean,hist_month_lag_median,hist_month_lag_max,hist_month_lag_min,hist_month_lag_std,hist_purchase_amount_sum,hist_purchase_amount_mean,hist_purchase_amount_median,hist_purchase_amount_max,hist_purchase_amount_min,hist_purchase_amount_std,hist_installments_mean,hist_installments_median,hist_installments_max,hist_installments_min,hist_installments_std,hist_p_vs_m_mean,hist_p_vs_m_median,hist_p_vs_m_max,hist_p_vs_m_min,hist_p_vs_m_std,hist_p_vs_i_mean,hist_p_vs_i_median,hist_p_vs_i_max,hist_p_vs_i_min,hist_p_vs_i_std,hist_purchase_date_max,hist_purchase_date_min,hist_category_2_p_mean_mean,hist_category_2_p_mean_median,hist_category_2_p_mean_max,hist_category_2_p_mean_min,hist_category_2_p_mean_std,hist_category_3_p_mean_mean,hist_category_3_p_mean_median,hist_category_3_p_mean_max,hist_category_3_p_mean_min,hist_category_3_p_mean_std,hist_state_id_p_mean_mean,hist_state_id_p_mean_median,hist_state_id_p_mean_max,hist_state_id_p_mean_min,hist_state_id_p_mean_std,hist_city_id_p_mean_mean,hist_city_id_p_mean_median,hist_city_id_p_mean_max,hist_city_id_p_mean_min,hist_city_id_p_mean_std,hist_subsector_id_p_mean_mean,hist_subsector_id_p_mean_median,hist_subsector_id_p_mean_max,hist_subsector_id_p_mean_min,hist_subsector_id_p_mean_std,hist_merchant_category_id_p_mean_mean,hist_merchant_category_id_p_mean_median,hist_merchant_category_id_p_mean_max,hist_merchant_category_id_p_mean_min,hist_merchant_category_id_p_mean_std,hist_merchant_id_p_mean_mean,hist_merchant_id_p_mean_median,hist_merchant_id_p_mean_max,hist_merchant_id_p_mean_min,hist_merchant_id_p_mean_std,hist_quarter_p_mean_mean,hist_quarter_p_mean_median,hist_quarter_p_mean_max,hist_quarter_p_mean_min,hist_quarter_p_mean_std,hist_month_p_mean_mean,hist_month_p_mean_median,hist_month_p_mean_max,hist_month_p_mean_min,hist_month_p_mean_std,hist_weekofyear_p_mean_mean,hist_weekofyear_p_mean_median,hist_weekofyear_p_mean_max,hist_weekofyear_p_mean_min,hist_weekofyear_p_mean_std,hist_dayofweek_p_mean_mean,hist_dayofweek_p_mean_median,hist_dayofweek_p_mean_max,hist_dayofweek_p_mean_min,hist_dayofweek_p_mean_std,hist_day_p_mean_mean,hist_day_p_mean_median,hist_day_p_mean_max,hist_day_p_mean_min,hist_day_p_mean_std,hist_hour_p_mean_mean,hist_hour_p_mean_median,hist_hour_p_mean_max,hist_hour_p_mean_min,hist_hour_p_mean_std,hist_first_year,hist_first_quarter,hist_first_month,hist_re_year,hist_re_quarter,hist_re_month,hist_a2r,hist_p2p,hist_sleep,hist_p2p_vs_count,hist_sleep_vs_count,hist_p2r_vs_count,hist_count_vs_p2p,hist_sleep_vs_p2p,hist_p_vs_p2p,hist_count_vs_p2r,hist_sleep_vs_p2r,hist_p_vs_p2r
0,C_ID_00007093c1,149,0.067114,0.167785,0.187919,3,2,3,4,13,18,29,4,12,39,7,28,18,-197.838926,-185.0,-13,-391,104.216256,-194.161074,-207.0,-1,-379,104.216256,-5.852349,-6.0,0,-12,3.453114,-76.85211,-0.515786,-0.587627,1.5,-0.728876,0.297818,1.288591,1.0,6.0,1.0,0.7649,-0.128251,-0.085235,0.214286,-0.721363,0.149473,-0.251977,-0.290808,0.375,-0.364438,0.128706,2018-02-27 05:14:57,2017-02-14 14:00:43,-0.572437,-0.582086,-0.5302,-0.59713,0.020423,-0.538263,-0.61783,-0.123849,-0.61783,0.182199,-0.567946,-0.57653,-0.5302,-0.594657,0.018279,-0.56897,-0.577454,-0.52583,-0.599463,0.021061,-0.587132,-0.619453,-0.107234,-0.683504,0.116425,-0.599078,-0.6202,-0.344769,-0.698723,0.095077,-0.586346,-0.574567,-0.311478,-0.699339,0.095052,-0.596635,-0.598183,-0.592935,-0.598229,0.002129,-0.597112,-0.597791,-0.590924,-0.602054,0.002974,-0.596773,-0.597023,-0.576654,-0.606256,0.004004,-0.592725,-0.589729,-0.588296,-0.628458,0.007549,-0.596183,-0.596729,-0.591216,-0.600465,0.002782,-0.593205,-0.582817,-0.581067,-0.634077,0.014742,2017,1,2,2018,1,2,-392,377,349,2.530201,2.342282,2.543624,0.39418,0.92328,-0.203312,0.392105,0.918421,-0.202242
1,C_ID_0001238066,123,0.00813,0.422764,0.01626,3,3,6,18,17,29,65,3,6,23,7,30,20,-112.243902,-113.0,-27,-179,38.533005,-67.756098,-67.0,-1,-153,38.533005,-1.813008,-2.0,0,-5,1.28898,-72.447201,-0.589002,-0.648184,0.768095,-0.734887,0.190235,1.675,1.0,10.0,1.0,1.444564,-0.276554,-0.216983,0.384048,-0.728681,0.198098,-0.27045,-0.321857,0.109728,-0.367443,0.104074,2018-02-27 16:18:59,2017-09-28 22:25:14,-0.603427,-0.610919,-0.5302,-0.610919,0.020046,-0.47758,-0.61783,-0.123849,-0.61783,0.223305,-0.603607,-0.610125,-0.5302,-0.610881,0.019698,-0.601042,-0.609366,-0.52583,-0.625662,0.021746,-0.605738,-0.632387,0.021631,-0.723084,0.103433,-0.596479,-0.631764,0.241158,-0.723084,0.125818,-0.586243,-0.634686,0.403352,-0.729574,0.164021,-0.595151,-0.592935,-0.592935,-0.598229,0.002606,-0.594661,-0.59477,-0.590924,-0.602054,0.004304,-0.593519,-0.594968,-0.576654,-0.606256,0.007413,-0.599096,-0.591606,-0.588296,-0.628458,0.013492,-0.596096,-0.596729,-0.590528,-0.600465,0.002797,-0.600072,-0.596481,-0.581067,-0.634077,0.015924,2017,3,9,2018,1,2,-180,151,121,1.227642,0.98374,1.243902,0.809211,0.796053,-0.476626,0.798701,0.785714,-0.470436
2,C_ID_0001506ef0,66,0.015152,0.484848,0.0,2,2,2,3,12,19,28,4,11,24,7,25,15,-442.909091,-498.0,-197,-596,128.642107,-164.090909,-109.0,-11,-410,128.642107,-4.833333,-3.0,0,-13,4.2375,-34.601879,-0.524271,-0.703707,1.493545,-0.740491,0.472284,0.015152,0.0,1.0,0.0,0.123091,-0.145832,-0.148056,1.493545,-0.716855,0.290651,-0.518903,-0.701077,1.493545,-0.740491,0.472171,2018-02-17 12:33:56,2017-01-14 16:16:01,-0.58296,-0.582086,-0.582086,-0.610919,0.00498,-0.641534,-0.641898,-0.61783,-0.641898,0.002963,-0.591755,-0.591181,-0.591181,-0.610125,0.003272,-0.615143,-0.616105,-0.566094,-0.616105,0.00624,-0.571516,-0.632387,-0.394989,-0.683504,0.094043,-0.572859,-0.633152,-0.361686,-0.710127,0.098212,-0.611419,-0.616059,-0.352221,-0.724481,0.09889,-0.595857,-0.596871,-0.592935,-0.598229,0.002555,-0.595491,-0.59477,-0.590924,-0.602054,0.00413,-0.594345,-0.59435,-0.585321,-0.606256,0.004958,-0.597743,-0.591606,-0.588296,-0.628458,0.010129,-0.59523,-0.594748,-0.591216,-0.600465,0.00279,-0.590755,-0.585612,-0.581067,-0.628762,0.011073,2016,3,7,2018,1,2,-607,398,373,6.030303,5.651515,6.212121,0.165414,0.934837,-0.086722,0.160584,0.907543,-0.084189
3,C_ID_0001793786,216,0.027778,0.171296,0.009259,4,2,4,10,24,48,119,4,10,33,7,31,21,-186.634259,-203.0,-20,-303,70.951936,-116.365741,-100.0,0,-283,70.951936,-3.328704,-3.0,0,-9,2.306373,-48.620883,-0.225097,-0.487911,1.5,-0.745405,0.625163,0.023148,0.0,1.0,0.0,0.150723,-0.084964,-0.099693,1.421794,-0.724518,0.242263,-0.21724,-0.464485,1.5,-0.737892,0.621397,2017-10-31 20:20:18,2017-01-21 10:15:21,-0.55843,-0.5302,-0.5302,-0.610919,0.03037,-0.641341,-0.641898,-0.61783,-0.641898,0.003628,-0.547193,-0.5302,-0.5302,-0.610125,0.021099,-0.583249,-0.586228,-0.52583,-0.609366,0.012798,-0.577724,-0.601317,-0.185873,-0.683504,0.105281,-0.578815,-0.6202,0.044977,-0.710127,0.122242,-0.538442,-0.611462,1.470374,-0.73767,0.223582,-0.597089,-0.596871,-0.592935,-0.598229,0.001472,-0.597898,-0.597831,-0.59477,-0.602054,0.001392,-0.598033,-0.598342,-0.592771,-0.602569,0.00211,-0.592584,-0.590207,-0.588296,-0.628458,0.006891,-0.596301,-0.596747,-0.590528,-0.600465,0.002621,-0.595432,-0.59424,-0.581067,-0.63263,0.013678,2017,1,1,2017,4,10,-303,283,252,1.310185,1.166667,1.310185,0.760563,0.887324,-0.1712,0.760563,0.887324,-0.1712
4,C_ID_000183fdda,144,0.090278,0.229167,0.027778,5,3,7,9,21,36,73,3,7,27,7,30,19,-90.666667,-98.5,25,-177,55.937403,-89.333333,-81.5,-3,-205,55.937403,-2.451389,-2.0,0,-6,1.895264,-71.073332,-0.493565,-0.661294,1.5,-0.737892,0.445999,1.914286,1.0,10.0,1.0,2.093105,-0.21518,-0.171625,1.205036,-0.731881,0.290545,-0.251202,-0.333213,0.468031,-0.368946,0.157515,2018-02-25 20:57:08,2017-08-07 09:49:14,-0.582195,-0.582086,-0.5302,-0.610919,0.010856,-0.47754,-0.61783,-0.123849,-0.61783,0.223144,-0.583463,-0.583471,-0.5302,-0.610794,0.011009,-0.576372,-0.575026,-0.52583,-0.609366,0.012176,-0.570472,-0.601317,-0.003688,-0.683079,0.11693,-0.568485,-0.6202,0.083997,-0.710127,0.126499,-0.539338,-0.618652,0.535892,-0.721197,0.213232,-0.595759,-0.596871,-0.592935,-0.598229,0.002445,-0.596323,-0.597791,-0.590924,-0.602054,0.003824,-0.595562,-0.59638,-0.576654,-0.606256,0.005093,-0.595184,-0.590207,-0.588296,-0.628458,0.011058,-0.596422,-0.596747,-0.591216,-0.600465,0.002561,-0.601233,-0.596481,-0.581067,-0.628762,0.014728,2017,3,9,2018,1,2,-180,202,172,1.402778,1.194444,1.423611,0.70936,0.847291,-0.350115,0.699029,0.834951,-0.345016


In [28]:
train = pd.merge(train, history, on='card_id', how='left')
test = pd.merge(test, history, on='card_id', how='left')

del history; gc.collect()

14

In [29]:
def aggregate_transactions_new(new):
        
    agg_func = {
        'is_month_start': binary_func, 
        'weekend': binary_func,
        'category_1': binary_func,
        #
        
        'category_2': category_func, #
        'category_3': category_func, #
        
        'state_id': ['nunique', ],
        'city_id': ['nunique', ],
        'subsector_id': ['nunique', ],
        'merchant_category_id': ['nunique', ],
        'merchant_id': ['nunique', ],
        
        'quarter': category_func,
        'month': category_func, 
        'weekofyear': category_func,
        'dayofweek': category_func,
        'day': category_func,
        'hour': category_func,
        
        #
        'a2p': contiguous_func, 
        'p2r': contiguous_func, 
        
        'month_lag': contiguous_func,
        'purchase_amount': ['sum', 'mean', 'median', 'max', 'min', 'std', ], 
        'installments': contiguous_func, 
        'p_vs_m': contiguous_func,
        'p_vs_i': contiguous_func,
        'purchase_date': ['max', 'min'],
        }
            
    for col in ['category_2', 'category_3', 
                'state_id', 'city_id', 'subsector_id', 'merchant_category_id', 'merchant_id', 
                'quarter', 
                'month', 'weekofyear', 
                'dayofweek',
                'day',
                'hour'
               ]:
        
        new[col+'_p_mean'] = new.groupby([col])['purchase_amount'].transform('mean')  # mean encode

        agg_func[col+'_p_mean'] = contiguous_func

    agg_new = new.groupby(['card_id']).agg(agg_func)
    
    agg_new.columns = ['_'.join(col).strip() for col in agg_new.columns.values]
    agg_new.reset_index(inplace=True)
        
    df = (new.groupby('card_id')
          .size()
          .reset_index(name='transactions_count'))
    
    agg_new = pd.merge(df, agg_new, on='card_id', how='left')
        
    return agg_new

0

In [30]:
%%time
new = aggregate_transactions_new(new_transactions)
    
new.columns = ['new_' + c if c != 'card_id' else c for c in new.columns]
    
new['new_p2p'] = (new['new_purchase_date_max'] - new['new_purchase_date_min']).dt.days
new['new_sleep'] = new['new_p2p'] - new['new_day_nunique']
###
new['new_p2p_vs_count'] = new['new_p2p']/new['new_transactions_count']
new['new_sleep_vs_count'] = new['new_sleep']/new['new_transactions_count']

new['new_p2r_vs_count'] = new['new_p2r_max'].abs()/new['new_transactions_count']

###
new['new_count_vs_p2p'] = new['new_transactions_count']/(new['new_p2p'].abs()+1)
new['new_sleep_vs_p2p'] = new['new_sleep']/(new['new_p2p'].abs()+1)
new['new_p_vs_p2p'] = new['new_purchase_amount_sum']/(new['new_p2p'].abs()+1)
###
new['new_count_vs_p2r'] = new['new_transactions_count']/(new['new_p2r_max'].abs()+1)
new['new_sleep_vs_p2r'] = new['new_sleep']/(new['new_p2r_max'].abs()+1)
new['new_p_vs_p2r'] = new['new_purchase_amount_sum']/(new['new_p2r_max'].abs()+1)
###

CPU times: user 3min 3s, sys: 10.2 s, total: 3min 13s
Wall time: 21.2 s


In [31]:
new.head()

Unnamed: 0,card_id,new_transactions_count,new_is_month_start_mean,new_weekend_mean,new_category_1_mean,new_category_2_nunique,new_category_3_nunique,new_state_id_nunique,new_city_id_nunique,new_subsector_id_nunique,new_merchant_category_id_nunique,new_merchant_id_nunique,new_quarter_nunique,new_month_nunique,new_weekofyear_nunique,new_dayofweek_nunique,new_day_nunique,new_hour_nunique,new_a2p_mean,new_a2p_median,new_a2p_max,new_a2p_min,new_a2p_std,new_p2r_mean,new_p2r_median,new_p2r_max,new_p2r_min,new_p2r_std,new_month_lag_mean,new_month_lag_median,new_month_lag_max,new_month_lag_min,new_month_lag_std,new_purchase_amount_sum,new_purchase_amount_mean,new_purchase_amount_median,new_purchase_amount_max,new_purchase_amount_min,new_purchase_amount_std,new_installments_mean,new_installments_median,new_installments_max,new_installments_min,new_installments_std,new_p_vs_m_mean,new_p_vs_m_median,new_p_vs_m_max,new_p_vs_m_min,new_p_vs_m_std,new_p_vs_i_mean,new_p_vs_i_median,new_p_vs_i_max,new_p_vs_i_min,new_p_vs_i_std,new_purchase_date_max,new_purchase_date_min,new_category_2_p_mean_mean,new_category_2_p_mean_median,new_category_2_p_mean_max,new_category_2_p_mean_min,new_category_2_p_mean_std,new_category_3_p_mean_mean,new_category_3_p_mean_median,new_category_3_p_mean_max,new_category_3_p_mean_min,new_category_3_p_mean_std,new_state_id_p_mean_mean,new_state_id_p_mean_median,new_state_id_p_mean_max,new_state_id_p_mean_min,new_state_id_p_mean_std,new_city_id_p_mean_mean,new_city_id_p_mean_median,new_city_id_p_mean_max,new_city_id_p_mean_min,new_city_id_p_mean_std,new_subsector_id_p_mean_mean,new_subsector_id_p_mean_median,new_subsector_id_p_mean_max,new_subsector_id_p_mean_min,new_subsector_id_p_mean_std,new_merchant_category_id_p_mean_mean,new_merchant_category_id_p_mean_median,new_merchant_category_id_p_mean_max,new_merchant_category_id_p_mean_min,new_merchant_category_id_p_mean_std,new_merchant_id_p_mean_mean,new_merchant_id_p_mean_median,new_merchant_id_p_mean_max,new_merchant_id_p_mean_min,new_merchant_id_p_mean_std,new_quarter_p_mean_mean,new_quarter_p_mean_median,new_quarter_p_mean_max,new_quarter_p_mean_min,new_quarter_p_mean_std,new_month_p_mean_mean,new_month_p_mean_median,new_month_p_mean_max,new_month_p_mean_min,new_month_p_mean_std,new_weekofyear_p_mean_mean,new_weekofyear_p_mean_median,new_weekofyear_p_mean_max,new_weekofyear_p_mean_min,new_weekofyear_p_mean_std,new_dayofweek_p_mean_mean,new_dayofweek_p_mean_median,new_dayofweek_p_mean_max,new_dayofweek_p_mean_min,new_dayofweek_p_mean_std,new_day_p_mean_mean,new_day_p_mean_median,new_day_p_mean_max,new_day_p_mean_min,new_day_p_mean_std,new_hour_p_mean_mean,new_hour_p_mean_median,new_hour_p_mean_max,new_hour_p_mean_min,new_hour_p_mean_std,new_p2p,new_sleep,new_p2p_vs_count,new_sleep_vs_count,new_p2r_vs_count,new_count_vs_p2p,new_sleep_vs_p2p,new_p_vs_p2p,new_count_vs_p2r,new_sleep_vs_p2r,new_p_vs_p2r
0,C_ID_00007093c1,2,0.0,0.0,0.0,2,1,2,2,2,2,2,1,1,2,2,2,2,-429.0,-429.0,-426,-432,4.242641,37.0,37.0,40,34,4.242641,2.0,2.0,2,2,0.0,-1.328524,-0.664262,-0.664262,-0.656749,-0.671775,0.010625,1.0,1.0,1.0,1.0,0.0,-0.221421,-0.221421,-0.218916,-0.223925,0.003542,-0.332131,-0.332131,-0.328374,-0.335888,0.005313,2018-04-09 16:23:59,2018-04-03 11:13:35,-0.57996,-0.57996,-0.571085,-0.588835,0.012551,-0.615461,-0.615461,-0.615461,-0.615461,0.0,-0.579101,-0.579101,-0.571887,-0.586315,0.010202,-0.576606,-0.576606,-0.575068,-0.578145,0.002176,-0.508819,-0.508819,-0.445344,-0.572294,0.089767,-0.500311,-0.500311,-0.459415,-0.541207,0.057836,-0.620281,-0.620281,-0.568788,-0.671775,0.072823,-0.572793,-0.572793,-0.572793,-0.572793,0.0,-0.571095,-0.571095,-0.571095,-0.571095,0.0,-0.571103,-0.571103,-0.569963,-0.572242,0.001612,-0.562547,-0.562547,-0.561972,-0.563122,0.000813,-0.572118,-0.572118,-0.570274,-0.573962,0.002608,-0.558911,-0.558911,-0.555792,-0.562029,0.004411,6,4,3.0,2.0,20.0,0.285714,0.571429,-0.189789,0.04878,0.097561,-0.032403
1,C_ID_0001238066,26,0.076923,0.461538,0.076923,3,3,4,8,9,15,26,2,2,9,6,14,16,-208.961538,-204.0,-181,-241,17.752703,28.961538,24.0,61,1,17.752703,1.346154,1.0,2,1,0.485165,-14.850055,-0.571156,-0.649235,-0.078318,-0.740897,0.173436,1.72,1.0,10.0,1.0,2.051828,-0.252919,-0.241448,-0.039159,-0.370449,0.09616,-0.272389,-0.328374,-0.01958,-0.370449,0.106812,2018-04-30 19:57:30,2018-03-01 16:48:27,-0.570805,-0.588835,-0.449364,-0.588835,0.045055,-0.523379,-0.615461,-0.136057,-0.615461,0.19245,-0.570179,-0.586315,-0.449364,-0.598644,0.04463,-0.571179,-0.584084,-0.433662,-0.606339,0.052652,-0.612073,-0.616939,-0.509771,-0.659385,0.047479,-0.61116,-0.625254,-0.469862,-0.693083,0.047878,-0.573916,-0.593638,-0.227141,-0.714485,0.112194,-0.572302,-0.572042,-0.572042,-0.572793,0.000364,-0.57198,-0.572449,-0.571095,-0.572449,0.000657,-0.572267,-0.570447,-0.568809,-0.578501,0.003226,-0.581007,-0.572497,-0.561972,-0.61039,0.016503,-0.57643,-0.57725,-0.566042,-0.585524,0.007445,-0.582248,-0.581004,-0.555792,-0.608539,0.016535,60,46,2.307692,1.769231,2.346154,0.42623,0.754098,-0.243444,0.419355,0.741935,-0.239517
2,C_ID_0001506ef0,2,0.0,0.0,0.0,1,1,1,1,2,2,2,1,1,2,2,2,2,-626.0,-626.0,-623,-629,4.242641,19.0,19.0,22,16,4.242641,1.0,1.0,1,1,0.0,-1.447354,-0.723677,-0.723677,-0.715352,-0.732001,0.011773,0.0,0.0,0.0,0.0,0.0,-0.361838,-0.361838,-0.357676,-0.366001,0.005886,-0.723677,-0.723677,-0.715352,-0.732001,0.011773,2018-03-22 09:14:30,2018-03-16 22:21:58,-0.571085,-0.571085,-0.571085,-0.571085,0.0,-0.634457,-0.634457,-0.634457,-0.634457,0.0,-0.579098,-0.579098,-0.579098,-0.579098,0.0,-0.598682,-0.598682,-0.598682,-0.598682,0.0,-0.654744,-0.654744,-0.633761,-0.675728,0.029675,-0.641615,-0.641615,-0.636272,-0.646959,0.007557,-0.661027,-0.661027,-0.604686,-0.717368,0.079679,-0.572042,-0.572042,-0.572042,-0.572042,0.0,-0.572449,-0.572449,-0.572449,-0.572449,0.0,-0.571333,-0.571333,-0.569705,-0.572962,0.002303,-0.569623,-0.569623,-0.56675,-0.572497,0.004064,-0.573714,-0.573714,-0.567474,-0.579954,0.008825,-0.586198,-0.586198,-0.572546,-0.599849,0.019306,5,3,2.5,1.5,11.0,0.333333,0.5,-0.241226,0.086957,0.130435,-0.062928
3,C_ID_0001793786,31,0.0,0.451613,0.0,5,1,5,7,14,21,31,1,2,6,6,13,10,-336.290323,-330.0,-318,-364,15.616644,33.290323,27.0,61,15,15.616644,1.322581,1.0,2,1,0.475191,-3.075811,-0.09922,-0.372748,1.5,-0.737892,0.682057,0.0,0.0,0.0,0.0,0.0,-0.039332,-0.14726,0.75,-0.363311,0.308507,-0.09922,-0.372748,1.5,-0.737892,0.682057,2017-12-31 17:35:56,2017-11-15 15:44:20,-0.572314,-0.572875,-0.449364,-0.588835,0.033832,-0.634457,-0.634457,-0.634457,-0.634457,0.0,-0.563927,-0.571493,-0.449364,-0.586315,0.034186,-0.560859,-0.574325,-0.523457,-0.575068,0.02026,-0.598786,-0.616939,-0.421939,-0.659385,0.06772,-0.592967,-0.617693,-0.321116,-0.693083,0.077481,-0.443829,-0.577258,0.755743,-0.737331,0.334013,-0.586289,-0.586289,-0.586289,-0.586289,0.0,-0.585244,-0.586299,-0.583027,-0.586299,0.001555,-0.583609,-0.587997,-0.569705,-0.595483,0.010498,-0.582411,-0.572497,-0.561972,-0.61039,0.020856,-0.573224,-0.568509,-0.563786,-0.590323,0.008946,-0.580219,-0.573315,-0.558676,-0.61315,0.018218,46,33,1.483871,1.064516,1.967742,0.659574,0.702128,-0.065443,0.5,0.532258,-0.04961
4,C_ID_000183fdda,11,0.0,0.181818,0.0,1,3,2,2,6,9,11,2,2,7,6,9,8,-200.181818,-190.0,-182,-241,21.798248,20.181818,10.0,61,2,21.798248,1.272727,1.0,2,1,0.467099,-6.590778,-0.599162,-0.665765,-0.10768,-0.732332,0.182877,1.7,1.0,4.0,1.0,1.05935,-0.277157,-0.315602,-0.035893,-0.366166,0.09668,-0.275101,-0.327661,-0.09192,-0.366166,0.101957,2018-04-30 14:59:53,2018-03-02 12:26:26,-0.571085,-0.571085,-0.571085,-0.571085,0.0,-0.397812,-0.615461,-0.136057,-0.615461,0.250061,-0.570159,-0.570083,-0.570083,-0.570917,0.000251,-0.571745,-0.571781,-0.571381,-0.571781,0.000121,-0.585342,-0.654354,-0.345206,-0.659385,0.099833,-0.595213,-0.649425,-0.393057,-0.693083,0.095353,-0.607778,-0.637794,-0.407699,-0.732332,0.096114,-0.572247,-0.572042,-0.572042,-0.572793,0.000351,-0.57208,-0.572449,-0.571095,-0.572449,0.000633,-0.571967,-0.569837,-0.568809,-0.578501,0.003052,-0.569047,-0.56675,-0.561972,-0.584572,0.008604,-0.57346,-0.572437,-0.563786,-0.585524,0.007349,-0.575049,-0.573315,-0.555792,-0.599849,0.01419,59,50,5.363636,4.545455,5.545455,0.183333,0.833333,-0.109846,0.177419,0.806452,-0.106303


In [32]:
train = pd.merge(train, new, on='card_id', how='left')
test = pd.merge(test, new, on='card_id', how='left')

del new; gc.collect()

28

In [33]:
train['outliers'] = 0
train.loc[train['target'] < -30, 'outliers'] = 1
train['outliers'].value_counts()

0    199710
1      2207
Name: outliers, dtype: int64

In [34]:
for df in [train, test]:
    ###
    df['c_p2p_diff'] = df['hist_p2p_vs_count'] - df['new_p2p_vs_count']
    df['c_sleep_diff'] = df['hist_sleep_vs_count'] - df['new_sleep_vs_count']
    df['c_p_diff'] = df['hist_purchase_amount_mean'] - df['new_purchase_amount_mean']
    ###
    df['p2p_count_diff'] = df['hist_count_vs_p2p'] - df['new_count_vs_p2p']
    df['p2p_sleep_diff'] = df['hist_sleep_vs_p2p'] - df['new_sleep_vs_p2p']
    df['p2p_p_diff'] = df['hist_p_vs_p2p'] - df['new_p_vs_p2p']
    ###
    df['p2r_count_diff'] = df['hist_count_vs_p2r'] - df['new_count_vs_p2r']
    df['p2r_sleep_diff'] = df['hist_sleep_vs_p2r'] - df['new_sleep_vs_p2r']
    df['p2r_p_diff'] = df['hist_p_vs_p2r'] - df['new_p_vs_p2r']
    ###
    df['c_p2p_diff_vs'] = df['c_p2p_diff'] / df['hist_p2p_vs_count']
    df['c_sleep_diff_vs'] = df['c_sleep_diff'] / df['hist_sleep_vs_count']
    df['c_p_diff_vs']  = df['c_p_diff'] / df['hist_purchase_amount_mean']
    ###
    df['p2p_count_diff_vs'] = df['p2p_count_diff'] / df['hist_count_vs_p2p']
    df['p2p_sleep_diff_vs'] = df['p2p_sleep_diff'] / df['hist_sleep_vs_p2p']
    df['p2p_p_diff_vs']  = df['p2p_p_diff'] / df['hist_p_vs_p2p']
    ###
    df['p2r_count_diff_vs'] = df['p2r_count_diff'] / df['hist_count_vs_p2r']
    df['p2r_sleep_diff_vs'] = df['p2r_sleep_diff'] / df['hist_sleep_vs_p2r']
    df['p2r_p_diff_vs']  = df['p2r_p_diff'] / df['hist_p_vs_p2r']
    ###  
    ###
    for f in ['hist_purchase_date_max','hist_purchase_date_min', 
              'new_purchase_date_max', 'new_purchase_date_min']:
        df[f] = df[f].astype(np.int64) * 1e-9

In [35]:
train.head()

Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3,target,authorized_flag_mean,hist_transactions_count,hist_is_month_start_mean,hist_weekend_mean,hist_category_1_mean,hist_category_2_nunique,hist_category_3_nunique,hist_state_id_nunique,hist_city_id_nunique,hist_subsector_id_nunique,hist_merchant_category_id_nunique,hist_merchant_id_nunique,hist_quarter_nunique,hist_month_nunique,hist_weekofyear_nunique,hist_dayofweek_nunique,hist_day_nunique,hist_hour_nunique,hist_a2p_mean,hist_a2p_median,hist_a2p_max,hist_a2p_min,hist_a2p_std,hist_p2r_mean,hist_p2r_median,hist_p2r_max,hist_p2r_min,hist_p2r_std,hist_month_lag_mean,hist_month_lag_median,hist_month_lag_max,hist_month_lag_min,hist_month_lag_std,hist_purchase_amount_sum,hist_purchase_amount_mean,hist_purchase_amount_median,hist_purchase_amount_max,hist_purchase_amount_min,hist_purchase_amount_std,hist_installments_mean,hist_installments_median,hist_installments_max,hist_installments_min,hist_installments_std,hist_p_vs_m_mean,hist_p_vs_m_median,hist_p_vs_m_max,hist_p_vs_m_min,hist_p_vs_m_std,hist_p_vs_i_mean,hist_p_vs_i_median,hist_p_vs_i_max,hist_p_vs_i_min,hist_p_vs_i_std,hist_purchase_date_max,hist_purchase_date_min,hist_category_2_p_mean_mean,hist_category_2_p_mean_median,hist_category_2_p_mean_max,hist_category_2_p_mean_min,hist_category_2_p_mean_std,hist_category_3_p_mean_mean,hist_category_3_p_mean_median,hist_category_3_p_mean_max,hist_category_3_p_mean_min,hist_category_3_p_mean_std,hist_state_id_p_mean_mean,hist_state_id_p_mean_median,hist_state_id_p_mean_max,hist_state_id_p_mean_min,hist_state_id_p_mean_std,hist_city_id_p_mean_mean,hist_city_id_p_mean_median,hist_city_id_p_mean_max,hist_city_id_p_mean_min,hist_city_id_p_mean_std,hist_subsector_id_p_mean_mean,hist_subsector_id_p_mean_median,hist_subsector_id_p_mean_max,hist_subsector_id_p_mean_min,hist_subsector_id_p_mean_std,hist_merchant_category_id_p_mean_mean,hist_merchant_category_id_p_mean_median,hist_merchant_category_id_p_mean_max,hist_merchant_category_id_p_mean_min,hist_merchant_category_id_p_mean_std,hist_merchant_id_p_mean_mean,hist_merchant_id_p_mean_median,hist_merchant_id_p_mean_max,hist_merchant_id_p_mean_min,hist_merchant_id_p_mean_std,hist_quarter_p_mean_mean,hist_quarter_p_mean_median,hist_quarter_p_mean_max,hist_quarter_p_mean_min,hist_quarter_p_mean_std,hist_month_p_mean_mean,hist_month_p_mean_median,hist_month_p_mean_max,hist_month_p_mean_min,hist_month_p_mean_std,hist_weekofyear_p_mean_mean,hist_weekofyear_p_mean_median,hist_weekofyear_p_mean_max,hist_weekofyear_p_mean_min,hist_weekofyear_p_mean_std,hist_dayofweek_p_mean_mean,hist_dayofweek_p_mean_median,hist_dayofweek_p_mean_max,hist_dayofweek_p_mean_min,hist_dayofweek_p_mean_std,hist_day_p_mean_mean,hist_day_p_mean_median,hist_day_p_mean_max,hist_day_p_mean_min,hist_day_p_mean_std,hist_hour_p_mean_mean,hist_hour_p_mean_median,hist_hour_p_mean_max,hist_hour_p_mean_min,hist_hour_p_mean_std,hist_first_year,hist_first_quarter,hist_first_month,hist_re_year,hist_re_quarter,hist_re_month,hist_a2r,hist_p2p,hist_sleep,hist_p2p_vs_count,hist_sleep_vs_count,hist_p2r_vs_count,hist_count_vs_p2p,hist_sleep_vs_p2p,hist_p_vs_p2p,hist_count_vs_p2r,hist_sleep_vs_p2r,hist_p_vs_p2r,new_transactions_count,new_is_month_start_mean,new_weekend_mean,new_category_1_mean,new_category_2_nunique,new_category_3_nunique,new_state_id_nunique,new_city_id_nunique,new_subsector_id_nunique,new_merchant_category_id_nunique,new_merchant_id_nunique,new_quarter_nunique,new_month_nunique,new_weekofyear_nunique,new_dayofweek_nunique,new_day_nunique,new_hour_nunique,new_a2p_mean,new_a2p_median,new_a2p_max,new_a2p_min,new_a2p_std,new_p2r_mean,new_p2r_median,new_p2r_max,new_p2r_min,new_p2r_std,new_month_lag_mean,new_month_lag_median,new_month_lag_max,new_month_lag_min,new_month_lag_std,new_purchase_amount_sum,new_purchase_amount_mean,new_purchase_amount_median,new_purchase_amount_max,new_purchase_amount_min,new_purchase_amount_std,new_installments_mean,new_installments_median,new_installments_max,new_installments_min,new_installments_std,new_p_vs_m_mean,new_p_vs_m_median,new_p_vs_m_max,new_p_vs_m_min,new_p_vs_m_std,new_p_vs_i_mean,new_p_vs_i_median,new_p_vs_i_max,new_p_vs_i_min,new_p_vs_i_std,new_purchase_date_max,new_purchase_date_min,new_category_2_p_mean_mean,new_category_2_p_mean_median,new_category_2_p_mean_max,new_category_2_p_mean_min,new_category_2_p_mean_std,new_category_3_p_mean_mean,new_category_3_p_mean_median,new_category_3_p_mean_max,new_category_3_p_mean_min,new_category_3_p_mean_std,new_state_id_p_mean_mean,new_state_id_p_mean_median,new_state_id_p_mean_max,new_state_id_p_mean_min,new_state_id_p_mean_std,new_city_id_p_mean_mean,new_city_id_p_mean_median,new_city_id_p_mean_max,new_city_id_p_mean_min,new_city_id_p_mean_std,new_subsector_id_p_mean_mean,new_subsector_id_p_mean_median,new_subsector_id_p_mean_max,new_subsector_id_p_mean_min,new_subsector_id_p_mean_std,new_merchant_category_id_p_mean_mean,new_merchant_category_id_p_mean_median,new_merchant_category_id_p_mean_max,new_merchant_category_id_p_mean_min,new_merchant_category_id_p_mean_std,new_merchant_id_p_mean_mean,new_merchant_id_p_mean_median,new_merchant_id_p_mean_max,new_merchant_id_p_mean_min,new_merchant_id_p_mean_std,new_quarter_p_mean_mean,new_quarter_p_mean_median,new_quarter_p_mean_max,new_quarter_p_mean_min,new_quarter_p_mean_std,new_month_p_mean_mean,new_month_p_mean_median,new_month_p_mean_max,new_month_p_mean_min,new_month_p_mean_std,new_weekofyear_p_mean_mean,new_weekofyear_p_mean_median,new_weekofyear_p_mean_max,new_weekofyear_p_mean_min,new_weekofyear_p_mean_std,new_dayofweek_p_mean_mean,new_dayofweek_p_mean_median,new_dayofweek_p_mean_max,new_dayofweek_p_mean_min,new_dayofweek_p_mean_std,new_day_p_mean_mean,new_day_p_mean_median,new_day_p_mean_max,new_day_p_mean_min,new_day_p_mean_std,new_hour_p_mean_mean,new_hour_p_mean_median,new_hour_p_mean_max,new_hour_p_mean_min,new_hour_p_mean_std,new_p2p,new_sleep,new_p2p_vs_count,new_sleep_vs_count,new_p2r_vs_count,new_count_vs_p2p,new_sleep_vs_p2p,new_p_vs_p2p,new_count_vs_p2r,new_sleep_vs_p2r,new_p_vs_p2r,outliers,c_p2p_diff,c_sleep_diff,c_p_diff,p2p_count_diff,p2p_sleep_diff,p2p_p_diff,p2r_count_diff,p2r_sleep_diff,p2r_p_diff,c_p2p_diff_vs,c_sleep_diff_vs,c_p_diff_vs,p2p_count_diff_vs,p2p_sleep_diff_vs,p2p_p_diff_vs,p2r_count_diff_vs,p2r_sleep_diff_vs,p2r_p_diff_vs
0,2017-06-01,C_ID_92a2005557,5,2,1,-0.820283,0.95,260,0.026923,0.346154,0.0,2,2,3,7,21,41,95,4,9,35,7,31,23,-139.323077,-137.5,-26,-269,74.281861,-132.676923,-134.5,-3,-246,74.281861,-3.911538,-4.0,0,-8,2.397687,-166.727134,-0.641258,-0.698042,1.5,-0.739395,0.173928,0.015385,0.0,1.0,0.0,0.123314,-0.191616,-0.122982,0.5,-0.738944,0.173792,-0.636097,-0.696997,1.5,-0.739395,0.177854,1519551000.0,1498573000.0,-0.61076,-0.610919,-0.59713,-0.610919,0.001475,-0.641528,-0.641898,-0.61783,-0.641898,0.002968,-0.609978,-0.610125,-0.594651,-0.610125,0.001428,-0.608593,-0.609366,-0.566469,-0.612304,0.005248,-0.646859,-0.675323,-0.003688,-0.723084,0.096471,-0.650081,-0.68186,0.241158,-0.723084,0.11356,-0.616642,-0.691779,0.497951,-0.729832,0.157344,-0.595602,-0.596871,-0.592935,-0.598229,0.002168,-0.595381,-0.596318,-0.590924,-0.602054,0.003354,-0.595483,-0.595849,-0.576654,-0.606256,0.00534,-0.598331,-0.590207,-0.588296,-0.628458,0.013844,-0.596457,-0.597384,-0.590528,-0.600465,0.002668,-0.593275,-0.582817,-0.581067,-0.634077,0.014161,2017,2,6,2018,1,2,-272,242,211,0.930769,0.811538,0.946154,1.069959,0.868313,-0.68612,1.052632,0.854251,-0.675009,23.0,0.0,0.26087,0.0,1.0,1.0,1.0,3.0,10.0,14.0,23.0,2.0,2.0,7.0,7.0,17.0,8.0,-303.26087,-303.0,-277.0,-332.0,16.591941,31.26087,31.0,60.0,5.0,16.591941,1.478261,1.0,2.0,1.0,0.510754,-13.244202,-0.575835,-0.58118,-0.296112,-0.724368,0.135812,0.0,0.0,0.0,0.0,0.0,-0.240423,-0.238952,-0.102586,-0.362184,0.072041,-0.575835,-0.58118,-0.296112,-0.724368,0.135812,1525001000.0,1520259000.0,-0.588835,-0.588835,-0.588835,-0.588835,0.0,-0.634457,-0.634457,-0.634457,-0.634457,0.0,-0.586315,-0.586315,-0.586315,-0.586315,0.0,-0.5792,-0.575068,-0.575068,-0.60963,0.009585,-0.631336,-0.659385,-0.345206,-0.720933,0.079077,-0.615909,-0.632814,-0.265009,-0.720933,0.098304,-0.570283,-0.569305,-0.229462,-0.710157,0.117976,-0.572401,-0.572042,-0.572042,-0.572793,0.000384,-0.571802,-0.572449,-0.571095,-0.572449,0.000692,-0.572123,-0.570447,-0.568809,-0.578501,0.003578,-0.574779,-0.572497,-0.561972,-0.61039,0.015854,-0.575048,-0.57231,-0.560548,-0.590323,0.007958,-0.563981,-0.562217,-0.555792,-0.57458,0.007045,54.0,37.0,2.347826,1.608696,2.608696,0.418182,0.672727,-0.240804,0.377049,0.606557,-0.217118,0,-1.417057,-0.797157,-0.065423,0.651777,0.195585,-0.445316,0.675582,0.247694,-0.457891,-1.522458,-0.982279,0.102023,0.609161,0.225248,0.649036,0.641803,0.289954,0.678348
1,2017-01-01,C_ID_3d0044924f,4,1,0,0.392913,0.968571,350,0.04,0.377143,0.088571,2,3,3,9,24,57,142,4,12,50,7,31,24,-226.942857,-213.5,-5,-395,116.976167,-168.057143,-181.5,0,-390,116.976167,-5.031429,-5.0,0,-12,3.804934,-213.579474,-0.610227,-0.70859,1.5,-0.7424,0.276506,1.566092,1.0,10.0,1.0,1.50262,-0.210091,-0.104141,0.3,-0.737892,0.225517,-0.294727,-0.35437,0.5,-0.3712,0.121929,1517438000.0,1483720000.0,-0.60377,-0.610919,-0.5302,-0.610919,0.022967,-0.513462,-0.61783,-0.123849,-0.61783,0.201852,-0.603068,-0.610125,-0.5302,-0.610881,0.022748,-0.601927,-0.609366,-0.52583,-0.615565,0.023799,-0.595975,-0.619453,-0.003688,-0.723084,0.120314,-0.602571,-0.644898,0.083997,-0.723084,0.14131,-0.594211,-0.654646,0.737681,-0.731393,0.198402,-0.596536,-0.596871,-0.592935,-0.598229,0.002176,-0.596096,-0.596382,-0.590924,-0.602054,0.00305,-0.596681,-0.597726,-0.576654,-0.606256,0.004987,-0.598642,-0.590907,-0.588296,-0.628458,0.013716,-0.596492,-0.597384,-0.590528,-0.600465,0.002857,-0.598432,-0.596481,-0.581067,-0.634077,0.014345,2017,1,1,2018,1,1,-395,390,359,1.114286,1.025714,1.114286,0.895141,0.918159,-0.546239,0.895141,0.918159,-0.546239,6.0,0.166667,0.0,0.0,1.0,1.0,1.0,1.0,4.0,5.0,6.0,1.0,2.0,4.0,4.0,4.0,5.0,-422.5,-421.5,-396.0,-453.0,26.402651,27.5,26.5,58.0,1.0,26.402651,1.5,1.5,2.0,1.0,0.547723,-4.355735,-0.725956,-0.732633,-0.701858,-0.73941,0.014326,1.0,1.0,1.0,1.0,0.0,-0.303226,-0.305276,-0.233953,-0.369705,0.070442,-0.362978,-0.366316,-0.350929,-0.369705,0.007163,1522393000.0,1517505000.0,-0.588835,-0.588835,-0.588835,-0.588835,0.0,-0.615461,-0.615461,-0.615461,-0.615461,0.0,-0.586315,-0.586315,-0.586315,-0.586315,0.0,-0.575068,-0.575068,-0.575068,-0.575068,0.0,-0.566986,-0.62535,-0.215509,-0.659385,0.17324,-0.557115,-0.625254,-0.145134,-0.693083,0.203741,-0.588241,-0.643117,-0.186088,-0.719306,0.200993,-0.572042,-0.572042,-0.572042,-0.572042,0.0,-0.57031,-0.57031,-0.56817,-0.572449,0.002344,-0.571877,-0.57149,-0.5668,-0.578501,0.003973,-0.564906,-0.563122,-0.561972,-0.572497,0.004113,-0.571457,-0.569403,-0.560548,-0.585524,0.010816,-0.571593,-0.571328,-0.567973,-0.575455,0.003085,56.0,52.0,9.333333,8.666667,9.666667,0.105263,0.912281,-0.076416,0.101695,0.881356,-0.073826,0,-8.219048,-7.640952,0.115729,0.789878,0.005878,-0.469823,0.793446,0.036803,-0.472413,-7.376068,-7.449396,-0.189649,0.882406,0.006402,0.860104,0.886392,0.040083,0.864847
2,2016-08-01,C_ID_d639edf6cd,2,2,0,0.688056,0.953488,43,0.0,0.255814,0.0,2,1,2,5,7,8,13,4,10,22,7,19,14,-304.302326,-264.0,-163,-575,117.06338,-271.697674,-312.0,-1,-413,117.06338,-8.604651,-10.0,0,-13,3.842987,-29.167391,-0.678311,-0.698868,-0.145847,-0.730138,0.08738,0.0,0.0,0.0,0.0,0.0,-0.098675,-0.063768,-0.0452,-0.661287,0.103856,-0.678311,-0.698868,-0.145847,-0.730138,0.08738,1519759000.0,1484123000.0,-0.598413,-0.59713,-0.59713,-0.610919,0.004053,-0.641898,-0.641898,-0.641898,-0.641898,0.0,-0.59609,-0.594651,-0.594651,-0.610125,0.004548,-0.602135,-0.600521,-0.600521,-0.634245,0.005914,-0.621766,-0.632387,-0.259431,-0.675323,0.065696,-0.624373,-0.633152,-0.29531,-0.699478,0.063245,-0.597087,-0.589081,-0.40403,-0.70821,0.043167,-0.597197,-0.598183,-0.592935,-0.598229,0.00195,-0.597835,-0.597831,-0.590999,-0.602054,0.002637,-0.599072,-0.598838,-0.589821,-0.606256,0.003864,-0.59462,-0.590207,-0.588296,-0.628458,0.009211,-0.597148,-0.597824,-0.591321,-0.600465,0.002655,-0.599273,-0.603807,-0.581067,-0.618956,0.013842,2016,3,8,2018,1,2,-576,412,393,9.581395,9.139535,9.604651,0.104116,0.951574,-0.070623,0.103865,0.949275,-0.070453,1.0,0.0,1.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,-635.0,-635.0,-635.0,-635.0,,59.0,59.0,59.0,59.0,,2.0,2.0,2.0,2.0,,-0.700326,-0.700326,-0.700326,-0.700326,-0.700326,,0.0,0.0,0.0,0.0,,-0.233442,-0.233442,-0.233442,-0.233442,,-0.700326,-0.700326,-0.700326,-0.700326,,1524937000.0,1524937000.0,-0.572046,-0.572046,-0.572046,-0.572046,,-0.634457,-0.634457,-0.634457,-0.634457,,-0.571493,-0.571493,-0.571493,-0.571493,,-0.574325,-0.574325,-0.574325,-0.574325,,-0.565526,-0.565526,-0.565526,-0.565526,,-0.690098,-0.690098,-0.690098,-0.690098,,-0.693672,-0.693672,-0.693672,-0.693672,,-0.572793,-0.572793,-0.572793,-0.572793,,-0.571095,-0.571095,-0.571095,-0.571095,,-0.568809,-0.568809,-0.568809,-0.568809,,-0.584572,-0.584572,-0.584572,-0.584572,,-0.57231,-0.57231,-0.57231,-0.57231,,-0.569341,-0.569341,-0.569341,-0.569341,,0.0,-1.0,0.0,-1.0,59.0,1.0,-1.0,-0.700326,0.016667,-0.016667,-0.011672,0,9.581395,10.139535,0.022014,-0.895884,1.951574,0.629702,0.087198,0.965942,-0.058781,1.0,1.109415,-0.032454,-8.604651,2.050891,-8.916364,0.839535,1.017557,0.834327
3,2017-09-01,C_ID_186d6a6901,4,3,0,0.142495,1.0,77,0.0,0.142857,0.155844,3,3,5,7,13,25,50,3,6,20,7,25,16,-81.883117,-55.0,-25,-180,53.380209,-98.116883,-125.0,0,-155,53.380209,-2.831169,-4.0,0,-5,1.802065,-49.491364,-0.642745,-0.707989,1.445596,-0.740897,0.261624,1.146667,1.0,3.0,1.0,0.484722,-0.259738,-0.145412,0.361399,-0.740897,0.233968,-0.315587,-0.354671,0.722798,-0.370449,0.140188,1519818000.0,1506443000.0,-0.592124,-0.60189,-0.5302,-0.610919,0.026977,-0.560426,-0.61783,-0.123849,-0.61783,0.158834,-0.600389,-0.614372,-0.5302,-0.615237,0.030525,-0.599914,-0.614372,-0.52583,-0.624886,0.032384,-0.59679,-0.632387,0.021631,-0.723084,0.11457,-0.589,-0.644898,-0.019754,-0.723084,0.12519,-0.577397,-0.647246,0.525537,-0.737291,0.240898,-0.59501,-0.592935,-0.592935,-0.598229,0.002448,-0.597304,-0.597831,-0.590924,-0.602054,0.003506,-0.596512,-0.597726,-0.576654,-0.606256,0.005691,-0.593391,-0.590207,-0.588296,-0.628458,0.009981,-0.596092,-0.596729,-0.590528,-0.60021,0.002878,-0.591511,-0.582806,-0.581067,-0.620001,0.013338,2017,3,9,2018,1,2,-180,154,129,2.0,1.675325,2.012987,0.496774,0.832258,-0.319299,0.49359,0.826923,-0.317252,7.0,0.0,0.428571,0.142857,2.0,2.0,2.0,2.0,5.0,6.0,7.0,2.0,2.0,5.0,4.0,7.0,5.0,-215.285714,-219.0,-187.0,-229.0,15.195551,35.285714,39.0,49.0,7.0,15.195551,1.714286,2.0,2.0,1.0,0.48795,-4.654372,-0.66491,-0.69161,-0.56674,-0.734135,0.065882,1.0,1.0,1.0,1.0,0.0,-0.252163,-0.238952,-0.188913,-0.350238,0.052891,-0.340636,-0.348022,-0.290823,-0.367068,0.027202,1524049000.0,1520424000.0,-0.559231,-0.577542,-0.449364,-0.577542,0.048447,-0.547387,-0.615461,-0.138941,-0.615461,0.180108,-0.565933,-0.585361,-0.449364,-0.585361,0.051402,-0.56369,-0.585361,-0.433662,-0.585361,0.057337,-0.615777,-0.659385,-0.509771,-0.672033,0.062464,-0.617348,-0.632814,-0.498108,-0.693083,0.079721,-0.609265,-0.641744,-0.436335,-0.724446,0.097901,-0.572579,-0.572793,-0.572042,-0.572793,0.000367,-0.571482,-0.571095,-0.571095,-0.572449,0.000661,-0.571163,-0.570447,-0.569837,-0.572962,0.001277,-0.575881,-0.563122,-0.562838,-0.61039,0.018326,-0.577433,-0.576908,-0.571521,-0.582996,0.004886,-0.571474,-0.573315,-0.555792,-0.598063,0.016881,41.0,34.0,5.857143,4.857143,7.0,0.166667,0.809524,-0.110818,0.14,0.68,-0.093087,0,-3.857143,-3.181818,0.022165,0.330108,0.022734,-0.208481,0.35359,0.146923,-0.224165,-1.928571,-1.899225,-0.034485,0.664502,0.027316,0.652932,0.716364,0.177674,0.706582
4,2017-11-01,C_ID_cdbd2c0db2,1,3,0,-0.159749,0.962406,133,0.037594,0.315789,0.112782,4,2,6,6,17,26,67,2,4,17,7,30,22,-67.398496,-68.0,-11,-119,32.095487,-51.601504,-51.0,0,-108,32.095487,-1.285714,-1.0,0,-3,1.0267,-71.745931,-0.539443,-0.689807,1.5,-0.746156,0.440657,1.368421,1.0,12.0,1.0,1.896862,-0.271601,-0.287412,1.5,-0.737892,0.375344,-0.291205,-0.344904,0.375,-0.373078,0.127318,1519850000.0,1510445000.0,-0.594448,-0.60189,-0.5302,-0.610919,0.023121,-0.591831,-0.61783,-0.123849,-0.61783,0.110721,-0.603126,-0.614372,-0.5302,-0.615237,0.026781,-0.603299,-0.614372,-0.52583,-0.624886,0.028026,-0.629542,-0.675323,0.021631,-0.723084,0.099153,-0.61572,-0.656989,0.083997,-0.723084,0.121151,-0.614575,-0.681575,0.282571,-0.737291,0.161918,-0.596159,-0.598229,-0.592935,-0.598229,0.002593,-0.595201,-0.59477,-0.590924,-0.602054,0.004424,-0.594894,-0.594968,-0.576654,-0.606256,0.006636,-0.597432,-0.590207,-0.588296,-0.628458,0.013302,-0.596633,-0.597384,-0.590528,-0.600465,0.002844,-0.595498,-0.59424,-0.581067,-0.634077,0.014292,2017,4,11,2018,1,2,-119,108,78,0.81203,0.586466,0.81203,1.220183,0.715596,-0.65822,1.220183,0.715596,-0.65822,36.0,0.055556,0.333333,0.055556,4.0,3.0,5.0,5.0,10.0,17.0,36.0,2.0,2.0,8.0,7.0,22.0,14.0,-150.805556,-151.5,-121.0,-178.0,16.98204,31.805556,32.5,59.0,2.0,16.98204,1.555556,2.0,2.0,1.0,0.503953,-19.926237,-0.553507,-0.607447,0.450886,-0.739395,0.223821,1.028571,1.0,2.0,1.0,0.169031,-0.228974,-0.232778,0.150295,-0.369697,0.103839,-0.280579,-0.305835,0.225443,-0.369697,0.110812,1524941000.0,1519992000.0,-0.569793,-0.577542,-0.449364,-0.588835,0.029878,-0.588908,-0.615461,-0.136057,-0.615461,0.111036,-0.57311,-0.585361,-0.449364,-0.586315,0.031291,-0.57088,-0.585361,-0.433662,-0.585361,0.035165,-0.585562,-0.659385,0.090939,-0.675728,0.173477,-0.571234,-0.632814,0.342761,-0.701197,0.192152,-0.553405,-0.629304,0.353666,-0.728822,0.223065,-0.572459,-0.572793,-0.572042,-0.572793,0.000379,-0.571697,-0.571095,-0.571095,-0.572449,0.000683,-0.572222,-0.570447,-0.568809,-0.578501,0.003274,-0.575276,-0.572497,-0.561972,-0.61039,0.015107,-0.574142,-0.57231,-0.566042,-0.590323,0.00709,-0.574394,-0.57458,-0.555792,-0.598063,0.014969,57.0,35.0,1.583333,0.972222,1.638889,0.62069,0.603448,-0.343556,0.6,0.583333,-0.332104,0,-0.771303,-0.385756,0.014063,0.599494,0.112148,-0.314664,0.620183,0.132263,-0.326116,-0.949846,-0.657764,-0.02607,0.491314,0.15672,0.478053,0.508271,0.184829,0.495451


In [36]:
test.head()

Unnamed: 0,first_active_month,card_id,feature_1,feature_2,feature_3,authorized_flag_mean,hist_transactions_count,hist_is_month_start_mean,hist_weekend_mean,hist_category_1_mean,hist_category_2_nunique,hist_category_3_nunique,hist_state_id_nunique,hist_city_id_nunique,hist_subsector_id_nunique,hist_merchant_category_id_nunique,hist_merchant_id_nunique,hist_quarter_nunique,hist_month_nunique,hist_weekofyear_nunique,hist_dayofweek_nunique,hist_day_nunique,hist_hour_nunique,hist_a2p_mean,hist_a2p_median,hist_a2p_max,hist_a2p_min,hist_a2p_std,hist_p2r_mean,hist_p2r_median,hist_p2r_max,hist_p2r_min,hist_p2r_std,hist_month_lag_mean,hist_month_lag_median,hist_month_lag_max,hist_month_lag_min,hist_month_lag_std,hist_purchase_amount_sum,hist_purchase_amount_mean,hist_purchase_amount_median,hist_purchase_amount_max,hist_purchase_amount_min,hist_purchase_amount_std,hist_installments_mean,hist_installments_median,hist_installments_max,hist_installments_min,hist_installments_std,hist_p_vs_m_mean,hist_p_vs_m_median,hist_p_vs_m_max,hist_p_vs_m_min,hist_p_vs_m_std,hist_p_vs_i_mean,hist_p_vs_i_median,hist_p_vs_i_max,hist_p_vs_i_min,hist_p_vs_i_std,hist_purchase_date_max,hist_purchase_date_min,hist_category_2_p_mean_mean,hist_category_2_p_mean_median,hist_category_2_p_mean_max,hist_category_2_p_mean_min,hist_category_2_p_mean_std,hist_category_3_p_mean_mean,hist_category_3_p_mean_median,hist_category_3_p_mean_max,hist_category_3_p_mean_min,hist_category_3_p_mean_std,hist_state_id_p_mean_mean,hist_state_id_p_mean_median,hist_state_id_p_mean_max,hist_state_id_p_mean_min,hist_state_id_p_mean_std,hist_city_id_p_mean_mean,hist_city_id_p_mean_median,hist_city_id_p_mean_max,hist_city_id_p_mean_min,hist_city_id_p_mean_std,hist_subsector_id_p_mean_mean,hist_subsector_id_p_mean_median,hist_subsector_id_p_mean_max,hist_subsector_id_p_mean_min,hist_subsector_id_p_mean_std,hist_merchant_category_id_p_mean_mean,hist_merchant_category_id_p_mean_median,hist_merchant_category_id_p_mean_max,hist_merchant_category_id_p_mean_min,hist_merchant_category_id_p_mean_std,hist_merchant_id_p_mean_mean,hist_merchant_id_p_mean_median,hist_merchant_id_p_mean_max,hist_merchant_id_p_mean_min,hist_merchant_id_p_mean_std,hist_quarter_p_mean_mean,hist_quarter_p_mean_median,hist_quarter_p_mean_max,hist_quarter_p_mean_min,hist_quarter_p_mean_std,hist_month_p_mean_mean,hist_month_p_mean_median,hist_month_p_mean_max,hist_month_p_mean_min,hist_month_p_mean_std,hist_weekofyear_p_mean_mean,hist_weekofyear_p_mean_median,hist_weekofyear_p_mean_max,hist_weekofyear_p_mean_min,hist_weekofyear_p_mean_std,hist_dayofweek_p_mean_mean,hist_dayofweek_p_mean_median,hist_dayofweek_p_mean_max,hist_dayofweek_p_mean_min,hist_dayofweek_p_mean_std,hist_day_p_mean_mean,hist_day_p_mean_median,hist_day_p_mean_max,hist_day_p_mean_min,hist_day_p_mean_std,hist_hour_p_mean_mean,hist_hour_p_mean_median,hist_hour_p_mean_max,hist_hour_p_mean_min,hist_hour_p_mean_std,hist_first_year,hist_first_quarter,hist_first_month,hist_re_year,hist_re_quarter,hist_re_month,hist_a2r,hist_p2p,hist_sleep,hist_p2p_vs_count,hist_sleep_vs_count,hist_p2r_vs_count,hist_count_vs_p2p,hist_sleep_vs_p2p,hist_p_vs_p2p,hist_count_vs_p2r,hist_sleep_vs_p2r,hist_p_vs_p2r,new_transactions_count,new_is_month_start_mean,new_weekend_mean,new_category_1_mean,new_category_2_nunique,new_category_3_nunique,new_state_id_nunique,new_city_id_nunique,new_subsector_id_nunique,new_merchant_category_id_nunique,new_merchant_id_nunique,new_quarter_nunique,new_month_nunique,new_weekofyear_nunique,new_dayofweek_nunique,new_day_nunique,new_hour_nunique,new_a2p_mean,new_a2p_median,new_a2p_max,new_a2p_min,new_a2p_std,new_p2r_mean,new_p2r_median,new_p2r_max,new_p2r_min,new_p2r_std,new_month_lag_mean,new_month_lag_median,new_month_lag_max,new_month_lag_min,new_month_lag_std,new_purchase_amount_sum,new_purchase_amount_mean,new_purchase_amount_median,new_purchase_amount_max,new_purchase_amount_min,new_purchase_amount_std,new_installments_mean,new_installments_median,new_installments_max,new_installments_min,new_installments_std,new_p_vs_m_mean,new_p_vs_m_median,new_p_vs_m_max,new_p_vs_m_min,new_p_vs_m_std,new_p_vs_i_mean,new_p_vs_i_median,new_p_vs_i_max,new_p_vs_i_min,new_p_vs_i_std,new_purchase_date_max,new_purchase_date_min,new_category_2_p_mean_mean,new_category_2_p_mean_median,new_category_2_p_mean_max,new_category_2_p_mean_min,new_category_2_p_mean_std,new_category_3_p_mean_mean,new_category_3_p_mean_median,new_category_3_p_mean_max,new_category_3_p_mean_min,new_category_3_p_mean_std,new_state_id_p_mean_mean,new_state_id_p_mean_median,new_state_id_p_mean_max,new_state_id_p_mean_min,new_state_id_p_mean_std,new_city_id_p_mean_mean,new_city_id_p_mean_median,new_city_id_p_mean_max,new_city_id_p_mean_min,new_city_id_p_mean_std,new_subsector_id_p_mean_mean,new_subsector_id_p_mean_median,new_subsector_id_p_mean_max,new_subsector_id_p_mean_min,new_subsector_id_p_mean_std,new_merchant_category_id_p_mean_mean,new_merchant_category_id_p_mean_median,new_merchant_category_id_p_mean_max,new_merchant_category_id_p_mean_min,new_merchant_category_id_p_mean_std,new_merchant_id_p_mean_mean,new_merchant_id_p_mean_median,new_merchant_id_p_mean_max,new_merchant_id_p_mean_min,new_merchant_id_p_mean_std,new_quarter_p_mean_mean,new_quarter_p_mean_median,new_quarter_p_mean_max,new_quarter_p_mean_min,new_quarter_p_mean_std,new_month_p_mean_mean,new_month_p_mean_median,new_month_p_mean_max,new_month_p_mean_min,new_month_p_mean_std,new_weekofyear_p_mean_mean,new_weekofyear_p_mean_median,new_weekofyear_p_mean_max,new_weekofyear_p_mean_min,new_weekofyear_p_mean_std,new_dayofweek_p_mean_mean,new_dayofweek_p_mean_median,new_dayofweek_p_mean_max,new_dayofweek_p_mean_min,new_dayofweek_p_mean_std,new_day_p_mean_mean,new_day_p_mean_median,new_day_p_mean_max,new_day_p_mean_min,new_day_p_mean_std,new_hour_p_mean_mean,new_hour_p_mean_median,new_hour_p_mean_max,new_hour_p_mean_min,new_hour_p_mean_std,new_p2p,new_sleep,new_p2p_vs_count,new_sleep_vs_count,new_p2r_vs_count,new_count_vs_p2p,new_sleep_vs_p2p,new_p_vs_p2p,new_count_vs_p2r,new_sleep_vs_p2r,new_p_vs_p2r,c_p2p_diff,c_sleep_diff,c_p_diff,p2p_count_diff,p2p_sleep_diff,p2p_p_diff,p2r_count_diff,p2r_sleep_diff,p2r_p_diff,c_p2p_diff_vs,c_sleep_diff_vs,c_p_diff_vs,p2p_count_diff_vs,p2p_sleep_diff_vs,p2p_p_diff_vs,p2r_count_diff_vs,p2r_sleep_diff_vs,p2r_p_diff_vs
0,2017-04-01,C_ID_0ab67a22ab,3,3,1,0.647059,68,0.044118,0.176471,0.338235,2,2,3,7,12,16,24,3,9,24,7,24,17,-148.602941,-155.0,-3,-272,74.718005,-125.397059,-119.0,-2,-271,74.718005,-3.632353,-3.5,0,-8,2.454994,-40.733733,-0.599025,-0.689206,0.235676,-0.743902,0.192268,2.073529,1.0,12.0,1.0,2.061127,-0.212196,-0.144573,0.026186,-0.739395,0.194647,-0.265675,-0.344603,0.078559,-0.371951,0.127292,1514510000.0,1491330000.0,-0.583617,-0.610919,-0.5302,-0.610919,0.038473,-0.458013,-0.61783,-0.123849,-0.61783,0.232814,-0.582328,-0.608888,-0.5302,-0.610125,0.037545,-0.581039,-0.615685,-0.52583,-0.61573,0.042622,-0.565759,-0.601317,-0.107234,-0.683079,0.122212,-0.569138,-0.582748,-0.263942,-0.699478,0.1139,-0.545406,-0.621481,-0.154367,-0.709401,0.14309,-0.595829,-0.596871,-0.592935,-0.598183,0.002284,-0.596241,-0.597791,-0.590924,-0.599696,0.002884,-0.596864,-0.597333,-0.585321,-0.601837,0.004477,-0.592535,-0.590207,-0.588296,-0.628458,0.006652,-0.596376,-0.596729,-0.590528,-0.60021,0.002724,-0.603363,-0.603807,-0.581067,-0.634077,0.017528,2017,2,4,2017,4,12,-274,268,244,3.941176,3.588235,3.985294,0.252788,0.907063,-0.151427,0.25,0.897059,-0.149756,3.0,0.0,0.333333,0.0,1.0,2.0,1.0,3.0,3.0,3.0,3.0,1.0,1.0,3.0,3.0,3.0,3.0,-320.666667,-321.0,-308.0,-333.0,12.503333,46.666667,47.0,59.0,34.0,12.503333,2.0,2.0,2.0,2.0,0.0,-1.777156,-0.592385,-0.671775,-0.383266,-0.722114,0.182843,1.666667,1.0,3.0,1.0,1.154701,-0.197462,-0.223925,-0.127755,-0.240705,0.060948,-0.264254,-0.335888,-0.095817,-0.361057,0.146413,1519845000.0,1517651000.0,-0.588835,-0.588835,-0.588835,-0.588835,0.0,-0.45566,-0.615461,-0.136057,-0.615461,0.276784,-0.587646,-0.587646,-0.587646,-0.587646,0.0,-0.60483,-0.602884,-0.594901,-0.616704,0.011031,-0.573889,-0.616939,-0.445344,-0.659385,0.113329,-0.588331,-0.617693,-0.454217,-0.693083,0.12211,-0.637906,-0.641898,-0.566239,-0.70558,0.069756,-0.572042,-0.572042,-0.572042,-0.572042,0.0,-0.56817,-0.56817,-0.56817,-0.56817,0.0,-0.574248,-0.574353,-0.5668,-0.581592,0.007396,-0.573302,-0.572497,-0.562838,-0.584572,0.010889,-0.571249,-0.57231,-0.567474,-0.573962,0.003372,-0.581336,-0.57458,-0.572546,-0.596881,0.013501,25.0,22.0,8.333333,7.333333,19.666667,0.115385,0.846154,-0.068352,0.05,0.366667,-0.029619,-4.392157,-3.745098,-0.00664,0.137403,0.060909,-0.083074,0.2,0.530392,-0.120137,-1.114428,-1.043716,0.011085,0.543552,0.06715,0.548612,0.8,0.591257,0.802217
1,2017-01-01,C_ID_130fd0cbdd,2,3,0,0.987179,78,0.0,0.217949,0.025641,3,2,3,4,12,16,27,2,5,20,7,27,18,-93.320513,-97.5,-12,-413,64.35383,-329.679487,-325.5,-10,-411,64.35383,-10.410256,-10.0,0,-13,2.164866,-49.136513,-0.629955,-0.679288,0.318817,-0.731881,0.154999,1.064103,1.0,4.0,1.0,0.405794,-0.069541,-0.057727,0.028983,-0.676283,0.094028,-0.311694,-0.339644,0.159409,-0.365941,0.083459,1518989000.0,1484321000.0,-0.599798,-0.60189,-0.5302,-0.60189,0.011582,-0.605164,-0.61783,-0.123849,-0.61783,0.078585,-0.600331,-0.602512,-0.5302,-0.602512,0.011809,-0.592689,-0.594612,-0.52583,-0.615568,0.011802,-0.622253,-0.632387,-0.437499,-0.683504,0.048544,-0.613358,-0.633152,-0.35901,-0.710127,0.073392,-0.618896,-0.646826,-0.210804,-0.715759,0.08074,-0.598205,-0.598183,-0.598183,-0.598229,2.3e-05,-0.598339,-0.598178,-0.59477,-0.602054,0.00241,-0.599289,-0.599219,-0.591699,-0.606256,0.003292,-0.59422,-0.590207,-0.588296,-0.628458,0.009366,-0.596863,-0.597384,-0.590528,-0.600465,0.002698,-0.592107,-0.591323,-0.581067,-0.619704,0.012028,2017,1,1,2018,1,2,-423,401,374,5.141026,4.794872,5.269231,0.19403,0.930348,-0.12223,0.18932,0.907767,-0.119263,9.0,0.0,0.333333,0.222222,2.0,2.0,2.0,2.0,6.0,8.0,9.0,2.0,2.0,6.0,6.0,7.0,8.0,-448.111111,-435.0,-426.0,-474.0,20.781268,25.111111,12.0,51.0,3.0,20.781268,1.444444,1.0,2.0,1.0,0.527046,-5.944698,-0.660522,-0.656749,-0.506484,-0.740897,0.071147,1.222222,1.0,3.0,1.0,0.666667,-0.282974,-0.30591,-0.168828,-0.358435,0.071383,-0.316192,-0.328374,-0.126621,-0.370449,0.074061,1524247000.0,1520080000.0,-0.549058,-0.577542,-0.449364,-0.577542,0.056521,-0.562194,-0.615461,-0.136057,-0.615461,0.159802,-0.551749,-0.581002,-0.449364,-0.581002,0.058047,-0.542182,-0.573187,-0.433662,-0.573187,0.061525,-0.569676,-0.572294,-0.445344,-0.659385,0.075369,-0.599339,-0.630178,-0.508339,-0.657243,0.058302,-0.661882,-0.660355,-0.568788,-0.741099,0.046546,-0.572376,-0.572042,-0.572042,-0.572793,0.000396,-0.571847,-0.572449,-0.571095,-0.572449,0.000714,-0.570726,-0.569963,-0.569705,-0.574353,0.001575,-0.574937,-0.572497,-0.561972,-0.61039,0.016111,-0.571782,-0.569732,-0.566755,-0.581595,0.004917,-0.576416,-0.572546,-0.555792,-0.597572,0.014545,48.0,41.0,5.333333,4.555556,5.666667,0.183673,0.836735,-0.12132,0.173077,0.788462,-0.114321,-0.192308,0.239316,0.030567,0.010356,0.093614,-0.00091,0.016243,0.119305,-0.004942,-0.037406,0.049911,-0.048522,0.053375,0.100622,0.007443,0.085799,0.131427,0.04144
2,2017-08-01,C_ID_b709037bc5,5,1,1,0.692308,13,0.0,0.0,0.076923,3,3,4,4,6,8,9,3,6,7,4,7,7,-127.615385,-126.0,-24,-185,49.157126,-83.384615,-85.0,-26,-187,49.157126,-2.076923,-2.0,0,-6,1.754116,3.495905,0.268916,0.214624,1.5,-0.536537,0.72734,4.7,4.0,10.0,1.0,3.12872,0.23344,0.091151,1.175825,-0.134134,0.416047,-0.024961,-0.018447,0.136364,-0.268268,0.135424,1517598000.0,1503673000.0,-0.593042,-0.59713,-0.5302,-0.610919,0.019262,-0.164817,-0.123849,-0.123849,-0.61783,0.136228,-0.591506,-0.594651,-0.5302,-0.610125,0.018996,-0.594581,-0.600521,-0.52583,-0.600521,0.020663,-0.42263,-0.437465,-0.230301,-0.601317,0.108714,-0.431809,-0.46925,-0.270607,-0.582748,0.099393,-0.150918,-0.298796,0.548182,-0.611614,0.373543,-0.595274,-0.592935,-0.592935,-0.598229,0.002654,-0.595048,-0.59477,-0.590924,-0.602054,0.004627,-0.592049,-0.593562,-0.576654,-0.601536,0.005582,-0.589979,-0.590207,-0.588296,-0.591606,0.001185,-0.594627,-0.593843,-0.591216,-0.600465,0.003165,-0.593896,-0.59424,-0.581596,-0.609127,0.011767,2017,3,8,2018,1,2,-211,161,154,12.384615,11.846154,14.384615,0.080247,0.950617,0.02158,0.069149,0.819149,0.018595,2.0,0.5,0.0,0.5,2.0,2.0,2.0,2.0,2.0,2.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,-218.0,-218.0,-212.0,-224.0,8.485281,7.0,7.0,13.0,1.0,8.485281,1.0,1.0,1.0,1.0,0.0,0.180138,0.090069,0.090069,0.904506,-0.724368,1.151788,5.5,5.5,10.0,1.0,6.363961,0.045034,0.045034,0.452253,-0.362184,0.575894,-0.139978,-0.139978,0.082228,-0.362184,0.314247,1520947000.0,1519916000.0,-0.510705,-0.510705,-0.449364,-0.572046,0.086749,-0.375759,-0.375759,-0.136057,-0.615461,0.33899,-0.50649,-0.50649,-0.449364,-0.563615,0.080787,-0.503969,-0.503969,-0.433662,-0.574277,0.09943,-0.533128,-0.533128,-0.394224,-0.672033,0.196441,-0.492315,-0.492315,-0.283798,-0.700832,0.294888,-0.454068,-0.454068,-0.207134,-0.701001,0.349217,-0.572042,-0.572042,-0.572042,-0.572042,0.0,-0.572449,-0.572449,-0.572449,-0.572449,0.0,-0.572029,-0.572029,-0.569705,-0.574353,0.003286,-0.564936,-0.564936,-0.563122,-0.56675,0.002565,-0.576261,-0.576261,-0.56921,-0.583313,0.009972,-0.568398,-0.568398,-0.562217,-0.57458,0.008742,11.0,9.0,5.5,4.5,6.5,0.166667,0.75,0.015011,0.142857,0.642857,0.012867,6.884615,7.346154,0.178847,-0.08642,0.200617,0.006568,-0.073708,0.176292,0.005728,0.555901,0.62013,0.665066,-1.076923,0.211039,0.304369,-1.065934,0.215213,0.308049
3,2017-12-01,C_ID_d27d835a9f,2,1,0,1.0,26,0.0,0.269231,0.0,1,3,1,1,11,18,23,2,3,6,7,11,9,-34.269231,-12.5,-3,-81,33.270477,-54.730769,-76.5,-8,-86,33.270477,-1.230769,-2.0,0,-2,0.951113,-13.690715,-0.526566,-0.587492,0.087965,-0.731881,0.219162,1.666667,1.0,6.0,1.0,1.34056,-0.328406,-0.21947,0.029322,-0.731881,0.251145,-0.251455,-0.293746,-0.020331,-0.365941,0.105024,1519127000.0,1512392000.0,-0.610919,-0.610919,-0.610919,-0.610919,0.0,-0.447826,-0.61783,-0.123849,-0.61783,0.238297,-0.610125,-0.610125,-0.610125,-0.610125,0.0,-0.609366,-0.609366,-0.609366,-0.609366,0.0,-0.571867,-0.601317,-0.363228,-0.683079,0.090907,-0.572843,-0.594228,-0.389234,-0.710127,0.097049,-0.508716,-0.60419,0.241313,-0.716833,0.222445,-0.595175,-0.592935,-0.592935,-0.598229,0.002667,-0.595116,-0.590999,-0.590999,-0.602054,0.005245,-0.594511,-0.589821,-0.585989,-0.606256,0.006948,-0.597723,-0.58953,-0.588296,-0.628458,0.015758,-0.593921,-0.593159,-0.590528,-0.598652,0.002658,-0.605534,-0.609127,-0.581067,-0.618956,0.011412,2017,4,12,2018,1,2,-89,77,66,2.961538,2.538462,3.307692,0.333333,0.846154,-0.175522,0.298851,0.758621,-0.157365,10.0,0.0,0.3,0.1,3.0,2.0,3.0,3.0,8.0,10.0,10.0,2.0,2.0,7.0,5.0,8.0,7.0,-111.9,-111.0,-93.0,-137.0,15.828596,22.9,22.0,48.0,4.0,15.828596,1.3,1.0,2.0,1.0,0.483046,-5.743674,-0.574367,-0.581391,-0.44788,-0.671775,0.073166,2.9,2.0,12.0,1.0,3.3483,-0.25779,-0.2645,-0.149293,-0.317104,0.051744,-0.210945,-0.209654,-0.034452,-0.335888,0.102593,1524000000.0,1520162000.0,-0.573209,-0.588835,-0.449364,-0.588835,0.043833,-0.375759,-0.375759,-0.136057,-0.615461,0.252668,-0.571138,-0.586315,-0.449364,-0.586315,0.04304,-0.560853,-0.575068,-0.433662,-0.575068,0.044691,-0.541613,-0.572294,-0.345206,-0.675728,0.093659,-0.527685,-0.524773,-0.365877,-0.662075,0.096323,-0.538693,-0.543843,-0.380392,-0.643995,0.091843,-0.572267,-0.572042,-0.572042,-0.572793,0.000363,-0.572043,-0.572449,-0.571095,-0.572449,0.000654,-0.572094,-0.571345,-0.569837,-0.578501,0.002793,-0.574165,-0.56675,-0.563122,-0.61039,0.015138,-0.577928,-0.578808,-0.569403,-0.590323,0.006676,-0.585417,-0.597226,-0.555792,-0.598063,0.017906,44.0,36.0,4.4,3.6,4.8,0.222222,0.8,-0.127637,0.204082,0.734694,-0.117218,-1.438462,-1.061538,0.047801,0.111111,0.046154,-0.047885,0.094769,0.023927,-0.040147,-0.485714,-0.418182,-0.09078,0.333333,0.054545,0.272814,0.317111,0.03154,0.255119
4,2015-12-01,C_ID_2b5e3df5c2,5,1,1,0.790909,110,0.018182,0.190909,0.0,3,2,4,5,15,31,47,4,12,34,7,27,21,-614.609091,-615.5,-399,-819,141.926011,-205.390909,-204.5,-1,-421,141.926011,-6.227273,-6.5,0,-13,4.530547,-37.923336,-0.344758,-0.671775,1.5,-0.746758,0.690151,1.090909,1.0,4.0,1.0,0.43988,-0.12451,-0.06296,1.5,-0.726622,0.326032,-0.17662,-0.335888,0.75,-0.373379,0.33424,1519728000.0,1483444000.0,-0.601318,-0.60189,-0.582086,-0.610919,0.004544,-0.590886,-0.61783,-0.123849,-0.61783,0.112692,-0.601614,-0.602512,-0.578258,-0.610125,0.005374,-0.614128,-0.615568,-0.58444,-0.615568,0.004935,-0.561734,-0.619453,-0.107234,-0.683079,0.120429,-0.552752,-0.6202,-0.06246,-0.699478,0.121425,-0.561582,-0.589783,-0.20571,-0.710944,0.118706,-0.597204,-0.598229,-0.592935,-0.598229,0.001842,-0.597963,-0.597831,-0.590924,-0.602054,0.002922,-0.59843,-0.598476,-0.589821,-0.606256,0.003887,-0.594356,-0.589729,-0.588296,-0.628458,0.011588,-0.595731,-0.596059,-0.590528,-0.600465,0.002537,-0.598528,-0.595762,-0.581067,-0.627459,0.015324,2015,4,12,2018,1,2,-820,419,392,3.809091,3.563636,3.827273,0.261905,0.933333,-0.090294,0.260664,0.92891,-0.089866,6.0,0.0,0.333333,0.0,2.0,3.0,2.0,2.0,4.0,5.0,6.0,2.0,2.0,3.0,4.0,5.0,5.0,-832.5,-827.5,-824.0,-863.0,15.083103,12.5,7.5,43.0,4.0,15.083103,1.166667,1.0,2.0,1.0,0.408248,-0.714607,-0.119101,-0.654495,1.5,-0.704082,0.921179,1.2,1.0,2.0,1.0,0.447214,-0.073239,-0.327247,0.75,-0.352041,0.450795,-0.170739,-0.335888,0.5,-0.352041,0.375175,1523535000.0,1520132000.0,-0.576466,-0.577542,-0.571085,-0.577542,0.002636,-0.456141,-0.615461,-0.136057,-0.615461,0.246821,-0.578878,-0.581002,-0.568259,-0.581002,0.005202,-0.587267,-0.58497,-0.58497,-0.598754,0.005627,-0.576796,-0.659385,-0.345206,-0.675728,0.144313,-0.559456,-0.632814,-0.365429,-0.646959,0.12568,-0.201846,-0.523168,1.5,-0.710562,0.855592,-0.572167,-0.572042,-0.572042,-0.572793,0.000307,-0.572224,-0.572449,-0.571095,-0.572449,0.000553,-0.571363,-0.5699,-0.569837,-0.574353,0.002316,-0.581602,-0.569623,-0.562838,-0.61039,0.022511,-0.576597,-0.579251,-0.566755,-0.582026,0.006646,-0.585891,-0.581619,-0.562029,-0.61646,0.020219,39.0,34.0,6.5,5.666667,7.166667,0.15,0.85,-0.017865,0.136364,0.772727,-0.016241,-2.690909,-2.10303,-0.225656,0.111905,0.083333,-0.072428,0.1243,0.156183,-0.073625,-0.706444,-0.590136,0.654536,0.427273,0.089286,0.802144,0.47686,0.168135,0.819274


In [39]:
print("Train Shape:", train.shape)
print("Test Shape:", test.shape)

Train Shape: (201917, 295)
Test Shape: (123623, 293)


In [40]:
train_summary = summary(train)
test_summary = summary(test)

In [41]:
train_summary.sort_values('unique', ascending=False)

Unnamed: 0,feature,unique,missing,mode,type
1,card_id,201917,0.0,0.000495,object
144,hist_p_vs_p2r,201897,0.0,0.000991,float64
111,hist_weekofyear_p_mean_std,201890,0.0,0.000991,float64
126,hist_hour_p_mean_std,201866,0.0,0.007429,float64
141,hist_p_vs_p2p,201832,0.0,0.003962,float64
54,hist_p_vs_m_std,201822,0.0,0.005448,float64
50,hist_p_vs_m_mean,201821,0.0,0.003962,float64
122,hist_hour_p_mean_mean,201778,0.0,0.001486,float64
107,hist_weekofyear_p_mean_mean,201736,0.0,0.002972,float64
55,hist_p_vs_i_mean,201668,0.0,0.011886,float64


In [42]:
test_summary.sort_values('unique', ascending=False)

Unnamed: 0,feature,unique,missing,mode,type
1,card_id,123623,0.0,0.000809,object
110,hist_weekofyear_p_mean_std,123614,0.0,0.002427,float64
143,hist_p_vs_p2r,123613,0.0,0.001618,float64
125,hist_hour_p_mean_std,123605,0.0,0.004853,float64
140,hist_p_vs_p2p,123587,0.0,0.004045,float64
49,hist_p_vs_m_mean,123585,0.0,0.004045,float64
53,hist_p_vs_m_std,123583,0.0,0.003236,float64
121,hist_hour_p_mean_mean,123557,0.0,0.002427,float64
106,hist_weekofyear_p_mean_mean,123554,0.0,0.003236,float64
54,hist_p_vs_i_mean,123497,0.0,0.016178,float64


In [65]:
train.to_csv("./data/pre_train_clip.csv")

In [66]:
test.to_csv("./data/pre_test_clip.csv")