# 사전작업

## 모듈 임포트

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import datetime
import seaborn as sns
import gc

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
pd.set_option('display.max_columns', 400)

In [4]:
path = './data/'

## Feature Engineering

### util functions

In [5]:
# reduce memory
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))

    return df

In [6]:
from scipy import stats
def mode(x):
    return stats.mode(x)[0][0]

In [7]:
def null_cnt(x):
    return np.sum(x.isna() * 1)

In [8]:
def over_550(data):
    return len(data[data > 550])

In [None]:
def ref_date(data):
    

### train test

In [6]:
# preprocessing train & test
def train_test(num_rows=None):

    # load csv
    train = pd.read_csv(path + 'train.csv', index_col=['card_id'])
    test = pd.read_csv(path + 'test.csv', index_col=['card_id'])
    
    # outlier
    train['outliers'] = 0
    train.loc[train['target'] < -30, 'outliers'] = 1

    # set target as nan
    test['target'] = np.nan

    # merge
    df = train.append(test)

    del train, test
    gc.collect()

    # first_active_month
    df['first_active'] = pd.to_datetime(df['first_active_month'])
    df['first_active_year'] = df['first_active'].dt.year
    df['first_active_month'] = df['first_active'].dt.month
    df['first_active_quarter'] = df['first_active'].dt.quarter
    df['first_active_weekofyear'] = df['first_active'].dt.weekofyear
    df['first_active_dayofweek'] = df['first_active'].dt.dayofweek
    # 모든 데이터의 마지막 거래날짜가 2018년 4월 30일 23시 59분 59초
    df['first_active_elapsed_time_from_trade'] = (datetime.datetime(2018, 4, 30, 23, 59, 59) - df['first_active']).dt.days
    df['first_active_elapsed_time_from_today'] = (datetime.datetime.today() - df['first_active']).dt.days

    df['days_feature1'] = df['feature_1'] * df['first_active_elapsed_time_from_trade']
    df['days_feature2'] = df['feature_2'] * df['first_active_elapsed_time_from_trade'] 
    df['days_feature3'] = df['feature_3'] * df['first_active_elapsed_time_from_trade'] 

    df['days_feature1_ratio'] = df['feature_1'] / df['first_active_elapsed_time_from_trade']
    df['days_feature2_ratio'] = df['feature_2'] / df['first_active_elapsed_time_from_trade']
    df['days_feature3_ratio'] = df['feature_3'] / df['first_active_elapsed_time_from_trade']
    
    for f in ['feature_1','feature_2','feature_3']:
        order_label = df.groupby([f])['outliers'].mean()
        df[f] = df[f].map(order_label)

    df['feature_sum'] = df['feature_1'] + df['feature_2'] + df['feature_3']
    df['feature_mean'] = df['feature_sum'] / 3
    df['feature_max'] = df[['feature_1', 'feature_2', 'feature_3']].max(axis=1)
    df['feature_min'] = df[['feature_1', 'feature_2', 'feature_3']].min(axis=1)
    df['feature_var'] = df[['feature_1', 'feature_2', 'feature_3']].std(axis=1)

    return df

### historical_transactions

In [16]:
# preprocessing historical transactions
def historical_transactions():
    
    # load csv
    hist_df = pd.read_csv(path + 'historical_transactions.csv')

    # fillna
    hist_df['category_2'].fillna(1.0,inplace=True)
    hist_df['category_3'].fillna('A',inplace=True)
    hist_df['merchant_id'].fillna(np.nan,inplace=True)
    hist_df['installments'].replace(-1, np.nan,inplace=True)
    hist_df['installments'].replace(999, np.nan,inplace=True)

    # trim
    hist_df['purchase_amount_trim'] = hist_df['purchase_amount'].apply(lambda x: min(x, 0.8))

    # Y/N to 1/0
    hist_df['authorized_flag'] = hist_df['authorized_flag'].map({'Y': 1, 'N': 0}).astype(int)
    hist_df['category_1'] = hist_df['category_1'].map({'Y': 1, 'N': 0}).astype(int)
    hist_df['category_3'] = hist_df['category_3'].map({'A':0, 'B':1, 'C':2})

    # purchase date
    hist_df['purchase_date'] = pd.to_datetime(hist_df['purchase_date'])
    hist_df['purchase_year'] = hist_df['purchase_date'].dt.year
    hist_df['purchase_month'] = hist_df['purchase_date'].dt.month
    hist_df['purchase_day'] = hist_df['purchase_date'].dt.day
    hist_df['purchase_hour'] = hist_df['purchase_date'].dt.hour
    hist_df['purchase_dayofweek'] = hist_df['purchase_date'].dt.dayofweek
    hist_df['purchase_weekofyear'] = hist_df['purchase_date'].dt.weekofyear
    hist_df['purchase_weekend'] = (hist_df['purchase_date'].dt.weekday >=5).astype(int)

    # additional features
    hist_df['price'] = hist_df['purchase_amount'] / hist_df['installments']
    
    hist_df['month_diff'] = ((datetime.datetime(2018, 4, 30, 23, 59, 59) - hist_df['purchase_date']).dt.days)//30
    hist_df['month_diff'] += hist_df['month_lag']
    
    hist_df['Christmas_Day_2017']=(pd.to_datetime('2017-12-25')-hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    hist_df['Mothers_Day_2017']=(pd.to_datetime('2017-06-04')-hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    hist_df['fathers_day_2017']=(pd.to_datetime('2017-08-13')-hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    hist_df['Children_day_2017']=(pd.to_datetime('2017-10-12')-hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    hist_df['Valentine_Day_2017']=(pd.to_datetime('2017-06-12')-hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    hist_df['Black_Friday_2017']=(pd.to_datetime('2017-11-24') - hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    hist_df['Mothers_Day_2018']=(pd.to_datetime('2018-05-13')-hist_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)

    hist_df['duration'] = hist_df['purchase_amount'] * hist_df['month_diff']
    hist_df['amount_month_ratio'] = hist_df['purchase_amount'] / hist_df['month_diff']

    # reduce memory usage
    hist_df = reduce_mem_usage(hist_df)
    
    aggs = {}
    aggs['authorized_flag'] = [mode, 'sum', 'mean']
    aggs['card_id'] = ['size']
    aggs['city_id'] = [mode, 'nunique']
    aggs['category_1'] = [mode, 'sum', 'mean']
    aggs['installments'] = [mode, 'sum', 'mean', 'var', 'max', 'min', null_cnt]
    aggs['category_3'] = [mode, 'mean']
    aggs['merchant_category_id'] = [mode, 'nunique']
    aggs['merchant_id'] = ['nunique']
    aggs['month_lag'] = [mode, 'sum', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_date'] = ['max','min']
    aggs['purchase_amount'] = ['sum', 'mean', 'var', 'max', 'min', 'skew', over_550]
    aggs['category_2'] = [mode, 'mean']
    aggs['state_id'] = [mode, 'nunique']
    aggs['subsector_id'] = [mode, 'nunique']
    aggs['purchase_amount_trim'] = ['sum', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_year'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_month'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_day'] = [mode, 'nunique', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_hour'] = [mode, 'nunique', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_dayofweek'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_weekofyear'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_weekend'] = [mode, 'sum', 'mean']
    aggs['price'] = ['sum','mean','max','min','var']
    aggs['month_diff'] = ['max','min','mean','var','skew']
    aggs['Christmas_Day_2017'] = ['mean']
    aggs['Mothers_Day_2017'] = ['mean']
    aggs['fathers_day_2017'] = ['mean']
    aggs['Children_day_2017'] = ['mean']
    aggs['Valentine_Day_2017'] = ['mean']
    aggs['Black_Friday_2017'] = ['mean']
    aggs['Mothers_Day_2018'] = ['mean']
    aggs['duration']=['mean','min','max','var','skew']
    aggs['amount_month_ratio']=['mean','min','max','var','skew']
    
    for col in ['category_2','category_3']:
        hist_df[col+'_mean'] = hist_df.groupby([col])['purchase_amount'].transform('mean')
        hist_df[col+'_min'] = hist_df.groupby([col])['purchase_amount'].transform('min')
        hist_df[col+'_max'] = hist_df.groupby([col])['purchase_amount'].transform('max')
        hist_df[col+'_sum'] = hist_df.groupby([col])['purchase_amount'].transform('sum')
        aggs[col+'_mean'] = ['mean']
        aggs[col+'_min'] = ['mean']
        aggs[col+'_max'] = ['mean']
        aggs[col+'_sum'] = ['mean']
    
    hist_df = hist_df.groupby('card_id').agg(aggs)
    
    # change column name
    hist_df.columns = pd.Index([e[0] + "_" + e[1] for e in hist_df.columns.tolist()])
    hist_df.columns = ['hist_'+ c for c in hist_df.columns]

    hist_df['hist_purchase_date_diff'] = (hist_df['hist_purchase_date_max']-hist_df['hist_purchase_date_min']).dt.days
    hist_df['hist_purchase_date_average'] = hist_df['hist_purchase_date_diff']/hist_df['hist_card_id_size']
    hist_df['hist_purchase_date_uptonow'] = (datetime.datetime.today()-hist_df['hist_purchase_date_max']).dt.days
    hist_df['hist_purchase_date_uptomin'] = (datetime.datetime.today()-hist_df['hist_purchase_date_min']).dt.days

    # reduce memory usage
    hist_df = reduce_mem_usage(hist_df)

    return hist_df

### new_merchant_transactions

In [17]:
# preprocessing new_merchant_transactions
def new_merchant_transactions():
    
    # load csv
    new_merchant_df = pd.read_csv(path + 'new_merchant_transactions.csv')

    # fillna
    new_merchant_df['category_2'].fillna(1.0,inplace=True)
    new_merchant_df['category_3'].fillna('A',inplace=True)
    new_merchant_df['merchant_id'].fillna(np.nan,inplace=True)
    new_merchant_df['installments'].replace(-1, np.nan,inplace=True)
    new_merchant_df['installments'].replace(999, np.nan,inplace=True)

    # trim
    new_merchant_df['purchase_amount_trim'] = new_merchant_df['purchase_amount'].apply(lambda x: min(x, 0.8))

    # Y/N to 1/0
    new_merchant_df['authorized_flag'] = new_merchant_df['authorized_flag'].map({'Y': 1, 'N': 0}).astype(int)
    new_merchant_df['category_1'] = new_merchant_df['category_1'].map({'Y': 1, 'N': 0}).astype(int)
    new_merchant_df['category_3'] = new_merchant_df['category_3'].map({'A':0, 'B':1, 'C':2})

    # purchase date
    new_merchant_df['purchase_date'] = pd.to_datetime(new_merchant_df['purchase_date'])
    new_merchant_df['purchase_year'] = new_merchant_df['purchase_date'].dt.year
    new_merchant_df['purchase_month'] = new_merchant_df['purchase_date'].dt.month
    new_merchant_df['purchase_day'] = new_merchant_df['purchase_date'].dt.day
    new_merchant_df['purchase_hour'] = new_merchant_df['purchase_date'].dt.hour
    new_merchant_df['purchase_dayofweek'] = new_merchant_df['purchase_date'].dt.dayofweek
    new_merchant_df['purchase_weekofyear'] = new_merchant_df['purchase_date'].dt.weekofyear
    new_merchant_df['purchase_weekend'] = (new_merchant_df['purchase_date'].dt.weekday >=5).astype(int)

    # additional features
    new_merchant_df['price'] = new_merchant_df['purchase_amount'] / new_merchant_df['installments']
    
    new_merchant_df['month_diff'] = ((datetime.datetime(2018, 4, 30, 23, 59, 59) - new_merchant_df['purchase_date']).dt.days)//30
    new_merchant_df['month_diff'] += new_merchant_df['month_lag']

    new_merchant_df['Christmas_Day_2017']=(pd.to_datetime('2017-12-25')-new_merchant_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    new_merchant_df['Children_day_2017']=(pd.to_datetime('2017-10-12')-new_merchant_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    new_merchant_df['Black_Friday_2017']=(pd.to_datetime('2017-11-24') - new_merchant_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
    new_merchant_df['Mothers_Day_2018']=(pd.to_datetime('2018-05-13')-new_merchant_df['purchase_date']).dt.days.apply(lambda x: x if x > 0 and x < 100 else 0)
                                     
    new_merchant_df['duration'] = new_merchant_df['purchase_amount'] * new_merchant_df['month_diff']
    new_merchant_df['amount_month_ratio'] = new_merchant_df['purchase_amount'] / new_merchant_df['month_diff']

    # reduce memory usage
    new_merchant_df = reduce_mem_usage(new_merchant_df)
    
    aggs = {}
    aggs['authorized_flag'] = [mode, 'sum', 'mean']
    aggs['card_id'] = ['size']
    aggs['city_id'] = [mode, 'nunique']
    aggs['category_1'] = [mode, 'sum', 'mean']
    aggs['installments'] = [mode, 'sum', 'mean', 'var', 'max', 'min', null_cnt]
    aggs['category_3'] = [mode, 'mean']
    aggs['merchant_category_id'] = [mode, 'nunique']
    aggs['merchant_id'] = ['nunique']
    aggs['month_lag'] = [mode, 'sum', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_date'] = ['max','min']
    aggs['purchase_amount'] = ['sum', 'mean', 'var', 'max', 'min', 'skew', over_550]
    aggs['category_2'] = [mode, 'mean']
    aggs['state_id'] = [mode, 'nunique']
    aggs['subsector_id'] = [mode, 'nunique']
    aggs['purchase_amount_trim'] = ['sum', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_year'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_month'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_day'] = [mode, 'nunique', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_hour'] = [mode, 'nunique', 'mean', 'var', 'max', 'min', 'skew']
    aggs['purchase_dayofweek'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_weekofyear'] = [mode, 'nunique', 'mean', 'max', 'min']
    aggs['purchase_weekend'] = [mode, 'sum', 'mean']
    aggs['price'] = ['sum','mean','max','min','var']
    aggs['month_diff'] = ['max','min','mean','var','skew']
    aggs['Christmas_Day_2017'] = ['mean']
    aggs['Children_day_2017'] = ['mean']
    aggs['Black_Friday_2017'] = ['mean']
    aggs['Mothers_Day_2018'] = ['mean']
    aggs['duration']=['mean','min','max','var','skew']
    aggs['amount_month_ratio']=['mean','min','max','var','skew']

    for col in ['category_2','category_3']:
        new_merchant_df[col+'_mean'] = new_merchant_df.groupby([col])['purchase_amount'].transform('mean')
        new_merchant_df[col+'_min'] = new_merchant_df.groupby([col])['purchase_amount'].transform('min')
        new_merchant_df[col+'_max'] = new_merchant_df.groupby([col])['purchase_amount'].transform('max')
        new_merchant_df[col+'_sum'] = new_merchant_df.groupby([col])['purchase_amount'].transform('sum')
        aggs[col+'_mean'] = ['mean']

    new_merchant_df = new_merchant_df.groupby('card_id').agg(aggs)

    # change column name
    new_merchant_df.columns = pd.Index([e[0] + "_" + e[1] for e in new_merchant_df.columns.tolist()])
    new_merchant_df.columns = ['new_'+ c for c in new_merchant_df.columns]

    new_merchant_df['new_purchase_date_diff'] = (new_merchant_df['new_purchase_date_max']-new_merchant_df['new_purchase_date_min']).dt.days
    new_merchant_df['new_purchase_date_average'] = new_merchant_df['new_purchase_date_diff']/new_merchant_df['new_card_id_size']
    new_merchant_df['new_purchase_date_uptonow'] = (datetime.datetime.today()-new_merchant_df['new_purchase_date_max']).dt.days
    new_merchant_df['new_purchase_date_uptomin'] = (datetime.datetime.today()-new_merchant_df['new_purchase_date_min']).dt.days

    # reduce memory usage
    new_merchant_df = reduce_mem_usage(new_merchant_df)

    return new_merchant_df

### additional_features

In [27]:
result.head(3)

Unnamed: 0_level_0,feature_1,feature_2,feature_3,first_active_month,outliers,target,first_active,first_active_year,first_active_quarter,first_active_weekofyear,first_active_dayofweek,first_active_elapsed_time_from_trade,first_active_elapsed_time_from_today,days_feature1,days_feature2,days_feature3,days_feature1_ratio,days_feature2_ratio,days_feature3_ratio,feature_sum,feature_mean,feature_max,feature_min,feature_var,hist_authorized_flag_mode,hist_authorized_flag_sum,hist_authorized_flag_mean,hist_card_id_size,hist_city_id_mode,hist_city_id_nunique,hist_category_1_mode,hist_category_1_sum,hist_category_1_mean,hist_installments_mode,hist_installments_sum,hist_installments_mean,hist_installments_var,hist_installments_max,hist_installments_min,hist_installments_null_cnt,hist_category_3_mode,hist_category_3_mean,hist_merchant_category_id_mode,hist_merchant_category_id_nunique,hist_merchant_id_nunique,hist_month_lag_mode,hist_month_lag_sum,hist_month_lag_mean,hist_month_lag_var,hist_month_lag_max,hist_month_lag_min,hist_month_lag_skew,hist_purchase_date_max,hist_purchase_date_min,hist_purchase_amount_sum,hist_purchase_amount_mean,hist_purchase_amount_var,hist_purchase_amount_max,hist_purchase_amount_min,hist_purchase_amount_skew,hist_purchase_amount_over_550,hist_category_2_mode,hist_category_2_mean,hist_state_id_mode,hist_state_id_nunique,hist_subsector_id_mode,hist_subsector_id_nunique,hist_purchase_amount_trim_sum,hist_purchase_amount_trim_mean,hist_purchase_amount_trim_var,hist_purchase_amount_trim_max,hist_purchase_amount_trim_min,hist_purchase_amount_trim_skew,hist_purchase_year_mode,hist_purchase_year_nunique,hist_purchase_year_mean,hist_purchase_year_max,hist_purchase_year_min,hist_purchase_month_mode,hist_purchase_month_nunique,hist_purchase_month_mean,hist_purchase_month_max,hist_purchase_month_min,hist_purchase_day_mode,hist_purchase_day_nunique,hist_purchase_day_mean,hist_purchase_day_var,hist_purchase_day_max,hist_purchase_day_min,hist_purchase_day_skew,hist_purchase_hour_mode,hist_purchase_hour_nunique,hist_purchase_hour_mean,hist_purchase_hour_var,hist_purchase_hour_max,hist_purchase_hour_min,hist_purchase_hour_skew,hist_purchase_dayofweek_mode,hist_purchase_dayofweek_nunique,hist_purchase_dayofweek_mean,hist_purchase_dayofweek_max,hist_purchase_dayofweek_min,hist_purchase_weekofyear_mode,hist_purchase_weekofyear_nunique,hist_purchase_weekofyear_mean,hist_purchase_weekofyear_max,hist_purchase_weekofyear_min,hist_purchase_weekend_mode,hist_purchase_weekend_sum,hist_purchase_weekend_mean,hist_price_sum,hist_price_mean,hist_price_max,hist_price_min,hist_price_var,hist_month_diff_max,hist_month_diff_min,hist_month_diff_mean,hist_month_diff_var,hist_month_diff_skew,hist_Christmas_Day_2017_mean,hist_Mothers_Day_2017_mean,hist_fathers_day_2017_mean,hist_Children_day_2017_mean,hist_Valentine_Day_2017_mean,hist_Black_Friday_2017_mean,hist_Mothers_Day_2018_mean,hist_duration_mean,hist_duration_min,hist_duration_max,hist_duration_var,hist_duration_skew,hist_amount_month_ratio_mean,hist_amount_month_ratio_min,hist_amount_month_ratio_max,hist_amount_month_ratio_var,hist_amount_month_ratio_skew,hist_category_2_mean_mean,hist_category_2_min_mean,hist_category_2_max_mean,hist_category_2_sum_mean,hist_category_3_mean_mean,hist_category_3_min_mean,hist_category_3_max_mean,hist_category_3_sum_mean,hist_purchase_date_diff,hist_purchase_date_average,hist_purchase_date_uptonow,hist_purchase_date_uptomin,new_authorized_flag_mode,new_authorized_flag_sum,new_authorized_flag_mean,new_card_id_size,new_city_id_mode,new_city_id_nunique,new_category_1_mode,new_category_1_sum,new_category_1_mean,new_installments_mode,new_installments_sum,new_installments_mean,new_installments_var,new_installments_max,new_installments_min,new_installments_null_cnt,new_category_3_mode,new_category_3_mean,new_merchant_category_id_mode,new_merchant_category_id_nunique,new_merchant_id_nunique,new_month_lag_mode,new_month_lag_sum,new_month_lag_mean,new_month_lag_var,new_month_lag_max,new_month_lag_min,new_month_lag_skew,new_purchase_date_max,new_purchase_date_min,new_purchase_amount_sum,new_purchase_amount_mean,new_purchase_amount_var,new_purchase_amount_max,new_purchase_amount_min,new_purchase_amount_skew,new_purchase_amount_over_550,new_category_2_mode,new_category_2_mean,new_state_id_mode,new_state_id_nunique,new_subsector_id_mode,new_subsector_id_nunique,new_purchase_amount_trim_sum,new_purchase_amount_trim_mean,new_purchase_amount_trim_var,new_purchase_amount_trim_max,new_purchase_amount_trim_min,new_purchase_amount_trim_skew,new_purchase_year_mode,new_purchase_year_nunique,new_purchase_year_mean,new_purchase_year_max,new_purchase_year_min,new_purchase_month_mode,new_purchase_month_nunique,new_purchase_month_mean,new_purchase_month_max,new_purchase_month_min,new_purchase_day_mode,new_purchase_day_nunique,new_purchase_day_mean,new_purchase_day_var,new_purchase_day_max,new_purchase_day_min,new_purchase_day_skew,new_purchase_hour_mode,new_purchase_hour_nunique,new_purchase_hour_mean,new_purchase_hour_var,new_purchase_hour_max,new_purchase_hour_min,new_purchase_hour_skew,new_purchase_dayofweek_mode,new_purchase_dayofweek_nunique,new_purchase_dayofweek_mean,new_purchase_dayofweek_max,new_purchase_dayofweek_min,new_purchase_weekofyear_mode,new_purchase_weekofyear_nunique,new_purchase_weekofyear_mean,new_purchase_weekofyear_max,new_purchase_weekofyear_min,new_purchase_weekend_mode,new_purchase_weekend_sum,new_purchase_weekend_mean,new_price_sum,new_price_mean,new_price_max,new_price_min,new_price_var,new_month_diff_max,new_month_diff_min,new_month_diff_mean,new_month_diff_var,new_month_diff_skew,new_Christmas_Day_2017_mean,new_Children_day_2017_mean,new_Black_Friday_2017_mean,new_Mothers_Day_2018_mean,new_duration_mean,new_duration_min,new_duration_max,new_duration_var,new_duration_skew,new_amount_month_ratio_mean,new_amount_month_ratio_min,new_amount_month_ratio_max,new_amount_month_ratio_var,new_amount_month_ratio_skew,new_category_2_mean_mean,new_category_3_mean_mean,new_purchase_date_diff,new_purchase_date_average,new_purchase_date_uptonow,new_purchase_date_uptomin
card_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1,Unnamed: 102_level_1,Unnamed: 103_level_1,Unnamed: 104_level_1,Unnamed: 105_level_1,Unnamed: 106_level_1,Unnamed: 107_level_1,Unnamed: 108_level_1,Unnamed: 109_level_1,Unnamed: 110_level_1,Unnamed: 111_level_1,Unnamed: 112_level_1,Unnamed: 113_level_1,Unnamed: 114_level_1,Unnamed: 115_level_1,Unnamed: 116_level_1,Unnamed: 117_level_1,Unnamed: 118_level_1,Unnamed: 119_level_1,Unnamed: 120_level_1,Unnamed: 121_level_1,Unnamed: 122_level_1,Unnamed: 123_level_1,Unnamed: 124_level_1,Unnamed: 125_level_1,Unnamed: 126_level_1,Unnamed: 127_level_1,Unnamed: 128_level_1,Unnamed: 129_level_1,Unnamed: 130_level_1,Unnamed: 131_level_1,Unnamed: 132_level_1,Unnamed: 133_level_1,Unnamed: 134_level_1,Unnamed: 135_level_1,Unnamed: 136_level_1,Unnamed: 137_level_1,Unnamed: 138_level_1,Unnamed: 139_level_1,Unnamed: 140_level_1,Unnamed: 141_level_1,Unnamed: 142_level_1,Unnamed: 143_level_1,Unnamed: 144_level_1,Unnamed: 145_level_1,Unnamed: 146_level_1,Unnamed: 147_level_1,Unnamed: 148_level_1,Unnamed: 149_level_1,Unnamed: 150_level_1,Unnamed: 151_level_1,Unnamed: 152_level_1,Unnamed: 153_level_1,Unnamed: 154_level_1,Unnamed: 155_level_1,Unnamed: 156_level_1,Unnamed: 157_level_1,Unnamed: 158_level_1,Unnamed: 159_level_1,Unnamed: 160_level_1,Unnamed: 161_level_1,Unnamed: 162_level_1,Unnamed: 163_level_1,Unnamed: 164_level_1,Unnamed: 165_level_1,Unnamed: 166_level_1,Unnamed: 167_level_1,Unnamed: 168_level_1,Unnamed: 169_level_1,Unnamed: 170_level_1,Unnamed: 171_level_1,Unnamed: 172_level_1,Unnamed: 173_level_1,Unnamed: 174_level_1,Unnamed: 175_level_1,Unnamed: 176_level_1,Unnamed: 177_level_1,Unnamed: 178_level_1,Unnamed: 179_level_1,Unnamed: 180_level_1,Unnamed: 181_level_1,Unnamed: 182_level_1,Unnamed: 183_level_1,Unnamed: 184_level_1,Unnamed: 185_level_1,Unnamed: 186_level_1,Unnamed: 187_level_1,Unnamed: 188_level_1,Unnamed: 189_level_1,Unnamed: 190_level_1,Unnamed: 191_level_1,Unnamed: 192_level_1,Unnamed: 193_level_1,Unnamed: 194_level_1,Unnamed: 195_level_1,Unnamed: 196_level_1,Unnamed: 197_level_1,Unnamed: 198_level_1,Unnamed: 199_level_1,Unnamed: 200_level_1,Unnamed: 201_level_1,Unnamed: 202_level_1,Unnamed: 203_level_1,Unnamed: 204_level_1,Unnamed: 205_level_1,Unnamed: 206_level_1,Unnamed: 207_level_1,Unnamed: 208_level_1,Unnamed: 209_level_1,Unnamed: 210_level_1,Unnamed: 211_level_1,Unnamed: 212_level_1,Unnamed: 213_level_1,Unnamed: 214_level_1,Unnamed: 215_level_1,Unnamed: 216_level_1,Unnamed: 217_level_1,Unnamed: 218_level_1,Unnamed: 219_level_1,Unnamed: 220_level_1,Unnamed: 221_level_1,Unnamed: 222_level_1,Unnamed: 223_level_1,Unnamed: 224_level_1,Unnamed: 225_level_1,Unnamed: 226_level_1,Unnamed: 227_level_1,Unnamed: 228_level_1,Unnamed: 229_level_1,Unnamed: 230_level_1,Unnamed: 231_level_1,Unnamed: 232_level_1,Unnamed: 233_level_1,Unnamed: 234_level_1,Unnamed: 235_level_1,Unnamed: 236_level_1,Unnamed: 237_level_1,Unnamed: 238_level_1,Unnamed: 239_level_1,Unnamed: 240_level_1,Unnamed: 241_level_1,Unnamed: 242_level_1,Unnamed: 243_level_1,Unnamed: 244_level_1,Unnamed: 245_level_1,Unnamed: 246_level_1,Unnamed: 247_level_1,Unnamed: 248_level_1,Unnamed: 249_level_1,Unnamed: 250_level_1,Unnamed: 251_level_1,Unnamed: 252_level_1,Unnamed: 253_level_1,Unnamed: 254_level_1,Unnamed: 255_level_1,Unnamed: 256_level_1,Unnamed: 257_level_1,Unnamed: 258_level_1,Unnamed: 259_level_1,Unnamed: 260_level_1,Unnamed: 261_level_1,Unnamed: 262_level_1,Unnamed: 263_level_1,Unnamed: 264_level_1,Unnamed: 265_level_1
C_ID_92a2005557,0.013145,0.008752,0.011428,6.0,0.0,-0.820283,2017-06-01,2017.0,2.0,22.0,3.0,333.0,613.0,1665.0,666.0,333.0,0.015015,0.006006,0.003003,0.033324,0.011108,0.013145,0.008752,0.002214,1,247.0,0.950195,260,69,7,0,0.0,0.0,0.0,4.0,0.015381,0.015205,1.0,0.0,0.0,0,0.015381,560,41,94,-2,-1017.0,-3.912109,5.75,0,-8,0.065979,2018-02-25 09:31:15,2017-06-27 14:18:08,-165.968735,-0.638341,0.045003,2.258394,-0.739258,10.242188,0.0,1.0,1.045898,9,3,34,21,-167.375,-0.644043,0.020569,0.799805,-0.739258,5.132812,2017,2,2017.0,2018,2017,12,9,8.054688,12,1,11,31,15.507812,76.875,31,1,0.102356,14,23,13.3125,24.6875,23,0,-0.887207,5,7,3.210938,6,0,50,35,33.0625,52,1,0,90.0,0.346191,,,inf,-inf,,3,2,2.072266,0.067993,3.298828,13.125,0.0,6.265625,26.765625,0.0,16.46875,7.753906,-1.325042,-2.201172,4.516789,0.216691,7.734375,-0.311035,-0.369629,1.129197,0.011823,9.351562,0.07251,-0.74707,5942464.0,1309719.0,0.34668,-0.74707,5920398.0,5429670.5,242,0.930664,343,586,1.0,23.0,1.0,23.0,69.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,278.0,14.0,23.0,1.0,34.0,1.478516,0.260986,2.0,1.0,0.093262,2018-04-29 11:23:05,2018-03-05 14:04:36,-13.242188,-0.575684,0.018433,-0.296143,-0.724609,0.895996,0.0,1.0,1.0,9.0,1.0,37.0,10.0,-13.242188,-0.575684,0.018433,-0.296143,-0.724609,0.895996,2018.0,1.0,2018.0,2018.0,2018.0,3.0,2.0,3.478516,4.0,3.0,6.0,17.0,16.4375,88.8125,31.0,5.0,0.338867,13.0,8.0,12.867188,4.210938,16.0,8.0,-0.603516,4.0,7.0,3.130859,6.0,0.0,13.0,7.0,13.304688,17.0,10.0,0.0,6.0,0.260986,-inf,-inf,-inf,-inf,,2.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,41.75,-1.151367,-1.449219,-0.592285,0.07373,0.895996,-0.287842,-0.362305,-0.148071,0.004608,0.895996,-0.550293,-0.592773,54.0,2.347656,280.0,335.0
C_ID_3d0044924f,0.010712,0.011385,0.010283,1.0,0.0,0.392913,2017-01-01,2017.0,1.0,52.0,6.0,484.0,764.0,1936.0,484.0,0.0,0.008264,0.002066,0.0,0.032379,0.010793,0.011385,0.010283,0.000555,1,339.0,0.96875,350,69,9,0,31.0,0.088562,1.0,545.0,1.566406,2.257812,10.0,1.0,2.0,1,1.200195,307,57,142,0,-1761.0,-5.03125,14.476562,0,-12,-0.258057,2018-01-31 22:31:09,2017-01-06 16:29:42,-210.006332,-0.600018,0.1482,4.6303,-0.742188,8.8125,0.0,1.0,1.0,9,3,34,24,-215.375,-0.615234,0.058594,0.799805,-0.742188,3.744141,2017,2,2017.0,2018,2017,1,12,6.21875,12,1,19,31,16.671875,77.375,31,1,-0.235718,12,24,14.71875,31.15625,23,0,-0.893555,5,7,3.363281,6,0,3,50,25.21875,52,1,0,132.0,0.377197,-200.113283,-0.575038,2.31515,-0.7424,0.082442,4,2,3.091797,0.100525,2.0,10.648438,8.734375,12.554688,13.789062,6.570312,9.835938,0.0,-1.853766,-2.951172,13.890898,1.405803,8.25,-0.196045,-0.366699,1.543433,0.016649,8.554688,0.074585,-0.74707,6010604.0,1329550.0,-0.295166,-0.74707,156963.1,-3653208.0,390,1.114258,368,758,1.0,6.0,1.0,6.0,69.0,1.0,0.0,0.0,0.0,1.0,6.0,1.0,0.0,1.0,1.0,0.0,1.0,1.0,307.0,5.0,6.0,1.0,9.0,1.5,0.300049,2.0,1.0,0.0,2018-03-30 06:48:26,2018-02-01 17:07:54,-4.355469,-0.726074,0.000207,-0.70166,-0.739258,1.255859,0.0,1.0,1.0,9.0,1.0,19.0,4.0,-4.355469,-0.726074,0.000207,-0.70166,-0.739258,1.255859,2018.0,1.0,2018.0,2018.0,2018.0,2.0,2.0,2.5,3.0,2.0,5.0,4.0,13.5,131.5,30.0,1.0,0.370117,17.0,5.0,11.164062,24.5625,17.0,6.0,0.383301,0.0,4.0,1.5,4.0,0.0,6.0,4.0,9.0,13.0,5.0,0.0,0.0,0.0,-4.355735,-0.725956,-0.701858,-0.73941,0.000205,3.0,3.0,3.0,0.0,0.0,0.0,0.0,0.0,56.84375,-2.177734,-2.21875,-2.105469,0.001841,1.180664,-0.241943,-0.24646,-0.234009,2.3e-05,1.151367,-0.550293,-0.606445,56.0,9.335938,310.0,367.0
C_ID_d639edf6cd,0.01061,0.008752,0.010283,8.0,0.0,0.688056,2016-08-01,2016.0,3.0,31.0,0.0,637.0,917.0,1274.0,1274.0,0.0,0.00314,0.00314,0.0,0.029645,0.009882,0.01061,0.008752,0.000992,1,41.0,0.953613,43,143,5,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.0,705,8,13,-10,-370.0,-8.601562,14.765625,0,-13,0.725586,2018-02-27 19:08:25,2017-01-11 08:21:22,-29.167391,-0.678311,0.007635,-0.145847,-0.72998,5.625,0.0,5.0,4.628906,5,2,33,7,-29.171875,-0.678223,0.007637,-0.145874,-0.72998,5.621094,2017,2,2017.0,2018,2017,1,10,4.558594,12,1,21,19,19.328125,62.3125,30,2,-0.756348,19,14,17.90625,12.375,23,8,-0.886719,4,7,3.302734,6,0,4,22,18.375,49,2,0,11.0,0.255859,-inf,-inf,-inf,-inf,,3,2,2.070312,0.066467,3.5,9.671875,17.0,8.90625,7.511719,17.65625,8.90625,1.720703,-1.406243,-2.181641,-0.291695,0.070888,0.3125,-0.330811,-0.36499,-0.072924,0.002562,3.71875,-0.08783,-0.74707,654527.7,-229313.4,0.358398,-0.74707,6010604.0,5588325.0,412,9.578125,341,753,1.0,1.0,1.0,1.0,143.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,528.0,1.0,1.0,2.0,2.0,2.0,,2.0,2.0,,2018-04-28 17:43:11,2018-04-28 17:43:11,-0.700195,-0.700195,,-0.700195,-0.700195,,0.0,5.0,5.0,5.0,1.0,25.0,1.0,-0.700195,-0.700195,,-0.700195,-0.700195,,2018.0,1.0,2018.0,2018.0,2018.0,4.0,1.0,4.0,4.0,4.0,28.0,1.0,28.0,,28.0,28.0,,17.0,1.0,17.0,,17.0,17.0,,5.0,1.0,5.0,5.0,5.0,17.0,1.0,17.0,17.0,17.0,1.0,1.0,1.0,-inf,-inf,-inf,-inf,,2.0,2.0,2.0,,,0.0,0.0,0.0,14.0,-1.400391,-1.400391,-1.400391,,,-0.350098,-0.350098,-0.350098,,,-0.548828,-0.592773,0.0,0.0,281.0,281.0


In [49]:
# additional features
def additional_features(df):
    df['hist_first_buy'] = (pd.to_datetime(df['hist_purchase_date_min']) - pd.to_datetime(df['first_active'])).dt.days
    df['hist_last_buy'] = (pd.to_datetime(df['hist_purchase_date_max']) - pd.to_datetime(df['first_active'])).dt.days
    df['new_first_buy'] = (pd.to_datetime(df['new_purchase_date_min']) - pd.to_datetime(df['first_active'])).dt.days
    df['new_last_buy'] = (pd.to_datetime(df['new_purchase_date_max']) - pd.to_datetime(df['first_active'])).dt.days

    date_features=['hist_purchase_date_max','hist_purchase_date_min', 'new_purchase_date_max', 'new_purchase_date_min']

    for f in date_features:
        df[f] = pd.to_datetime(df[f]).astype(np.int64) * 1e-9

    df['card_id_total_size'] = df['new_card_id_size']+df['hist_card_id_size']
    df['card_id_size_ratio'] = df['new_card_id_size']/df['hist_card_id_size']
    df['purchase_amount_total'] = df['new_purchase_amount_sum']+df['hist_purchase_amount_sum']
    df['purchase_amount_mean'] = df['new_purchase_amount_mean']+df['hist_purchase_amount_mean']
    df['purchase_amount_max'] = df['new_purchase_amount_max']+df['hist_purchase_amount_max']
    df['purchase_amount_min'] = df['new_purchase_amount_min']+df['hist_purchase_amount_min']
    df['purchase_amount_ratio'] = df['new_purchase_amount_sum']/df['hist_purchase_amount_sum']
    df['month_diff_mean'] = df['new_month_diff_mean']+df['hist_month_diff_mean']
    df['month_diff_ratio'] = df['new_month_diff_mean']/df['hist_month_diff_mean']
    df['month_lag_mean'] = df['new_month_lag_mean']+df['hist_month_lag_mean']
    df['month_lag_max'] = df['new_month_lag_max']+df['hist_month_lag_max']
    df['month_lag_min'] = df['new_month_lag_min']+df['hist_month_lag_min']
    df['category_1_mean'] = df['new_category_1_mean']+df['hist_category_1_mean']
    df['installments_total'] = df['new_installments_sum']+df['hist_installments_sum']
    df['installments_mean'] = df['new_installments_mean']+df['hist_installments_mean']
    df['installments_max'] = df['new_installments_max']+df['hist_installments_max']
    df['installments_ratio'] = df['new_installments_sum']/df['hist_installments_sum']
    df['price_total'] = df['purchase_amount_total'] / df['installments_total']
    df['price_mean'] = df['purchase_amount_mean'] / df['installments_mean']
    df['price_max'] = df['purchase_amount_max'] / df['installments_max']
    df['duration_mean'] = df['new_duration_mean']+df['hist_duration_mean']
    df['duration_min'] = df['new_duration_min']+df['hist_duration_min']
    df['duration_max'] = df['new_duration_max']+df['hist_duration_max']
    df['amount_month_ratio_mean']=df['new_amount_month_ratio_mean']+df['hist_amount_month_ratio_mean']
    df['amount_month_ratio_min']=df['new_amount_month_ratio_min']+df['hist_amount_month_ratio_min']
    df['amount_month_ratio_max']=df['new_amount_month_ratio_max']+df['hist_amount_month_ratio_max']
    df['new_CLV'] = df['new_card_id_size'] * df['new_purchase_amount_sum'] / df['new_month_diff_mean']
    df['hist_CLV'] = df['hist_card_id_size'] * df['hist_purchase_amount_sum'] / df['hist_month_diff_mean']
    df['CLV_ratio'] = df['new_CLV'] / df['hist_CLV']

    return df

In [53]:
additional_features(result)

Unnamed: 0,card_id,feature_1,feature_2,feature_3,first_active_month,outliers,target,first_active,first_active_year,first_active_quarter,first_active_weekofyear,first_active_dayofweek,first_active_elapsed_time_from_trade,first_active_elapsed_time_from_today,days_feature1,days_feature2,days_feature3,days_feature1_ratio,days_feature2_ratio,days_feature3_ratio,feature_sum,feature_mean,feature_max,feature_min,feature_var,hist_authorized_flag_mode,hist_authorized_flag_sum,hist_authorized_flag_mean,hist_card_id_size,hist_city_id_mode,hist_city_id_nunique,hist_category_1_mode,hist_category_1_sum,hist_category_1_mean,hist_installments_mode,hist_installments_sum,hist_installments_mean,hist_installments_var,hist_installments_max,hist_installments_min,hist_installments_null_cnt,hist_category_3_mode,hist_category_3_mean,hist_merchant_category_id_mode,hist_merchant_category_id_nunique,hist_merchant_id_nunique,hist_month_lag_mode,hist_month_lag_sum,hist_month_lag_mean,hist_month_lag_var,hist_month_lag_max,hist_month_lag_min,hist_month_lag_skew,hist_purchase_date_max,hist_purchase_date_min,hist_purchase_amount_sum,hist_purchase_amount_mean,hist_purchase_amount_var,hist_purchase_amount_max,hist_purchase_amount_min,hist_purchase_amount_skew,hist_purchase_amount_over_550,hist_category_2_mode,hist_category_2_mean,hist_state_id_mode,hist_state_id_nunique,hist_subsector_id_mode,hist_subsector_id_nunique,hist_purchase_amount_trim_sum,hist_purchase_amount_trim_mean,hist_purchase_amount_trim_var,hist_purchase_amount_trim_max,hist_purchase_amount_trim_min,hist_purchase_amount_trim_skew,hist_purchase_year_mode,hist_purchase_year_nunique,hist_purchase_year_mean,hist_purchase_year_max,hist_purchase_year_min,hist_purchase_month_mode,hist_purchase_month_nunique,hist_purchase_month_mean,hist_purchase_month_max,hist_purchase_month_min,hist_purchase_day_mode,hist_purchase_day_nunique,hist_purchase_day_mean,hist_purchase_day_var,hist_purchase_day_max,hist_purchase_day_min,hist_purchase_day_skew,hist_purchase_hour_mode,hist_purchase_hour_nunique,hist_purchase_hour_mean,hist_purchase_hour_var,hist_purchase_hour_max,hist_purchase_hour_min,hist_purchase_hour_skew,hist_purchase_dayofweek_mode,hist_purchase_dayofweek_nunique,hist_purchase_dayofweek_mean,hist_purchase_dayofweek_max,hist_purchase_dayofweek_min,hist_purchase_weekofyear_mode,hist_purchase_weekofyear_nunique,hist_purchase_weekofyear_mean,hist_purchase_weekofyear_max,hist_purchase_weekofyear_min,hist_purchase_weekend_mode,hist_purchase_weekend_sum,hist_purchase_weekend_mean,hist_price_sum,hist_price_mean,hist_price_max,hist_price_min,hist_price_var,hist_month_diff_max,hist_month_diff_min,hist_month_diff_mean,hist_month_diff_var,hist_month_diff_skew,hist_Christmas_Day_2017_mean,hist_Mothers_Day_2017_mean,hist_fathers_day_2017_mean,hist_Children_day_2017_mean,hist_Valentine_Day_2017_mean,hist_Black_Friday_2017_mean,hist_Mothers_Day_2018_mean,hist_duration_mean,hist_duration_min,hist_duration_max,hist_duration_var,hist_duration_skew,hist_amount_month_ratio_mean,hist_amount_month_ratio_min,hist_amount_month_ratio_max,hist_amount_month_ratio_var,hist_amount_month_ratio_skew,hist_category_2_mean_mean,hist_category_2_min_mean,hist_category_2_max_mean,hist_category_2_sum_mean,hist_category_3_mean_mean,hist_category_3_min_mean,hist_category_3_max_mean,hist_category_3_sum_mean,hist_purchase_date_diff,hist_purchase_date_average,hist_purchase_date_uptonow,hist_purchase_date_uptomin,new_authorized_flag_mode,new_authorized_flag_sum,new_authorized_flag_mean,new_card_id_size,new_city_id_mode,new_city_id_nunique,new_category_1_mode,new_category_1_sum,new_category_1_mean,new_installments_mode,new_installments_sum,new_installments_mean,new_installments_var,new_installments_max,new_installments_min,new_installments_null_cnt,new_category_3_mode,new_category_3_mean,new_merchant_category_id_mode,new_merchant_category_id_nunique,new_merchant_id_nunique,new_month_lag_mode,new_month_lag_sum,new_month_lag_mean,new_month_lag_var,new_month_lag_max,new_month_lag_min,new_month_lag_skew,new_purchase_date_max,new_purchase_date_min,new_purchase_amount_sum,new_purchase_amount_mean,new_purchase_amount_var,new_purchase_amount_max,new_purchase_amount_min,new_purchase_amount_skew,new_purchase_amount_over_550,new_category_2_mode,new_category_2_mean,new_state_id_mode,new_state_id_nunique,new_subsector_id_mode,new_subsector_id_nunique,new_purchase_amount_trim_sum,new_purchase_amount_trim_mean,new_purchase_amount_trim_var,new_purchase_amount_trim_max,new_purchase_amount_trim_min,new_purchase_amount_trim_skew,new_purchase_year_mode,new_purchase_year_nunique,new_purchase_year_mean,new_purchase_year_max,new_purchase_year_min,new_purchase_month_mode,new_purchase_month_nunique,new_purchase_month_mean,new_purchase_month_max,new_purchase_month_min,new_purchase_day_mode,new_purchase_day_nunique,new_purchase_day_mean,new_purchase_day_var,new_purchase_day_max,new_purchase_day_min,new_purchase_day_skew,new_purchase_hour_mode,new_purchase_hour_nunique,new_purchase_hour_mean,new_purchase_hour_var,new_purchase_hour_max,new_purchase_hour_min,new_purchase_hour_skew,new_purchase_dayofweek_mode,new_purchase_dayofweek_nunique,new_purchase_dayofweek_mean,new_purchase_dayofweek_max,new_purchase_dayofweek_min,new_purchase_weekofyear_mode,new_purchase_weekofyear_nunique,new_purchase_weekofyear_mean,new_purchase_weekofyear_max,new_purchase_weekofyear_min,new_purchase_weekend_mode,new_purchase_weekend_sum,new_purchase_weekend_mean,new_price_sum,new_price_mean,new_price_max,new_price_min,new_price_var,new_month_diff_max,new_month_diff_min,new_month_diff_mean,new_month_diff_var,new_month_diff_skew,new_Christmas_Day_2017_mean,new_Children_day_2017_mean,new_Black_Friday_2017_mean,new_Mothers_Day_2018_mean,new_duration_mean,new_duration_min,new_duration_max,new_duration_var,new_duration_skew,new_amount_month_ratio_mean,new_amount_month_ratio_min,new_amount_month_ratio_max,new_amount_month_ratio_var,new_amount_month_ratio_skew,new_category_2_mean_mean,new_category_3_mean_mean,new_purchase_date_diff,new_purchase_date_average,new_purchase_date_uptonow,new_purchase_date_uptomin,hist_first_buy,hist_last_buy,new_first_buy,new_last_buy,card_id_total_size,card_id_size_ratio,purchase_amount_total,purchase_amount_mean,purchase_amount_max,purchase_amount_min,purchase_amount_ratio,month_diff_mean,month_diff_ratio,month_lag_mean,month_lag_max,month_lag_min,category_1_mean,installments_total,installments_mean,installments_max,installments_ratio,price_total,price_mean,price_max,duration_mean,duration_min,duration_max,amount_month_ratio_mean,amount_month_ratio_min,amount_month_ratio_max,new_CLV,hist_CLV,CLV_ratio
0,C_ID_92a2005557,0.013145,0.008752,0.011428,6.0,0.0,-0.820283,2017-06-01 00:00:00,2017.0,2.0,22.0,3.0,333.0,613.0,1665.0,666.0,333.0,0.015015,0.006006,0.003003,0.033324,0.011108,0.013145,0.008752,0.002214,1,247.0,0.9500,260,69,7,0,0.0,0.00000,0.0,4.0,0.015380,0.015205,1.0,0.0,0.0,0,0.015380,560,41,94,-2,-1017.0,-3.9120,5.7500,0,-8,0.066000,1.000000e-09,1.000000e-09,-165.968730,-0.638341,0.045003,2.258395,-0.7393,10.2400,0.0,1.0,1.046,9,3,34,21,-167.400,-0.6440,0.020570,0.80000,-0.7393,5.1330,2017,2,2017.0,2018,2017,12,9,8.055,12,1,11,31,15.510,76.90,31,1,0.10236,14,23,13.310,24.690,23,0,-0.88700,5,7,3.2100,6,0,50,35,33.06,52,1,0,90.0,0.34620,,,inf,-inf,,3,2,2.072,0.06800,3.2990,13.125,0.00000,6.2660,26.7700,0.0000,16.4700,7.7540,-1.325042,-2.201,4.516789,0.216691,7.7340,-0.31100,-0.36960,1.129197,0.011823,9.3500,0.07250,-0.747,5942464.500,1309718.600,0.346700,-0.747,5920398.500,5429670.50,242,0.9307,343,586,1.0,23.0,1.0,23.0,69.0,3.0,0.0,0.0,0.00000,0.0,0.0,0.000,0.00000,0.0,0.0,0.0,0.0,0.0000,278.0,14.0,23.0,1.0,34.0,1.479,0.2610,2.0,1.0,0.09326,1.000000e-09,1.000000e-09,-13.2400,-0.5757,0.018430,-0.29610,-0.7246,0.895996,0.0,1.0,1.000,9.0,1.0,37.0,10.0,-13.2400,-0.5757,0.018430,-0.29610,-0.7246,0.89600,2018.0,1.0,2018.0,2018.0,2018.0,3.0,2.0,3.479,4.0,3.0,6.0,17.0,16.440,88.800,31.0,5.0,0.33890,13.0,8.0,12.870,4.210,16.0,8.0,-0.60350,4.0,7.0,3.130,6.0,0.0,13.0,7.0,13.305,17.0,10.0,0.0,6.0,0.2610,-inf,-inf,-inf,-inf,,2.0,2.0,2.000,0.0000,0.0000,0.000,0.0,0.000,41.75,-1.1510,-1.4490,-0.592300,0.073700,0.895996,-0.28780,-0.36230,-0.14810,4.610000e-03,0.895996,-0.5503,-0.59300,54.0,2.348,280.0,335.0,-17318.0,-17318.0,-17318.0,-17318.0,283.0,0.088462,-179.208730,-1.214041,1.962295,-1.4639,0.079774,4.072,0.965251,-2.4330,2.0,-7.0,0.00000,4.0,0.015380,1.0,0.000000,-44.802183,-78.936365,1.962295,-2.476042,-3.6500,3.924489,-0.59880,-0.73190,0.981097,-152.260000,-20826.191988,0.007311
1,C_ID_3d0044924f,0.010712,0.011385,0.010283,1.0,0.0,0.392913,2017-01-01 00:00:00,2017.0,1.0,52.0,6.0,484.0,764.0,1936.0,484.0,0.0,0.008264,0.002066,0.000000,0.032379,0.010793,0.011385,0.010283,0.000555,1,339.0,0.9688,350,69,9,0,31.0,0.08856,1.0,545.0,1.566000,2.258000,10.0,1.0,2.0,1,1.200000,307,57,142,0,-1761.0,-5.0300,14.4800,0,-12,-0.258000,1.000000e-09,1.000000e-09,-210.006330,-0.600018,0.148200,4.630300,-0.7420,8.8100,0.0,1.0,1.000,9,3,34,24,-215.400,-0.6150,0.058600,0.80000,-0.7420,3.7440,2017,2,2017.0,2018,2017,1,12,6.220,12,1,19,31,16.670,77.40,31,1,-0.23570,12,24,14.720,31.160,23,0,-0.89360,5,7,3.3630,6,0,3,50,25.22,52,1,0,132.0,0.37720,-200.113283,-0.575038,2.315150,-0.742400,0.082442,4,2,3.092,0.10050,2.0000,10.650,8.73400,12.5550,13.7900,6.5700,9.8360,0.0000,-1.853766,-2.951,13.890898,1.405803,8.2500,-0.19600,-0.36670,1.543433,0.016649,8.5550,0.07460,-0.747,6010604.000,1329550.400,-0.295200,-0.747,156963.110,-3653208.00,390,1.1140,368,758,1.0,6.0,1.0,6.0,69.0,1.0,0.0,0.0,0.00000,1.0,6.0,1.000,0.00000,1.0,1.0,0.0,1.0,1.0000,307.0,5.0,6.0,1.0,9.0,1.500,0.3000,2.0,1.0,0.00000,1.000000e-09,1.000000e-09,-4.3550,-0.7260,0.000207,-0.70170,-0.7393,1.255859,0.0,1.0,1.000,9.0,1.0,19.0,4.0,-4.3550,-0.7260,0.000207,-0.70170,-0.7393,1.25600,2018.0,1.0,2018.0,2018.0,2018.0,2.0,2.0,2.500,3.0,2.0,5.0,4.0,13.500,131.500,30.0,1.0,0.37000,17.0,5.0,11.164,24.560,17.0,6.0,0.38330,0.0,4.0,1.500,4.0,0.0,6.0,4.0,9.000,13.0,5.0,0.0,0.0,0.0000,-4.355735,-0.725956,-0.701858,-0.739410,0.000205,3.0,3.0,3.000,0.0000,0.0000,0.000,0.0,0.000,56.84,-2.1780,-2.2190,-2.105000,0.001841,1.180664,-0.24200,-0.24650,-0.23400,2.277000e-05,1.151367,-0.5503,-0.60640,56.0,9.336,310.0,367.0,-17167.0,-17167.0,-17167.0,-17167.0,356.0,0.017143,-214.361330,-1.326018,3.928600,-1.4813,0.020737,6.092,0.970246,-3.5300,2.0,-11.0,0.08856,551.0,2.566000,11.0,0.011009,-0.389041,-0.516765,0.357145,-4.031766,-5.1700,11.785898,-0.43800,-0.61320,1.309433,-8.710000,-23771.738519,0.000366
2,C_ID_d639edf6cd,0.010610,0.008752,0.010283,8.0,0.0,0.688056,2016-08-01 00:00:00,2016.0,3.0,31.0,0.0,637.0,917.0,1274.0,1274.0,0.0,0.003140,0.003140,0.000000,0.029645,0.009882,0.010610,0.008752,0.000992,1,41.0,0.9536,43,143,5,0,0.0,0.00000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0,0.000000,705,8,13,-10,-370.0,-8.6000,14.7660,0,-13,0.725600,1.000000e-09,1.000000e-09,-29.167390,-0.678311,0.007635,-0.145847,-0.7300,5.6250,0.0,5.0,4.630,5,2,33,7,-29.170,-0.6780,0.007637,-0.14590,-0.7300,5.6200,2017,2,2017.0,2018,2017,1,10,4.560,12,1,21,19,19.330,62.30,30,2,-0.75630,19,14,17.900,12.375,23,8,-0.88670,4,7,3.3030,6,0,4,22,18.38,49,2,0,11.0,0.25590,-inf,-inf,-inf,-inf,,3,2,2.070,0.06647,3.5000,9.670,17.00000,8.9100,7.5100,17.6600,8.9100,1.7210,-1.406243,-2.182,-0.291695,0.070888,0.3125,-0.33080,-0.36500,-0.072924,0.002562,3.7190,-0.08780,-0.747,654527.700,-229313.400,0.358400,-0.747,6010604.000,5588325.00,412,9.5800,341,753,1.0,1.0,1.0,1.0,143.0,1.0,0.0,0.0,0.00000,0.0,0.0,0.000,,0.0,0.0,0.0,0.0,0.0000,528.0,1.0,1.0,2.0,2.0,2.000,,2.0,2.0,,1.000000e-09,1.000000e-09,-0.7000,-0.7000,,-0.70000,-0.7000,,0.0,5.0,5.000,5.0,1.0,25.0,1.0,-0.7000,-0.7000,,-0.70000,-0.7000,,2018.0,1.0,2018.0,2018.0,2018.0,4.0,1.0,4.000,4.0,4.0,28.0,1.0,28.000,,28.0,28.0,,17.0,1.0,17.000,,17.0,17.0,,5.0,1.0,5.000,5.0,5.0,17.0,1.0,17.000,17.0,17.0,1.0,1.0,1.0000,-inf,-inf,-inf,-inf,,2.0,2.0,2.000,,,0.000,0.0,0.000,14.00,-1.4000,-1.4000,-1.400000,,,-0.35000,-0.35000,-0.35000,,,-0.5490,-0.59300,0.0,0.000,281.0,281.0,-17014.0,-17014.0,-17014.0,-17014.0,44.0,0.023256,-29.867390,-1.378311,-0.845847,-1.4300,0.023999,4.070,0.966184,-6.6000,2.0,-11.0,0.00000,0.0,0.000000,0.0,,-inf,-inf,-inf,-2.806243,-3.5820,-1.691695,-0.68080,-0.71500,-0.422924,-0.350000,-605.892643,0.000578
3,C_ID_186d6a6901,0.010712,0.014166,0.010283,9.0,0.0,0.142495,2017-09-01 00:00:00,2017.0,3.0,35.0,4.0,241.0,521.0,964.0,723.0,0.0,0.016598,0.012448,0.000000,0.035161,0.011720,0.014166,0.010283,0.002129,1,77.0,1.0000,77,17,7,0,12.0,0.15590,1.0,86.0,1.146000,0.235000,3.0,1.0,2.0,1,1.064000,278,25,50,-4,-218.0,-2.8320,3.2480,0,-5,0.557600,1.000000e-09,1.000000e-09,-49.491364,-0.642745,0.068447,1.445596,-0.7407,6.9700,0.0,4.0,3.064,22,5,37,13,-50.120,-0.6514,0.038360,0.80000,-0.7407,5.7580,2017,2,2017.0,2018,2017,10,6,7.742,12,1,7,25,16.880,81.50,31,2,-0.14140,16,16,14.445,26.220,23,0,-1.43200,2,7,2.7930,6,0,42,20,32.00,52,1,0,11.0,0.14280,-46.828849,-0.624385,1.445596,-0.740897,0.083902,3,1,2.014,0.03930,1.5310,44.660,0.00000,0.0000,2.6230,0.0000,24.1900,17.9000,-1.295112,-2.210,2.891191,0.298258,6.0800,-0.32280,-0.71700,0.722798,0.019262,5.5600,-0.08620,-0.747,1955977.000,127970.164,-0.338400,-0.747,289036.530,-4009099.50,154,2.0000,340,495,1.0,7.0,1.0,7.0,17.0,2.0,0.0,1.0,0.14280,1.0,6.0,1.000,0.00000,1.0,1.0,1.0,1.0,0.8570,80.0,6.0,7.0,2.0,12.0,1.714,0.2380,2.0,1.0,-1.22950,1.000000e-09,1.000000e-09,-4.6560,-0.6650,0.004345,-0.56700,-0.7344,0.808594,0.0,4.0,3.572,22.0,2.0,37.0,5.0,-4.6560,-0.6650,0.004345,-0.56700,-0.7344,0.80860,2018.0,1.0,2018.0,2018.0,2018.0,4.0,2.0,3.715,4.0,3.0,4.0,7.0,13.140,50.800,24.0,4.0,0.20640,11.0,5.0,13.000,23.000,21.0,7.0,0.83800,2.0,4.0,3.285,6.0,1.0,14.0,5.0,13.860,16.0,10.0,0.0,3.0,0.4285,-4.087632,-0.681272,-0.581646,-0.734135,0.002960,2.0,2.0,2.000,0.0000,0.0000,0.000,0.0,0.000,37.72,-1.3300,-1.4690,-1.134000,0.017380,0.808594,-0.33250,-0.36720,-0.28340,1.086000e-03,0.808594,-0.5566,-0.60450,41.0,5.855,291.0,333.0,-17410.0,-17410.0,-17410.0,-17410.0,84.0,0.090909,-54.147364,-1.307745,0.878595,-1.4751,0.094077,4.014,0.993049,-1.1180,2.0,-4.0,0.29870,92.0,2.146000,4.0,0.069767,-0.588558,-0.609387,0.219649,-2.625112,-3.6790,1.757191,-0.65530,-1.08420,0.439398,-16.296000,-1892.172308,0.008612
4,C_ID_cdbd2c0db2,0.008058,0.014166,0.010283,11.0,0.0,-0.159749,2017-11-01 00:00:00,2017.0,4.0,44.0,2.0,180.0,460.0,180.0,540.0,0.0,0.005556,0.016667,0.000000,0.032508,0.010836,0.014166,0.008058,0.003091,1,128.0,0.9624,133,17,6,0,15.0,0.11280,1.0,182.0,1.368000,3.598000,12.0,1.0,0.0,1,1.053000,278,26,66,-1,-171.0,-1.2860,1.0540,0,-3,-0.296000,1.000000e-09,1.000000e-09,-48.687656,-0.366073,1.828160,7.193041,-0.7460,5.2100,0.0,4.0,3.443,22,6,37,17,-75.250,-0.5660,0.103900,0.80000,-0.7460,3.2680,2018,2,2018.0,2018,2017,1,4,5.406,12,1,7,30,16.100,79.60,31,1,-0.09530,10,22,13.050,32.620,23,0,-0.54150,4,7,3.2400,6,0,1,17,21.78,52,1,0,42.0,0.31570,-75.971922,-0.571217,0.728986,-0.746156,0.086111,3,1,2.008,0.02267,2.1020,7.790,0.00000,0.0000,0.0000,0.0000,0.6616,20.6600,-0.736037,-2.146,14.386082,7.324718,5.2000,-0.18420,-0.70560,3.596520,0.458645,5.1840,-0.11460,-0.747,1227080.200,-87801.440,-0.377700,-0.747,141170.810,-4465276.00,108,0.8120,340,449,1.0,36.0,1.0,36.0,17.0,5.0,0.0,2.0,0.05554,1.0,36.0,1.028,0.02856,2.0,1.0,1.0,1.0,1.0000,278.0,17.0,36.0,2.0,56.0,1.556,0.2540,2.0,1.0,-0.23340,1.000000e-09,1.000000e-09,-19.9200,-0.5537,0.050100,0.45100,-0.7393,2.892578,0.0,4.0,3.473,22.0,5.0,37.0,10.0,-19.9200,-0.5537,0.050100,0.45100,-0.7393,2.89300,2018.0,1.0,2018.0,2018.0,2018.0,4.0,2.0,3.555,4.0,3.0,2.0,22.0,14.586,82.700,31.0,1.0,0.21600,13.0,14.0,14.720,14.780,23.0,5.0,0.03610,5.0,7.0,3.277,6.0,0.0,11.0,8.0,13.360,17.0,9.0,0.0,12.0,0.3333,-19.585593,-0.559588,0.450886,-0.739395,0.050300,2.0,2.0,2.000,0.0000,0.0000,0.000,0.0,0.000,41.20,-1.1070,-1.4790,0.902000,0.200400,2.892578,-0.27690,-0.36960,0.22550,1.253000e-02,2.892578,-0.5557,-0.58840,57.0,1.583,281.0,338.0,-17471.0,-17471.0,-17471.0,-17471.0,169.0,0.270677,-68.607656,-0.919773,7.644041,-1.4853,0.409139,4.008,0.996016,0.2700,2.0,-2.0,0.16834,218.0,2.396000,14.0,0.197802,-0.314714,-0.383878,0.546003,-1.843037,-3.6250,15.288082,-0.46110,-1.07520,3.822020,-358.560000,-3224.829805,0.111187
5,C_ID_0894217f2f,0.010712,0.008752,0.010283,9.0,0.0,0.871585,2016-09-01 00:00:00,2016.0,3.0,35.0,3.0,606.0,886.0,2424.0,1212.0,0.0,0.006601,0.003300,0.000000,0.029747,0.009916,0.010712,0.008752,0.001030,1,32.0,0.9697,33,69,4,0,1.0,0.03030,3.0,99.0,3.000000,3.875000,10.0,1.0,0.0,2,1.758000,884,14,22,-1,-84.0,-2.5450,4.1300,0,-6,-0.263000,1.000000e-09,1.000000e-09,-13.776231,-0.417462,0.033105,0.033719,-0.6790,0.7964,0.0,1.0,1.121,9,3,27,8,-13.770,-0.4175,0.033100,0.03372,-0.6790,0.7964,2017,1,2017.0,2017,2017,6,7,4.453,7,1,25,17,18.030,58.66,29,4,-0.41500,14,13,14.240,25.810,23,0,-0.97700,5,6,2.7870,5,0,24,16,17.75,30,2,0,10.0,0.30300,-7.696680,-0.233233,0.011240,-0.679288,0.049340,10,9,9.060,0.05872,3.8610,0.000,25.00000,21.7800,14.6300,22.0000,0.0000,0.0000,-3.776298,-6.113,0.303475,2.705428,0.7860,-0.04617,-0.07550,0.003747,0.000408,0.7900,0.06915,-0.747,5831652.000,1277467.100,-0.017760,-0.747,59604.830,-997078.10,199,6.0300,554,754,1.0,4.0,1.0,4.0,69.0,1.0,0.0,0.0,0.00000,1.0,11.0,2.750,2.91600,5.0,1.0,0.0,2.0,1.7500,69.0,4.0,4.0,2.0,7.0,1.750,0.2500,2.0,1.0,-2.00000,1.000000e-09,1.000000e-09,-1.3990,-0.3499,0.078550,-0.00009,-0.6543,0.400146,0.0,1.0,1.000,9.0,1.0,2.0,4.0,-1.3990,-0.3499,0.078550,-0.00009,-0.6543,0.40010,2017.0,1.0,2017.0,2017.0,2017.0,9.0,2.0,8.750,9.0,8.0,15.0,3.0,12.750,10.914,15.0,8.0,-1.56000,10.0,4.0,17.000,23.330,21.0,10.0,-1.59700,4.0,3.0,2.750,4.0,1.0,37.0,3.0,35.750,37.0,33.0,0.0,0.0,0.0000,-0.981630,-0.245407,-0.000018,-0.654495,0.083800,9.0,9.0,9.000,0.0000,0.0000,0.000,36.0,54.000,0.00,-3.1500,-5.8900,-0.000812,6.367000,0.401611,-0.03888,-0.07270,-0.00001,9.700000e-04,0.401123,-0.5503,-0.12330,31.0,7.750,506.0,537.0,-17045.0,-17045.0,-17045.0,-17045.0,37.0,0.121212,-15.175231,-0.767362,0.033629,-1.3333,0.101552,18.060,0.993377,-0.7950,2.0,-5.0,0.03030,110.0,5.750000,15.0,0.111111,-0.137957,-0.133454,0.002242,-6.926298,-12.0030,0.302664,-0.08505,-0.14820,0.003737,-0.621778,-50.178325,0.012391
6,C_ID_7e63323c00,0.010479,0.008752,0.011428,12.0,0.0,0.230129,2016-12-01 00:00:00,2016.0,4.0,48.0,3.0,515.0,795.0,1545.0,1030.0,515.0,0.005825,0.003883,0.001942,0.030659,0.010220,0.011428,0.008752,0.001356,1,243.0,0.9346,260,160,6,0,0.0,0.00000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0,0.000000,705,23,63,-2,-1728.0,-6.6450,17.8800,0,-13,-0.012560,1.000000e-09,1.000000e-09,-185.421590,-0.713160,0.000918,-0.550722,-0.7420,2.4040,0.0,5.0,4.984,21,4,33,12,-185.400,-0.7134,0.000917,-0.55100,-0.7420,2.4260,2017,2,2017.0,2018,2017,2,12,6.016,12,1,17,31,13.060,66.40,31,1,0.38090,12,16,14.380,10.990,21,0,-0.38000,4,7,2.5680,6,0,6,48,23.95,51,1,0,19.0,0.07306,-inf,-inf,-inf,-inf,,3,2,2.139,0.11975,2.1050,10.680,9.29000,14.2500,7.5470,9.6800,8.4000,4.3800,-1.523690,-2.225,-1.167560,0.060245,-2.0140,-0.34030,-0.37100,-0.183574,0.002002,1.8840,-0.10376,-0.747,127899.664,-382586.300,0.358400,-0.747,6010604.000,5588325.00,414,1.5930,345,759,1.0,5.0,1.0,5.0,160.0,1.0,0.0,0.0,0.00000,0.0,0.0,0.000,0.00000,0.0,0.0,0.0,0.0,0.0000,19.0,5.0,5.0,1.0,6.0,1.200,0.2000,2.0,1.0,2.23600,1.000000e-09,1.000000e-09,-3.6020,-0.7207,0.000384,-0.68850,-0.7420,1.302734,0.0,5.0,5.000,21.0,1.0,27.0,4.0,-3.6020,-0.7207,0.000384,-0.68850,-0.7420,1.30300,2018.0,1.0,2018.0,2018.0,2018.0,3.0,2.0,3.200,4.0,3.0,6.0,3.0,10.200,20.700,15.0,6.0,0.31540,12.0,3.0,14.400,6.300,17.0,12.0,0.19600,3.0,3.0,3.000,4.0,1.0,10.0,3.0,11.200,14.0,10.0,0.0,0.0,0.0000,-inf,-inf,-inf,-inf,,2.0,2.0,2.000,0.0000,0.0000,0.000,0.0,0.000,56.60,-1.4410,-1.4840,-1.377000,0.001534,1.298828,-0.36040,-0.37100,-0.34420,9.590000e-05,1.295898,-0.5490,-0.59300,31.0,6.200,303.0,334.0,-17136.0,-17136.0,-17136.0,-17136.0,265.0,0.019231,-189.023590,-1.433860,-1.239222,-1.4840,0.019426,4.139,0.935016,-5.4450,2.0,-12.0,0.00000,0.0,0.000000,0.0,,-inf,-inf,-inf,-2.964690,-3.7090,-2.544560,-0.70070,-0.74200,-0.527774,-9.005000,-22538.388686,0.000400
7,C_ID_dfa21fc124,0.010479,0.008752,0.011428,9.0,0.0,2.135850,2017-09-01 00:00:00,2017.0,3.0,35.0,4.0,241.0,521.0,723.0,482.0,241.0,0.012448,0.008299,0.004149,0.030659,0.010220,0.011428,0.008752,0.001356,1,19.0,0.8640,22,320,2,0,0.0,0.00000,0.0,0.0,0.000000,0.000000,0.0,0.0,0.0,0,0.000000,705,7,9,-2,-38.0,-1.7280,0.9697,0,-3,0.378200,1.000000e-09,1.000000e-09,-13.565845,-0.616629,0.033053,-0.055688,-0.7456,2.2050,0.0,4.0,3.182,13,2,33,6,-13.570,-0.6167,0.033080,-0.05570,-0.7456,2.2030,2017,1,2017.0,2017,2017,10,4,10.270,12,9,29,10,19.900,93.60,29,4,-0.72500,23,9,16.400,53.780,23,0,-1.54600,4,7,3.5450,6,0,39,8,43.28,51,39,0,4.0,0.18180,-inf,-inf,-inf,-inf,,4,4,4.000,0.00000,0.0000,56.530,0.00000,0.0000,3.4550,0.0000,29.4000,0.0000,-2.466517,-2.982,-0.222753,0.528846,2.2050,-0.15420,-0.18640,-0.013922,0.002066,2.2050,-0.09530,-0.747,1726469.900,59956.190,0.358400,-0.747,6010604.000,5588325.00,83,3.7730,409,493,1.0,3.0,1.0,3.0,320.0,2.0,0.0,0.0,0.00000,0.0,0.0,0.000,0.00000,0.0,0.0,0.0,0.0,0.0000,307.0,3.0,3.0,2.0,6.0,2.000,0.0000,2.0,2.0,0.00000,1.000000e-09,1.000000e-09,-2.0980,-0.6990,0.000005,-0.69730,-0.7017,0.000000,0.0,4.0,3.000,13.0,2.0,7.0,3.0,-2.0980,-0.6990,0.000005,-0.69730,-0.7017,0.00000,2018.0,1.0,2018.0,2018.0,2018.0,2.0,1.0,2.000,2.0,2.0,6.0,3.0,13.664,44.340,18.0,6.0,-1.68800,7.0,3.0,13.000,43.000,20.0,7.0,0.67040,1.0,3.0,4.000,6.0,1.0,7.0,2.0,6.668,7.0,6.0,1.0,2.0,0.6665,-inf,-inf,-inf,-inf,,4.0,4.0,4.000,0.0000,0.0000,0.000,0.0,0.000,87.30,-2.7970,-2.8070,-2.790000,0.000078,-1.034180,-0.17480,-0.17540,-0.17430,3.000000e-07,0.000000,-0.5550,-0.59300,12.0,4.000,350.0,362.0,-17410.0,-17410.0,-17410.0,-17410.0,25.0,0.136364,-15.663845,-1.315629,-0.752988,-1.4473,0.154653,8.000,1.000000,0.2720,2.0,-1.0,0.00000,0.0,0.000000,0.0,,-inf,-inf,-inf,-5.263517,-5.7890,-3.012753,-0.32900,-0.36180,-0.188222,-1.573500,-74.612148,0.021089
8,C_ID_fe0fdac8ea,0.010610,0.011385,0.010283,8.0,0.0,-0.065406,2017-08-01 00:00:00,2017.0,3.0,31.0,1.0,272.0,552.0,544.0,272.0,0.0,0.007353,0.003676,0.000000,0.032277,0.010759,0.011385,0.010283,0.000566,1,11.0,0.7334,15,173,2,0,4.0,0.26660,1.0,16.0,1.066000,0.066650,2.0,1.0,0.0,1,1.066000,705,4,8,-5,-63.0,-4.2000,1.6000,-2,-6,0.674300,1.000000e-09,1.000000e-09,-8.708374,-0.580558,0.025819,-0.251424,-0.7460,0.7617,0.0,1.0,1.000,9,2,33,4,-8.710,-0.5806,0.025800,-0.25150,-0.7460,0.7627,2017,1,2017.0,2017,2017,9,5,9.800,12,8,4,8,5.934,14.49,18,2,2.51400,4,8,13.664,44.100,22,4,-0.54350,0,5,1.4670,5,0,36,6,39.53,49,33,0,2.0,0.13330,-8.582663,-0.572178,-0.125712,-0.746156,0.032784,3,2,2.066,0.06665,3.8730,17.000,0.00000,0.0000,23.8600,0.0000,55.7200,0.0000,-1.182989,-1.492,-0.502847,0.086786,0.8490,-0.28660,-0.37300,-0.109363,0.007640,0.9297,0.07460,-0.747,6010604.000,1329550.400,-0.370600,-0.747,139546.880,-4396226.00,112,7.4650,421,534,1.0,2.0,1.0,2.0,173.0,1.0,0.0,0.0,0.00000,1.0,2.0,1.000,0.00000,1.0,1.0,0.0,1.0,1.0000,80.0,2.0,2.0,1.0,2.0,1.000,0.0000,1.0,1.0,,1.000000e-09,1.000000e-09,-1.2640,-0.6320,0.005207,-0.58060,-0.6826,,0.0,1.0,1.000,9.0,1.0,19.0,2.0,-1.2640,-0.6320,0.005207,-0.58060,-0.6826,,2018.0,1.0,2018.0,2018.0,2018.0,3.0,1.0,3.000,3.0,3.0,17.0,2.0,18.500,4.500,20.0,17.0,,14.0,2.0,17.500,24.500,21.0,14.0,,1.0,2.0,3.000,5.0,1.0,11.0,2.0,11.500,12.0,11.0,0.0,1.0,0.5000,-1.263159,-0.631579,-0.580715,-0.682444,0.005173,2.0,2.0,2.000,0.0000,,0.000,0.0,0.000,54.50,-1.2640,-1.3650,-1.161000,0.020830,,-0.31600,-0.34130,-0.29030,1.302000e-03,,-0.5503,-0.60640,3.0,1.500,320.0,323.0,-17379.0,-17379.0,-17379.0,-17379.0,17.0,0.133333,-9.972374,-1.212558,-0.832024,-1.4286,0.145148,4.066,0.968054,-3.2000,-1.0,-5.0,0.26660,18.0,2.066000,3.0,0.125000,-0.554021,-0.586911,-0.277341,-2.446989,-2.8570,-1.663847,-0.60260,-0.71430,-0.399663,-1.264000,-63.226336,0.019992
9,C_ID_bf62c0b49d,0.010610,0.008752,0.010283,8.0,0.0,0.300062,2016-08-01 00:00:00,2016.0,3.0,31.0,0.0,637.0,917.0,1274.0,1274.0,0.0,0.003140,0.003140,0.000000,0.029645,0.009882,0.010610,0.008752,0.000992,1,99.0,0.8760,113,88,6,0,0.0,0.00000,0.0,3.0,0.026550,0.026080,1.0,0.0,0.0,0,0.026550,705,29,59,-1,-664.0,-5.8750,17.4700,0,-13,-0.186600,1.000000e-09,1.000000e-09,-76.997120,-0.681390,0.007153,-0.371335,-0.7430,2.2070,0.0,1.0,1.035,16,3,37,13,-77.000,-0.6810,0.007150,-0.37130,-0.7430,2.2000,2017,2,2017.0,2018,2017,1,12,5.258,12,1,7,25,12.560,67.94,31,1,0.87100,16,14,14.690,14.790,20,0,-1.83100,2,7,2.3180,6,0,32,33,20.64,51,1,0,13.0,0.11505,-inf,-inf,-0.657906,-inf,,3,2,2.105,0.09576,2.5920,5.504,6.73400,11.1250,13.5400,5.4770,5.6300,4.4900,-1.434084,-2.220,-0.742670,0.071674,-0.6550,-0.32890,-0.37160,-0.164335,0.003030,1.4670,0.07300,-0.747,5958343.500,1314340.200,0.338100,-0.747,5854940.000,5314541.00,415,3.6720,340,755,1.0,3.0,1.0,3.0,88.0,1.0,0.0,0.0,0.00000,0.0,0.0,0.000,0.00000,0.0,0.0,0.0,0.0,0.0000,80.0,3.0,3.0,2.0,6.0,2.000,0.0000,2.0,2.0,0.00000,1.000000e-09,1.000000e-09,-2.1400,-0.7134,0.001704,-0.66600,-0.7417,1.633789,0.0,1.0,1.000,16.0,1.0,37.0,2.0,-2.1400,-0.7134,0.001704,-0.66600,-0.7417,1.63400,2018.0,1.0,2018.0,2018.0,2018.0,4.0,1.0,4.000,4.0,4.0,1.0,2.0,3.334,16.330,8.0,1.0,1.73200,9.0,3.0,12.000,7.000,14.0,9.0,-1.45800,6.0,1.0,6.000,6.0,6.0,13.0,2.0,13.336,14.0,13.0,1.0,3.0,1.0000,-inf,-inf,-inf,-inf,,2.0,2.0,2.000,0.0000,0.0000,0.000,0.0,0.000,38.66,-1.4270,-1.4830,-1.332000,0.006817,1.633789,-0.35670,-0.37080,-0.33300,4.260000e-04,1.637695,-0.5503,-0.59300,7.0,2.334,301.0,308.0,-17014.0,-17014.0,-17014.0,-17014.0,116.0,0.026549,-79.137120,-1.394790,-1.037335,-1.4847,0.027793,4.105,0.950119,-3.8750,2.0,-11.0,0.00000,3.0,0.026550,1.0,0.000000,-26.379040,-52.534481,-1.037335,-2.861084,-3.7030,-2.074670,-0.68560,-0.74240,-0.497335,-3.210000,-4133.337083,0.000777


In [19]:
df = train_test()

In [20]:
hist = historical_transactions()

Memory usage after optimization is: 2110.04 MB
Decreased by 69.8%
Memory usage after optimization is: 88.79 MB
Decreased by 54.2%


In [21]:
new_hist = new_merchant_transactions()

Memory usage after optimization is: 125.43 MB
Decreased by 70.6%
Memory usage after optimization is: 69.42 MB
Decreased by 51.4%


In [22]:
result = pd.merge(df, hist, on='card_id', how='outer')
result = pd.merge(result, new_hist, on='card_id', how='outer')

In [54]:
result = additional_features(result)

In [60]:
train_df = result[result['target'].notnull()]
test_df = result[result['target'].isnull()]

In [63]:
train_df.set_index('card_id').to_csv('./data/train_v2.csv')
test_df.set_index('card_id').to_csv('./data/test_v2.csv')