[View in Colaboratory](https://colab.research.google.com/github/Hoiy/kaggle-home-credit-default-risk/blob/master/prep.ipynb)

In [0]:
import pandas as pd
import numpy as np
import os
import dotenv
import gc
from tqdm import tqdm 
from sklearn.feature_extraction.text import CountVectorizer

dotenv.load_dotenv('.env')

True

In [0]:
types = ['train', 'test']
dfs = {t:pd.read_csv('./raw/application_%s.csv'%t) for t in types}
bureau = pd.read_csv('./raw/bureau.csv')
bureau_balance = pd.read_csv('./raw/bureau_balance.csv')
prev_app = pd.read_csv('./raw/previous_application.csv')
pos_cash = pd.read_csv('./raw/POS_CASH_balance.csv')
credit_card_balance = pd.read_csv('./raw/credit_card_balance.csv')
previous_application = pd.read_csv('./raw/previous_application.csv')
installments_payments = pd.read_csv('./raw/installments_payments.csv')

In [0]:
for t in types:
  df = dfs[t]
  docs = [_f for _f in df.columns if 'FLAG_DOC' in _f]
  live = [_f for _f in df.columns if ('FLAG_' in _f) & ('FLAG_DOC' not in _f) & ('_FLAG_' not in _f)]
    
  df['NEW_CREDIT_TO_ANNUITY_RATIO'] = df['AMT_CREDIT'] / df['AMT_ANNUITY']
  df['NEW_CREDIT_TO_GOODS_RATIO'] = df['AMT_CREDIT'] / df['AMT_GOODS_PRICE']
  
  inc_by_org = df[['AMT_INCOME_TOTAL', 'ORGANIZATION_TYPE']].groupby('ORGANIZATION_TYPE').median()['AMT_INCOME_TOTAL']
  
  df['NEW_DOC_IND_AVG'] = df[docs].mean(axis=1)
  df['NEW_DOC_IND_STD'] = df[docs].std(axis=1)
  df['NEW_DOC_IND_KURT'] = df[docs].kurtosis(axis=1)
  df['NEW_LIVE_IND_SUM'] = df[live].sum(axis=1)
  df['NEW_LIVE_IND_STD'] = df[live].std(axis=1)
  df['NEW_LIVE_IND_KURT'] = df[live].kurtosis(axis=1)
  
  df['NEW_INC_PER_CHLD'] = df['AMT_INCOME_TOTAL'] / (1 + df['CNT_CHILDREN'])
  df['NEW_EMPLOY_TO_BIRTH_RATIO'] = df['DAYS_EMPLOYED'] / df['DAYS_BIRTH']
  df['NEW_ANNUITY_TO_INCOME_RATIO'] = df['AMT_ANNUITY'] / (1 + df['AMT_INCOME_TOTAL'])
  df['NEW_SOURCES_PROD'] = df['EXT_SOURCE_1'] * df['EXT_SOURCE_2'] * df['EXT_SOURCE_3']
  df['NEW_EXT_SOURCES_MEAN'] = df[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].mean(axis=1)
  df['NEW_SCORES_STD'] = df[['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']].std(axis=1)
  df['NEW_SCORES_STD'] = df['NEW_SCORES_STD'].fillna(df['NEW_SCORES_STD'].mean())
  df['NEW_CAR_TO_BIRTH_RATIO'] = df['OWN_CAR_AGE'] / df['DAYS_BIRTH']
  df['NEW_CAR_TO_EMPLOY_RATIO'] = df['OWN_CAR_AGE'] / df['DAYS_EMPLOYED']
  df['NEW_PHONE_TO_BIRTH_RATIO'] = df['DAYS_LAST_PHONE_CHANGE'] / df['DAYS_BIRTH']
  df['NEW_PHONE_TO_EMPLOY_RATIO'] = df['DAYS_LAST_PHONE_CHANGE'] / df['DAYS_EMPLOYED']
  df['NEW_CREDIT_TO_INCOME_RATIO'] = df['AMT_CREDIT'] / df['AMT_INCOME_TOTAL']

In [0]:
def process_log_df(df, col_prefix, group_col, sort_col, cat_cols, cont_cols):
  def second(ser):
    return ser.iloc[1] if len(ser) >= 2 else None

  def third(ser):
    return ser.iloc[2] if len(ser) >= 3 else None

  def second_last(ser):
    return ser.iloc[-1] if len(ser) >= 2 else None

  def third_last(ser):
    return ser.iloc[-2] if len(ser) >= 3 else None  
  
  
  FILE_PATH='./prep/processed_%s.snappy.parquet'%col_prefix
  
#   import os
#   if os.path.exists(FILE_PATH):
#     return pd.read_parquet(FILE_PATH)

#   cat_cols = cat_cols[:1]
#   cont_cols = cont_cols[:1]
  
  from tqdm import tqdm
  
  grouped = df.sort_values([group_col, sort_col]).groupby(group_col)
  df = grouped.size().to_frame()
  df.columns=['SIZE']
#   display(df.head())
  
  print("Processing categorical columns...")
  for col in tqdm(cat_cols):
    agg = grouped.agg({col: ['count', 'first', second, second_last, 'last']})
    agg.columns = [v[0] + '__' + v[1] + '__CATEGORY' if v[1] != 'count' else v[0] + '__' + v[1] for v in agg.columns.values]
    df = pd.concat([df, agg], axis=1)
    df = pd.concat([df, grouped[col].value_counts().unstack().fillna(0).astype(int).rename(lambda x: col + '__' + x, axis=1)], axis=1)    
#     display(df.head())
  
  print("Processing continous columns...")
  for col in tqdm(cont_cols):
    agg = grouped.agg({col: ['min', 'max', 'median', 'var', 'count', 'sum', 'first', second, second_last, 'last']})
    agg.columns = [v[0] + '__' + v[1] for v in agg.columns.values]
    df = pd.concat([df, agg], axis=1)
#     display(df.head())
  
  df.rename(lambda x: col_prefix + '__' + x, inplace=True, axis=1)  
#   display(df.head())
  df.to_parquet(FILE_PATH)
  return df

In [0]:
prev_app['APP_CREDIT_PERC'] = prev_app['AMT_APPLICATION'] / prev_app['AMT_CREDIT']

prev_app_cat_cols = [
    'NAME_CONTRACT_TYPE', 
    'WEEKDAY_APPR_PROCESS_START', 
    'FLAG_LAST_APPL_PER_CONTRACT', 
    'NAME_CASH_LOAN_PURPOSE', 
    'NAME_CONTRACT_STATUS', 
    'NAME_PAYMENT_TYPE', 
    'CODE_REJECT_REASON',
    'NAME_TYPE_SUITE',
    'NAME_CLIENT_TYPE',
    'NAME_GOODS_CATEGORY',
    'NAME_PORTFOLIO',
    'NAME_PRODUCT_TYPE',
    'CHANNEL_TYPE',
    'NAME_SELLER_INDUSTRY',
    'NAME_YIELD_GROUP',
    'PRODUCT_COMBINATION'
]
prev_app_cont_cols = prev_app.columns.drop(prev_app_cat_cols + ['SK_ID_CURR'])

processed_prev_app = process_log_df(
    prev_app, 
    col_prefix='PREV_APP', 
    group_col='SK_ID_CURR', 
    sort_col='DAYS_DECISION',
    cat_cols=prev_app_cat_cols,
    cont_cols=prev_app_cont_cols
)

for t in types:
  dfs[t] = pd.merge(dfs[t], processed_prev_app, left_on='SK_ID_CURR', right_index=True, how='left')

  0%|          | 0/16 [00:00<?, ?it/s]

Processing categorical columns...


100%|██████████| 16/16 [04:28<00:00, 16.75s/it]
  0%|          | 0/21 [00:00<?, ?it/s]

Processing continous columns...


100%|██████████| 21/21 [06:12<00:00, 17.72s/it]


In [0]:
bureau_cat_cols = ['CREDIT_ACTIVE', 'CREDIT_CURRENCY', 'CREDIT_TYPE']
bureau_cont_cols = bureau.columns.drop(bureau_cat_cols + ['SK_ID_CURR'])

processed_bureau = process_log_df(
    bureau, 
    col_prefix='BUREAU', 
    group_col='SK_ID_CURR',
    sort_col='DAYS_CREDIT',
    cat_cols=bureau_cat_cols,
    cont_cols=bureau_cont_cols
)

for t in types:
  dfs[t] = pd.merge(dfs[t], processed_bureau, left_on='SK_ID_CURR', right_index=True, how='left')

  0%|          | 0/3 [00:00<?, ?it/s]

Processing categorical columns...


100%|██████████| 3/3 [00:42<00:00, 14.15s/it]
  0%|          | 0/13 [00:00<?, ?it/s]

Processing continous columns...


100%|██████████| 13/13 [03:02<00:00, 14.06s/it]


In [0]:
pd.set_option('display.max_columns', None)
dfs['train'].head()

Unnamed: 0,SK_ID_CURR,TARGET,NAME_CONTRACT_TYPE,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,AMT_CREDIT,AMT_ANNUITY,AMT_GOODS_PRICE,NAME_TYPE_SUITE,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,REGION_POPULATION_RELATIVE,DAYS_BIRTH,DAYS_EMPLOYED,DAYS_REGISTRATION,DAYS_ID_PUBLISH,OWN_CAR_AGE,FLAG_MOBIL,FLAG_EMP_PHONE,FLAG_WORK_PHONE,FLAG_CONT_MOBILE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,REGION_RATING_CLIENT,REGION_RATING_CLIENT_W_CITY,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,REG_REGION_NOT_LIVE_REGION,REG_REGION_NOT_WORK_REGION,LIVE_REGION_NOT_WORK_REGION,REG_CITY_NOT_LIVE_CITY,REG_CITY_NOT_WORK_CITY,LIVE_CITY_NOT_WORK_CITY,ORGANIZATION_TYPE,EXT_SOURCE_1,EXT_SOURCE_2,EXT_SOURCE_3,APARTMENTS_AVG,BASEMENTAREA_AVG,YEARS_BEGINEXPLUATATION_AVG,YEARS_BUILD_AVG,COMMONAREA_AVG,ELEVATORS_AVG,ENTRANCES_AVG,FLOORSMAX_AVG,FLOORSMIN_AVG,LANDAREA_AVG,LIVINGAPARTMENTS_AVG,LIVINGAREA_AVG,NONLIVINGAPARTMENTS_AVG,NONLIVINGAREA_AVG,APARTMENTS_MODE,BASEMENTAREA_MODE,YEARS_BEGINEXPLUATATION_MODE,YEARS_BUILD_MODE,COMMONAREA_MODE,ELEVATORS_MODE,ENTRANCES_MODE,FLOORSMAX_MODE,FLOORSMIN_MODE,LANDAREA_MODE,LIVINGAPARTMENTS_MODE,LIVINGAREA_MODE,NONLIVINGAPARTMENTS_MODE,NONLIVINGAREA_MODE,APARTMENTS_MEDI,BASEMENTAREA_MEDI,YEARS_BEGINEXPLUATATION_MEDI,YEARS_BUILD_MEDI,COMMONAREA_MEDI,ELEVATORS_MEDI,ENTRANCES_MEDI,FLOORSMAX_MEDI,FLOORSMIN_MEDI,LANDAREA_MEDI,LIVINGAPARTMENTS_MEDI,LIVINGAREA_MEDI,NONLIVINGAPARTMENTS_MEDI,NONLIVINGAREA_MEDI,FONDKAPREMONT_MODE,HOUSETYPE_MODE,TOTALAREA_MODE,WALLSMATERIAL_MODE,EMERGENCYSTATE_MODE,OBS_30_CNT_SOCIAL_CIRCLE,DEF_30_CNT_SOCIAL_CIRCLE,OBS_60_CNT_SOCIAL_CIRCLE,DEF_60_CNT_SOCIAL_CIRCLE,DAYS_LAST_PHONE_CHANGE,FLAG_DOCUMENT_2,FLAG_DOCUMENT_3,FLAG_DOCUMENT_4,FLAG_DOCUMENT_5,FLAG_DOCUMENT_6,FLAG_DOCUMENT_7,FLAG_DOCUMENT_8,FLAG_DOCUMENT_9,FLAG_DOCUMENT_10,FLAG_DOCUMENT_11,FLAG_DOCUMENT_12,FLAG_DOCUMENT_13,FLAG_DOCUMENT_14,FLAG_DOCUMENT_15,FLAG_DOCUMENT_16,FLAG_DOCUMENT_17,FLAG_DOCUMENT_18,FLAG_DOCUMENT_19,FLAG_DOCUMENT_20,FLAG_DOCUMENT_21,AMT_REQ_CREDIT_BUREAU_HOUR,AMT_REQ_CREDIT_BUREAU_DAY,AMT_REQ_CREDIT_BUREAU_WEEK,AMT_REQ_CREDIT_BUREAU_MON,AMT_REQ_CREDIT_BUREAU_QRT,AMT_REQ_CREDIT_BUREAU_YEAR,PREV_APP__SIZE,PREV_APP__NAME_CONTRACT_TYPE__count,PREV_APP__NAME_CONTRACT_TYPE__first__CATEGORY,PREV_APP__NAME_CONTRACT_TYPE__second__CATEGORY,PREV_APP__NAME_CONTRACT_TYPE__second_last__CATEGORY,PREV_APP__NAME_CONTRACT_TYPE__last__CATEGORY,PREV_APP__NAME_CONTRACT_TYPE__Cash loans,PREV_APP__NAME_CONTRACT_TYPE__Consumer loans,PREV_APP__NAME_CONTRACT_TYPE__Revolving loans,PREV_APP__NAME_CONTRACT_TYPE__XNA,PREV_APP__WEEKDAY_APPR_PROCESS_START__count,PREV_APP__WEEKDAY_APPR_PROCESS_START__first__CATEGORY,PREV_APP__WEEKDAY_APPR_PROCESS_START__second__CATEGORY,PREV_APP__WEEKDAY_APPR_PROCESS_START__second_last__CATEGORY,PREV_APP__WEEKDAY_APPR_PROCESS_START__last__CATEGORY,PREV_APP__WEEKDAY_APPR_PROCESS_START__FRIDAY,PREV_APP__WEEKDAY_APPR_PROCESS_START__MONDAY,PREV_APP__WEEKDAY_APPR_PROCESS_START__SATURDAY,PREV_APP__WEEKDAY_APPR_PROCESS_START__SUNDAY,PREV_APP__WEEKDAY_APPR_PROCESS_START__THURSDAY,PREV_APP__WEEKDAY_APPR_PROCESS_START__TUESDAY,PREV_APP__WEEKDAY_APPR_PROCESS_START__WEDNESDAY,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__count,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__first__CATEGORY,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__second__CATEGORY,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__second_last__CATEGORY,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__last__CATEGORY,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__N,PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__Y,PREV_APP__NAME_CASH_LOAN_PURPOSE__count,PREV_APP__NAME_CASH_LOAN_PURPOSE__first__CATEGORY,PREV_APP__NAME_CASH_LOAN_PURPOSE__second__CATEGORY,PREV_APP__NAME_CASH_LOAN_PURPOSE__second_last__CATEGORY,PREV_APP__NAME_CASH_LOAN_PURPOSE__last__CATEGORY,PREV_APP__NAME_CASH_LOAN_PURPOSE__Building a house or an annex,PREV_APP__NAME_CASH_LOAN_PURPOSE__Business development,PREV_APP__NAME_CASH_LOAN_PURPOSE__Buying a garage,PREV_APP__NAME_CASH_LOAN_PURPOSE__Buying a holiday home / land,PREV_APP__NAME_CASH_LOAN_PURPOSE__Buying a home,PREV_APP__NAME_CASH_LOAN_PURPOSE__Buying a new car,PREV_APP__NAME_CASH_LOAN_PURPOSE__Buying a used car,PREV_APP__NAME_CASH_LOAN_PURPOSE__Car repairs,PREV_APP__NAME_CASH_LOAN_PURPOSE__Education,PREV_APP__NAME_CASH_LOAN_PURPOSE__Everyday expenses,PREV_APP__NAME_CASH_LOAN_PURPOSE__Furniture,PREV_APP__NAME_CASH_LOAN_PURPOSE__Gasification / water supply,PREV_APP__NAME_CASH_LOAN_PURPOSE__Hobby,PREV_APP__NAME_CASH_LOAN_PURPOSE__Journey,PREV_APP__NAME_CASH_LOAN_PURPOSE__Medicine,PREV_APP__NAME_CASH_LOAN_PURPOSE__Money for a third person,PREV_APP__NAME_CASH_LOAN_PURPOSE__Other,PREV_APP__NAME_CASH_LOAN_PURPOSE__Payments on other loans,PREV_APP__NAME_CASH_LOAN_PURPOSE__Purchase of electronic equipment,PREV_APP__NAME_CASH_LOAN_PURPOSE__Refusal to name the goal,PREV_APP__NAME_CASH_LOAN_PURPOSE__Repairs,PREV_APP__NAME_CASH_LOAN_PURPOSE__Urgent needs,PREV_APP__NAME_CASH_LOAN_PURPOSE__Wedding / gift / holiday,PREV_APP__NAME_CASH_LOAN_PURPOSE__XAP,PREV_APP__NAME_CASH_LOAN_PURPOSE__XNA,PREV_APP__NAME_CONTRACT_STATUS__count,PREV_APP__NAME_CONTRACT_STATUS__first__CATEGORY,PREV_APP__NAME_CONTRACT_STATUS__second__CATEGORY,PREV_APP__NAME_CONTRACT_STATUS__second_last__CATEGORY,PREV_APP__NAME_CONTRACT_STATUS__last__CATEGORY,PREV_APP__NAME_CONTRACT_STATUS__Approved,PREV_APP__NAME_CONTRACT_STATUS__Canceled,PREV_APP__NAME_CONTRACT_STATUS__Refused,PREV_APP__NAME_CONTRACT_STATUS__Unused offer,PREV_APP__NAME_PAYMENT_TYPE__count,PREV_APP__NAME_PAYMENT_TYPE__first__CATEGORY,PREV_APP__NAME_PAYMENT_TYPE__second__CATEGORY,PREV_APP__NAME_PAYMENT_TYPE__second_last__CATEGORY,PREV_APP__NAME_PAYMENT_TYPE__last__CATEGORY,PREV_APP__NAME_PAYMENT_TYPE__Cash through the bank,PREV_APP__NAME_PAYMENT_TYPE__Cashless from the account of the employer,PREV_APP__NAME_PAYMENT_TYPE__Non-cash from your account,PREV_APP__NAME_PAYMENT_TYPE__XNA,PREV_APP__CODE_REJECT_REASON__count,PREV_APP__CODE_REJECT_REASON__first__CATEGORY,PREV_APP__CODE_REJECT_REASON__second__CATEGORY,PREV_APP__CODE_REJECT_REASON__second_last__CATEGORY,PREV_APP__CODE_REJECT_REASON__last__CATEGORY,PREV_APP__CODE_REJECT_REASON__CLIENT,PREV_APP__CODE_REJECT_REASON__HC,PREV_APP__CODE_REJECT_REASON__LIMIT,PREV_APP__CODE_REJECT_REASON__SCO,PREV_APP__CODE_REJECT_REASON__SCOFR,PREV_APP__CODE_REJECT_REASON__SYSTEM,PREV_APP__CODE_REJECT_REASON__VERIF,PREV_APP__CODE_REJECT_REASON__XAP,PREV_APP__CODE_REJECT_REASON__XNA,PREV_APP__NAME_TYPE_SUITE__count,PREV_APP__NAME_TYPE_SUITE__first__CATEGORY,PREV_APP__NAME_TYPE_SUITE__second__CATEGORY,PREV_APP__NAME_TYPE_SUITE__second_last__CATEGORY,PREV_APP__NAME_TYPE_SUITE__last__CATEGORY,PREV_APP__NAME_TYPE_SUITE__Children,PREV_APP__NAME_TYPE_SUITE__Family,PREV_APP__NAME_TYPE_SUITE__Group of people,PREV_APP__NAME_TYPE_SUITE__Other_A,PREV_APP__NAME_TYPE_SUITE__Other_B,"PREV_APP__NAME_TYPE_SUITE__Spouse, partner",PREV_APP__NAME_TYPE_SUITE__Unaccompanied,PREV_APP__NAME_CLIENT_TYPE__count,PREV_APP__NAME_CLIENT_TYPE__first__CATEGORY,PREV_APP__NAME_CLIENT_TYPE__second__CATEGORY,PREV_APP__NAME_CLIENT_TYPE__second_last__CATEGORY,PREV_APP__NAME_CLIENT_TYPE__last__CATEGORY,PREV_APP__NAME_CLIENT_TYPE__New,PREV_APP__NAME_CLIENT_TYPE__Refreshed,PREV_APP__NAME_CLIENT_TYPE__Repeater,PREV_APP__NAME_CLIENT_TYPE__XNA,PREV_APP__NAME_GOODS_CATEGORY__count,PREV_APP__NAME_GOODS_CATEGORY__first__CATEGORY,PREV_APP__NAME_GOODS_CATEGORY__second__CATEGORY,PREV_APP__NAME_GOODS_CATEGORY__second_last__CATEGORY,PREV_APP__NAME_GOODS_CATEGORY__last__CATEGORY,PREV_APP__NAME_GOODS_CATEGORY__Additional Service,PREV_APP__NAME_GOODS_CATEGORY__Animals,PREV_APP__NAME_GOODS_CATEGORY__Audio/Video,PREV_APP__NAME_GOODS_CATEGORY__Auto Accessories,PREV_APP__NAME_GOODS_CATEGORY__Clothing and Accessories,PREV_APP__NAME_GOODS_CATEGORY__Computers,PREV_APP__NAME_GOODS_CATEGORY__Construction Materials,PREV_APP__NAME_GOODS_CATEGORY__Consumer Electronics,PREV_APP__NAME_GOODS_CATEGORY__Direct Sales,PREV_APP__NAME_GOODS_CATEGORY__Education,PREV_APP__NAME_GOODS_CATEGORY__Fitness,PREV_APP__NAME_GOODS_CATEGORY__Furniture,PREV_APP__NAME_GOODS_CATEGORY__Gardening,PREV_APP__NAME_GOODS_CATEGORY__Homewares,PREV_APP__NAME_GOODS_CATEGORY__House Construction,PREV_APP__NAME_GOODS_CATEGORY__Insurance,PREV_APP__NAME_GOODS_CATEGORY__Jewelry,PREV_APP__NAME_GOODS_CATEGORY__Medical Supplies,PREV_APP__NAME_GOODS_CATEGORY__Medicine,PREV_APP__NAME_GOODS_CATEGORY__Mobile,PREV_APP__NAME_GOODS_CATEGORY__Office Appliances,PREV_APP__NAME_GOODS_CATEGORY__Other,PREV_APP__NAME_GOODS_CATEGORY__Photo / Cinema Equipment,PREV_APP__NAME_GOODS_CATEGORY__Sport and Leisure,PREV_APP__NAME_GOODS_CATEGORY__Tourism,PREV_APP__NAME_GOODS_CATEGORY__Vehicles,PREV_APP__NAME_GOODS_CATEGORY__Weapon,PREV_APP__NAME_GOODS_CATEGORY__XNA,PREV_APP__NAME_PORTFOLIO__count,PREV_APP__NAME_PORTFOLIO__first__CATEGORY,PREV_APP__NAME_PORTFOLIO__second__CATEGORY,PREV_APP__NAME_PORTFOLIO__second_last__CATEGORY,PREV_APP__NAME_PORTFOLIO__last__CATEGORY,PREV_APP__NAME_PORTFOLIO__Cards,PREV_APP__NAME_PORTFOLIO__Cars,PREV_APP__NAME_PORTFOLIO__Cash,PREV_APP__NAME_PORTFOLIO__POS,PREV_APP__NAME_PORTFOLIO__XNA,PREV_APP__NAME_PRODUCT_TYPE__count,PREV_APP__NAME_PRODUCT_TYPE__first__CATEGORY,PREV_APP__NAME_PRODUCT_TYPE__second__CATEGORY,PREV_APP__NAME_PRODUCT_TYPE__second_last__CATEGORY,PREV_APP__NAME_PRODUCT_TYPE__last__CATEGORY,PREV_APP__NAME_PRODUCT_TYPE__XNA,PREV_APP__NAME_PRODUCT_TYPE__walk-in,PREV_APP__NAME_PRODUCT_TYPE__x-sell,PREV_APP__CHANNEL_TYPE__count,PREV_APP__CHANNEL_TYPE__first__CATEGORY,PREV_APP__CHANNEL_TYPE__second__CATEGORY,PREV_APP__CHANNEL_TYPE__second_last__CATEGORY,PREV_APP__CHANNEL_TYPE__last__CATEGORY,PREV_APP__CHANNEL_TYPE__AP+ (Cash loan),PREV_APP__CHANNEL_TYPE__Car dealer,PREV_APP__CHANNEL_TYPE__Channel of corporate sales,PREV_APP__CHANNEL_TYPE__Contact center,PREV_APP__CHANNEL_TYPE__Country-wide,PREV_APP__CHANNEL_TYPE__Credit and cash offices,PREV_APP__CHANNEL_TYPE__Regional / Local,PREV_APP__CHANNEL_TYPE__Stone,PREV_APP__NAME_SELLER_INDUSTRY__count,PREV_APP__NAME_SELLER_INDUSTRY__first__CATEGORY,PREV_APP__NAME_SELLER_INDUSTRY__second__CATEGORY,PREV_APP__NAME_SELLER_INDUSTRY__second_last__CATEGORY,PREV_APP__NAME_SELLER_INDUSTRY__last__CATEGORY,PREV_APP__NAME_SELLER_INDUSTRY__Auto technology,PREV_APP__NAME_SELLER_INDUSTRY__Clothing,PREV_APP__NAME_SELLER_INDUSTRY__Connectivity,PREV_APP__NAME_SELLER_INDUSTRY__Construction,PREV_APP__NAME_SELLER_INDUSTRY__Consumer electronics,PREV_APP__NAME_SELLER_INDUSTRY__Furniture,PREV_APP__NAME_SELLER_INDUSTRY__Industry,PREV_APP__NAME_SELLER_INDUSTRY__Jewelry,PREV_APP__NAME_SELLER_INDUSTRY__MLM partners,PREV_APP__NAME_SELLER_INDUSTRY__Tourism,PREV_APP__NAME_SELLER_INDUSTRY__XNA,PREV_APP__NAME_YIELD_GROUP__count,PREV_APP__NAME_YIELD_GROUP__first__CATEGORY,PREV_APP__NAME_YIELD_GROUP__second__CATEGORY,PREV_APP__NAME_YIELD_GROUP__second_last__CATEGORY,PREV_APP__NAME_YIELD_GROUP__last__CATEGORY,PREV_APP__NAME_YIELD_GROUP__XNA,PREV_APP__NAME_YIELD_GROUP__high,PREV_APP__NAME_YIELD_GROUP__low_action,PREV_APP__NAME_YIELD_GROUP__low_normal,PREV_APP__NAME_YIELD_GROUP__middle,PREV_APP__PRODUCT_COMBINATION__count,PREV_APP__PRODUCT_COMBINATION__first__CATEGORY,PREV_APP__PRODUCT_COMBINATION__second__CATEGORY,PREV_APP__PRODUCT_COMBINATION__second_last__CATEGORY,PREV_APP__PRODUCT_COMBINATION__last__CATEGORY,PREV_APP__PRODUCT_COMBINATION__Card Street,PREV_APP__PRODUCT_COMBINATION__Card X-Sell,PREV_APP__PRODUCT_COMBINATION__Cash,PREV_APP__PRODUCT_COMBINATION__Cash Street: high,PREV_APP__PRODUCT_COMBINATION__Cash Street: low,PREV_APP__PRODUCT_COMBINATION__Cash Street: middle,PREV_APP__PRODUCT_COMBINATION__Cash X-Sell: high,PREV_APP__PRODUCT_COMBINATION__Cash X-Sell: low,PREV_APP__PRODUCT_COMBINATION__Cash X-Sell: middle,PREV_APP__PRODUCT_COMBINATION__POS household with interest,PREV_APP__PRODUCT_COMBINATION__POS household without interest,PREV_APP__PRODUCT_COMBINATION__POS industry with interest,PREV_APP__PRODUCT_COMBINATION__POS industry without interest,PREV_APP__PRODUCT_COMBINATION__POS mobile with interest,PREV_APP__PRODUCT_COMBINATION__POS mobile without interest,PREV_APP__PRODUCT_COMBINATION__POS other with interest,PREV_APP__PRODUCT_COMBINATION__POS others without interest,PREV_APP__SK_ID_PREV__min,PREV_APP__SK_ID_PREV__max,PREV_APP__SK_ID_PREV__median,PREV_APP__SK_ID_PREV__var,PREV_APP__SK_ID_PREV__count,PREV_APP__SK_ID_PREV__sum,PREV_APP__SK_ID_PREV__first,PREV_APP__SK_ID_PREV__second,PREV_APP__SK_ID_PREV__second_last,PREV_APP__SK_ID_PREV__last,PREV_APP__AMT_ANNUITY__min,PREV_APP__AMT_ANNUITY__max,PREV_APP__AMT_ANNUITY__median,PREV_APP__AMT_ANNUITY__var,PREV_APP__AMT_ANNUITY__count,PREV_APP__AMT_ANNUITY__sum,PREV_APP__AMT_ANNUITY__first,PREV_APP__AMT_ANNUITY__second,PREV_APP__AMT_ANNUITY__second_last,PREV_APP__AMT_ANNUITY__last,PREV_APP__AMT_APPLICATION__min,PREV_APP__AMT_APPLICATION__max,PREV_APP__AMT_APPLICATION__median,PREV_APP__AMT_APPLICATION__var,PREV_APP__AMT_APPLICATION__count,PREV_APP__AMT_APPLICATION__sum,PREV_APP__AMT_APPLICATION__first,PREV_APP__AMT_APPLICATION__second,PREV_APP__AMT_APPLICATION__second_last,PREV_APP__AMT_APPLICATION__last,PREV_APP__AMT_CREDIT__min,PREV_APP__AMT_CREDIT__max,PREV_APP__AMT_CREDIT__median,PREV_APP__AMT_CREDIT__var,PREV_APP__AMT_CREDIT__count,PREV_APP__AMT_CREDIT__sum,PREV_APP__AMT_CREDIT__first,PREV_APP__AMT_CREDIT__second,PREV_APP__AMT_CREDIT__second_last,PREV_APP__AMT_CREDIT__last,PREV_APP__AMT_DOWN_PAYMENT__min,PREV_APP__AMT_DOWN_PAYMENT__max,PREV_APP__AMT_DOWN_PAYMENT__median,PREV_APP__AMT_DOWN_PAYMENT__var,PREV_APP__AMT_DOWN_PAYMENT__count,PREV_APP__AMT_DOWN_PAYMENT__sum,PREV_APP__AMT_DOWN_PAYMENT__first,PREV_APP__AMT_DOWN_PAYMENT__second,PREV_APP__AMT_DOWN_PAYMENT__second_last,PREV_APP__AMT_DOWN_PAYMENT__last,PREV_APP__AMT_GOODS_PRICE__min,PREV_APP__AMT_GOODS_PRICE__max,PREV_APP__AMT_GOODS_PRICE__median,PREV_APP__AMT_GOODS_PRICE__var,PREV_APP__AMT_GOODS_PRICE__count,PREV_APP__AMT_GOODS_PRICE__sum,PREV_APP__AMT_GOODS_PRICE__first,PREV_APP__AMT_GOODS_PRICE__second,PREV_APP__AMT_GOODS_PRICE__second_last,PREV_APP__AMT_GOODS_PRICE__last,PREV_APP__HOUR_APPR_PROCESS_START__min,PREV_APP__HOUR_APPR_PROCESS_START__max,PREV_APP__HOUR_APPR_PROCESS_START__median,PREV_APP__HOUR_APPR_PROCESS_START__var,PREV_APP__HOUR_APPR_PROCESS_START__count,PREV_APP__HOUR_APPR_PROCESS_START__sum,PREV_APP__HOUR_APPR_PROCESS_START__first,PREV_APP__HOUR_APPR_PROCESS_START__second,PREV_APP__HOUR_APPR_PROCESS_START__second_last,PREV_APP__HOUR_APPR_PROCESS_START__last,PREV_APP__NFLAG_LAST_APPL_IN_DAY__min,PREV_APP__NFLAG_LAST_APPL_IN_DAY__max,PREV_APP__NFLAG_LAST_APPL_IN_DAY__median,PREV_APP__NFLAG_LAST_APPL_IN_DAY__var,PREV_APP__NFLAG_LAST_APPL_IN_DAY__count,PREV_APP__NFLAG_LAST_APPL_IN_DAY__sum,PREV_APP__NFLAG_LAST_APPL_IN_DAY__first,PREV_APP__NFLAG_LAST_APPL_IN_DAY__second,PREV_APP__NFLAG_LAST_APPL_IN_DAY__second_last,PREV_APP__NFLAG_LAST_APPL_IN_DAY__last,PREV_APP__RATE_DOWN_PAYMENT__min,PREV_APP__RATE_DOWN_PAYMENT__max,PREV_APP__RATE_DOWN_PAYMENT__median,PREV_APP__RATE_DOWN_PAYMENT__var,PREV_APP__RATE_DOWN_PAYMENT__count,PREV_APP__RATE_DOWN_PAYMENT__sum,PREV_APP__RATE_DOWN_PAYMENT__first,PREV_APP__RATE_DOWN_PAYMENT__second,PREV_APP__RATE_DOWN_PAYMENT__second_last,PREV_APP__RATE_DOWN_PAYMENT__last,PREV_APP__RATE_INTEREST_PRIMARY__min,PREV_APP__RATE_INTEREST_PRIMARY__max,PREV_APP__RATE_INTEREST_PRIMARY__median,PREV_APP__RATE_INTEREST_PRIMARY__var,PREV_APP__RATE_INTEREST_PRIMARY__count,PREV_APP__RATE_INTEREST_PRIMARY__sum,PREV_APP__RATE_INTEREST_PRIMARY__first,PREV_APP__RATE_INTEREST_PRIMARY__second,PREV_APP__RATE_INTEREST_PRIMARY__second_last,PREV_APP__RATE_INTEREST_PRIMARY__last,PREV_APP__RATE_INTEREST_PRIVILEGED__min,PREV_APP__RATE_INTEREST_PRIVILEGED__max,PREV_APP__RATE_INTEREST_PRIVILEGED__median,PREV_APP__RATE_INTEREST_PRIVILEGED__var,PREV_APP__RATE_INTEREST_PRIVILEGED__count,PREV_APP__RATE_INTEREST_PRIVILEGED__sum,PREV_APP__RATE_INTEREST_PRIVILEGED__first,PREV_APP__RATE_INTEREST_PRIVILEGED__second,PREV_APP__RATE_INTEREST_PRIVILEGED__second_last,PREV_APP__RATE_INTEREST_PRIVILEGED__last,PREV_APP__DAYS_DECISION__min,PREV_APP__DAYS_DECISION__max,PREV_APP__DAYS_DECISION__median,PREV_APP__DAYS_DECISION__var,PREV_APP__DAYS_DECISION__count,PREV_APP__DAYS_DECISION__sum,PREV_APP__DAYS_DECISION__first,PREV_APP__DAYS_DECISION__second,PREV_APP__DAYS_DECISION__second_last,PREV_APP__DAYS_DECISION__last,PREV_APP__SELLERPLACE_AREA__min,PREV_APP__SELLERPLACE_AREA__max,PREV_APP__SELLERPLACE_AREA__median,PREV_APP__SELLERPLACE_AREA__var,PREV_APP__SELLERPLACE_AREA__count,PREV_APP__SELLERPLACE_AREA__sum,PREV_APP__SELLERPLACE_AREA__first,PREV_APP__SELLERPLACE_AREA__second,PREV_APP__SELLERPLACE_AREA__second_last,PREV_APP__SELLERPLACE_AREA__last,PREV_APP__CNT_PAYMENT__min,PREV_APP__CNT_PAYMENT__max,PREV_APP__CNT_PAYMENT__median,PREV_APP__CNT_PAYMENT__var,PREV_APP__CNT_PAYMENT__count,PREV_APP__CNT_PAYMENT__sum,PREV_APP__CNT_PAYMENT__first,PREV_APP__CNT_PAYMENT__second,PREV_APP__CNT_PAYMENT__second_last,PREV_APP__CNT_PAYMENT__last,PREV_APP__DAYS_FIRST_DRAWING__min,PREV_APP__DAYS_FIRST_DRAWING__max,PREV_APP__DAYS_FIRST_DRAWING__median,PREV_APP__DAYS_FIRST_DRAWING__var,PREV_APP__DAYS_FIRST_DRAWING__count,PREV_APP__DAYS_FIRST_DRAWING__sum,PREV_APP__DAYS_FIRST_DRAWING__first,PREV_APP__DAYS_FIRST_DRAWING__second,PREV_APP__DAYS_FIRST_DRAWING__second_last,PREV_APP__DAYS_FIRST_DRAWING__last,PREV_APP__DAYS_FIRST_DUE__min,PREV_APP__DAYS_FIRST_DUE__max,PREV_APP__DAYS_FIRST_DUE__median,PREV_APP__DAYS_FIRST_DUE__var,PREV_APP__DAYS_FIRST_DUE__count,PREV_APP__DAYS_FIRST_DUE__sum,PREV_APP__DAYS_FIRST_DUE__first,PREV_APP__DAYS_FIRST_DUE__second,PREV_APP__DAYS_FIRST_DUE__second_last,PREV_APP__DAYS_FIRST_DUE__last,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__min,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__max,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__median,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__var,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__count,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__sum,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__first,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__second,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__second_last,PREV_APP__DAYS_LAST_DUE_1ST_VERSION__last,PREV_APP__DAYS_LAST_DUE__min,PREV_APP__DAYS_LAST_DUE__max,PREV_APP__DAYS_LAST_DUE__median,PREV_APP__DAYS_LAST_DUE__var,PREV_APP__DAYS_LAST_DUE__count,PREV_APP__DAYS_LAST_DUE__sum,PREV_APP__DAYS_LAST_DUE__first,PREV_APP__DAYS_LAST_DUE__second,PREV_APP__DAYS_LAST_DUE__second_last,PREV_APP__DAYS_LAST_DUE__last,PREV_APP__DAYS_TERMINATION__min,PREV_APP__DAYS_TERMINATION__max,PREV_APP__DAYS_TERMINATION__median,PREV_APP__DAYS_TERMINATION__var,PREV_APP__DAYS_TERMINATION__count,PREV_APP__DAYS_TERMINATION__sum,PREV_APP__DAYS_TERMINATION__first,PREV_APP__DAYS_TERMINATION__second,PREV_APP__DAYS_TERMINATION__second_last,PREV_APP__DAYS_TERMINATION__last,PREV_APP__NFLAG_INSURED_ON_APPROVAL__min,PREV_APP__NFLAG_INSURED_ON_APPROVAL__max,PREV_APP__NFLAG_INSURED_ON_APPROVAL__median,PREV_APP__NFLAG_INSURED_ON_APPROVAL__var,PREV_APP__NFLAG_INSURED_ON_APPROVAL__count,PREV_APP__NFLAG_INSURED_ON_APPROVAL__sum,PREV_APP__NFLAG_INSURED_ON_APPROVAL__first,PREV_APP__NFLAG_INSURED_ON_APPROVAL__second,PREV_APP__NFLAG_INSURED_ON_APPROVAL__second_last,PREV_APP__NFLAG_INSURED_ON_APPROVAL__last,BUREAU__SIZE,BUREAU__CREDIT_ACTIVE__count,BUREAU__CREDIT_ACTIVE__first__CATEGORY,BUREAU__CREDIT_ACTIVE__second__CATEGORY,BUREAU__CREDIT_ACTIVE__second_last__CATEGORY,BUREAU__CREDIT_ACTIVE__last__CATEGORY,BUREAU__CREDIT_ACTIVE__Active,BUREAU__CREDIT_ACTIVE__Bad debt,BUREAU__CREDIT_ACTIVE__Closed,BUREAU__CREDIT_ACTIVE__Sold,BUREAU__CREDIT_CURRENCY__count,BUREAU__CREDIT_CURRENCY__first__CATEGORY,BUREAU__CREDIT_CURRENCY__second__CATEGORY,BUREAU__CREDIT_CURRENCY__second_last__CATEGORY,BUREAU__CREDIT_CURRENCY__last__CATEGORY,BUREAU__CREDIT_CURRENCY__currency 1,BUREAU__CREDIT_CURRENCY__currency 2,BUREAU__CREDIT_CURRENCY__currency 3,BUREAU__CREDIT_CURRENCY__currency 4,BUREAU__CREDIT_TYPE__count,BUREAU__CREDIT_TYPE__first__CATEGORY,BUREAU__CREDIT_TYPE__second__CATEGORY,BUREAU__CREDIT_TYPE__second_last__CATEGORY,BUREAU__CREDIT_TYPE__last__CATEGORY,BUREAU__CREDIT_TYPE__Another type of loan,BUREAU__CREDIT_TYPE__Car loan,BUREAU__CREDIT_TYPE__Cash loan (non-earmarked),BUREAU__CREDIT_TYPE__Consumer credit,BUREAU__CREDIT_TYPE__Credit card,BUREAU__CREDIT_TYPE__Interbank credit,BUREAU__CREDIT_TYPE__Loan for business development,BUREAU__CREDIT_TYPE__Loan for purchase of shares (margin lending),BUREAU__CREDIT_TYPE__Loan for the purchase of equipment,BUREAU__CREDIT_TYPE__Loan for working capital replenishment,BUREAU__CREDIT_TYPE__Microloan,BUREAU__CREDIT_TYPE__Mobile operator loan,BUREAU__CREDIT_TYPE__Mortgage,BUREAU__CREDIT_TYPE__Real estate loan,BUREAU__CREDIT_TYPE__Unknown type of loan,BUREAU__SK_ID_BUREAU__min,BUREAU__SK_ID_BUREAU__max,BUREAU__SK_ID_BUREAU__median,BUREAU__SK_ID_BUREAU__var,BUREAU__SK_ID_BUREAU__count,BUREAU__SK_ID_BUREAU__sum,BUREAU__SK_ID_BUREAU__first,BUREAU__SK_ID_BUREAU__second,BUREAU__SK_ID_BUREAU__second_last,BUREAU__SK_ID_BUREAU__last,BUREAU__DAYS_CREDIT__min,BUREAU__DAYS_CREDIT__max,BUREAU__DAYS_CREDIT__median,BUREAU__DAYS_CREDIT__var,BUREAU__DAYS_CREDIT__count,BUREAU__DAYS_CREDIT__sum,BUREAU__DAYS_CREDIT__first,BUREAU__DAYS_CREDIT__second,BUREAU__DAYS_CREDIT__second_last,BUREAU__DAYS_CREDIT__last,BUREAU__CREDIT_DAY_OVERDUE__min,BUREAU__CREDIT_DAY_OVERDUE__max,BUREAU__CREDIT_DAY_OVERDUE__median,BUREAU__CREDIT_DAY_OVERDUE__var,BUREAU__CREDIT_DAY_OVERDUE__count,BUREAU__CREDIT_DAY_OVERDUE__sum,BUREAU__CREDIT_DAY_OVERDUE__first,BUREAU__CREDIT_DAY_OVERDUE__second,BUREAU__CREDIT_DAY_OVERDUE__second_last,BUREAU__CREDIT_DAY_OVERDUE__last,BUREAU__DAYS_CREDIT_ENDDATE__min,BUREAU__DAYS_CREDIT_ENDDATE__max,BUREAU__DAYS_CREDIT_ENDDATE__median,BUREAU__DAYS_CREDIT_ENDDATE__var,BUREAU__DAYS_CREDIT_ENDDATE__count,BUREAU__DAYS_CREDIT_ENDDATE__sum,BUREAU__DAYS_CREDIT_ENDDATE__first,BUREAU__DAYS_CREDIT_ENDDATE__second,BUREAU__DAYS_CREDIT_ENDDATE__second_last,BUREAU__DAYS_CREDIT_ENDDATE__last,BUREAU__DAYS_ENDDATE_FACT__min,BUREAU__DAYS_ENDDATE_FACT__max,BUREAU__DAYS_ENDDATE_FACT__median,BUREAU__DAYS_ENDDATE_FACT__var,BUREAU__DAYS_ENDDATE_FACT__count,BUREAU__DAYS_ENDDATE_FACT__sum,BUREAU__DAYS_ENDDATE_FACT__first,BUREAU__DAYS_ENDDATE_FACT__second,BUREAU__DAYS_ENDDATE_FACT__second_last,BUREAU__DAYS_ENDDATE_FACT__last,BUREAU__AMT_CREDIT_MAX_OVERDUE__min,BUREAU__AMT_CREDIT_MAX_OVERDUE__max,BUREAU__AMT_CREDIT_MAX_OVERDUE__median,BUREAU__AMT_CREDIT_MAX_OVERDUE__var,BUREAU__AMT_CREDIT_MAX_OVERDUE__count,BUREAU__AMT_CREDIT_MAX_OVERDUE__sum,BUREAU__AMT_CREDIT_MAX_OVERDUE__first,BUREAU__AMT_CREDIT_MAX_OVERDUE__second,BUREAU__AMT_CREDIT_MAX_OVERDUE__second_last,BUREAU__AMT_CREDIT_MAX_OVERDUE__last,BUREAU__CNT_CREDIT_PROLONG__min,BUREAU__CNT_CREDIT_PROLONG__max,BUREAU__CNT_CREDIT_PROLONG__median,BUREAU__CNT_CREDIT_PROLONG__var,BUREAU__CNT_CREDIT_PROLONG__count,BUREAU__CNT_CREDIT_PROLONG__sum,BUREAU__CNT_CREDIT_PROLONG__first,BUREAU__CNT_CREDIT_PROLONG__second,BUREAU__CNT_CREDIT_PROLONG__second_last,BUREAU__CNT_CREDIT_PROLONG__last,BUREAU__AMT_CREDIT_SUM__min,BUREAU__AMT_CREDIT_SUM__max,BUREAU__AMT_CREDIT_SUM__median,BUREAU__AMT_CREDIT_SUM__var,BUREAU__AMT_CREDIT_SUM__count,BUREAU__AMT_CREDIT_SUM__sum,BUREAU__AMT_CREDIT_SUM__first,BUREAU__AMT_CREDIT_SUM__second,BUREAU__AMT_CREDIT_SUM__second_last,BUREAU__AMT_CREDIT_SUM__last,BUREAU__AMT_CREDIT_SUM_DEBT__min,BUREAU__AMT_CREDIT_SUM_DEBT__max,BUREAU__AMT_CREDIT_SUM_DEBT__median,BUREAU__AMT_CREDIT_SUM_DEBT__var,BUREAU__AMT_CREDIT_SUM_DEBT__count,BUREAU__AMT_CREDIT_SUM_DEBT__sum,BUREAU__AMT_CREDIT_SUM_DEBT__first,BUREAU__AMT_CREDIT_SUM_DEBT__second,BUREAU__AMT_CREDIT_SUM_DEBT__second_last,BUREAU__AMT_CREDIT_SUM_DEBT__last,BUREAU__AMT_CREDIT_SUM_LIMIT__min,BUREAU__AMT_CREDIT_SUM_LIMIT__max,BUREAU__AMT_CREDIT_SUM_LIMIT__median,BUREAU__AMT_CREDIT_SUM_LIMIT__var,BUREAU__AMT_CREDIT_SUM_LIMIT__count,BUREAU__AMT_CREDIT_SUM_LIMIT__sum,BUREAU__AMT_CREDIT_SUM_LIMIT__first,BUREAU__AMT_CREDIT_SUM_LIMIT__second,BUREAU__AMT_CREDIT_SUM_LIMIT__second_last,BUREAU__AMT_CREDIT_SUM_LIMIT__last,BUREAU__AMT_CREDIT_SUM_OVERDUE__min,BUREAU__AMT_CREDIT_SUM_OVERDUE__max,BUREAU__AMT_CREDIT_SUM_OVERDUE__median,BUREAU__AMT_CREDIT_SUM_OVERDUE__var,BUREAU__AMT_CREDIT_SUM_OVERDUE__count,BUREAU__AMT_CREDIT_SUM_OVERDUE__sum,BUREAU__AMT_CREDIT_SUM_OVERDUE__first,BUREAU__AMT_CREDIT_SUM_OVERDUE__second,BUREAU__AMT_CREDIT_SUM_OVERDUE__second_last,BUREAU__AMT_CREDIT_SUM_OVERDUE__last,BUREAU__DAYS_CREDIT_UPDATE__min,BUREAU__DAYS_CREDIT_UPDATE__max,BUREAU__DAYS_CREDIT_UPDATE__median,BUREAU__DAYS_CREDIT_UPDATE__var,BUREAU__DAYS_CREDIT_UPDATE__count,BUREAU__DAYS_CREDIT_UPDATE__sum,BUREAU__DAYS_CREDIT_UPDATE__first,BUREAU__DAYS_CREDIT_UPDATE__second,BUREAU__DAYS_CREDIT_UPDATE__second_last,BUREAU__DAYS_CREDIT_UPDATE__last,BUREAU__AMT_ANNUITY__min,BUREAU__AMT_ANNUITY__max,BUREAU__AMT_ANNUITY__median,BUREAU__AMT_ANNUITY__var,BUREAU__AMT_ANNUITY__count,BUREAU__AMT_ANNUITY__sum,BUREAU__AMT_ANNUITY__first,BUREAU__AMT_ANNUITY__second,BUREAU__AMT_ANNUITY__second_last,BUREAU__AMT_ANNUITY__last
0,100002,1,Cash loans,M,N,Y,0,202500.0,406597.5,24700.5,351000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.018801,-9461,-637,-3648.0,-2120,,1,1,0,1,1,0,Laborers,1.0,2,2,WEDNESDAY,10,0,0,0,0,0,0,Business Entity Type 3,0.083037,0.262949,0.139376,0.0247,0.0369,0.9722,0.6192,0.0143,0.0,0.069,0.0833,0.125,0.0369,0.0202,0.019,0.0,0.0,0.0252,0.0383,0.9722,0.6341,0.0144,0.0,0.069,0.0833,0.125,0.0377,0.022,0.0198,0.0,0.0,0.025,0.0369,0.9722,0.6243,0.0144,0.0,0.069,0.0833,0.125,0.0375,0.0205,0.0193,0.0,0.0,reg oper account,block of flats,0.0149,"Stone, brick",No,2.0,2.0,2.0,2.0,-1134.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,Consumer loans,,,Consumer loans,0.0,1.0,0.0,0.0,1.0,SATURDAY,,,SATURDAY,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,Y,,,Y,0.0,1.0,1.0,XAP,,,XAP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,Approved,,,Approved,1.0,0.0,0.0,0.0,1.0,XNA,,,XNA,0.0,0.0,0.0,1.0,1.0,XAP,,,XAP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,1.0,New,,,New,1.0,0.0,0.0,0.0,1.0,Vehicles,,,Vehicles,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,POS,,,POS,0.0,0.0,0.0,1.0,0.0,1.0,XNA,,,XNA,1.0,0.0,0.0,1.0,Stone,,,Stone,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,Auto technology,,,Auto technology,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,low_normal,,,low_normal,0.0,0.0,0.0,1.0,0.0,1.0,POS other with interest,,,POS other with interest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1038818.0,1038818.0,1038818.0,,1.0,1038818.0,1038818.0,,,1038818.0,9251.775,9251.775,9251.775,,1.0,9251.775,9251.775,,,9251.775,179055.0,179055.0,179055.0,,1.0,179055.0,179055.0,,,179055.0,179055.0,179055.0,179055.0,,1.0,179055.0,179055.0,,,179055.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,179055.0,179055.0,179055.0,,1.0,179055.0,179055.0,,,179055.0,9.0,9.0,9.0,,1.0,9.0,9.0,,,9.0,1.0,1.0,1.0,,1.0,1.0,1.0,,,1.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,,,,,0.0,0.0,,,,,,,,,0.0,0.0,,,,,-606.0,-606.0,-606.0,,1.0,-606.0,-606.0,,,-606.0,500.0,500.0,500.0,,1.0,500.0,500.0,,,500.0,24.0,24.0,24.0,,1.0,24.0,24.0,,,24.0,365243.0,365243.0,365243.0,,1.0,365243.0,365243.0,,,365243.0,-565.0,-565.0,-565.0,,1.0,-565.0,-565.0,,,-565.0,125.0,125.0,125.0,,1.0,125.0,125.0,,,125.0,-25.0,-25.0,-25.0,,1.0,-25.0,-25.0,,,-25.0,-17.0,-17.0,-17.0,,1.0,-17.0,-17.0,,,-17.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,8.0,8.0,Closed,Closed,Active,Active,2.0,0.0,6.0,0.0,8.0,currency 1,currency 1,currency 1,currency 1,8.0,0.0,0.0,0.0,8.0,Consumer credit,Credit card,Credit card,Credit card,0.0,0.0,0.0,4.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6113835.0,6158909.0,6158905.5,253924400.0,8.0,49226177.0,6158906.0,6158904.0,6158909.0,6158909.0,-1437.0,-103.0,-1042.5,186150.0,8.0,-6992.0,-1437.0,-1125.0,-103.0,-103.0,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,-1072.0,780.0,-424.5,589042.4,6.0,-2094.0,-1072.0,-1038.0,,85.0,-1185.0,-36.0,-939.0,266248.3,6.0,-4185.0,-1185.0,-1038.0,,-48.0,0.0,5043.645,40.5,5584936.0,5.0,8405.145,0.0,,40.5,40.5,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,450000.0,54130.5,21338070000.0,8.0,865055.565,135000.0,40761.0,31988.565,31988.565,0.0,245781.0,0.0,12081660000.0,5.0,245781.0,0.0,,0.0,0.0,0.0,31988.565,0.0,255817100.0,4.0,31988.565,0.0,,31988.565,31988.565,0.0,0.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,-1185.0,-7.0,-402.5,268865.553571,8.0,-3999.0,-1185.0,-1038.0,-24.0,-24.0,0.0,0.0,0.0,0.0,7.0,0.0,0.0,0.0,0.0,0.0
1,100003,0,Cash loans,F,N,N,0,270000.0,1293502.5,35698.5,1129500.0,Family,State servant,Higher education,Married,House / apartment,0.003541,-16765,-1188,-1186.0,-291,,1,1,0,1,1,0,Core staff,2.0,1,1,MONDAY,11,0,0,0,0,0,0,School,0.311267,0.622246,,0.0959,0.0529,0.9851,0.796,0.0605,0.08,0.0345,0.2917,0.3333,0.013,0.0773,0.0549,0.0039,0.0098,0.0924,0.0538,0.9851,0.804,0.0497,0.0806,0.0345,0.2917,0.3333,0.0128,0.079,0.0554,0.0,0.0,0.0968,0.0529,0.9851,0.7987,0.0608,0.08,0.0345,0.2917,0.3333,0.0132,0.0787,0.0558,0.0039,0.01,reg oper account,block of flats,0.0714,Block,No,1.0,0.0,1.0,0.0,-828.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,3.0,Consumer loans,Consumer loans,Cash loans,Cash loans,1.0,2.0,0.0,0.0,3.0,SATURDAY,SUNDAY,FRIDAY,FRIDAY,1.0,0.0,1.0,1.0,0.0,0.0,0.0,3.0,Y,Y,Y,Y,0.0,3.0,3.0,XAP,XAP,XNA,XNA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,1.0,3.0,Approved,Approved,Approved,Approved,3.0,0.0,0.0,0.0,3.0,Cash through the bank,Cash through the bank,XNA,XNA,2.0,0.0,0.0,1.0,3.0,XAP,XAP,XAP,XAP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,3.0,Family,Family,Unaccompanied,Unaccompanied,0.0,2.0,0.0,0.0,0.0,0.0,1.0,3.0,Refreshed,Refreshed,Repeater,Repeater,0.0,2.0,1.0,0.0,3.0,Consumer Electronics,Furniture,XNA,XNA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,3.0,POS,POS,Cash,Cash,0.0,0.0,1.0,2.0,0.0,3.0,XNA,XNA,x-sell,x-sell,2.0,0.0,1.0,3.0,Country-wide,Stone,Credit and cash offices,Credit and cash offices,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,3.0,Consumer electronics,Furniture,XNA,XNA,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,3.0,middle,middle,low_normal,low_normal,0.0,0.0,0.0,1.0,2.0,3.0,POS household with interest,POS industry with interest,Cash X-Sell: low,Cash X-Sell: low,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1810518.0,2636178.0,2396755.0,180451900000.0,3.0,6843451.0,2396755.0,2636178.0,1810518.0,1810518.0,6737.31,98356.995,64567.665,2146706000.0,3.0,169661.97,6737.31,64567.665,98356.995,98356.995,68809.5,900000.0,337500.0,179913100000.0,3.0,1306309.5,68809.5,337500.0,900000.0,900000.0,68053.5,1035882.0,348637.5,247954100000.0,3.0,1452573.0,68053.5,348637.5,1035882.0,1035882.0,0.0,6885.0,3442.5,23701610.0,2.0,6885.0,6885.0,0.0,,0.0,68809.5,900000.0,337500.0,179913100000.0,3.0,1306309.5,68809.5,337500.0,900000.0,900000.0,12.0,17.0,15.0,6.333333,3.0,44.0,15.0,17.0,12.0,12.0,1.0,1.0,1.0,0.0,3.0,3.0,1.0,1.0,1.0,1.0,0.0,0.100061,0.05003,0.005006,2.0,0.100061,0.100061,0.0,,0.0,,,,,0.0,0.0,,,,,,,,,0.0,0.0,,,,,-2341.0,-746.0,-828.0,806653.0,3.0,-3915.0,-2341.0,-828.0,-746.0,-746.0,-1.0,1400.0,200.0,573867.0,3.0,1599.0,200.0,1400.0,-1.0,-1.0,6.0,12.0,12.0,12.0,3.0,30.0,12.0,6.0,12.0,12.0,365243.0,365243.0,365243.0,0.0,3.0,1095729.0,365243.0,365243.0,365243.0,365243.0,-2310.0,-716.0,-797.0,806094.3,3.0,-3823.0,-2310.0,-797.0,-716.0,-716.0,-1980.0,-386.0,-647.0,730974.3,3.0,-3013.0,-1980.0,-647.0,-386.0,-386.0,-1980.0,-536.0,-647.0,645724.3,3.0,-3163.0,-1980.0,-647.0,-536.0,-536.0,-1976.0,-527.0,-639.0,649952.3,3.0,-3142.0,-1976.0,-639.0,-527.0,-527.0,0.0,1.0,1.0,0.333333,3.0,2.0,1.0,0.0,1.0,1.0,4.0,4.0,Closed,Closed,Active,Active,1.0,0.0,3.0,0.0,4.0,currency 1,currency 1,currency 1,currency 1,4.0,0.0,0.0,0.0,4.0,Consumer credit,Credit card,Credit card,Credit card,0.0,0.0,0.0,2.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5885877.0,5885880.0,5885878.5,1.666667,4.0,23543514.0,5885877.0,5885878.0,5885880.0,5885880.0,-2586.0,-606.0,-1205.5,827783.583333,4.0,-5603.0,-2586.0,-1636.0,-606.0,-606.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,-2434.0,1216.0,-480.0,2228364.0,4.0,-2178.0,-2434.0,-540.0,1216.0,1216.0,-2131.0,-540.0,-621.0,802990.333333,3.0,-3292.0,-2131.0,-540.0,,-621.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,22248.0,810000.0,92576.25,138584600000.0,4.0,1017400.5,22248.0,112500.0,810000.0,810000.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,810000.0,0.0,164025000000.0,4.0,810000.0,0.0,0.0,810000.0,810000.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,-2131.0,-43.0,-545.0,824562.0,4.0,-3264.0,-2131.0,-540.0,-43.0,-43.0,,,,,0.0,0.0,,,,
2,100004,0,Revolving loans,M,Y,Y,0,67500.0,135000.0,6750.0,135000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.010032,-19046,-225,-4260.0,-2531,26.0,1,1,1,1,1,0,Laborers,1.0,2,2,MONDAY,9,0,0,0,0,0,0,Government,,0.555912,0.729567,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-815.0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,Consumer loans,,,Consumer loans,0.0,1.0,0.0,0.0,1.0,FRIDAY,,,FRIDAY,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,Y,,,Y,0.0,1.0,1.0,XAP,,,XAP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,Approved,,,Approved,1.0,0.0,0.0,0.0,1.0,Cash through the bank,,,Cash through the bank,1.0,0.0,0.0,0.0,1.0,XAP,,,XAP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,Unaccompanied,,,Unaccompanied,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,New,,,New,1.0,0.0,0.0,0.0,1.0,Mobile,,,Mobile,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,POS,,,POS,0.0,0.0,0.0,1.0,0.0,1.0,XNA,,,XNA,1.0,0.0,0.0,1.0,Regional / Local,,,Regional / Local,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,Connectivity,,,Connectivity,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,middle,,,middle,0.0,0.0,0.0,0.0,1.0,1.0,POS mobile without interest,,,POS mobile without interest,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1564014.0,1564014.0,1564014.0,,1.0,1564014.0,1564014.0,,,1564014.0,5357.25,5357.25,5357.25,,1.0,5357.25,5357.25,,,5357.25,24282.0,24282.0,24282.0,,1.0,24282.0,24282.0,,,24282.0,20106.0,20106.0,20106.0,,1.0,20106.0,20106.0,,,20106.0,4860.0,4860.0,4860.0,,1.0,4860.0,4860.0,,,4860.0,24282.0,24282.0,24282.0,,1.0,24282.0,24282.0,,,24282.0,5.0,5.0,5.0,,1.0,5.0,5.0,,,5.0,1.0,1.0,1.0,,1.0,1.0,1.0,,,1.0,0.212008,0.212008,0.212008,,1.0,0.212008,0.212008,,,0.212008,,,,,0.0,0.0,,,,,,,,,0.0,0.0,,,,,-815.0,-815.0,-815.0,,1.0,-815.0,-815.0,,,-815.0,30.0,30.0,30.0,,1.0,30.0,30.0,,,30.0,4.0,4.0,4.0,,1.0,4.0,4.0,,,4.0,365243.0,365243.0,365243.0,,1.0,365243.0,365243.0,,,365243.0,-784.0,-784.0,-784.0,,1.0,-784.0,-784.0,,,-784.0,-694.0,-694.0,-694.0,,1.0,-694.0,-694.0,,,-694.0,-724.0,-724.0,-724.0,,1.0,-724.0,-724.0,,,-724.0,-714.0,-714.0,-714.0,,1.0,-714.0,-714.0,,,-714.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,2.0,2.0,Closed,Closed,Closed,Closed,0.0,0.0,2.0,0.0,2.0,currency 1,currency 1,currency 1,currency 1,2.0,0.0,0.0,0.0,2.0,Consumer credit,Consumer credit,Consumer credit,Consumer credit,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6829133.0,6829134.0,6829133.5,0.5,2.0,13658267.0,6829133.0,6829134.0,6829134.0,6829134.0,-1326.0,-408.0,-867.0,421362.0,2.0,-1734.0,-1326.0,-408.0,-408.0,-408.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,-595.0,-382.0,-488.5,22684.5,2.0,-977.0,-595.0,-382.0,-382.0,-382.0,-683.0,-382.0,-532.5,45300.5,2.0,-1065.0,-683.0,-382.0,-382.0,-382.0,0.0,0.0,0.0,,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,94500.0,94537.8,94518.9,714.42,2.0,189037.8,94500.0,94537.8,94537.8,94537.8,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,-682.0,-382.0,-532.0,45000.0,2.0,-1064.0,-682.0,-382.0,-382.0,-382.0,,,,,0.0,0.0,,,,
3,100006,0,Cash loans,F,N,Y,0,135000.0,312682.5,29686.5,297000.0,Unaccompanied,Working,Secondary / secondary special,Civil marriage,House / apartment,0.008019,-19005,-3039,-9833.0,-2437,,1,1,0,1,0,0,Laborers,2.0,2,2,WEDNESDAY,17,0,0,0,0,0,0,Business Entity Type 3,,0.650442,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,2.0,0.0,2.0,0.0,-617.0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,,,,,,9.0,9.0,Consumer loans,Cash loans,Cash loans,Cash loans,5.0,2.0,2.0,0.0,9.0,TUESDAY,SATURDAY,THURSDAY,THURSDAY,0.0,0.0,1.0,1.0,6.0,1.0,0.0,9.0,Y,Y,Y,Y,0.0,9.0,9.0,XAP,XNA,XNA,XNA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,5.0,9.0,Approved,Approved,Refused,Refused,5.0,3.0,1.0,0.0,9.0,XNA,Cash through the bank,Cash through the bank,Cash through the bank,4.0,0.0,0.0,5.0,9.0,XAP,XAP,LIMIT,LIMIT,0.0,0.0,1.0,0.0,0.0,0.0,0.0,8.0,0.0,4.0,Family,,Unaccompanied,Unaccompanied,0.0,1.0,0.0,0.0,0.0,0.0,3.0,9.0,New,Repeater,Repeater,Repeater,1.0,0.0,8.0,0.0,9.0,Construction Materials,XNA,XNA,XNA,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,9.0,POS,Cash,Cash,Cash,1.0,0.0,3.0,2.0,3.0,9.0,XNA,x-sell,x-sell,x-sell,5.0,0.0,4.0,9.0,Stone,Credit and cash offices,Credit and cash offices,Credit and cash offices,0.0,0.0,0.0,0.0,1.0,7.0,0.0,1.0,9.0,Construction,XNA,XNA,XNA,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,7.0,9.0,middle,high,low_normal,low_normal,4.0,2.0,0.0,2.0,1.0,9.0,POS industry with interest,Cash X-Sell: high,Cash X-Sell: low,Cash X-Sell: low,1.0,1.0,2.0,0.0,0.0,0.0,1.0,2.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1020698.0,2827850.0,2078043.0,369724100000.0,9.0,17392159.0,2299329.0,1020698.0,1697039.0,1697039.0,2482.92,39954.51,26636.76,185601900.0,6.0,141907.05,2482.92,39954.51,32696.1,32696.1,0.0,688500.0,270000.0,81896200000.0,9.0,2449829.34,26912.34,454500.0,688500.0,688500.0,0.0,906615.0,267930.0,111113800000.0,9.0,2625259.5,24219.0,481495.5,906615.0,906615.0,2693.34,66987.0,34840.17,2066837000.0,2.0,69680.34,2693.34,,,66987.0,26912.34,688500.0,394708.5,64348770000.0,6.0,2449829.34,26912.34,454500.0,688500.0,688500.0,12.0,15.0,15.0,1.0,9.0,132.0,15.0,12.0,15.0,15.0,1.0,1.0,1.0,0.0,9.0,9.0,1.0,1.0,1.0,1.0,0.108994,0.21783,0.163412,0.005923,2.0,0.326824,0.108994,,,0.21783,,,,,0.0,0.0,,,,,,,,,0.0,0.0,,,,,-617.0,-181.0,-181.0,24723.277778,9.0,-2452.0,-617.0,-438.0,-181.0,-181.0,-1.0,8025.0,-1.0,7150604.0,9.0,8048.0,30.0,-1.0,-1.0,-1.0,0.0,48.0,15.0,409.2,6.0,138.0,12.0,18.0,48.0,48.0,365243.0,365243.0,365243.0,0.0,4.0,1460972.0,365243.0,,,365243.0,-545.0,365243.0,-216.0,33410140000.0,4.0,364266.0,-545.0,,,365243.0,-215.0,365243.0,654.0,33284520000.0,4.0,366336.0,-215.0,,,365243.0,-425.0,365243.0,182546.0,44537650000.0,4.0,729910.0,-425.0,,,365243.0,-416.0,365243.0,182550.0,44535580000.0,4.0,729927.0,-416.0,,,365243.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,100007,0,Cash loans,M,N,Y,0,121500.0,513000.0,21865.5,513000.0,Unaccompanied,Working,Secondary / secondary special,Single / not married,House / apartment,0.028663,-19932,-3038,-4311.0,-3458,,1,1,0,1,0,0,Core staff,1.0,2,2,THURSDAY,11,0,0,0,0,1,1,Religion,,0.322738,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,0.0,0.0,0.0,-1106.0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,6.0,Consumer loans,Consumer loans,Cash loans,Cash loans,4.0,2.0,0.0,0.0,6.0,SATURDAY,SUNDAY,MONDAY,MONDAY,1.0,1.0,1.0,2.0,1.0,0.0,0.0,6.0,Y,Y,Y,Y,0.0,6.0,6.0,XAP,XAP,XNA,XNA,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,4.0,6.0,Approved,Approved,Approved,Approved,6.0,0.0,0.0,0.0,6.0,Cash through the bank,Cash through the bank,Cash through the bank,Cash through the bank,5.0,0.0,0.0,1.0,6.0,XAP,XAP,XAP,XAP,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,4.0,Family,Unaccompanied,,Unaccompanied,0.0,2.0,0.0,0.0,0.0,0.0,2.0,6.0,New,Repeater,Repeater,Repeater,1.0,0.0,5.0,0.0,6.0,Audio/Video,Audio/Video,XNA,XNA,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,6.0,POS,POS,Cash,Cash,0.0,0.0,4.0,2.0,0.0,6.0,XNA,XNA,x-sell,x-sell,2.0,1.0,3.0,6.0,Country-wide,Country-wide,Credit and cash offices,Credit and cash offices,1.0,0.0,0.0,0.0,3.0,1.0,1.0,0.0,6.0,Consumer electronics,Connectivity,XNA,XNA,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,6.0,high,high,middle,middle,0.0,3.0,0.0,0.0,3.0,6.0,POS household with interest,POS mobile with interest,Cash X-Sell: middle,Cash X-Sell: middle,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1692033.0,2730157.0,2060607.5,142116900000.0,6.0,12946871.0,2119973.0,2462742.0,1692033.0,1692033.0,1834.29,22678.785,14524.3125,65021430.0,6.0,73672.83,1834.29,3601.53,16037.64,16037.64,17176.5,247500.0,191250.0,10117540000.0,6.0,903181.5,17176.5,31005.0,247500.0,247500.0,14616.0,284400.0,197932.5,13931650000.0,6.0,999832.5,14616.0,30663.0,274288.5,274288.5,3105.0,3676.5,3390.75,163306.1,2.0,6781.5,3676.5,3105.0,,3105.0,17176.5,247500.0,191250.0,10117540000.0,6.0,903181.5,17176.5,31005.0,247500.0,247500.0,8.0,15.0,13.0,6.266667,6.0,74.0,13.0,15.0,11.0,11.0,1.0,1.0,1.0,0.0,6.0,6.0,1.0,1.0,1.0,1.0,0.100143,0.21889,0.159516,0.00705,2.0,0.319033,0.21889,0.100143,,0.100143,,,,,0.0,0.0,,,,,,,,,0.0,0.0,,,,,-2357.0,-374.0,-986.5,514474.166667,6.0,-7337.0,-2357.0,-1768.0,-374.0,-374.0,-1.0,1200.0,28.5,375664.6,6.0,2455.0,1200.0,53.0,-1.0,-1.0,10.0,48.0,15.0,205.866667,6.0,124.0,10.0,12.0,24.0,24.0,365243.0,365243.0,365243.0,0.0,5.0,1826215.0,365243.0,365243.0,365243.0,365243.0,-2326.0,-344.0,-1076.0,604317.2,5.0,-6316.0,-2326.0,-1736.0,-344.0,-344.0,-2056.0,346.0,-746.0,870165.2,5.0,-4186.0,-2056.0,-1406.0,346.0,346.0,-2056.0,365243.0,-746.0,26847790000.0,5.0,360681.0,-2056.0,-1406.0,365243.0,365243.0,-2041.0,365243.0,-739.0,26846400000.0,5.0,360719.0,-2041.0,-1397.0,365243.0,365243.0,0.0,1.0,1.0,0.3,5.0,3.0,1.0,0.0,1.0,1.0,1.0,1.0,Closed,,,Closed,0.0,0.0,1.0,0.0,1.0,currency 1,,,currency 1,1.0,0.0,0.0,0.0,1.0,Consumer credit,,,Consumer credit,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5987200.0,5987200.0,5987200.0,,1.0,5987200.0,5987200.0,,,5987200.0,-1149.0,-1149.0,-1149.0,,1.0,-1149.0,-1149.0,,,-1149.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,-783.0,-783.0,-783.0,,1.0,-783.0,-783.0,,,-783.0,-783.0,-783.0,-783.0,,1.0,-783.0,-783.0,,,-783.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,146250.0,146250.0,146250.0,,1.0,146250.0,146250.0,,,146250.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,0.0,0.0,0.0,,1.0,0.0,0.0,,,0.0,-783.0,-783.0,-783.0,,1.0,-783.0,-783.0,,,-783.0,,,,,0.0,0.0,,,,


In [0]:
bureau_balance_cat_cols = ['STATUS']
bureau_balance_cont_cols = bureau_balance.columns.drop(bureau_balance_cat_cols + ['SK_ID_BUREAU'])
processed_bureau_balance = process_log_df(
    bureau_balance, 
    'BUREAU_BALANCE', 
    'SK_ID_BUREAU', 
    'MONTHS_BALANCE', 
    cat_cols=bureau_balance_cat_cols, 
    cont_cols=bureau_balance_cont_cols
)
for t in types:
  dfs[t] = pd.merge(dfs[t], processed_bureau_balance, left_on='BUREAU__SK_ID_BUREAU__last', right_index=True, how='left')

  0%|          | 0/1 [00:00<?, ?it/s]

Processing categorical columns...


100%|██████████| 1/1 [00:54<00:00, 54.43s/it]
  0%|          | 0/1 [00:00<?, ?it/s]

Processing continous columns...


100%|██████████| 1/1 [00:42<00:00, 42.00s/it]


In [0]:
pos_cash_cat_cols = ['NAME_CONTRACT_STATUS']
pos_cash_cont_cols = pos_cash.columns.drop(pos_cash_cat_cols + ['SK_ID_CURR'])
processed_pos_cash = process_log_df(
    pos_cash, 
    'POS_CASH', 
    'SK_ID_CURR', 
    'MONTHS_BALANCE', 
    cat_cols=pos_cash_cat_cols, 
    cont_cols=pos_cash_cont_cols
)

for t in types:
  dfs[t] = pd.merge(dfs[t], processed_pos_cash, left_on='SK_ID_CURR', right_index=True, how='left')

  0%|          | 0/1 [00:00<?, ?it/s]

Processing categorical columns...


100%|██████████| 1/1 [00:20<00:00, 20.30s/it]
  0%|          | 0/6 [00:00<?, ?it/s]

Processing continous columns...


100%|██████████| 6/6 [01:39<00:00, 16.63s/it]


In [0]:
credit_card_balance_cat_cols = ['NAME_CONTRACT_STATUS']
credit_card_balance_cont_cols = credit_card_balance.columns.drop(credit_card_balance_cat_cols + ['SK_ID_CURR'])
processed_credit_card_balance = process_log_df(
    credit_card_balance, 
    'CREDIT_CARD_BALANCE', 
    'SK_ID_CURR', 
    'MONTHS_BALANCE', 
    cat_cols=credit_card_balance_cat_cols, 
    cont_cols=credit_card_balance_cont_cols
)

for t in types:
  dfs[t] = pd.merge(dfs[t], processed_credit_card_balance, left_on='SK_ID_CURR', right_index=True, how='left')

  0%|          | 0/1 [00:00<?, ?it/s]

Processing categorical columns...


100%|██████████| 1/1 [00:07<00:00,  7.60s/it]
  0%|          | 0/21 [00:00<?, ?it/s]

Processing continous columns...


100%|██████████| 21/21 [02:08<00:00,  6.13s/it]


In [0]:
previous_application_cat_cols = [
    'NAME_CONTRACT_TYPE', 
    'WEEKDAY_APPR_PROCESS_START', 
    'FLAG_LAST_APPL_PER_CONTRACT', 
    'NAME_CONTRACT_STATUS', 
    'NAME_PAYMENT_TYPE', 
    'CODE_REJECT_REASON', 
    'NAME_TYPE_SUITE', 
    'NAME_CLIENT_TYPE', 
    'NAME_GOODS_CATEGORY', 
    'NAME_PORTFOLIO', 
    'NAME_PRODUCT_TYPE', 
    'CHANNEL_TYPE', 
    'NAME_SELLER_INDUSTRY', 
    'NAME_YIELD_GROUP', 
    'PRODUCT_COMBINATION',
    'NAME_CASH_LOAN_PURPOSE']
previous_application_cont_cols = previous_application.columns.drop(previous_application_cat_cols + ['SK_ID_CURR'])
processed_previous_application = process_log_df(
    previous_application, 
    'PREVIOUS_APPLICATION', 
    'SK_ID_CURR', 
    'DAYS_DECISION', 
    cat_cols=previous_application_cat_cols, 
    cont_cols=previous_application_cont_cols
)

for t in types:
  dfs[t] = pd.merge(dfs[t], processed_previous_application, left_on='SK_ID_CURR', right_index=True, how='left')

  0%|          | 0/16 [00:00<?, ?it/s]

Processing categorical columns...


100%|██████████| 16/16 [04:53<00:00, 18.36s/it]
  0%|          | 0/20 [00:00<?, ?it/s]

Processing continous columns...


100%|██████████| 20/20 [06:22<00:00, 19.14s/it]


In [0]:
installments_payments['PAYMENT_PERC'] = installments_payments['AMT_PAYMENT'] / installments_payments['AMT_INSTALMENT']
installments_payments['PAYMENT_DIFF'] = installments_payments['AMT_INSTALMENT'] - installments_payments['AMT_PAYMENT']
installments_payments['DPD'] = installments_payments['DAYS_ENTRY_PAYMENT'] - installments_payments['DAYS_INSTALMENT']
installments_payments['DBD'] = installments_payments['DAYS_INSTALMENT'] - installments_payments['DAYS_ENTRY_PAYMENT']
installments_payments['DPD'] = installments_payments['DPD'].apply(lambda x: x if x > 0 else 0)
installments_payments['DBD'] = installments_payments['DBD'].apply(lambda x: x if x > 0 else 0)

installments_payments_cat_cols = []
installments_payments_cont_cols = installments_payments.columns.drop(installments_payments_cat_cols + ['SK_ID_CURR'])
processed_installments_payments = process_log_df(
    installments_payments, 
    'INSTALLMENTS_PAYMENTS', 
    'SK_ID_CURR', 
    'DAYS_INSTALMENT', 
    cat_cols=installments_payments_cat_cols, 
    cont_cols=installments_payments_cont_cols
)

for t in types:
  dfs[t] = pd.merge(dfs[t], processed_installments_payments, left_on='SK_ID_CURR', right_index=True, how='left')

0it [00:00, ?it/s]
  0%|          | 0/11 [00:00<?, ?it/s]

Processing categorical columns...
Processing continous columns...


100%|██████████| 11/11 [03:44<00:00, 20.45s/it]


In [0]:
ext_source_cols = ['EXT_SOURCE_1', 'EXT_SOURCE_2', 'EXT_SOURCE_3']
impute_ext_source_cols = ['IMPUTE_EXT_SOURCE_1', 'IMPUTE_EXT_SOURCE_2', 'IMPUTE_EXT_SOURCE_3']

for t in types:
  dfs[t]['EXT_SOURCE_count'] = dfs[t][ext_source_cols].count(axis=1)
  dfs[t]['EXT_SOURCE_max'] = dfs[t][ext_source_cols].max(axis=1)
  dfs[t]['EXT_SOURCE_min'] = dfs[t][ext_source_cols].min(axis=1)
  dfs[t]['EXT_SOURCE_median'] = dfs[t][ext_source_cols].median(axis=1)
  dfs[t]['EXT_SOURCE_diff'] = dfs[t]['EXT_SOURCE_max'] - dfs[t]['EXT_SOURCE_min']
  dfs[t][impute_ext_source_cols] = dfs[t][ext_source_cols]

  r = func(a, **kwargs)


In [0]:
# imputer for handling missing values
from sklearn.preprocessing import Imputer
imputer = Imputer(strategy = 'median')

# Need to impute missing values
dfs['train'][impute_ext_source_cols] = imputer.fit_transform(dfs['train'][impute_ext_source_cols])
dfs['test'][impute_ext_source_cols] = imputer.transform(dfs['test'][impute_ext_source_cols])

from sklearn.preprocessing import PolynomialFeatures
                                  
# Create the polynomial object with specified degree
poly_transformer = PolynomialFeatures(degree = 3)
poly_transformer.fit(dfs['train'][impute_ext_source_cols])
feats_name = ['IMPUTE_POLY_FEATS_'+col for col in poly_transformer.get_feature_names(input_features = impute_ext_source_cols)]

for t in types:
  dfs[t][feats_name] = pd.DataFrame(poly_transformer.transform(dfs[t][impute_ext_source_cols]), columns = feats_name)

In [0]:
bool_keywords = ['FLAG', 'NOT']

for keyword in bool_keywords:
  doc_cols = []
  for col in dfs['train']:
    if keyword in col:
      doc_cols.append(col)

  print(doc_cols)
  for t in types:
    dfs[t][keyword+'_total'] = dfs[t][doc_cols].sum(axis=1)
#     dfs[t].drop(columns=doc_cols, inplace=True)

['FLAG_OWN_CAR', 'FLAG_OWN_REALTY', 'FLAG_MOBIL', 'FLAG_EMP_PHONE', 'FLAG_WORK_PHONE', 'FLAG_CONT_MOBILE', 'FLAG_PHONE', 'FLAG_EMAIL', 'FLAG_DOCUMENT_2', 'FLAG_DOCUMENT_3', 'FLAG_DOCUMENT_4', 'FLAG_DOCUMENT_5', 'FLAG_DOCUMENT_6', 'FLAG_DOCUMENT_7', 'FLAG_DOCUMENT_8', 'FLAG_DOCUMENT_9', 'FLAG_DOCUMENT_10', 'FLAG_DOCUMENT_11', 'FLAG_DOCUMENT_12', 'FLAG_DOCUMENT_13', 'FLAG_DOCUMENT_14', 'FLAG_DOCUMENT_15', 'FLAG_DOCUMENT_16', 'FLAG_DOCUMENT_17', 'FLAG_DOCUMENT_18', 'FLAG_DOCUMENT_19', 'FLAG_DOCUMENT_20', 'FLAG_DOCUMENT_21', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__count', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__first__CATEGORY', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__second__CATEGORY', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__second_last__CATEGORY', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__last__CATEGORY', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__N', 'PREV_APP__FLAG_LAST_APPL_PER_CONTRACT__Y', 'PREV_APP__NFLAG_LAST_APPL_IN_DAY__min', 'PREV_APP__NFLAG_LAST_APPL_IN_DAY__max', 'PREV_APP__NFLAG_

In [0]:
for t in types:
  dfs[t].to_parquet('./prep/application_%s.snappy.parquet'%t)
  
!gsutil rsync ./prep gs://{os.environ['GCP_BUCKET']}/prep

Building synchronization state...
Starting synchronization...
Copying file://./prep/application_test.snappy.parquet [Content-Type=application/octet-stream]...
Copying file://./prep/application_train.snappy.parquet [Content-Type=application/octet-stream]...
==> NOTE: You are uploading one or more large file(s), which would run
significantly faster if you enable parallel composite uploads. This
feature can be enabled by editing the
"parallel_composite_upload_threshold" value in your .boto
configuration file. However, note that if you do this large files will
be uploaded as `composite objects
<https://cloud.google.com/storage/docs/composite-objects>`_,which
means that any user who downloads such objects will need to have a
compiled crcmod installed (see "gsutil help crcmod"). This is because
without a compiled crcmod, computing checksums on composite objects is
so slow that gsutil disables downloads of composite objects.

Copying file://./prep/processed_BUREAU.snappy.parquet [Content-Type