# Home Credit Ad Hoc Analysis

In [50]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import iqr, randint, uniform
import xgboost as xgb
import lightgbm as lgb
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegressionCV
from sklearn.preprocessing import Imputer, StandardScaler, RobustScaler
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, train_test_split, cross_val_score
from sklearn.metrics import roc_auc_score
from skopt import gp_minimize
from skopt.plots import plot_convergence, plot_objective
from IPython.display import display
import pickle
import gc

pd.options.display.max_columns = None

gc.enable()
np.random.seed(0)

path = "/Users/danielsaxton/home_credit_default_risk/"

In [33]:
application_test = pd.read_csv(path + "application_test.csv")

In [None]:
application_train = pd.read_csv(path + "application_train.csv")

In [4]:
bureau_balance = pd.read_csv(path + "bureau_balance.csv")
bureau = pd.read_csv(path + "bureau.csv")
credit_card = pd.read_csv(path + "credit_card_balance.csv")
installments = pd.read_csv(path + "installments_payments.csv")
pos_cash = pd.read_csv(path + "POS_CASH_balance.csv")
previous_application = pd.read_csv(path + "previous_application.csv")

In [None]:
df = pd.read_csv(path + "preprocessed_train.csv")

In [82]:
n = 700

(previous_application.
     join(application_train.loc[application_train["TARGET"] == 1, ["SK_ID_CURR", "TARGET"]], how="inner", on="SK_ID_CURR", rsuffix="_app").
     sort_values(["SK_ID_CURR", "SK_ID_CURR"]).
     where(lambda x: x["DAYS_DECISION"] >= -360).
     dropna(how="all").
     head(n).
     tail(20).
     pipe(display))

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL,SK_ID_CURR_app,TARGET
1612849,1580760.0,105489.0,Cash loans,83097.0,3150000.0,3150000.0,,3150000.0,MONDAY,18.0,Y,1.0,,,,Repairs,Refused,-253.0,Cash through the bank,HC,Unaccompanied,Repeater,XNA,Cash,walk-in,Credit and cash offices,-1.0,XNA,60.0,low_action,Cash Street: low,,,,,,,222407.0,1.0
912328,2659131.0,105520.0,Consumer loans,21787.515,152595.0,152595.0,0.0,152595.0,SATURDAY,11.0,Y,1.0,0.0,,,XAP,Approved,-200.0,XNA,XAP,Unaccompanied,Repeater,Audio/Video,POS,XNA,Stone,65.0,Consumer electronics,8.0,middle,POS household with interest,365243.0,-164.0,46.0,365243.0,365243.0,0.0,222443.0,1.0
690043,1817363.0,105523.0,Consumer loans,4286.025,31671.0,31671.0,0.0,31671.0,FRIDAY,8.0,Y,1.0,0.0,,,XAP,Approved,-89.0,Cash through the bank,XAP,,Repeater,Mobile,POS,XNA,Country-wide,30.0,Connectivity,10.0,high,POS mobile with interest,365243.0,-40.0,230.0,365243.0,365243.0,0.0,222446.0,1.0
1067824,1792146.0,105523.0,Cash loans,29526.795,675000.0,744498.0,,675000.0,SATURDAY,12.0,Y,1.0,,,,XNA,Approved,-177.0,Cash through the bank,XAP,Unaccompanied,Refreshed,XNA,Cash,x-sell,Contact center,-1.0,XNA,36.0,low_normal,Cash X-Sell: low,365243.0,-147.0,903.0,365243.0,365243.0,1.0,222446.0,1.0
795743,2630758.0,105564.0,Consumer loans,22785.39,141430.5,111726.0,29704.5,141430.5,FRIDAY,15.0,Y,1.0,0.228741,,,XAP,Approved,-227.0,XNA,XAP,,New,Mobile,POS,XNA,Country-wide,15.0,Connectivity,6.0,high,POS mobile with interest,365243.0,-159.0,-9.0,-39.0,-36.0,0.0,222494.0,1.0
613593,1648778.0,105576.0,Consumer loans,16556.805,159597.0,155961.0,15961.5,159597.0,THURSDAY,14.0,Y,1.0,0.101113,,,XAP,Approved,-242.0,Cash through the bank,XAP,Unaccompanied,New,Consumer Electronics,POS,XNA,Country-wide,3205.0,Consumer electronics,12.0,middle,POS household with interest,365243.0,-211.0,119.0,365243.0,365243.0,0.0,222508.0,1.0
1347062,1672377.0,105576.0,Revolving loans,2250.0,45000.0,45000.0,,45000.0,THURSDAY,14.0,Y,1.0,,,,XAP,Approved,-242.0,XNA,XAP,Unaccompanied,New,XNA,Cards,walk-in,Country-wide,3205.0,Consumer electronics,0.0,XNA,Card Street,365243.0,365243.0,365243.0,365243.0,365243.0,0.0,222508.0,1.0
656744,2200482.0,105595.0,Consumer loans,10109.385,123588.0,98869.5,24718.5,123588.0,FRIDAY,18.0,Y,1.0,0.217826,,,XAP,Approved,-301.0,Cash through the bank,XAP,Unaccompanied,New,Construction Materials,POS,XNA,Regional / Local,70.0,Construction,12.0,middle,POS industry with interest,365243.0,-271.0,59.0,-211.0,-208.0,0.0,222528.0,1.0
1036987,1970257.0,105595.0,Consumer loans,18619.02,170995.5,102595.5,68400.0,170995.5,TUESDAY,18.0,Y,1.0,0.435648,,,XAP,Approved,-143.0,XNA,XAP,,Repeater,Computers,POS,XNA,Country-wide,36.0,Consumer electronics,6.0,low_normal,POS household with interest,365243.0,-106.0,44.0,365243.0,365243.0,0.0,222528.0,1.0
633744,1091916.0,105633.0,Consumer loans,6807.42,72931.5,72931.5,0.0,72931.5,TUESDAY,11.0,Y,1.0,0.0,,,XAP,Approved,-303.0,XNA,XAP,,Repeater,Construction Materials,POS,XNA,Stone,30.0,Construction,12.0,low_normal,POS industry with interest,365243.0,-272.0,58.0,365243.0,365243.0,0.0,222569.0,1.0


In [83]:
n = 700

(previous_application.
     join(application_train.loc[application_train["TARGET"] == 0, ["SK_ID_CURR", "TARGET"]], how="inner", on="SK_ID_CURR", rsuffix="_app").
     sort_values(["SK_ID_CURR", "SK_ID_CURR"]).
     where(lambda x: x["DAYS_DECISION"] >= -360).
     dropna(how="all").
     head(n).
     tail(20).
     pipe(display))

Unnamed: 0,SK_ID_PREV,SK_ID_CURR,NAME_CONTRACT_TYPE,AMT_ANNUITY,AMT_APPLICATION,AMT_CREDIT,AMT_DOWN_PAYMENT,AMT_GOODS_PRICE,WEEKDAY_APPR_PROCESS_START,HOUR_APPR_PROCESS_START,FLAG_LAST_APPL_PER_CONTRACT,NFLAG_LAST_APPL_IN_DAY,RATE_DOWN_PAYMENT,RATE_INTEREST_PRIMARY,RATE_INTEREST_PRIVILEGED,NAME_CASH_LOAN_PURPOSE,NAME_CONTRACT_STATUS,DAYS_DECISION,NAME_PAYMENT_TYPE,CODE_REJECT_REASON,NAME_TYPE_SUITE,NAME_CLIENT_TYPE,NAME_GOODS_CATEGORY,NAME_PORTFOLIO,NAME_PRODUCT_TYPE,CHANNEL_TYPE,SELLERPLACE_AREA,NAME_SELLER_INDUSTRY,CNT_PAYMENT,NAME_YIELD_GROUP,PRODUCT_COMBINATION,DAYS_FIRST_DRAWING,DAYS_FIRST_DUE,DAYS_LAST_DUE_1ST_VERSION,DAYS_LAST_DUE,DAYS_TERMINATION,NFLAG_INSURED_ON_APPROVAL,SK_ID_CURR_app,TARGET
1012582,1314359.0,100442.0,Revolving loans,6750.0,135000.0,135000.0,,135000.0,TUESDAY,12.0,Y,1.0,,,,XAP,Approved,-93.0,XNA,XAP,Family,Repeater,XNA,Cards,x-sell,Credit and cash offices,-1.0,XNA,0.0,XNA,Card X-Sell,365243.0,365243.0,365243.0,365243.0,365243.0,0.0,216604.0,0.0
470137,1098796.0,100445.0,Cash loans,,0.0,0.0,,,TUESDAY,8.0,Y,1.0,,,,XNA,Canceled,-289.0,XNA,XAP,,Repeater,XNA,XNA,XNA,Credit and cash offices,-1.0,XNA,,XNA,Cash,,,,,,,216610.0,0.0
664186,2402372.0,100450.0,Cash loans,,0.0,0.0,,,MONDAY,9.0,Y,1.0,,,,XNA,Refused,-150.0,XNA,HC,,Repeater,XNA,XNA,XNA,Credit and cash offices,-1.0,XNA,,XNA,Cash,,,,,,,216615.0,0.0
46274,1066121.0,100451.0,Revolving loans,,0.0,0.0,,,SUNDAY,11.0,Y,1.0,,,,XAP,Canceled,-307.0,XNA,XAP,,Repeater,XNA,XNA,XNA,Credit and cash offices,-1.0,XNA,,XNA,Card Street,,,,,,,216616.0,0.0
600415,2666731.0,100451.0,Cash loans,,0.0,0.0,,,MONDAY,11.0,Y,1.0,,,,XNA,Canceled,-47.0,XNA,XAP,,Repeater,XNA,XNA,XNA,Credit and cash offices,-1.0,XNA,,XNA,Cash,,,,,,,216616.0,0.0
600416,2012168.0,100451.0,Cash loans,,0.0,0.0,,,THURSDAY,14.0,Y,1.0,,,,XNA,Canceled,-156.0,XNA,XAP,,Repeater,XNA,XNA,XNA,Credit and cash offices,-1.0,XNA,,XNA,Cash,,,,,,,216616.0,0.0
830082,2203402.0,100451.0,Revolving loans,2250.0,45000.0,45000.0,,45000.0,SUNDAY,11.0,Y,1.0,,,,XAP,Approved,-307.0,XNA,XAP,Unaccompanied,Repeater,XNA,Cards,walk-in,Credit and cash offices,-1.0,XNA,0.0,XNA,Card Street,365243.0,365243.0,365243.0,365243.0,365243.0,0.0,216616.0,0.0
1134387,1272202.0,100451.0,Cash loans,55425.69,1282500.0,1394230.5,,1282500.0,THURSDAY,14.0,Y,1.0,,,,XNA,Refused,-156.0,Cash through the bank,HC,Unaccompanied,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1.0,XNA,36.0,low_normal,Cash X-Sell: low,,,,,,,216616.0,0.0
1439419,1063506.0,100451.0,Cash loans,47440.8,1282500.0,1431477.0,,1282500.0,THURSDAY,14.0,Y,1.0,,,,XNA,Refused,-156.0,Cash through the bank,HC,Unaccompanied,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1.0,XNA,48.0,low_normal,Cash X-Sell: low,,,,,,,216616.0,0.0
1664010,2742836.0,100451.0,Cash loans,49217.085,1138500.0,1237684.5,,1138500.0,THURSDAY,14.0,Y,1.0,,,,XNA,Refused,-156.0,Cash through the bank,HC,Unaccompanied,Repeater,XNA,Cash,x-sell,Credit and cash offices,-1.0,XNA,36.0,low_normal,Cash X-Sell: low,,,,,,,216616.0,0.0
