In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
%load_ext blackcellmagic

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import pandas as pd

In [None]:
import extract
import preprocess as prep
import feature_engineering as fe
import imputations as imp
import evaluate as eval_

In [None]:
import sys
sys.path.insert(0,'/home/shared/utils')

## Training

In [None]:
loans = extract.fetch_funded_mature_loans("'2018-01-01'", "'2020-04-19'")

In [None]:
loans = prep.preprocess_iloans(loans)

In [None]:
loans = fe.fe_iloans(loans)

In [None]:
access_count = extract.get_esign_time_diff("'2018-01-01'", "'2020-04-19'")

In [None]:
access_count = prep.preprocess_esign(access_count)

In [None]:
bank_reports = extract.fetch_bank_reports("'2018-01-01'", "'2020-04-19'")

In [None]:
bank_reports = prep.preprocess_bank_reports(bank_reports)

In [None]:
bank_reports = fe.fe_bank_reports(bank_reports)

In [None]:
bank_app = extract.fetch_bank_app_loans("'2018-01-01'", "'2020-04-19'")

In [None]:
bank_app = prep.preprocess_bank_app(bank_app)

In [None]:
bank_reports_cols = ['loan_id','LenderAmountDeb', 'LenderCountCred', 'LenderAmountCred30', 'LenderCountDeb',
       'LenderAmountDeb30', 'LenderCountCred30', 'LenderCountDeb30',
       'LenderAmountCred', 'UniqLenderCount']

In [None]:
learning = pd.merge(loans, access_count, how = 'left',on = 'loan_id')

In [None]:
learning = pd.merge(learning, bank_reports, how = 'left',on = 'loan_id')

In [None]:
learning = pd.merge(learning, bank_app, how = 'left', on = 'loan_id')

In [None]:
BV_status_list = ['Bank Validation Uncertain', 'Bank Validation Approved']

In [None]:
learning = learning[learning['bank_app_decision'].isin(BV_status_list)]

In [None]:
learning = learning.loc[learning['primary_account'].notnull(), :]

In [None]:
training = learning[(learning['OriginationDate'] >= '2018-01-01') & (learning['OriginationDate'] <= '2019-12-31')]

In [None]:
evaluation = learning[(learning['OriginationDate'] >= '2020-01-01') & (learning['OriginationDate'] <= '2020-04-19')]

In [None]:
training, imp_acc_count, imp_dti, imp_pay_day = imp.impute_learning(training)

In [None]:
evaluation[['AccessCount']] = imp_acc_count.transform(evaluation[['AccessCount']])

In [None]:
evaluation[['dti']] = imp_dti.transform(evaluation[['dti']])

In [None]:
evaluation[['pay_day_test_result_amount']] = imp_pay_day.transform(evaluation[['pay_day_test_result_amount']])

In [None]:
evaluation['LeadProvider'] = evaluation['LeadProvider'].fillna('freedom')

In [None]:
df_loans = training[['MonthlyGrossIncome', 'Age', 'Reloan', 'LeadProvider', 'LenderCountCred30', 
                     'UniqLenderCount', 'LenderAmountDeb', 'LenderAmountCred', 'LenderAmountDeb30',
                     'LenderAmountCred30', 'LenderCountDeb', 'LenderCountCred', 'LenderCountDeb30',
                     'median_weekly_credit', 'dev_weekly_credit_count', 'median_daily_debit', 'AccessCount', 
                     'IsFirstDefault', 'dti', 'pay_day_test_result_amount', 'diff_pos_neg_days', 'median_daily_balance']]

In [None]:
df_loans = prep.changing_bool_dtypes_to_str(df_loans)

## Pycaret

In [None]:
from pycaret.classification import *

In [None]:
clf = setup(data = df_loans, train_size = .99, target = 'IsFirstDefault', session_id = 69,
            normalize = True,
            transformation = True,
            remove_outliers = True, 
            numeric_features = ['MonthlyGrossIncome', 'Age', 'LenderCountCred30', 
                                'UniqLenderCount', 'LenderAmountDeb', 'LenderAmountCred', 'LenderAmountDeb30',
                                'LenderAmountCred30', 'LenderCountDeb', 'LenderCountCred', 'LenderCountDeb30',
                                'median_weekly_credit', 'dev_weekly_credit_count', 'median_daily_debit',
                                'AccessCount', 'dti', 'pay_day_test_result_amount', 'diff_pos_neg_days', 'median_daily_balance'])

In [None]:
tuned_lr = tune_model('lr', optimize = 'F1')

In [None]:
bagged_lr = ensemble_model(tuned_lr, method = 'Bagging')

In [None]:
final_model = finalize_model(bagged_lr)

In [None]:
save_model(final_model, 'new_model')

## Evaluation

In [None]:
df_eval = evaluation[['MonthlyGrossIncome', 'Age', 'Reloan', 'LeadProvider', 'LenderCountCred30', 
                     'UniqLenderCount', 'LenderAmountDeb', 'LenderAmountCred', 'LenderAmountDeb30',
                     'LenderAmountCred30', 'LenderCountDeb', 'LenderCountCred', 'LenderCountDeb30',
                     'median_weekly_credit', 'dev_weekly_credit_count', 'median_daily_debit', 'AccessCount', 
                     'IsFirstDefault', 'dti', 'pay_day_test_result_amount', 'diff_pos_neg_days', 'median_daily_balance']]

In [None]:
df_eval = prep.changing_bool_dtypes_to_str(df_eval)

In [None]:
new_model = load_model('new_model')

In [None]:
predictions_pycaret = predict_model(new_model, data = df_eval)

In [None]:
eval_.get_KS(predictions_pycaret, 'IsFirstDefault', 'Score')