In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
%load_ext blackcellmagic

In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import pymysql
import pymssql
import sys
import os
from functools import reduce
import seaborn as sns
from matplotlib import pyplot as plt

In [None]:
sns.set(style = 'darkgrid')

In [None]:
sys.path.insert(0, os.getcwd())
sys.path.insert(0, '/home/shared/utils')

In [None]:
import query as q
import extract
import preprocess as prep
import utilities as util
import model_compare as mod_com
from sklearn.metrics import confusion_matrix as cm
import EDA as eda

bank_app - loanid, entered_date, bv decisions

predicon_model - loan_id, entered_date, count of scored applicants, #positive

loans - loanid, lead_time_added, #lender approved

## In a Nutshell

### Extracting Data

In [None]:
bank_app = extract.extract_bankapp("'2020-07-09'")

In [None]:
model = extract.extract_model()

In [None]:
loan_history = extract.extract_loan_history("'2020-07-09'")

### Modifying Data

In [None]:
modified_bank_app = prep.preprocess_bankapp_db(bank_app)

In [None]:
modified_model_scores = prep.preprocess_model_db(model)

In [None]:
modified_loan_history = prep.preprocess_loan_history_db(loan_history)

In [None]:
is_BV_uncertain_approved = modified_bank_app['agent_decision'].isin(['Bank Validation Uncertain', 'Bank Validation Approved'])
modified_bank_app = modified_bank_app[is_BV_uncertain_approved]

### Analysing loanids which were not lender approved but our model approved

In [None]:
dfs = [modified_bank_app, modified_model_scores, modified_loan_history]
merged_db = reduce(lambda left, right : pd.merge(left, right, on = 'LoanId', how = 'left'), dfs)

In [None]:
merged_db['Decision'].fillna(-1, inplace = True)

In [None]:
sub_cats = ['len_to_many_open_loans_101', 'len_return_found_102', 'len_missing_payments_103',
            'bk_verif_bal_on_payday_130', 'bk_verif_high_negative_curr_bal_131',
            'bk_verif_high_pct_negative_bal_132', 'bk_verif_bad_acc_type_133',
            'bk_verif_savings_acc_134', 'bk_verif_business_acc_135',
            'bk_verif_pre_debit_card_acc_136', 'bk_verif_stop_payment_or_revoked_137']

In [None]:
merged_db = prep.preprocess_lender_reject_sub_categories(merged_db, sub_cats)

In [None]:
merged_db.info()

In [None]:
lender_reject_model_approved = merged_db[(merged_db['Decision'] == 1) & (merged_db['LenderApproved'] == 0)]

In [None]:
missing_decisions_loanids = merged_db[(merged_db['Decision'] == 1) & (merged_db['LenderApproved'] == 0) & 
                                      (merged_db['underwriting_final_decision'].isnull())]['LoanId'].values

In [None]:
no_sale_flags = ['No Sale Lender', 'No Sale Bank Verification']
is_no_sale_lender_bank_verification = lender_reject_model_approved['underwriting_final_decision'].isin(no_sale_flags)
no_sale_lender_bank_verification = lender_reject_model_approved[is_no_sale_lender_bank_verification]

In [None]:
util.get_confusion_matrix(merged_db, norm = 'index')

In [None]:
util.get_lender_approved_model_disapproved_reasons(lender_reject_model_approved)

In [None]:
no_sale_lender = no_sale_lender_bank_verification[no_sale_lender_bank_verification['underwriting_final_decision'] == 'No Sale Lender']

In [None]:
no_sale_bank_verification = no_sale_lender_bank_verification[no_sale_lender_bank_verification['underwriting_final_decision'] == 'No Sale Bank Verification']

In [None]:
no_sale_lender['sub_category'].value_counts()

In [None]:
plt.figure(figsize = (8, 8))
ax = sns.countplot(data = no_sale_lender, y = 'sub_category',
                   order = no_sale_lender['sub_category'].value_counts().index)
total = no_sale_lender.shape[0]
for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_width()/total)
    x = p.get_x() + p.get_width() + 0.02
    y = p.get_y() + p.get_height()/2
    ax.annotate(percentage, (x, y))
plt.title("No Sale Lender")
plt.show()

In [None]:
no_sale_bank_verification['sub_category'].value_counts()

In [None]:
plt.figure(figsize = (8, 8))
ax = sns.countplot(data = no_sale_bank_verification, y = 'sub_category',
                   order = no_sale_bank_verification['sub_category'].value_counts().index)
total = no_sale_lender.shape[0]
for p in ax.patches:
    percentage = '{:.1f}%'.format(100 * p.get_width()/total)
    x = p.get_x() + p.get_width() + 0.02
    y = p.get_y() + p.get_height()/2
    ax.annotate(percentage, (x, y))
plt.title("No Sale Bank Verification")
plt.show()