In [5]:
# @hidden
from IPython.display import display 
import numpy as np 
import pandas as pd
from io import BytesIO
import random
import pickle
import requests
import joblib
from compress_pickle import dump, load
import string
import warnings
from scipy.stats import randint as sp_randint
from scipy.stats import uniform as sp_uniform
from scipy.stats import uniform as sp_randFloat
from scipy.stats import randint as sp_randInt
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns',100)
pd.set_option('display.max_rows',100)

#-----------------Plots
from matplotlib import pyplot as plt
from matplotlib import rcParams
import seaborn as sns
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import auc
import pylab as pl
large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.style.use('seaborn-whitegrid')
sns.set_style("white")
%matplotlib inline
%config InlineBackend.figure_format='retina'

#-----------------Pre-processing
import category_encoders as ce
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from imblearn.pipeline import Pipeline
from sklearn.decomposition import PCA
from sklearn.feature_selection import RFE
from yellowbrick.model_selection import RFECV

#-----------------Feature Importance
from sklearn.feature_selection import SelectKBest, f_classif ,mutual_info_classif
from sklearn.feature_selection import SelectFromModel
import shap

#-----------------Modelling
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from lightgbm import LGBMClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

#-----------------Scores and Metrics
from sklearn import metrics
from sklearn.model_selection import cross_val_score as cv_score
from sklearn.metrics import f1_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score 
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, fbeta_score
from sklearn.metrics import roc_curve
from sklearn.calibration import calibration_curve
from sklearn.metrics import brier_score_loss

#-----------------GUI
from ipywidgets import *
from IPython.display import display
import ipywidgets as widgets
from IPython import get_ipython

In [6]:
# @hidden
url = 'https://raw.githubusercontent.com/ThomasD96/University_Repo/master/Nova_SBE/Master_thesis/credit_risk_dataset.csv'
df = pd.read_csv(url, low_memory = True)

column_rename = {
                'person_age':'age',
                'person_income':'income',
                'person_home_ownership':'home_ownership',
                'person_emp_length':'emp_length',
                'loan_int_rate':'interest_rate',
                'loan_status':'status',
                'loan_amnt':'amount',
                'loan_grade':'grade',
                'cb_person_default_on_file' :'historical_default',    
                'cb_person_cred_hist_length':'cred_history_length'
                }

df = df.rename(columns = column_rename)
df_data = df.copy()
form_item_layout = widgets.Layout(
    display='flex',
    flex_flow='row',
    justify_content='space-between'
)


# displaying the text widget
text = widgets.Text(
    placeholder='Type in your <NAME> here',
    disabled=False
)

# add button that updates the graph based on the checkboxes
button = widgets.Button(description="Check credibility")

#display(button)
resultLabel = widgets.Label(
    value="",
    #visible = False,
    visibility = 'hidden',
    disabled = True
)

age = widgets.BoundedIntText(
    value=df_data.age.median(),
    min=18,
    max=100,
    step=1,
    disabled=False
)

income = widgets.BoundedFloatText(
    value=df_data.income.median(),
    min=0,
    max=df_data.income.max(),
    step=100,
    disabled=False,
    readout_format = ".2f"
)


home_ownership = widgets.Dropdown(
    options=[('RENT', 1), ('OWN', 2), ('MORTGAGE', 3)],
    value=1
)


emp_length = widgets.BoundedIntText(
    value=df_data.emp_length.median(),
    min=1,
    max=50,
    step=1,
    disabled=False
)


loan_intent = widgets.Dropdown(
    options=[('EDUCATION', 1), ('MEDICAL', 2), ('VENTURE', 3), ('HOME IMPROVEMENT', 4), ('PERSONAL', 5), ('DEBT CONSOLIDATION', 6)],
    value=1
)

grade = widgets.Dropdown(
    options=[('A', 1), ('B', 2), ('C', 3), ('D', 4), ('E', 5), ('F', 6), ('G', 7)],
    value=1
)

amount = widgets.BoundedFloatText(
    value=df_data.amount.median(),
    min=0,
    max=100000,
    step=500,
    readout_format = ".2f",
    disabled=False
)

interest_rate = widgets.BoundedFloatText(
    value=0,
    min=0,
    max=20,
    step=0.1,
    readout_format = ".2f",
    disabled=False
)

loan_percent_income = widgets.BoundedFloatText(
    value=df_data.loan_percent_income.median(),
    min=0,
    max=1,
    step=0.05,
    readout_format = ".2f",
    disabled=False
)

historical_default = widgets.Text(
    placeholder='N',
    disabled=False
)

historical_default = widgets.Dropdown(
    options=[('Y', 1), ('N', 2)],
    value=1
)

cred_history_length = widgets.BoundedFloatText(
    value=df_data.cred_history_length.median(),
    min=0,
    max=50,
    step=0.5,
    readout_format = ".2f",
    disabled=False
)

algos = widgets.Dropdown(options=['XGB (with RFE)', 'XGB (with PCA)'])

form_items = [
   Box([Label(value='Please, enter the name:'), text], layout=form_item_layout),
   Box([Label(value='Age:'), age], layout=form_item_layout),
   Box([Label(value='Income:'), income], layout=form_item_layout),
   Box([Label(value='Home Ownership:'), home_ownership], layout=form_item_layout),
   Box([Label(value='Employment Length (in years):'), emp_length], layout=form_item_layout),
   Box([Label(value='Loan Intent:'), loan_intent], layout=form_item_layout),
   Box([Label(value='Grade:'), grade], layout=form_item_layout),
   Box([Label(value='Interest Rate:'), interest_rate], layout=form_item_layout),
   Box([Label(value='Loan to income ratio:'), loan_percent_income], layout=form_item_layout),
   Box([Label(value='Historical Default:'), historical_default], layout=form_item_layout),
   Box([Label(value='Credit History Length:'), cred_history_length], layout=form_item_layout),
   Box([Label(value='Loan Amount:'), amount], layout=form_item_layout),
   Box([Label(value='Algorithm:'),algos], layout=form_item_layout),
   button,
   Box([Label(value='Result:'), resultLabel], layout=form_item_layout),
]

form = Box(form_items, layout=Layout(
    display='flex',
    flex_flow='column',
    border='dashed 2px',
    align_items='stretch',
    width= '70%'
))

display(form)


# function to deal with the checkbox update button       
def on_button_clicked(b):
    name2 = text.value
    home_ownership2 = home_ownership
    amount2 = amount.value
    age2 = age.value
    income2 = income.value
    emp_length2 = emp_length.value
    loan_intent2 = loan_intent.value
    grade2 = grade.value
    interest_rate2 = interest_rate.value
    loan_percent_income2 = loan_percent_income.value
    historical_default2 = historical_default.value
    cred_history_length2 = cred_history_length.value
    algo2 = algos.value
    
    testARR = [age2, income2, home_ownership2, emp_length2,loan_intent2,grade2, amount2, interest_rate2, loan_percent_income2, historical_default2, cred_history_length2]
    testARR = np.array(testARR)
    testARR = testARR.reshape(1, -1)
    testARR = pd.DataFrame(testARR, columns = ['age', 'income', 'home_ownership', 'emp_length', 'loan_intent', 'grade', 'amount', 'interest_rate', 'loan_percent_income', 'historical_default', 'cred_history_length'])
    yGuess  = []
    
    if  algo2 == 'XGB (with RFE)':
        mLink = 'https://github.com/ThomasD96/University_Repo/blob/master/Nova_SBE/Master_thesis/pickle/finalized_xgb_model_rfe?raw=true'
        mfile = BytesIO(requests.get(mLink).content)
        model_xgb_rfe = load(mfile, compression="lzma", set_default_extension=False)
        yGuess = model_xgb_rfe.predict(testARR)
        
    elif algo2 == 'XGB (with PCA)':
        mLink = 'https://github.com/ThomasD96/University_Repo/blob/master/Nova_SBE/Master_thesis/pickle/finalized_xgb_model_pca.sav?raw=true'
        mfile = BytesIO(requests.get(mLink).content)
        model_xgb_pca = pickle.load(mfile)
        yGuess = model_xgb_pca.predict(testARR)
        
    else:
        mLink = 'https://github.com/ThomasD96/University_Repo/blob/master/Nova_SBE/Master_thesis/pickle/finalized_xgb_model_rfe?raw=true'
        mfile = BytesIO(requests.get(mLink).content)
        model_xgb_rfe = load(mfile, compression="lzma", set_default_extension=False)
        yGuess = clf_xgb_rfe.predict(testARR)
    
    if(yGuess[0] == 0):
        resultLabel.value = text.value + ' can be provided with the loan.'    
    else:
        resultLabel.value = text.value + ' should not be provided with the loan.'
        
button.on_click(on_button_clicked)
plt.show()

Box(children=(Box(children=(Label(value='Please, enter the name:'), Text(value='', placeholder='Type in your <…