In [9]:
import pandas as pd
import joblib

# load the model
model = joblib.load('LogisticRegression.joblib') #modify this line to load the model you want to use

#load new applicant data
new_applicant = pd.read_csv('NewApplicant.csv') #modify this line to load the new applicant data

In [10]:

#######Preprocess NewApplicant data #######

if 'LoanID' in new_applicant.columns:
    new_applicant = new_applicant.drop(['LoanID'], axis=1)
else:
    print('LoanID column is not present or has removed, check the columns')

new_applicant['Education'] = new_applicant['Education'].map({'High School':1,'Bachelor\'s':2, 'Master\'s':3, 'PhD':4}).astype(int)
new_applicant['EmploymentType'] = new_applicant['EmploymentType'].map({'Unemployed':1,'Part-time':2, 'Full-time':3, 'Self-employed':4}).astype(int)
new_applicant['MaritalStatus']= new_applicant['MaritalStatus'].map({'Single':1,'Married':2, 'Divorced':3}).astype(int)
new_applicant['HasMortgage']= new_applicant['HasMortgage'].map({'No':0,'Yes':1}).astype(int)
new_applicant['HasDependents']= new_applicant['HasDependents'].map({'No':0,'Yes':1}).astype(int)
new_applicant['LoanPurpose']= new_applicant['LoanPurpose'].map({'Education':1,'Home':2, 'Business':3, 'Auto':4, 'Other':5}).astype(int)
new_applicant['HasCoSigner']= new_applicant['HasCoSigner'].map({'No':0,'Yes':1}).astype(int)
new_applicant['LoanTerm']= (new_applicant['LoanTerm']/12).astype(int)



from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
# List of columns to be processed for Z-score normalization
new_applicant_independent_variables = ['Age','Income', 'LoanAmount', 'CreditScore', 'MonthsEmployed', \
                       'NumCreditLines', 'InterestRate', 'LoanTerm', 'DTIRatio']

# apply Z-score normalization and rounding to 4 decimal places
for column in new_applicant_independent_variables:
    new_applicant[column] = scaler.fit_transform(new_applicant[[column]])
    new_applicant[column] = new_applicant[column].apply(lambda x: round(x, 4))


# drop the MaritalStatus column due to the absence of the column in the train data that has been dropped due to VIF > 3
new_applicant = new_applicant.drop(['MaritalStatus'], axis=1)


columns_encode = ['Education', 'EmploymentType','LoanPurpose']
# One hot encoding for categorical variables #columns_encode is at the train data
new_applicant = pd.get_dummies(new_applicant, columns=columns_encode)
new_applicant_columns = new_applicant.columns
for column in new_applicant_columns:
    if new_applicant[column].dtype == bool:
        new_applicant[column] = new_applicant[column].astype(int)




# split the new applicant data into features and target variable(Default)
X_new_applicant = new_applicant.drop(['Default'], axis=1)
Y_new_applicant = new_applicant['Default']


import numpy as np
# predict and check the probability of the new applicant of getting default
y_new_applicant_proba = model.predict_proba(X_new_applicant)
print(" Default[0] | Default[1]")
print('-'*30)
print(y_new_applicant_proba)

# predict using the loaded model with probability score hihger than 0.5(default_value = 0.5) considered as not having default (0)
result = np.where(y_new_applicant_proba[:,0] > 0.5, 0, 1)



print(result)


 Default[0] | Default[1]
------------------------------
[[0.97215858 0.02784142]
 [0.93025153 0.06974847]
 [0.61638182 0.38361818]
 [0.47088917 0.52911083]
 [0.7760443  0.2239557 ]
 [0.42456998 0.57543002]
 [0.7781638  0.2218362 ]
 [0.95025445 0.04974555]
 [0.54008388 0.45991612]
 [0.89025176 0.10974824]
 [0.93686058 0.06313942]
 [0.95638283 0.04361717]
 [0.69854793 0.30145207]
 [0.78951611 0.21048389]
 [0.91004674 0.08995326]
 [0.20692712 0.79307288]
 [0.26306275 0.73693725]
 [0.95929584 0.04070416]
 [0.7922884  0.2077116 ]
 [0.96883617 0.03116383]]
[0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0]
