In [9]:
#imports

import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, log_loss
from xgboost import XGBClassifier




 

In [10]:
file_path = '3_and_4_Loan_Data.csv'
df = pd.read_csv(file_path)
df['loan_to_income'] = df['loan_amt_outstanding'] / df['income']
df['debt_to_income'] = df['total_debt_outstanding'] / df['income']
df['nonloan_debt'] = df['total_debt_outstanding'] - df['loan_amt_outstanding']

features = [
    'credit_lines_outstanding',
    'loan_amt_outstanding',
    'total_debt_outstanding',
    'income',
    'years_employed',
    'fico_score',
    'loan_to_income',
    'debt_to_income',
    'nonloan_debt']

X = df[features]
y = df['default']

X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size = 0.3, random_state=0, stratify=y)

X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=0, stratify = y_temp)

model = XGBClassifier(
    n_estimators = 1000,
    learning_rate = 0.05,
    max_depth = 4,
    random_state = 0,
    use_label_encoder = False,
    eval_metric = "logloss")

model.fit(
    X_train, y_train, 
    eval_set = [(X_valid, y_valid)],
    early_stopping_rounds = 20,
    verbose = True
)

y_proba = model.predict_proba(X_test)[:, 1]
print("AUC:", roc_auc_score(y_test, y_proba))
print("Log Loss:", log_loss(y_test, y_proba))



[0]	validation_0-logloss:0.64648
[1]	validation_0-logloss:0.60411
[2]	validation_0-logloss:0.56565
[3]	validation_0-logloss:0.53033
[4]	validation_0-logloss:0.49813
[5]	validation_0-logloss:0.46852
[6]	validation_0-logloss:0.44123
[7]	validation_0-logloss:0.41605
[8]	validation_0-logloss:0.39256
[9]	validation_0-logloss:0.37083
[10]	validation_0-logloss:0.35060
[11]	validation_0-logloss:0.33171
[12]	validation_0-logloss:0.31413
[13]	validation_0-logloss:0.29782
[14]	validation_0-logloss:0.28222
[15]	validation_0-logloss:0.26794
[16]	validation_0-logloss:0.25391
[17]	validation_0-logloss:0.24074
[18]	validation_0-logloss:0.22833
[19]	validation_0-logloss:0.21676
[20]	validation_0-logloss:0.20591
[21]	validation_0-logloss:0.19550
[22]	validation_0-logloss:0.18571
[23]	validation_0-logloss:0.17649
[24]	validation_0-logloss:0.16814
[25]	validation_0-logloss:0.16034
[26]	validation_0-logloss:0.15246
[27]	validation_0-logloss:0.14551
[28]	validation_0-logloss:0.13847
[29]	validation_0-loglos



[35]	validation_0-logloss:0.10002
[36]	validation_0-logloss:0.09594
[37]	validation_0-logloss:0.09166
[38]	validation_0-logloss:0.08793
[39]	validation_0-logloss:0.08406
[40]	validation_0-logloss:0.08071
[41]	validation_0-logloss:0.07714
[42]	validation_0-logloss:0.07389
[43]	validation_0-logloss:0.07104
[44]	validation_0-logloss:0.06796
[45]	validation_0-logloss:0.06542
[46]	validation_0-logloss:0.06277
[47]	validation_0-logloss:0.06057
[48]	validation_0-logloss:0.05802
[49]	validation_0-logloss:0.05574
[50]	validation_0-logloss:0.05362
[51]	validation_0-logloss:0.05162
[52]	validation_0-logloss:0.04975
[53]	validation_0-logloss:0.04799
[54]	validation_0-logloss:0.04607
[55]	validation_0-logloss:0.04437
[56]	validation_0-logloss:0.04280
[57]	validation_0-logloss:0.04131
[58]	validation_0-logloss:0.03989
[59]	validation_0-logloss:0.03854
[60]	validation_0-logloss:0.03726
[61]	validation_0-logloss:0.03589
[62]	validation_0-logloss:0.03459
[63]	validation_0-logloss:0.03337
[64]	validatio

In [11]:
def expected_loss(input_features: dict, model):
    row = pd.DataFrame([input_features])
    
    row['loan_to_income'] = row['loan_amt_outstanding'] / row['income']
    row['debt_to_income'] = row['total_debt_outstanding'] / row['income']
    row['nonloan_debt'] = row['total_debt_outstanding'] - row['loan_amt_outstanding']  
    
    features = [
    'credit_lines_outstanding',
    'loan_amt_outstanding',
    'total_debt_outstanding',
    'income',
    'years_employed',
    'fico_score',
    'loan_to_income',
    'debt_to_income',
    'nonloan_debt']
    
    X_input = row[features]
    pd_hat = model.predict_proba(X_input)[0][1]
    
    LGD = 0.9
    EAD = row['loan_amt_outstanding']
    
    return float(pd_hat * LGD * EAD)



In [15]:
borrower = {
    'credit_lines_outstanding': 3,
    'loan_amt_outstanding': 2000,
    'total_debt_outstanding': 12000,
    'income': 95000,
    'years_employed': 4,
    'fico_score': 615
}

loss = expected_loss(borrower, model)
print("Expected Loss: £", round(loss, 2))


Expected Loss: £ 0.59


  return float(pd_hat * LGD * EAD)
