In [17]:
# import the necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix

In [4]:
# read in the data
data = pd.read_csv('Task 3 and 4_Loan_Data.csv')  

In [21]:
# checking the data
data.head()

Unnamed: 0,customer_id,credit_lines_outstanding,loan_amt_outstanding,total_debt_outstanding,income,years_employed,fico_score,default
0,8153374,0,5221.545193,3915.471226,78039.38546,5,605,0
1,7442532,5,1958.928726,8228.75252,26648.43525,2,572,1
2,2256073,0,3363.009259,2027.83085,65866.71246,4,602,0
3,4885975,0,4766.648001,2501.730397,74356.88347,5,612,0
4,4700614,1,1345.827718,1768.826187,23448.32631,6,631,0


In [6]:
# prepare the data
X = data.drop(['customer_id', 'default'], axis=1)
y = data['default']

In [7]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
# train a logistic regression model
logreg_model = LogisticRegression()
logreg_model.fit(X_train, y_train)

In [9]:
# train a decision tree classifier
tree_model = DecisionTreeClassifier()
tree_model.fit(X_train, y_train)

In [10]:
# evaluate models
logreg_accuracy = accuracy_score(y_test, logreg_model.predict(X_test))
tree_accuracy = accuracy_score(y_test, tree_model.predict(X_test))

In [11]:
# checking accuracies
print(f"Logistic Regression Accuracy: {logreg_accuracy:.2f}")
print(f"Decision Tree Accuracy: {tree_accuracy:.2f}")

Logistic Regression Accuracy: 0.99
Decision Tree Accuracy: 0.99


In [12]:
# calculate probabilities of default using the logistic regression model
def calculate_default_probability(model, features):
    probabilities = model.predict_proba([features])[0]
    default_probability = probabilities[1]
    return default_probability

In [13]:
# calculate expected loss
def calculate_expected_loss(default_probability, loan_amount, recovery_rate):
    return default_probability * (1 - recovery_rate) * loan_amount

In [27]:
# example loan properties
loan_features = {
    'credit_lines_outstanding': 2,
    'loan_amt_outstanding': 10000,
    'total_debt_outstanding': 15000,
    'income': 60000,
    'years_employed': 3,
    'fico_score': 700
}

In [28]:
# set the recovery rate
recovery_rate = 0.1

In [29]:
# calculate default probability and expected loss using logistic regression model
default_prob_logreg = calculate_default_probability(logreg_model, list(loan_features.values()))
expected_loss_logreg = calculate_expected_loss(default_prob_logreg, loan_features['loan_amt_outstanding'], recovery_rate)

print(f"Loan Default Probability (Logistic Regression): {default_prob_logreg:.2f}")
print(f"Expected Loss (Logistic Regression): ${expected_loss_logreg:.2f}")

Loan Default Probability (Logistic Regression): 1.00
Expected Loss (Logistic Regression): $8993.94




In [30]:
# calculate default probability and expected loss using decision tree model
default_prob_tree = calculate_default_probability(tree_model, list(loan_features.values()))
expected_loss_tree = calculate_expected_loss(default_prob_tree, loan_features['loan_amt_outstanding'], recovery_rate)

print(f"Loan Default Probability (Decision Tree): {default_prob_tree:.2f}")
print(f"Expected Loss (Decision Tree): ${expected_loss_tree:.2f}")

Loan Default Probability (Decision Tree): 0.00
Expected Loss (Decision Tree): $0.00




We got some interesting results. According to the logistic regression model, the hypothetical person would defualt and the expected loss was around 9000. But the decision tree model said the person wouldn't defualt and there'd be no losss.

In [32]:
# example 2 loan properties
loan_features_ex2 = {
    'credit_lines_outstanding': 0,
    'loan_amt_outstanding': 10000,
    'total_debt_outstanding': 10000,
    'income': 100000,
    'years_employed': 7,
    'fico_score': 740
}

default_prob_logreg = calculate_default_probability(logreg_model, list(loan_features_ex2.values()))
expected_loss_logreg = calculate_expected_loss(default_prob_logreg, loan_features_ex2['loan_amt_outstanding'], recovery_rate)

print(f"Loan Default Probability (Logistic Regression): {default_prob_logreg:.2f}")
print(f"Expected Loss (Logistic Regression): ${expected_loss_logreg:.2f}")


default_prob_tree = calculate_default_probability(tree_model, list(loan_features_ex2.values()))
expected_loss_tree = calculate_expected_loss(default_prob_tree, loan_features_ex2['loan_amt_outstanding'], recovery_rate)

print(f"Loan Default Probability (Decision Tree): {default_prob_tree:.2f}")
print(f"Expected Loss (Decision Tree): ${expected_loss_tree:.2f}")

Loan Default Probability (Logistic Regression): 0.00
Expected Loss (Logistic Regression): $0.00
Loan Default Probability (Decision Tree): 0.00
Expected Loss (Decision Tree): $0.00




In [33]:
# example 3 loan properties
loan_features_ex3 = {
    'credit_lines_outstanding': 1,
    'loan_amt_outstanding': 100000,
    'total_debt_outstanding': 150000,
    'income': 120000,
    'years_employed': 5,
    'fico_score': 690
}

default_prob_logreg = calculate_default_probability(logreg_model, list(loan_features_ex3.values()))
expected_loss_logreg = calculate_expected_loss(default_prob_logreg, loan_features_ex3['loan_amt_outstanding'], recovery_rate)

print(f"Loan Default Probability (Logistic Regression): {default_prob_logreg:.2f}")
print(f"Expected Loss (Logistic Regression): ${expected_loss_logreg:.2f}")


default_prob_tree = calculate_default_probability(tree_model, list(loan_features_ex3.values()))
expected_loss_tree = calculate_expected_loss(default_prob_tree, loan_features_ex3['loan_amt_outstanding'], recovery_rate)

print(f"Loan Default Probability (Decision Tree): {default_prob_tree:.2f}")
print(f"Expected Loss (Decision Tree): ${expected_loss_tree:.2f}")

Loan Default Probability (Logistic Regression): 1.00
Expected Loss (Logistic Regression): $90000.00
Loan Default Probability (Decision Tree): 0.00
Expected Loss (Decision Tree): $0.00


