In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

df = pd.read_csv('Task 3 and 4_Loan_Data.csv')
df.head()
features = ['credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 'income', 'years_employed', 'fico_score']
X = df[features]
y = df['default']

# Split the data into testing and training sets. Training set (80%) for the decision tree to learn the relationship between features.
# Testing set (20%) to see how the trained decision tree performs on unseen data to evaluate the model's accuracy

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

dt_model = DecisionTreeClassifier(max_depth = 5, random_state = 42) # max_depth = 5 prevents overfitting
dt_model.fit(X_train, y_train)

y_pred = dt_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"Decision Tree Accuracy on Test Set: {accuracy:.4f}")
            

Decision Tree Accuracy on Test Set: 0.9945


In [8]:
def expected_loss(loan_amt, credit_lines_out, loan_amt_out, total_debt_out, income, yrs_employed, fico_score):
    """
    Predicts the Probability of Default (PD) and calculates the Expected Loss 
    for a given loan using a pre-trained model.
    
    Inputs: loan characteristics + the trained model object and feature list.
    Outputs: (expected_loss_value, pd_estimate)
    """
    
    recovery_rate = 0.1 # Percentage of loan that can be recovered if there is a default
    loss_rate = 0.9
    
    # Single row data frame which the model is designed to accept
    input_data = pd.DataFrame([[credit_lines_out, loan_amt_out, total_debt_out, income, yrs_employed, fico_score]], columns=features)
    
    # Predict Probability of Default (PD) - we take the probability of class 1 (default)
    pd_estimate = dt_model.predict_proba(input_data)[0][1]

    expected_loss_amt = loan_amt * pd_estimate * loss_rate

    return expected_loss_amt, pd_estimate

In [20]:
# --- 3. TEST EXECUTION (using the model and function defined above) ---

# Scenario: High-risk customer
loan_amount = 5000.00
test_case_data = {
    "loan_amt": loan_amount,
    "credit_lines_out": 5,
    "loan_amt_out": 8000.00,
    "total_debt_out": 15000.00,
    "income": 35000.00,
    "yrs_employed": 2,
    "fico_score": 580
}

el_amt, pd_est = expected_loss( 
    **test_case_data
)

print("\n--- Expected Loss Calculation ---")
print(f"Loan Amount: ${loan_amount:,.2f}")
print(f"Predicted Probability of Default (PD): {pd_est:.4f}")
print(f"Expected Loss (EL): ${el_amt:,.2f}")


--- Expected Loss Calculation ---
Loan Amount: $5,000.00
Predicted Probability of Default (PD): 1.0000
Expected Loss (EL): $4,500.00
