In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report


In [2]:
# Load the data
data = pd.read_csv('Loan_Data.csv')

In [3]:
# Select features and target
features = ['credit_lines_outstanding', 'loan_amt_outstanding', 'total_debt_outstanding', 
            'income', 'years_employed', 'fico_score']
X = data[features]
y = data['default']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train the logistic regression model
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = model.predict(X_test_scaled)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")
print(classification_report(y_test, y_pred))

Model Accuracy: 1.00
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1652
           1       0.99      0.98      0.99       348

    accuracy                           1.00      2000
   macro avg       0.99      0.99      0.99      2000
weighted avg       1.00      1.00      1.00      2000



In [5]:
def calculate_expected_loss(credit_lines_outstanding, loan_amt_outstanding, total_debt_outstanding, 
                            income, years_employed, fico_score, loan_amount, recovery_rate=0.1):
    # Prepare the input data
    input_data = np.array([[credit_lines_outstanding, loan_amt_outstanding, total_debt_outstanding, 
                            income, years_employed, fico_score]])
    input_data_scaled = scaler.transform(input_data)
    
    # Predict probability of default
    pd = model.predict_proba(input_data_scaled)[0][1]
    
    # Calculate expected loss
    lgd = 1 - recovery_rate
    ead = loan_amount
    expected_loss = pd * lgd * ead
    
    return expected_loss, pd

# Example usage
credit_lines_outstanding = 2
loan_amt_outstanding = 5000
total_debt_outstanding = 10000
income = 75000
years_employed = 5
fico_score = 650
loan_amount = 20000

expected_loss, pd = calculate_expected_loss(credit_lines_outstanding, loan_amt_outstanding, total_debt_outstanding, income, years_employed, fico_score, loan_amount)

print(f"Probability of Default: {pd:.2%}")
print(f"Expected Loss: ${expected_loss:.2f}")

Probability of Default: 0.00%
Expected Loss: $0.20


