In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, mean_squared_error
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [3]:
# Load the data
data = pd.read_csv('loan_data.csv')

# Prepare the features and target
X = data.drop(['customer_id', 'default'], axis=1)
y = data['default']

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
# Scale the features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [6]:
# Create the ANN model
model = Sequential()
model.add(Dense(128, activation='relu', input_dim=X_train_scaled.shape[1]))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [7]:
model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [8]:
# Train the model
history = model.fit(X_train_scaled, y_train, epochs=100, batch_size=32, validation_split=0.2, verbose=0)

In [9]:
# Evaluate the model
train_loss, train_accuracy = model.evaluate(X_train_scaled, y_train, verbose=0)
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)

In [10]:
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

Train Accuracy: 0.9994
Test Accuracy: 0.9970


In [11]:
# Function to calculate expected loss
def calculate_expected_loss(loan_details, recovery_rate=0.1):
    # Scale the input
    loan_details_scaled = scaler.transform(loan_details.reshape(1, -1))
    
    # Predict probability of default
    pd = model.predict(loan_details_scaled)[0][0]
    
    # Calculate expected loss
    loan_amount = loan_details[2]  # Assuming loan_amt_outstanding is at index 2
    expected_loss = pd * (1 - recovery_rate) * loan_amount
    
    return pd, expected_loss

In [12]:
# Example usage
new_loan = np.array([3, 50000, 100000, 40000, 2, 400])  # Example loan details
pd, expected_loss = calculate_expected_loss(new_loan)
print(f"Probability of Default: {pd:.4f}")
print(f"Expected Loss: ${expected_loss:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 95ms/step
Probability of Default: 1.0000
Expected Loss: $90000.00




In [13]:
from scipy.special import logsumexp

In [14]:
def optimize_fico_buckets(fico_scores, defaults, num_buckets):
    N = len(fico_scores)
    sorted_indices = np.argsort(fico_scores)
    sorted_fico = fico_scores[sorted_indices]
    sorted_defaults = defaults[sorted_indices]

    def bucket_likelihood(start, end):
        k = np.sum(sorted_defaults[start:end])
        n = end - start
        p = k / n if n > 0 else 0
        return k * np.log(p) + (n - k) * np.log(1 - p) if 0 < p < 1 else 0

    DP = np.zeros((num_buckets + 1, N + 1))
    split_points = np.zeros((num_buckets + 1, N + 1), dtype=int)

    for i in range(1, num_buckets + 1):
        for j in range(i, N + 1):
            max_ll = -np.inf
            best_split = i - 1
            for k in range(i - 1, j):
                ll = DP[i-1][k] + bucket_likelihood(k, j)
                if ll > max_ll:
                    max_ll = ll
                    best_split = k
            DP[i][j] = max_ll
            split_points[i][j] = best_split

    # Backtrack to find optimal bucket boundaries
    boundaries = [N]
    for i in range(num_buckets, 0, -1):
        boundaries.append(split_points[i][boundaries[-1]])
    boundaries = boundaries[::-1]

    return sorted_fico[boundaries[1:-1]]

In [15]:
# Example usage:
fico_scores = np.array(data['fico_score']) # Your FICO scores here
defaults = np.array(data['default']) # Corresponding default indicators (0 or 1)
num_buckets = 10

optimal_boundaries = optimize_fico_buckets(fico_scores, defaults, num_buckets)
print("Optimal bucket boundaries:", optimal_boundaries)

Optimal bucket boundaries: [755 765 765 774 774 788 789 810 810]
