In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

import d3rlpy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [2]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
d3rlpy.seed(SEED)

In [3]:
DATA_PATH = '../data/processed_loans.csv'
df = pd.read_csv(DATA_PATH)

In [None]:
# Re-calculating rewards
def calculate_reward(row):
    if row['target'] == 0: # Fully Paid
        return row['loan_amnt'] * (row['int_rate'] / 100.0)
    else: # Default
        return -1.0 * row['loan_amnt']

df['reward'] = df.apply(calculate_reward, axis=1)

In [6]:
# Feature/Target Split
feature_cols = df.drop(columns=['target', 'reward']).columns
X = df[feature_cols].values
y = df['target'].values
rewards = df['reward'].values

In [7]:
# Split (Same seed as before to match Phase 2)
X_train, X_test, y_train, y_test, r_train, r_test = train_test_split(
    X, y, rewards, test_size=0.30, random_state=SEED, stratify=y
)

In [8]:
# Scale (Crucial: Fit on Train, Transform Test)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [9]:
# Convert Test Data to Tensors 
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

# Loading DL & RL models for evaluation

In [10]:
class LoanDefaultModel(nn.Module):
    def __init__(self, input_size):
        super(LoanDefaultModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        self.layer2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        self.layer3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.3)
        self.output = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.dropout1(self.relu1(self.layer1(x)))
        x = self.dropout2(self.relu2(self.layer2(x)))
        x = self.dropout3(self.relu3(self.layer3(x)))
        return self.sigmoid(self.output(x))

In [11]:
# Restoring DL Model
dl_model = LoanDefaultModel(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(dl_model.parameters(), lr=0.001)

In [12]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

In [13]:
dl_model.train()
for epoch in range(50):
    optimizer.zero_grad()
    y_pred = dl_model(X_train_tensor)
    loss = criterion(y_pred, y_train_tensor)
    loss.backward()
    optimizer.step()

In [16]:
#Loading RL Agent
cql = d3rlpy.algos.DiscreteCQLConfig().create(device="cpu") 

dummy_actions = np.zeros(100, dtype=int)
dummy_actions[0] = 1  

cql.build_with_dataset(d3rlpy.dataset.MDPDataset(
    observations=np.array(X_train_scaled[:100]), 
    actions=dummy_actions, 
    rewards=np.zeros(100),
    terminals=np.ones(100)
))

cql.load_model('../models/cql_loan_agent.pt')

2025-12-09 14:17.04 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float64')], shape=[(79,)]) reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)])
2025-12-09 14:17.04 [info     ] Action-space has been automatically determined. action_space=<ActionSpace.DISCRETE: 2>
2025-12-09 14:17.04 [info     ] Action size has been automatically determined. action_size=2


# Divergent Analysis

In [17]:
# A. Get Predictions on TEST set
dl_model.eval()
with torch.no_grad():
    dl_probs = dl_model(X_test_tensor).numpy().flatten()
    # DL Decisions (Using your Optimized Threshold 0.25)
    dl_decisions = (dl_probs > 0.25).astype(int) 
    dl_actions = 1 - dl_decisions # 1=Approve, 0=Deny

# RL Decisions (1=Approve, 0=Deny)
rl_actions = cql.predict(X_test_scaled)

In [18]:
# B. Compare
agreement = (dl_actions == rl_actions)
print(f"Models Agree on: {np.mean(agreement):.1%} of cases")

Models Agree on: 70.3% of cases


In [19]:
# C. Find Divergence: DL Approves (1), RL Denies (0)
mask_dl_yes_rl_no = (dl_actions == 1) & (rl_actions == 0)
divergent_indices = np.where(mask_dl_yes_rl_no)[0]

print(f"Cases where DL says APPROVE but RL says DENY: {len(divergent_indices)}")

Cases where DL says APPROVE but RL says DENY: 6


In [22]:
# D. Inspect a specific Divergent Case (for the Report)
if len(divergent_indices) > 0:
    idx = divergent_indices[0]
    
    print("Divergent Case Study")
    print(f"Applicant Index: {idx}")
    print(f"DL Probability of Default: {dl_probs[idx]:.4f} (Safe < 0.25)")
    print(f"RL Action: {rl_actions[idx]} (Deny)")
    print(f"Actual Outcome (Target): {y_test[idx]} (1=Default, 0=Paid)")
    print(f"Financial Consequence: ${r_test[idx]:.2f}")

Divergent Case Study
Applicant Index: 687
DL Probability of Default: 0.0471 (Safe < 0.25)
RL Action: 0 (Deny)
Actual Outcome (Target): 0 (1=Default, 0=Paid)
Financial Consequence: $6149.50


# Estimated Policy Value (EPV) Calculation

In [23]:
# Calculate Rewards for DL Policy
dl_rewards = []
for i in range(len(X_test)):
    if dl_actions[i] == 1: # DL Approved
        dl_rewards.append(r_test[i])
    else:
        dl_rewards.append(0) # DL Denied 

In [24]:
# Calculate Rewards for RL Policy
rl_rewards = []
for i in range(len(X_test)):
    if rl_actions[i] == 1: # RL Approved
        rl_rewards.append(r_test[i])
    else:
        rl_rewards.append(0) # RL Denied 

In [25]:
# Metrics
epv_dl = np.mean(dl_rewards)
epv_rl = np.mean(rl_rewards)

In [26]:
comparison_df = pd.DataFrame({
    'Metric': ['Test Set AUC', 'Test Set F1', 'Est. Policy Value (Avg Reward)'],
    'Deep Learning (Model 1)': [0.741, 0.456, f"${epv_dl:.2f}"],
    'RL Agent (Model 2)': ['N/A (Policy)', 'N/A', f"${epv_rl:.2f}"]
})

print("Final Metrics Comparison")
display(comparison_df)

Final Metrics Comparison


Unnamed: 0,Metric,Deep Learning (Model 1),RL Agent (Model 2)
0,Test Set AUC,0.741,N/A (Policy)
1,Test Set F1,0.456,
2,Est. Policy Value (Avg Reward),$-273.19,$-1339.02
