In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim

import d3rlpy

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, classification_report

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [2]:
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
d3rlpy.seed(SEED)

In [3]:
DATA_PATH = '../data/processed_loans.csv'
df = pd.read_csv(DATA_PATH)

In [4]:
# Re-calculating rewards
def calculate_reward(row):
    if row['target'] == 0: # Fully Paid
        return row['loan_amnt'] * (row['int_rate'] / 100.0)
    else: # Default
        return -1.0 * row['loan_amnt']

df['reward'] = df.apply(calculate_reward, axis=1)

In [5]:
# Feature/Target Split
feature_cols = df.drop(columns=['target', 'reward']).columns
X = df[feature_cols].values
y = df['target'].values
rewards = df['reward'].values

In [6]:
# Split (Same seed as before to match Phase 2)
X_train, X_test, y_train, y_test, r_train, r_test = train_test_split(
    X, y, rewards, test_size=0.30, random_state=SEED, stratify=y
)

In [7]:
# Scale (Crucial: Fit on Train, Transform Test)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [8]:
# Convert Test Data to Tensors 
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

# Loading DL & RL models for evaluation

In [9]:
class LoanDefaultModel(nn.Module):
    def __init__(self, input_size):
        super(LoanDefaultModel, self).__init__()
        self.layer1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        self.layer2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.dropout2 = nn.Dropout(0.3)
        self.layer3 = nn.Linear(128, 64)
        self.relu3 = nn.ReLU()
        self.dropout3 = nn.Dropout(0.3)
        self.output = nn.Linear(64, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.dropout1(self.relu1(self.layer1(x)))
        x = self.dropout2(self.relu2(self.layer2(x)))
        x = self.dropout3(self.relu3(self.layer3(x)))
        return self.sigmoid(self.output(x))

In [10]:
# Restoring DL Model
dl_model = LoanDefaultModel(X_train.shape[1])
criterion = nn.BCELoss()
optimizer = optim.Adam(dl_model.parameters(), lr=0.001)

In [11]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)

In [12]:
dl_model.train()
for epoch in range(50):
    optimizer.zero_grad()
    y_pred = dl_model(X_train_tensor)
    loss = criterion(y_pred, y_train_tensor)
    loss.backward()
    optimizer.step()

In [13]:
#Loading RL Agent
cql = d3rlpy.algos.DiscreteCQLConfig().create(device="cpu") 

dummy_actions = np.zeros(100, dtype=int)
dummy_actions[0] = 1  

cql.build_with_dataset(d3rlpy.dataset.MDPDataset(
    observations=np.array(X_train_scaled[:100]), 
    actions=dummy_actions, 
    rewards=np.zeros(100),
    terminals=np.ones(100)
))

cql.load_model('../models/cql_loan_agent.pt')

2025-12-10 20:13.58 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('int64')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float64')], shape=[(79,)]) reward_signature=Signature(dtype=[dtype('float64')], shape=[(1,)])
2025-12-10 20:13.58 [info     ] Action-space has been automatically determined. action_space=<ActionSpace.DISCRETE: 2>
2025-12-10 20:13.58 [info     ] Action size has been automatically determined. action_size=2


# Divergent Analysis

In [14]:
# A. Get Predictions on TEST set
dl_model.eval()
with torch.no_grad():
    dl_probs = dl_model(X_test_tensor).numpy().flatten()
    # DL Decisions (Using your Optimized Threshold 0.25)
    dl_decisions = (dl_probs > 0.25).astype(int) 
    dl_actions = 1 - dl_decisions # 1=Approve, 0=Deny

# RL Decisions (1=Approve, 0=Deny)
rl_actions = cql.predict(X_test_scaled)

In [15]:
# B. Compare
agreement = (dl_actions == rl_actions)
print(f"Models Agree on: {np.mean(agreement):.1%} of cases")

Models Agree on: 70.3% of cases


In [16]:
# C. Find Divergence: DL Approves (1), RL Denies (0)
mask_dl_yes_rl_no = (dl_actions == 1) & (rl_actions == 0)
divergent_indices = np.where(mask_dl_yes_rl_no)[0]

print(f"Cases where DL says APPROVE but RL says DENY: {len(divergent_indices)}")

Cases where DL says APPROVE but RL says DENY: 6


In [17]:
# D. Inspect a specific Divergent Case where DL accepts (for the Report)
if len(divergent_indices) > 0:
    idx = divergent_indices[0]
    
    print("Divergent Case Study")
    print(f"Applicant Index: {idx}")
    print(f"DL Probability of Default: {dl_probs[idx]:.4f} (Safe < 0.25)")
    print(f"RL Action: {rl_actions[idx]} (Deny)")
    print(f"Actual Outcome (Target): {y_test[idx]} (1=Default, 0=Paid)")
    print(f"Financial Consequence: ${r_test[idx]:.2f}")

Divergent Case Study
Applicant Index: 687
DL Probability of Default: 0.0471 (Safe < 0.25)
RL Action: 0 (Deny)
Actual Outcome (Target): 0 (1=Default, 0=Paid)
Financial Consequence: $6149.50


In [18]:
# E. Inspect a specific Divergent Case where DL Denies (for the Report)

toxic_mask = (dl_actions == 0) & (rl_actions == 1) & (y_test == 1)
toxic_indices = np.where(toxic_mask)[0]

print(f"Toxic Loans (DL Saved / RL Lost): {len(toxic_indices)}")

if len(toxic_indices) > 0:
    idx = toxic_indices[0] # Just grab the first one
    loss_amount = r_test[idx] # The money we lost
    
    print(f"Applicant Index: {idx}")
    print(f"DL Probability: {dl_probs[idx]:.4f} (High Risk)")
    print(f"RL Decision: {rl_actions[idx]} (Approved - Oops)")
    print(f"Real Outcome: Defaulted")
    print(f"Financial Loss: ${loss_amount:.2f}")

Toxic Loans (DL Saved / RL Lost): 4964
Applicant Index: 45
DL Probability: 0.3029 (High Risk)
RL Decision: 1 (Approved - Oops)
Real Outcome: Defaulted
Financial Loss: $-6000.00


# Estimated Policy Value (EPV) Calculation

In [19]:
# Calculate Rewards for DL Policy
dl_rewards = []
for i in range(len(X_test)):
    if dl_actions[i] == 1: # DL Approved
        dl_rewards.append(r_test[i])
    else:
        dl_rewards.append(0) # DL Denied 

In [20]:
# Calculate Rewards for RL Policy
rl_rewards = []
for i in range(len(X_test)):
    if rl_actions[i] == 1: # RL Approved
        rl_rewards.append(r_test[i])
    else:
        rl_rewards.append(0) # RL Denied 

In [21]:
# Metrics
epv_dl = np.mean(dl_rewards)
epv_rl = np.mean(rl_rewards)

In [22]:
comparison_df = pd.DataFrame({
    'Metric': ['Test Set AUC', 'Test Set F1', 'Est. Policy Value (Avg Reward)'],
    'Deep Learning (Model 1)': [0.741, 0.456, f"${epv_dl:.2f}"],
    'RL Agent (Model 2)': ['N/A (Policy)', 'N/A', f"${epv_rl:.2f}"]
})

print("Final Metrics Comparison")
display(comparison_df)

Final Metrics Comparison


Unnamed: 0,Metric,Deep Learning (Model 1),RL Agent (Model 2)
0,Test Set AUC,0.741,N/A (Policy)
1,Test Set F1,0.456,
2,Est. Policy Value (Avg Reward),$-273.19,$-1339.02


In [23]:
# Calculate Policy Statistics
print("=" * 60)
print("POLICY BEHAVIOR COMPARISON")
print("=" * 60)

dl_approve_rate = np.mean(dl_actions == 1)
rl_approve_rate = np.mean(rl_actions == 1)

print(f"\n1. APPROVAL RATES:")
print(f"   Baseline (Historical): 100.0%")
print(f"   DL Model:              {dl_approve_rate:.1%}")
print(f"   RL Agent:              {rl_approve_rate:.1%}")

# Risk profile of approved loans
dl_approved_mask = (dl_actions == 1)
rl_approved_mask = (rl_actions == 1)

if np.sum(dl_approved_mask) > 0:
    dl_default_rate = np.mean(y_test[dl_approved_mask])
    dl_avg_reward_if_approved = np.mean(r_test[dl_approved_mask])
else:
    dl_default_rate = 0
    dl_avg_reward_if_approved = 0

if np.sum(rl_approved_mask) > 0:
    rl_default_rate = np.mean(y_test[rl_approved_mask])
    rl_avg_reward_if_approved = np.mean(r_test[rl_approved_mask])
else:
    rl_default_rate = 0
    rl_avg_reward_if_approved = 0

print(f"\n2. RISK PROFILE OF APPROVED LOANS:")
print(f"   DL Approved Loans: {np.sum(dl_approved_mask)} loans")
print(f"     → Default Rate: {dl_default_rate:.1%}")
print(f"     → Avg Reward per Approved Loan: ${dl_avg_reward_if_approved:.2f}")
print(f"\n   RL Approved Loans: {np.sum(rl_approved_mask)} loans")
print(f"     → Default Rate: {rl_default_rate:.1%}")
print(f"     → Avg Reward per Approved Loan: ${rl_avg_reward_if_approved:.2f}")

# Disagreement Analysis
print(f"\n3. DECISION DISAGREEMENTS:")
print(f"   Total Agreement: {np.mean(dl_actions == rl_actions):.1%}")

dl_yes_rl_no = np.sum((dl_actions == 1) & (rl_actions == 0))
dl_no_rl_yes = np.sum((dl_actions == 0) & (rl_actions == 1))

print(f"   DL Approves / RL Denies: {dl_yes_rl_no} cases")
print(f"   DL Denies / RL Approves: {dl_no_rl_yes} cases")

# Financial impact of disagreements
if dl_no_rl_yes > 0:
    mask_risky = (dl_actions == 0) & (rl_actions == 1)
    risky_outcomes = r_test[mask_risky]
    print(f"\n   Impact of RL's 'Risky' Approvals:")
    print(f"     → These {dl_no_rl_yes} loans had avg reward: ${np.mean(risky_outcomes):.2f}")
    print(f"     → Default rate: {np.mean(y_test[mask_risky]):.1%}")

POLICY BEHAVIOR COMPARISON

1. APPROVAL RATES:
   Baseline (Historical): 100.0%
   DL Model:              65.6%
   RL Agent:              95.2%

2. RISK PROFILE OF APPROVED LOANS:
   DL Approved Loans: 34640 loans
     → Default Rate: 11.7%
     → Avg Reward per Approved Loan: $-416.61

   RL Approved Loans: 50306 loans
     → Default Rate: 17.9%
     → Avg Reward per Approved Loan: $-1406.07

3. DECISION DISAGREEMENTS:
   Total Agreement: 70.3%
   DL Approves / RL Denies: 6 cases
   DL Denies / RL Approves: 15672 cases

   Impact of RL's 'Risky' Approvals:
     → These 15672 loans had avg reward: $-3594.77
     → Default rate: 31.7%
