In [None]:
import numpy as np
import torch
import torch.nn as nn
from google.colab import drive
import os
import joblib

# --- 0. Setup ---

DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# --- 1. Define Model Architectures ---


# --- Model 1 (Task 2): DL Classifier ---
class LoanDefaultClassifier(nn.Module):
    def __init__(self, input_dim, hidden_dim_1, hidden_dim_2, hidden_dim_3, output_dim, dropout_rate):
        super(LoanDefaultClassifier, self).__init__()
        layers = []
        layers.append(nn.Linear(input_dim, hidden_dim_1))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        layers.append(nn.Linear(hidden_dim_1, hidden_dim_2))
        layers.append(nn.ReLU())
        layers.append(nn.Dropout(dropout_rate))
        if hidden_dim_3 > 0:
            layers.append(nn.Linear(hidden_dim_2, hidden_dim_3))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout_rate))
            layers.append(nn.Linear(hidden_dim_3, output_dim))
        else:
            layers.append(nn.Linear(hidden_dim_2, output_dim))
        self.network = nn.Sequential(*layers)
    def forward(self, x):
        return self.network(x)

# --- Model 2 (Task 3): RL Q-Network ---
class QPolicyNet(nn.Module):
    def __init__(self, input_dim, hidden_dim_1, hidden_dim_2, output_dim):
        super(QPolicyNet, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim_1),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_dim_1, hidden_dim_2),
            nn.ReLU(),
            nn.Dropout(0.4),
            nn.Linear(hidden_dim_2, output_dim)
        )
    def forward(self, x):
        return self.network(x)

# --- 2. Load Data and Models ---
print("Mounting Google Drive...")
try:
    
    drive.mount('/content/drive', force_remount=True)
    =

    DRIVE_MOUNT_POINT = '/content/drive/MyDrive/'
    PROJECT_DIR = os.path.join(DRIVE_MOUNT_POINT, 'shodhAI')

    # Load Test Data
    TEST_DATA_PATH = os.path.join(PROJECT_DIR, 'processed_data_test.npz')
    with np.load(TEST_DATA_PATH) as data:
        X_test = data['X'].astype(np.float32)
        y_test = data['y'].astype(np.float32) 
        r_test = data['r'].astype(np.float32) 

    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(DEVICE)
    INPUT_DIM = X_test.shape[1]

    
    PREPROCESSOR_PATH = os.path.join(PROJECT_DIR, 'preprocessor.joblib')
    preprocessor = joblib.load(PREPROCESSOR_PATH)
    
    feature_names = preprocessor.get_feature_names_out()

    print("Test data loaded.")

    # --- Load Model 1 (Task 2) ---
    print("Loading Task 2 (DL) model...")

   
    DL_BEST_PARAMS = {
        'hidden_1': 256,
        'hidden_2': 128,
        'hidden_3': 0, 
        'dropout': 0.4
    }
    # ---------------------

    DL_MODEL_PATH = os.path.join(PROJECT_DIR, 'dl_model_weights.pth')

    dl_model = LoanDefaultClassifier(
        INPUT_DIM,
        DL_BEST_PARAMS['hidden_1'],
        DL_BEST_PARAMS['hidden_2'],
        DL_BEST_PARAMS['hidden_3'],
        1,
        DL_BEST_PARAMS['dropout']
    ).to(DEVICE)
    dl_model.load_state_dict(torch.load(DL_MODEL_PATH, map_location=DEVICE))
    dl_model.eval()
    print("Task 2 (DL) model loaded.")

    # --- Load Model 2 (Task 3) ---
    print("Loading Task 3 (RL) model...")
    RL_MODEL_PATH = os.path.join(PROJECT_DIR, 'rl_q_model.pth')
    
    rl_model = QPolicyNet(INPUT_DIM, 256, 128, 2).to(DEVICE)
    rl_model.load_state_dict(torch.load(RL_MODEL_PATH, map_location=DEVICE))
    rl_model.eval()
    print("Task 3 (RL) model loaded.")

except Exception as e:
    print(f"Error loading models or data: {e}")

    raise e

# --- 3. Get Predictions from Both Models ---
with torch.no_grad():
    # Model 1 (DL) Predictions
    dl_logits = dl_model(X_test_tensor)
    dl_probs = torch.sigmoid(dl_logits).cpu().numpy().flatten()

    # NOTE: You MUST update this threshold to match the "Optimal Threshold"
   
    DL_THRESHOLD = 0.5160
    dl_decisions = (dl_probs >= DL_THRESHOLD).astype(int) # 1 = Predict Default, 0 = Predict Paid
    # We flip this for a final policy: 0 = Deny, 1 = Approve
    dl_policy = 1 - dl_decisions # 0=Deny, 1=Approve

    # Model 2 (RL) Predictions
    rl_q_values = rl_model(X_test_tensor).cpu().numpy()
    rl_policy = np.argmax(rl_q_values, axis=1) # 0 = Deny, 1 = Approve

print("\n--- Model Policy Comparison ---")
dl_approves = np.sum(dl_policy == 1)
rl_approves = np.sum(rl_policy == 1)

print(f"Task 2 (DL) Policy: Approves {dl_approves}/{len(dl_policy)} loans ({dl_approves/len(dl_policy)*100:.2f}%)")
print(f"Task 3 (RL) Policy: Approves {rl_approves}/{len(rl_policy)} loans ({rl_approves/len(rl_policy)*100:.2f}%)")


# --- 4. Find Disagreements (Key Analysis for Report) ---
print("\n--- Finding Policy Disagreements ---")


dl_approves_rl_denies = (dl_policy == 1) & (rl_policy == 0)
disagreement_indices = np.where(dl_approves_rl_denies)[0]

print(f"Found {len(disagreement_indices)} loans where the DL model (Task 2) approves but the RL model (Task 3) denies.")

# --- 5. Analyze the Disagreements ---
print("\n--- Analyzing 5 Example Disagreements ---")

disagreement_X = X_test[disagreement_indices]
disagreement_y_true = y_test[disagreement_indices] # 1=Default, 0=Paid
disagreement_r_true = r_test[disagreement_indices] # True *unscaled* profit/loss

for i in range(min(5, len(disagreement_indices))):
    idx_in_test_set = disagreement_indices[i]
    print(f"\n--- Example {i+1} (Test Set Index: {idx_in_test_set}) ---")

    # Get model outputs for this one person
    prob_default = dl_probs[idx_in_test_set]
    q_deny = rl_q_values[idx_in_test_set, 0]
    q_approve = rl_q_values[idx_in_test_set, 1]

    print(f"  > True Outcome: {'DEFAULTED' if disagreement_y_true[i] == 1 else 'Fully Paid'}")
    print(f"  > True Reward:  ${disagreement_r_true[i]:.2f}")

    print("\n  Task 2 (DL) Model:")
    print(f"    Predicted P(Default): {prob_default:.4f}")
    print(f"    Decision: APPROVE (because {prob_default:.4f} < {DL_THRESHOLD})")

    print("\n  Task 3 (RL) Model:")
    print(f"    Q-Value (Deny):    {q_deny:.4f}")
    print(f"    Q-Value (Approve): {q_approve:.4f}")
    print(f"    Decision: DENY (because {q_deny:.4f} > {q_approve:.4f})")

    print("\n  Analysis:")
    print("    This is an applicant the DL model thinks is 'safe enough' (prob < 51.6%),")
    print("    but the RL model, trained on profit, learned that the risk-vs-reward")
    print("    for this type of applicant is a *bad financial bet*.")

    # You could also print the top 5 features for this applicant
    # print("\n  Applicant's (Scaled) Features:")
    # top_features_idx = np.argsort(disagreement_X[i])[-5:]
    # for feat_idx in top_features_idx:
    #     print(f"    {feature_names[feat_idx]}: {disagreement_X[i, feat_idx]:.2f}")

print("\n--- Task 4 (Analysis Script) Complete ---")



Using device: cpu
Mounting Google Drive...
Mounted at /content/drive
Test data loaded.
Loading Task 2 (DL) model...
Task 2 (DL) model loaded.
Loading Task 3 (RL) model...
Task 3 (RL) model loaded.

--- Model Policy Comparison ---
Task 2 (DL) Policy: Approves 21870/35217 loans (62.10%)
Task 3 (RL) Policy: Approves 2385/35217 loans (6.77%)

--- Finding Policy Disagreements ---
Found 19485 loans where the DL model (Task 2) approves but the RL model (Task 3) denies.

--- Analyzing 5 Example Disagreements ---

--- Example 1 (Test Set Index: 0) ---
  > True Outcome: Fully Paid
  > True Reward:  $0.10

  Task 2 (DL) Model:
    Predicted P(Default): 0.3900
    Decision: APPROVE (because 0.3900 < 0.516)

  Task 3 (RL) Model:
    Q-Value (Deny):    0.7597
    Q-Value (Approve): 0.7136
    Decision: DENY (because 0.7597 > 0.7136)

  Analysis:
    This is an applicant the DL model thinks is 'safe enough' (prob < 51.6%),
    but the RL model, trained on profit, learned that the risk-vs-reward
    f