<a href="https://colab.research.google.com/github/ShauryaDamathia/ShodhAI_Policy_Optimization/blob/main/Policy_Optimization_for_Financial_Decision_Making.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **1_EDA_and_Preprocessing.ipynb**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
df = pd.read_csv('accepted_2007_to_2018Q4.csv', nrows=500000)

In [None]:
print("--- Initial Data Overview ---")
print(df.info())
print("\n--- Summary Statistics ---")
print(df.describe())

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(y='loan_status', data=df)
plt.title('Distribution of Loan Status')
plt.show()

In [None]:
selected_features = [
    'loan_amnt', 'int_rate', 'installment', 'grade', 'sub_grade', 'emp_length',
    'home_ownership', 'annual_inc', 'verification_status', 'purpose', 'dti',
    'delinq_2yrs', 'fico_range_high', 'inq_last_6mths', 'pub_rec', 'revol_bal',
    'revol_util', 'total_acc', 'loan_status'
]
df = df[selected_features]

print(f"\n--- Working with {len(df.columns)-1} selected features ---")

In [None]:
df = df[df['loan_status'].isin(['Fully Paid', 'Charged Off'])]
df['loan_status'] = df['loan_status'].apply(lambda x: 0 if x == 'Fully Paid' else 1)
print("\n--- Target Variable Distribution (0: Paid, 1: Default) ---")
print(df['loan_status'].value_counts(normalize=True))

In [None]:
numeric_cols = df.select_dtypes(include=np.number).columns.tolist()
for col in numeric_cols:
    if df[col].isnull().sum() > 0:
        df[col].fillna(df[col].median(), inplace=True)

df.dropna(inplace=True)

In [None]:
df['emp_length'] = df['emp_length'].str.replace(r'\D', '', regex=True)
df['emp_length'] = pd.to_numeric(df['emp_length'], errors='coerce')
df['emp_length'].fillna(0, inplace=True)

In [None]:
categorical_cols = df.select_dtypes(include=['object']).columns.tolist()
df_encoded = pd.get_dummies(df, columns=categorical_cols, drop_first=True)

In [None]:
X = df_encoded.drop('loan_status', axis=1)
y = df_encoded['loan_status']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

print("\n--- Data Preprocessing Complete ---")
print(f"Training data shape: {X_train.shape}")
print(f"Test data shape: {X_test.shape}")

In [None]:
X_test_unscaled = scaler.inverse_transform(X_test)
df_test = pd.DataFrame(X_test_unscaled, columns=X.columns)
df_test['loan_status'] = y_test.values

df_test.to_csv('preprocessed_test_data.csv', index=False)

In [None]:
np.save('X_train.npy', X_train)
np.save('X_test.npy', X_test)
np.save('y_train.npy', y_train)
np.save('y_test.npy', y_test)

# **2_Deep_Learning_Model.ipynb**

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import roc_auc_score, f1_score, roc_curve, auc
import matplotlib.pyplot as plt

In [None]:
X_train = np.load('X_train.npy')
X_test = np.load('X_test.npy')
y_train = np.load('y_train.npy')
y_test = np.load('y_test.npy')
print("--- Data Loaded Successfully ---")
print(f"Training data shape: {X_train.shape}")

In [None]:
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=[tf.keras.metrics.AUC(name='auc')])

print("\n--- Model Summary ---")
model.summary()

In [None]:
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train,
                    validation_split=0.2,
                    epochs=100,
                    batch_size=256,
                    callbacks=[early_stopping],
                    verbose=1)

y_pred_proba = model.predict(X_test).ravel()
y_pred_class = (y_pred_proba > 0.5).astype(int)

In [None]:
auc_score = roc_auc_score(y_test, y_pred_proba)
f1 = f1_score(y_test, y_pred_class)

print("\n--- Model Evaluation on Test Set ---")
print(f"AUC Score: {auc_score:.4f}")
print(f"F1-Score: {f1:.4f}")

In [None]:
model.save('loan_default_model.h5')
print("\n--- Model saved successfully as 'loan_default_model.h5' ---")

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_pred_proba)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss Over Epochs')
plt.legend()
plt.subplot(1, 2, 2)
plt.plot(history.history['auc'], label='Training AUC')
plt.plot(history.history['val_auc'], label='Validation AUC')
plt.title('AUC Over Epochs')
plt.legend()
plt.show()

# **3_Offline_RL_Agent.ipynb**

In [None]:
!pip install d3rlpy

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import d3rlpy
from d3rlpy.algos import DiscreteCQL, DiscreteCQLConfig
from d3rlpy.dataset import MDPDataset
from sklearn.model_selection import train_test_split
import os

In [None]:
try:
    df_test = pd.read_csv('preprocessed_test_data.csv')
    X_test_scaled = np.load('X_test.npy')
except FileNotFoundError as e:
    print(f"Error: {e}. Make sure you have run the first notebook (1_EDA_and_Preprocessing.ipynb) successfully.")
    # Exit or handle the error appropriately in a real script
    # For a notebook, this print statement will suffice.

print("--- Test Data Loaded ---")
if len(df_test) != len(X_test_scaled):
    raise ValueError("Mismatch between the length of the dataframe and the scaled features array.")

In [None]:
observations = X_test_scaled.astype('float32')
num_samples = len(df_test)
actions = np.ones(num_samples, dtype=int) # Historical action was always 'Approve' (1)
rewards = np.where(
    df_test['loan_status'] == 0, # If Fully Paid
    df_test['loan_amnt'] * (df_test['int_rate'] / 100),
    -df_test['loan_amnt'] # If Defaulted
).astype('float32').reshape(-1, 1) # Reshape to a 2D column vector
terminals = np.ones(num_samples)


In [None]:
train_obs, test_obs, \
train_act, test_act, \
train_rew, test_rew, \
train_term, test_term = train_test_split(
    observations, actions, rewards, terminals, test_size=0.2, random_state=42
)

# Create an MDPDataset for training ONLY
train_dataset = MDPDataset(
    observations=train_obs,
    actions=train_act,
    rewards=train_rew,
    terminals=train_term
)

In [None]:
print("\n--- Configuring and Training the RL Agent ---")
config = DiscreteCQLConfig()
cql = DiscreteCQL(
    config=config,
    device="cpu",
    enable_ddp=False
)

# Fit the model on the training dataset without any complex evaluation hooks
cql.fit(
    dataset=train_dataset,
    n_steps=50000  # A reasonable number of training steps for a demonstration
)
print("--- Training Complete ---")

In [None]:
print("\n--- Manually Evaluating the Trained Policy ---")

policy_actions_on_test_set = cql.predict(test_obs)

approved_indices = np.where(policy_actions_on_test_set == 1)[0]

rewards_of_approved_loans = test_rew[approved_indices]

if len(rewards_of_approved_loans) > 0:
    estimated_policy_value = rewards_of_approved_loans.mean()
else:
    estimated_policy_value = 0.0


print("\n--- RL Agent Evaluation Results ---")
print(f"Policy approved {len(approved_indices)} out of {len(test_obs)} loans in the test set.")
print(f"Estimated Policy Value (mean return): ${estimated_policy_value:.2f}")
print("This value represents the average expected profit for each loan the policy chooses to approve.")

In [32]:
print("\n--- Comparing DL and RL Policies ---")
# Use the full observation set for a complete comparison
rl_actions_full = cql.predict(observations)
df_test['rl_decision'] = rl_actions_full

model_path = 'loan_default_model.h5'
if os.path.exists(model_path):
    dl_model = tf.keras.models.load_model(model_path)
    dl_pred_proba = dl_model.predict(X_test_scaled).ravel()

    dl_threshold = 0.20
    df_test['dl_decision'] = (dl_pred_proba < dl_threshold).astype(int)
    df_test['dl_default_prob'] = dl_pred_proba

    disagreements = df_test[df_test['rl_decision'] != df_test['dl_decision']]
    print(f"\nFound {len(disagreements)} cases where DL and RL policies disagree.")

    rl_approves_dl_denies = disagreements[disagreements['rl_decision'] == 1].head()
    if not rl_approves_dl_denies.empty:
        print("\n--- Example: RL Approves, DL Denies ---")
        print("The DL model sees high risk (>20% default prob), but the RL agent approves.")
        print("This happens when the potential interest profit (reward) outweighs the default risk.")

        display_cols = ['loan_amnt', 'int_rate', 'annual_inc', 'fico_range_high', 'loan_status',
                        'dl_default_prob', 'rl_decision', 'dl_decision']
        print(rl_approves_dl_denies[display_cols].round(2))
    else:
        print("\nFound no examples where the RL agent approves and the DL model denies in the first few disagreements.")

else:
    print(f"\nCould not find '{model_path}'.")
    print("Please run Notebook 2 (2_Deep_Learning_Model.ipynb) first to create the model file.")


--- Comparing DL and RL Policies ---




[1m37/37[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step

Found 455 cases where DL and RL policies disagree.

--- Example: RL Approves, DL Denies ---
The DL model sees high risk (>20% default prob), but the RL agent approves.
This happens when the potential interest profit (reward) outweighs the default risk.
   loan_amnt  int_rate  annual_inc  fico_range_high  loan_status  \
0    11475.0     21.48     35000.0            664.0            0   
2    22950.0     17.97    115000.0            694.0            0   
4    12000.0     17.97     40000.0            679.0            0   
7    27275.0     21.48     62000.0            674.0            1   
8     8000.0     16.59     27000.0            664.0            1   

   dl_default_prob  rl_decision  dl_decision  
0             0.61            1            0  
2             0.45            1            0  
4             0.40            1            0  
7             0.61            1            0  
8             0.47           