# Hybrid Credit Risk Prediction with Reinforcement Learning and LSTM

In [1]:
# %pip install -q pandas 
# %pip install -q numpy 
# %pip install -q matplotlib 
# %pip install -q seaborn
# %pip install -q scikit-learn
# %pip install -q tensorflow
# %pip install -q gym
# %pip install -q stable-baselines3[extra]

In [2]:
import zipfile

import numpy as np                                                                  # type: ignore
import pandas as pd                                                                 # type: ignore

from sklearn.model_selection import train_test_split                                # type: ignore
from sklearn.preprocessing import StandardScaler                                    # type: ignore
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score # type: ignore

In [3]:
import tensorflow as tf                                     # type: ignore
from tensorflow.keras.models import Sequential              # type: ignore
from tensorflow.keras.layers import LSTM, Dense, Dropout    # type: ignore

2024-09-19 19:08:11.418605: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-09-19 19:08:15.015573: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-09-19 19:08:16.349979: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-09-19 19:08:16.643192: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-09-19 19:08:18.838302: I tensorflow/core/platform/cpu_feature_guar

In [4]:
import gym                          # type: ignore
from gym import spaces              # type: ignore

from stable_baselines3 import DQN   # type: ignore

## Step 1: Data Loading and Preprocessing

### Data Loading

In [5]:
zf = zipfile.ZipFile('../data/master_data/GiveMeSomeCredit.zip')

df = pd.read_csv(zf.open('cs-training.csv'))
df_test = pd.read_csv(zf.open('cs-test.csv'))
df_sample_entry = pd.read_csv(zf.open('sampleEntry.csv'))

In [6]:
print('Size of training dataset: ', len(df))
df.head()

Size of training dataset:  150000


Unnamed: 0.1,Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
0,1,1,0.766127,45,2,0.802982,9120.0,13,0,6,0,2.0
1,2,0,0.957151,40,0,0.121876,2600.0,4,0,0,0,1.0
2,3,0,0.65818,38,1,0.085113,3042.0,2,1,0,0,0.0
3,4,0,0.23381,30,0,0.03605,3300.0,5,0,0,0,0.0
4,5,0,0.907239,49,1,0.024926,63588.0,7,0,1,0,0.0


In [7]:
df.tail(10)

Unnamed: 0.1,Unnamed: 0,SeriousDlqin2yrs,RevolvingUtilizationOfUnsecuredLines,age,NumberOfTime30-59DaysPastDueNotWorse,DebtRatio,MonthlyIncome,NumberOfOpenCreditLinesAndLoans,NumberOfTimes90DaysLate,NumberRealEstateLoansOrLines,NumberOfTime60-89DaysPastDueNotWorse,NumberOfDependents
149990,149991,0,0.055518,46,0,0.609779,4335.0,7,0,1,0,2.0
149991,149992,0,0.104112,59,0,0.477658,10316.0,10,0,2,0,0.0
149992,149993,0,0.871976,50,0,4132.0,,11,0,1,0,3.0
149993,149994,0,1.0,22,0,0.0,820.0,1,0,0,0,0.0
149994,149995,0,0.385742,50,0,0.404293,3400.0,7,0,0,0,0.0
149995,149996,0,0.040674,74,0,0.225131,2100.0,4,0,1,0,0.0
149996,149997,0,0.299745,44,0,0.716562,5584.0,4,0,1,0,2.0
149997,149998,0,0.246044,58,0,3870.0,,18,0,1,0,0.0
149998,149999,0,0.0,30,0,0.0,5716.0,4,0,0,0,0.0
149999,150000,0,0.850283,64,0,0.249908,8158.0,8,0,2,0,0.0


### Preprocessing

#### Handling Missing Values

In [8]:
# get the count of missing values per column
missing_values_count = df.isnull().sum()
print(missing_values_count)

Unnamed: 0                                  0
SeriousDlqin2yrs                            0
RevolvingUtilizationOfUnsecuredLines        0
age                                         0
NumberOfTime30-59DaysPastDueNotWorse        0
DebtRatio                                   0
MonthlyIncome                           29731
NumberOfOpenCreditLinesAndLoans             0
NumberOfTimes90DaysLate                     0
NumberRealEstateLoansOrLines                0
NumberOfTime60-89DaysPastDueNotWorse        0
NumberOfDependents                       3924
dtype: int64


In [9]:
# MonthlyIncome

# filter rows where MonthlyIncome is NaN
missing_income_df = df[df['MonthlyIncome'].isna()]

# describe the age distribution for rows with missing MonthlyIncome
age_distribution = missing_income_df['age'].describe()
print(age_distribution)

count    29731.000000
mean        56.362349
std         15.438786
min         21.000000
25%         46.000000
50%         57.000000
75%         67.000000
max        109.000000
Name: age, dtype: float64


In [10]:
# define age bins and labels
age_bins = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120]
age_labels = ['0-10', '11-20', '21-30', '31-40', '41-50', '51-60', '61-70', '71-80', '81-90', '91-100', '101-110', '111-120']

# create age groups
missing_income_df['age_group'] = pd.cut(missing_income_df['age'], bins=age_bins, labels=age_labels, right=True)

# count the number of NaN MonthlyIncome for each age group
age_group_counts = missing_income_df['age_group'].value_counts(sort=False)

print(age_group_counts)

age_group
0-10          0
11-20         0
21-30      1705
31-40      3308
41-50      5468
51-60      6904
61-70      6949
71-80      3569
81-90      1594
91-100      227
101-110       7
111-120       0
Name: count, dtype: int64


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  missing_income_df['age_group'] = pd.cut(missing_income_df['age'], bins=age_bins, labels=age_labels, right=True)


In [11]:
# we can set the `MonthlyIncome` to 0, where there're `NaN`
# set MonthlyIncome to 0 where it is NaN
df['MonthlyIncome'].fillna(0, inplace=True)

# verify the result
print(df['MonthlyIncome'].isna().sum())

0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['MonthlyIncome'].fillna(0, inplace=True)


In [12]:
# set `NumberOfDependents` NaN to 0
df['NumberOfDependents'].fillna(0, inplace=True)

# verify the result
print(df['NumberOfDependents'].isna().sum())

0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['NumberOfDependents'].fillna(0, inplace=True)


#### Drop unnecessary column

In [13]:
# drop the ID column
df = df.drop(columns=['Unnamed: 0'])
df.columns

Index(['SeriousDlqin2yrs', 'RevolvingUtilizationOfUnsecuredLines', 'age',
       'NumberOfTime30-59DaysPastDueNotWorse', 'DebtRatio', 'MonthlyIncome',
       'NumberOfOpenCreditLinesAndLoans', 'NumberOfTimes90DaysLate',
       'NumberRealEstateLoansOrLines', 'NumberOfTime60-89DaysPastDueNotWorse',
       'NumberOfDependents'],
      dtype='object')

#### Train-Test Split

In [14]:
# define features (X) and target (y)
X = df.drop(columns=['SeriousDlqin2yrs'])
y = df['SeriousDlqin2yrs']

# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#### Feature scaling 

In [15]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [16]:
X_train_scaled.shape[0], X_train_scaled.shape[1], X_test_scaled.shape[0], X_test_scaled.shape[1]

(120000, 10, 30000, 10)

In [17]:
# reshape for LSTM (LSTM expects 3D input: [samples, time steps, features])
X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

## Step 2: Building the LSTM Model

In [18]:
# initialize the LSTM model
lstm_model = Sequential()

# add LSTM layer
lstm_model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])))
lstm_model.add(Dropout(0.2))

# add another LSTM layer
lstm_model.add(LSTM(units=50, return_sequences=False))
lstm_model.add(Dropout(0.2))

# add the output layer
lstm_model.add(Dense(units=1, activation='sigmoid'))

# compile the model
lstm_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# train the model
lstm_model.fit(X_train_scaled, y_train, epochs=10, batch_size=64, validation_data=(X_test_scaled, y_test))

# evaluate the model
accuracy = lstm_model.evaluate(X_test_scaled, y_test)
print(f"LSTM Model Accuracy: {accuracy[1]:.4f}")

I0000 00:00:1726753185.820713  223043 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2024-09-19 19:09:45.897940: W tensorflow/core/common_runtime/gpu/gpu_device.cc:2343] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...
  super().__init__(**kwargs)


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - accuracy: 0.9276 - loss: 0.2739 - val_accuracy: 0.9376 - val_loss: 0.1871
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - accuracy: 0.9350 - loss: 0.1934 - val_accuracy: 0.9368 - val_loss: 0.1864
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9368 - loss: 0.1883 - val_accuracy: 0.9368 - val_loss: 0.1857
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9361 - loss: 0.1884 - val_accuracy: 0.9372 - val_loss: 0.1857
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9358 - loss: 0.1898 - val_accuracy: 0.9371 - val_loss: 0.1863
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 1ms/step - accuracy: 0.9350 - loss: 0.1913 - val_accuracy: 0.9374 - val_loss: 0.1856
Epoch 7/10
[1m1

## Step 3: Reinforcement Learning for Loan Decision-Making

Implement a Reinforcement Learning (RL) environment for loan approval decision-making, where the agent will learn based on the LSTM model’s predictions.

### Create the RL Environment

In [19]:
# # define a custom environment
# class CreditRiskEnv(gym.Env):
#     def __init__(self, lstm_model, X, y):
#         super(CreditRiskEnv, self).__init__()

#         self.lstm_model = lstm_model
#         self.X = X
#         self.y = y
        
#         # define action and observation space
#         # 2 discrete actions: approve or reject
#         # assuming actions: 0 -> reject loan 
#         #                   1 -> approve loan
#         self.action_space = spaces.Discrete(2)

#         # observation space: shape based on input features (e.g., the shape of a single data point)
#         self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(X.shape[1],), dtype=np.float32)

#         # initialize the environment state
#         self.current_step = 0
    
#     def reset(self):
#         """
#         Reset the environment to an initial state.
#         Returns the first observation.
#         """
#         # Typically, you select a random or first data point from the dataset to start
#         self.current_step = 0
#         observation = self.X[self.current_step]
#         return observation
    
#     def step(self, action):
#         """
#         Execute one step in the environment.
#         Action is either 'approve' or 'reject'.
#         """

#         done = False
#         reward = 0
        
#         # simulate loan decision-making
#         if action == 1:  # approve loan
#             prediction = self.lstm_model.predict(np.array([self.X[self.current_step]]))
#             if prediction == 0:  # if the model predicts the borrower will repay (class 0)
#                 reward = 1  # reward for correct decision
#             else:
#                 reward = -1  # penalty for incorrect decision

#         self.current_step += 1
#         if self.current_step >= len(self.X):
#             done = True  # end episode if all data points are used

#         # return the next observation (next data point), reward, done flag, and info
#         observation = self.X[self.current_step] if not done else None
#         info = {}  # extra info if needed

#         return observation, reward, done, info

#     def render(self, mode='human'):
#         """
#         Render the environment. Not needed for this example.
#         """
#         pass

### Train the RL Agent

Can use Stable-Baselines3 to implement an RL agent (like DQN or PPO) and train it in this environment.

In [20]:
# # initialize the environment
# env = CreditRiskEnv(lstm_model=lstm_model, X=X_train_scaled, y=y_train)

# # initialize the DQN agent
# dqn_agent = DQN('MlpPolicy', env, verbose=1)

# # train the agent
# dqn_agent.learn(total_timesteps=10000)

# # evaluate the agent
# obs = env.reset()
# for _ in range(len(X_test_scaled)):
#     action, _states = dqn_agent.predict(obs)
#     obs, reward, done, info = env.step(action)
#     if done:
#         obs = env.reset()

In [21]:
### TEST

from gymnasium import spaces  # Use gymnasium.spaces to define the space
import gymnasium as gym
import numpy as np
from stable_baselines3 import DQN
from stable_baselines3.common.env_checker import check_env
from stable_baselines3.common.vec_env import DummyVecEnv

# define a custom environment
class CreditRiskEnv(gym.Env):
    def __init__(self, lstm_model, X, y):
        super(CreditRiskEnv, self).__init__()

        self.lstm_model = lstm_model
        self.X = X
        self.y = y
        
        # define action and observation space
        # 2 discrete actions: approve or reject
        # actions: 0 -> reject loan, 1 -> approve loan
        self.action_space = spaces.Discrete(2)

        # observation space: shape based on the number of features in each data point
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(self.X.shape[1],), dtype=np.float32)

        # initialize the environment state
        self.current_step = 0
    
    def reset(self, seed=None, options=None):
        """
        Reset the environment to an initial state.
        Returns the first observation.
        """
        # Set the seed if it's passed (for reproducibility)
        if seed is not None:
            np.random.seed(seed)

        self.current_step = 0
        observation = self.X[self.current_step].reshape(1, -1)  # Reshaping to (1, 10)
        return observation, {}  # returning observation and info (empty dict for now)
    
    def step(self, action):
        """
        Execute one step in the environment.
        Action is either 'approve' or 'reject'.
        """
        done = False
        reward = 0
        
        # simulate loan decision-making
        if action == 1:  # approve loan
            prediction = self.lstm_model.predict(np.array([self.X[self.current_step]]))
            if prediction == 0:  # if the model predicts the borrower will repay (class 0)
                reward = 1  # reward for correct decision
            else:
                reward = -1  # penalty for incorrect decision

        self.current_step += 1
        if self.current_step >= len(self.X):
            done = True  # end episode if all data points are used

        # return the next observation (next data point), reward, done flag, and info
        if not done:
            observation = self.X[self.current_step].reshape(1, -1)  # Reshaping to (1, 10)
        else:
            observation = None
            
        info = {}  # extra info if needed

        return observation, reward, done, info

    def render(self, mode='human'):
        """
        Render the environment. Not needed for this example.
        """
        pass

# initialize the environment
env = CreditRiskEnv(lstm_model=lstm_model, X=X_train_scaled, y=y_train)

# Wrap the environment with DummyVecEnv
vec_env = DummyVecEnv([lambda: env])

# initialize the DQN agent
dqn_agent = DQN('MlpPolicy', vec_env, verbose=1)

# train the agent
dqn_agent.learn(total_timesteps=10000)

Using cuda device


ValueError: could not broadcast input array from shape (10,) into shape (1,)

## Step 4: Model Evaluation

After training both the LSTM and RL models, evaluate them on test data.

### LSTM Model Evaluation

In [22]:
# predict on test data
y_pred = lstm_model.predict(X_test_scaled)
y_pred = (y_pred > 0.5).astype(int)

print(f"Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"Precision: {precision_score(y_test, y_pred):.4f}")
print(f"Recall: {recall_score(y_test, y_pred):.4f}")
print(f"F1 Score: {f1_score(y_test, y_pred):.4f}")

[1m938/938[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step
Accuracy: 0.9376
Precision: 0.5747
Recall: 0.1651
F1 Score: 0.2566


### RL Model Evaluation

In [None]:
total_reward = 0
obs = env.reset()
for _ in range(len(X_test_scaled)):
    action, _ = dqn_agent.predict(obs)
    obs, reward, done, _ = env.step(action)
    total_reward += reward
    if done:
        obs = env.reset()

print(f"Total Reward: {total_reward}")