<a href="https://colab.research.google.com/github/Anower120/AI-900-AIFundamentals/blob/main/RL_loan_approval_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
# The code sets up a RL loan approval system using Gym toolkit for developing and comparing RL algorithms.
# This environment simulates the process of approving or denying loan applications based on borrower data.

#Import the libraries
# Importing Required Libraries
import enum
import numpy as np
import gym
from gym.utils import seeding
from sklearn.preprocessing import KBinsDiscretizer, MinMaxScaler
import random

# Constants
DEFAULT_LOAN_TERMS = 36  # Loan term in months
DEFAULT_INTEREST_RATE = 0.05  # Interest rate

# Defining Actions for the RL Environment
class Actions(enum.Enum):
    Approve = 0
    Deny = 1

# Loan State Class
class LoanState:
    def __init__(self, interest_rate, loan_terms):
        self.interest_rate = interest_rate
        self.loan_terms = loan_terms

    def reset(self, borrower_info):
        self.borrower_info = np.array(borrower_info)
        self.loan_approved = False
        return self.encode()  # Return the encoded state

    @property
    def shape(self):
        return self.borrower_info.shape

    def encode(self):
        return self.borrower_info

    def step(self, action):
        if action == Actions.Approve:
            self.loan_approved = True
            return self.calculate_reward(), True
        self.loan_approved = False
        return 0, True

    def calculate_reward(self):
        credit_score, income, loan_amount, employment_years = self.borrower_info
        risk_factor = loan_amount / income
        return 1.0 if credit_score > 700 and risk_factor < 0.5 and employment_years > 5 else -1.0

# Loan Environment Class
class LoanEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, borrower_data):
        self.borrower_data = borrower_data
        self._state = LoanState(DEFAULT_INTEREST_RATE, DEFAULT_LOAN_TERMS)
        self.action_space = gym.spaces.Discrete(n=len(Actions))
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(4,), dtype=np.float32)
        self.seed()

    def reset(self):
        borrower_info = self.np_random.choice(self.borrower_data)
        return self._state.reset(borrower_info)  # Return the encoded state from LoanState

    def step(self, action_idx):
        action = Actions(action_idx)
        reward, done = self._state.step(action)
        next_state = self._state.encode()  # Encode the current state as the next state
        return next_state, reward, done

    def render(self, mode='human', close=False):
        # Rendering logic (if applicable)
        pass

    def close(self):
        # Close environment
        pass

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
        return [seed1, seed2]

# Sample Borrower Data (Normalization Required)
borrower_data = [
    (750, 60000, 10000, 10),
    (650, 40000, 5000, 5),
    (500, 30000, 15000, 2)
]

# Normalize the Data
scaler = MinMaxScaler()
borrower_data_normalized = scaler.fit_transform(borrower_data)

# Q-Learning Model Setup

# Discretize the State Space
n_bins = 5
est = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
est.fit(borrower_data_normalized)

# Convert Multi-dimensional State into a Single Integer Index
def discretize_state(state):
    # Reshape the state array to 2D (1, -1) for a single sample
    discretized = est.transform(state.reshape(1, -1))[0]
    # Convert the multi-dimensional state into a single integer index
    return sum([discretized[i] * (n_bins ** i) for i in range(len(discretized))]).astype(int)


# Initialize Q-table
state_size = n_bins ** len(borrower_data_normalized[0])
action_size = len(Actions)
q_table = np.zeros((state_size, action_size))

# Action Selection Function
def choose_action(state_index):
    return env.action_space.sample() if random.uniform(0, 1) < epsilon else np.argmax(q_table[state_index])

# Q-table Update Function
def update_q_table(state_index, action, reward, next_state_index):
    old_value = q_table[state_index, action]
    next_max = np.max(q_table[next_state_index])
    q_table[state_index, action] = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)

# Training the Model
alpha, gamma, epsilon, epsilon_decay = 0.1, 0.6, 0.1, 0.99
num_episodes = 1000
env = LoanEnv(borrower_data_normalized)

for i in range(num_episodes):
    state = env.reset()
    state_index = discretize_state(state)
    done = False
    total_reward = 0

    while not done:
        action = choose_action(state_index)
        next_state, reward, done = env.step(action)
        next_state_index = discretize_state(next_state)
        update_q_table(state_index, action, reward, next_state_index)
        state_index = next_state_index
        total_reward += reward
        epsilon = max(epsilon * epsilon_decay, 0.01)  # Decay epsilon

    if i % 100 == 0:
        print(f"Episode: {i}, Total Reward: {total_reward}, Epsilon: {epsilon:.4f}")

# Model Evaluation
num_test_episodes = 100
total_rewards, correct_approvals, correct_denials = 0, 0, 0

for i in range(num_test_episodes):
    state = env.reset()  # Get the initial state from the environment
    state_index = discretize_state(state)  # Discretize the state
    done = False
    episode_reward = 0

    while not done:
        action = np.argmax(q_table[state_index])
        next_state, reward, done = env.step(action)
        next_state_index = discretize_state(next_state)
        episode_reward += reward
        if action == Actions.Approve.value and reward > 0:
            correct_approvals += 1
        elif action == Actions.Deny.value and reward >= 0:
            correct_denials += 1
        state_index = next_state_index

    total_rewards += episode_reward

# Output Results
average_reward = total_rewards / num_test_episodes
print(f"Average Reward: {average_reward}")
print(f"Correct Approvals: {correct_approvals}")
print(f"Correct Denials: {correct_denials}")


  deprecation(
  deprecation(
  risk_factor = loan_amount / income
  risk_factor = loan_amount / income


Episode: 0, Total Reward: -1.0, Epsilon: 0.0990
Episode: 100, Total Reward: 0, Epsilon: 0.0362
Episode: 200, Total Reward: 0, Epsilon: 0.0133
Episode: 300, Total Reward: 0, Epsilon: 0.0100


  risk_factor = loan_amount / income


Episode: 400, Total Reward: 0, Epsilon: 0.0100
Episode: 500, Total Reward: 0, Epsilon: 0.0100
Episode: 600, Total Reward: 0, Epsilon: 0.0100
Episode: 700, Total Reward: 0, Epsilon: 0.0100
Episode: 800, Total Reward: 0, Epsilon: 0.0100
Episode: 900, Total Reward: 0, Epsilon: 0.0100
Average Reward: 0.0
Correct Approvals: 0
Correct Denials: 100
