<a href="https://colab.research.google.com/github/Anower120/AI-900-AIFundamentals/blob/main/RL_Q-learning_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# The code sets up a RL loan approval system using Gym toolkit for developing and comparing RL algorithms.
# This environment simulates the process of approving or denying loan applications based on borrower data.

#Import the libraries
import enum  # enumeration used to work with a number of constants
import numpy as np
import gym
from gym.utils import seeding

# Constants for the environment
DEFAULT_LOAN_TERMS = 36  # Example loan term in months
DEFAULT_INTEREST_RATE = 0.05  # Example interest rate

class Actions(enum.Enum):
    Approve = 0
    Deny = 1

class LoanState:
    def __init__(self, interest_rate, loan_terms):
        self.interest_rate = interest_rate
        self.loan_terms = loan_terms

    def reset(self, borrower_info):
        self.borrower_info = np.array(borrower_info)
        self.loan_approved = False

    @property
    def shape(self):
        return self.borrower_info.shape

    def encode(self):
        return self.borrower_info

    def step(self, action):
        if action == Actions.Approve:
            self.loan_approved = True
            reward = self.calculate_reward()
        else:
            self.loan_approved = False
            reward = 0
        done = True
        return reward, done

    def calculate_reward(self):
        # Improved reward function considering multiple factors
        credit_score, income, loan_amount, employment_years = self.borrower_info
        risk_factor = loan_amount / income
        if credit_score > 700 and risk_factor < 0.5 and employment_years > 5:
            return 1.0
        else:
            return -1.0

class LoanEnv(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, borrower_data):
        self.borrower_data = borrower_data
        self._state = LoanState(DEFAULT_INTEREST_RATE, DEFAULT_LOAN_TERMS)
        self.action_space = gym.spaces.Discrete(n=len(Actions))
        self.observation_space = gym.spaces.Box(low=0, high=np.inf, shape=(4,), dtype=np.float32)
        self.seed()

    def reset(self):
        borrower_info = self.np_random.choice(self.borrower_data)
        self._state.reset(borrower_info)
        return self._state.encode()

    def step(self, action_idx):
        action = Actions(action_idx)
        reward, done = self._state.step(action)
        obs = self._state.encode()
        info = {"loan_approved": self._state.loan_approved}
        return obs, reward, done, info

    def render(self, mode='human', close=False):
        pass

    def close(self):
        pass

    def seed(self, seed=None):
        self.np_random, seed1 = seeding.np_random(seed)
        seed2 = seeding.hash_seed(seed1 + 1) % 2 ** 31
        return [seed1, seed2]

# Sample Borrower Data
borrower_data = [
    (750, 60000, 10000, 10),
    (650, 40000, 5000, 5),
    (500, 30000, 15000, 2)
]

#  Improved Q-Learning Model with State Discretization
import random
from sklearn.preprocessing import KBinsDiscretizer

# Learning parameters
alpha = 0.1
gamma = 0.6
epsilon = 0.1
epsilon_decay = 0.99

# Discretize the state space
n_bins = 5
est = KBinsDiscretizer(n_bins=n_bins, encode='ordinal', strategy='uniform')
est.fit(borrower_data)  # Assuming borrower_data is a numpy array

def discretize_state(state):
    discretized = est.transform([state])[0]
    # Convert the multi-dimensional state into a single integer index
    return sum([discretized[i] * (n_bins ** i) for i in range(len(discretized))]).astype(int)


def choose_action(state_index):
    if random.uniform(0, 1) < epsilon:
        return env.action_space.sample()
    else:
        return np.argmax(q_table[state_index])


def update_q_table(state_index, action, reward, next_state_index):
    old_value = q_table[state_index, action]
    next_max = np.max(q_table[next_state_index])
    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
    q_table[state_index, action] = new_value


# Train the model
num_episodes = 1000
env = LoanEnv(borrower_data)

for i in range(num_episodes):
    state = discretize_state(env.reset())
    done = False
    total_reward = 0

    while not done:
        action = choose_action(state)
        next_state, reward, done, info = env.step(action)
        next_state = discretize_state(next_state)

        update_q_table(state, action, reward, next_state)

        state = next_state
        total_reward += reward

        if done:
            epsilon = max(epsilon * epsilon_decay, 0.01)  # Decay epsilon

    if i % 100 == 0:
        print(f"Episode: {i}, Total Reward: {total_reward}")



# Evaluate the performance of the Q-learning model
# Evaluation parameters
# Evaluation parameters
num_test_episodes = 100
total_rewards = 0
correct_approvals = 0
correct_denials = 0

for i in range(num_test_episodes):
    state = discretize_state(env.reset())
    done = False
    episode_reward = 0

    while not done:
        action = np.argmax(q_table[state])  # Choose action based on Q-table
        next_state, reward, done, info = env.step(action)
        next_state = discretize_state(next_state)

        episode_reward += reward

        if action == Actions.Approve.value and reward > 0:
            correct_approvals += 1
        elif action == Actions.Deny.value and reward >= 0:
            correct_denials += 1

        state = next_state

    total_rewards += episode_reward

# Calculate average reward
average_reward = total_rewards / num_test_episodes

print(f"Average Reward: {average_reward}")
print(f"Correct Approvals: {correct_approvals}")
print(f"Correct Denials: {correct_denials}")


  deprecation(
  deprecation(


Episode: 0, Total Reward: -1.0
Episode: 100, Total Reward: 0
Episode: 200, Total Reward: 0
Episode: 300, Total Reward: 0
Episode: 400, Total Reward: 1.0
Episode: 500, Total Reward: 0
Episode: 600, Total Reward: 0
Episode: 700, Total Reward: 1.0
Episode: 800, Total Reward: 1.0
Episode: 900, Total Reward: 1.0
Average Reward: 0.33
Correct Approvals: 33
Correct Denials: 67
