<a href="https://colab.research.google.com/github/Anower120/AI-900-AIFundamentals/blob/main/RL_DQN_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# The code sets up a RL loan approval system using Gym toolkit for developing and comparing RL algorithms.
# This environment simulates the process of approving or denying loan applications based on borrower data.
###
#Import the libraries
# Importing Required Libraries
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
import numpy as np
import random
from collections import deque
from sklearn.preprocessing import MinMaxScaler
import gym
from gym import spaces

class LoanEnv(gym.Env):
    """Custom Environment that follows gym interface"""
    metadata = {'render.modes': ['human']}

    def __init__(self, borrower_data):
        super(LoanEnv, self).__init__()
        self.borrower_data = borrower_data
        # Define action and observation space
        # They must be gym.spaces objects
        self.action_space = spaces.Discrete(2)  # Approve or Deny
        self.observation_space = spaces.Box(low=0, high=1, shape=(len(borrower_data[0]),), dtype=np.float32)

        # Example for internal state
        self.state = None

    def reset(self):
        # Reset the state of the environment to an initial state
        self.state = self.borrower_data[0]  # Example of resetting to the first borrower
        return np.array(self.state, dtype=np.float32)

    def step(self, action):
        # Execute one time step within the environment
        # Simplified reward logic
        reward = 0
        done = False
        info = {}

        # Example logic for moving to the next borrower in the dataset
        next_state = self.state # Placeholder for actual logic
        reward = 1 if action == 0 else -1  # Placeholder for actual logic

        # Check if we're done with the dataset
        done = True  # Placeholder for actual logic

        return np.array(next_state, dtype=np.float32), reward, done, info

    def render(self, mode='human'):
        # Render the environment to the screen
        pass

    def close(self):
        pass

# Sample Borrower Data
borrower_data = np.array([
    [750, 60000, 10000, 10],
    [650, 40000, 5000, 5],
    [500, 30000, 15000, 2]
])

# Normalize the Data
scaler = MinMaxScaler()
borrower_data_normalized = scaler.fit_transform(borrower_data)

# Instantiate the LoanEnv with normalized borrower data
env = LoanEnv(borrower_data_normalized)


# Define the DQN Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95    # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        # Neural Net for Deep-Q learning Model
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_size, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)



# Initialize DQN Agent
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = DQNAgent(state_size, action_size)


# Training the DQN Agent
# This step involves running episodes of interactions with the environment,
# storing experiences in a replay buffer, and periodically training the agent on a batch of experiences.
num_episodes = 100  # Total episodes for training
batch_size = 32  # Batch size for training the agent

# Training the DQN Agent
for e in range(num_episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    total_reward = 0

    for time in range(500):  # Maximum time steps per episode
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        if done:
            print(f"Episode: {e+1}/{num_episodes}, Reward: {total_reward}, Epsilon: {agent.epsilon:.2f}")
            break
        if len(agent.memory) > batch_size:
            agent.replay(batch_size)


# Save the model after training
agent.save("loan_dqn_model.h5")

# Load the model for evaluation or further training
agent.load("loan_dqn_model.h5")


# Evaluating the DQN Model
num_test_episodes = 100  # Adjusted for demonstration
total_rewards = 0

for e in range(num_test_episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])
    episode_reward = 0

    while True:
        action = np.argmax(agent.model.predict(state)[0])
        next_state, reward, done, _ = env.step(action)
        state = np.reshape(next_state, [1, state_size])
        episode_reward += reward
        if done:
            break

    total_rewards += episode_reward
    print(f"Test Episode: {e+1}, Reward: {episode_reward}")

average_reward = total_rewards / num_test_episodes
print(f"Average Reward Over {num_test_episodes} Test Episodes: {average_reward}")




Episode: 1/100, Reward: 1, Epsilon: 1.00
Episode: 2/100, Reward: -1, Epsilon: 1.00
Episode: 3/100, Reward: 1, Epsilon: 1.00
Episode: 4/100, Reward: -1, Epsilon: 1.00
Episode: 5/100, Reward: 1, Epsilon: 1.00
Episode: 6/100, Reward: -1, Epsilon: 1.00
Episode: 7/100, Reward: 1, Epsilon: 1.00
Episode: 8/100, Reward: 1, Epsilon: 1.00
Episode: 9/100, Reward: -1, Epsilon: 1.00
Episode: 10/100, Reward: -1, Epsilon: 1.00
Episode: 11/100, Reward: -1, Epsilon: 1.00
Episode: 12/100, Reward: -1, Epsilon: 1.00
Episode: 13/100, Reward: -1, Epsilon: 1.00
Episode: 14/100, Reward: -1, Epsilon: 1.00
Episode: 15/100, Reward: 1, Epsilon: 1.00
Episode: 16/100, Reward: -1, Epsilon: 1.00
Episode: 17/100, Reward: -1, Epsilon: 1.00
Episode: 18/100, Reward: -1, Epsilon: 1.00
Episode: 19/100, Reward: 1, Epsilon: 1.00
Episode: 20/100, Reward: -1, Epsilon: 1.00
Episode: 21/100, Reward: -1, Epsilon: 1.00
Episode: 22/100, Reward: -1, Epsilon: 1.00
Episode: 23/100, Reward: -1, Epsilon: 1.00
Episode: 24/100, Reward: -1