<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStardust/blob/main/Example_Deep_Q_Learning_with_OpenAI_Gym.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import gym
import random
from collections import deque

def discretize_state(state, bins):
    """Discretizes the continuous state into a tuple of integers."""
    discretized_state = []
    for i, value in enumerate(state):
        discretized_state.append(np.digitize(value, bins[i]) - 1)
    return tuple(discretized_state)

# Create the environment
env = gym.make('CartPole-v1')

# Hyperparameters
episodes = 1000
gamma = 0.99  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001
bins = [np.linspace(-4.8, 4.8, 10),
        np.linspace(-4, 4, 10),
        np.linspace(-.418, .418, 10),
        np.linspace(-4, 4, 10)]

# Initialize the Q-table with bins
state_size = [10, 10, 10, 10]  # Number of bins for each state dimension
action_size = env.action_space.n
q_table = np.zeros(state_size + [action_size])

# Training the agent
for episode in range(episodes):
    state = discretize_state(env.reset(), bins)
    done = False

    while not done:
        if np.random.rand() <= epsilon:
            action = env.action_space.sample()  # Explore
        else:
            action = np.argmax(q_table[state])  # Exploit

        next_state, reward, done, _ = env.step(action)
        next_state = discretize_state(next_state, bins)

        # Update Q-value using Bellman equation
        q_table[state][action] += learning_rate * (reward + gamma * np.max(q_table[next_state]) - q_table[state][action])

        state = next_state

    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

env.close()