<a href="https://colab.research.google.com/github/OneFineStarstuff/OneFineStardust/blob/main/Reinforcement_Learning_with_Deep_Q_Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import gym
import random
from collections import deque
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Create the environment
env = gym.make('CartPole-v1')

# Set parameters for the DQN
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
batch_size = 32

# Build the DQN model
def build_model():
    model = Sequential()
    model.add(Dense(24, input_dim=state_size, activation='relu'))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(action_size, activation='linear'))
    model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
    return model

model = build_model()
memory = deque(maxlen=2000)

# Training the DQN
for e in range(1000):
    state = env.reset()
    done = False

    while not done:
        # Choose action based on epsilon-greedy policy
        if np.random.rand() <= 0.1:  # Exploration
            action = random.randrange(action_size)
        else:  # Exploitation
            q_values = model.predict(state.reshape(1, state_size))
            action = np.argmax(q_values[0])

        next_state, reward, done, _ = env.step(action)
        memory.append((state, action, reward, next_state, done))

        if len(memory) > batch_size:
            minibatch = random.sample(memory, batch_size)
            for m in minibatch:
                s, a, r, s_next, d = m
                target = r + (1 - d) * np.max(model.predict(s_next.reshape(1, state_size))[0])
                target_f = model.predict(s.reshape(1, state_size))
                target_f[0][a] = target
                model.fit(s.reshape(1, state_size), target_f, epochs=1, verbose=0)

        state = next_state

    print(f"Episode: {e+1}, Score: {reward}")

env.close()