### Lab 29: Deep Q-Network (DQN) for CartPole

Objective: Implement a DQN to solve the CartPole-v1 environment



In [None]:

!pip install gymnasium tensorflow --quiet

import gymnasium as gym
import numpy as np
import tensorflow as tf
from collections import deque
import random
import matplotlib.pyplot as plt

# 1. Create Environment
env = gym.make("CartPole-v1")
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

# 2. DQN Model
def build_model(state_size, action_size):
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(24, activation='relu', input_shape=(state_size,)),
        tf.keras.layers.Dense(24, activation='relu'),
        tf.keras.layers.Dense(action_size, activation='linear')
    ])
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mse')
    return model

model = build_model(state_size, action_size)
target_model = build_model(state_size, action_size)
target_model.set_weights(model.get_weights())

# 3. Hyperparameters
episodes = 200
gamma = 0.95           # discount rate
epsilon = 1.0          # exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
batch_size = 32
update_target_every = 10

memory = deque(maxlen=2000)
rewards_list = []

# 4. Helper Function: Choose Action
def act(state, epsilon):
    if np.random.rand() <= epsilon:
        return random.randrange(action_size)
    q_values = model.predict(np.array([state]), verbose=0)
    return np.argmax(q_values[0])

# 5. Replay Function
def replay():
    global epsilon
    if len(memory) < batch_size:
        return
    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = model.predict(np.array([state]), verbose=0)[0]
        if done:
            target[action] = reward
        else:
            t = target_model.predict(np.array([next_state]), verbose=0)[0]
            target[action] = reward + gamma * np.amax(t)
        model.fit(np.array([state]), np.array([target]), epochs=1, verbose=0)
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

# 6. Main Training Loop
for e in range(episodes):
    state, _ = env.reset()
    total_reward = 0
    done = False
    while not done:
        action = act(state, epsilon)
        next_state, reward, done, _, _ = env.step(action)
        memory.append((state, action, reward, next_state, done))
        state = next_state
        total_reward += reward
        replay()
    rewards_list.append(total_reward)

    # Update target model
    if e % update_target_every == 0:
        target_model.set_weights(model.get_weights())

    print(f"Episode {e+1}/{episodes}, Score: {total_reward}, Epsilon: {epsilon:.2f}")

# 7. Plot Rewards
plt.plot(rewards_list)
plt.xlabel('Episode')
plt.ylabel('Total Reward')
plt.title('DQN Training Performance on CartPole')
plt.show()
