###**Install Dependencies**

We need `gymnasium` (the standard RL library) and tools to render the video.

In [1]:
# Install system dependencies for rendering in Colab (Crucial!)
!apt-get install -y xvfb python-opengl ffmpeg > /dev/null 2>&1
!pip install -q gymnasium[classic_control] imageio imageio-ffmpeg pyvirtualdisplay

# Start a virtual display
from pyvirtualdisplay import Display
display = Display(visible=0, size=(1400, 900))
display.start()

import gymnasium as gym
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from collections import deque
import random
import imageio
from IPython.display import HTML
from base64 import b64encode

print("System Ready! You can now render videos.")

System Ready! You can now render videos.


###**Define the DQN Agent**

This is the "Brain" of the AI. It uses a Deep Neural Network to decide whether to push the cart **Left** or **Right.**

In [2]:
# Create the Environment
env = gym.make("CartPole-v1", render_mode='rgb_array')
state_size = env.observation_space.shape[0]
action_size = env.action_space.n

# Optimized Agent
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = tf.keras.Sequential([
            layers.Dense(24, input_dim=self.state_size, activation='relu'),
            layers.Dense(24, activation='relu'),
            layers.Dense(self.action_size, activation='linear')
        ])
        model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def replay(self, batch_size):
        if len(self.memory) < batch_size:
            return
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
            target_f = self.model.predict(state, verbose=0)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay



###**Train the Agent**

This will take about **5-10 minutes.** The agent plays 50 episodes. Watch the "Score" go up!

In [3]:
# Train
agent = DQNAgent(state_size, action_size)
EPISODES = 200 # Keep it 50 for speed, but if it fails, increase to 80
batch_size = 32

print("Training Agent (Please wait)...")
for e in range(EPISODES):
    state, _ = env.reset()
    state = np.reshape(state, [1, state_size])
    time = 0
    done = False
    while not done:
        action = agent.act(state)
        next_state, reward, done, _, _ = env.step(action)
        reward = reward if not done else -10
        next_state = np.reshape(next_state, [1, state_size])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        time += 1
        if done:
            break
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)

    # Print progress every 5 episodes
    if e % 5 == 0:
        print(f"Episode: {e}/{EPISODES}, Score: {time} (Epsilon: {agent.epsilon:.2f})")

print("Training Complete!")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Training Agent (Please wait)...
Episode: 0/200, Score: 16 (Epsilon: 1.00)
Episode: 5/200, Score: 23 (Epsilon: 0.98)
Episode: 10/200, Score: 14 (Epsilon: 0.96)
Episode: 15/200, Score: 15 (Epsilon: 0.93)
Episode: 20/200, Score: 16 (Epsilon: 0.91)
Episode: 25/200, Score: 21 (Epsilon: 0.89)
Episode: 30/200, Score: 28 (Epsilon: 0.86)
Episode: 35/200, Score: 45 (Epsilon: 0.84)
Episode: 40/200, Score: 16 (Epsilon: 0.82)
Episode: 45/200, Score: 30 (Epsilon: 0.80)
Episode: 50/200, Score: 40 (Epsilon: 0.78)
Episode: 55/200, Score: 12 (Epsilon: 0.76)
Episode: 60/200, Score: 10 (Epsilon: 0.74)
Episode: 65/200, Score: 17 (Epsilon: 0.73)
Episode: 70/200, Score: 11 (Epsilon: 0.71)
Episode: 75/200, Score: 14 (Epsilon: 0.69)
Episode: 80/200, Score: 16 (Epsilon: 0.67)
Episode: 85/200, Score: 18 (Epsilon: 0.66)
Episode: 90/200, Score: 17 (Epsilon: 0.64)
Episode: 95/200, Score: 14 (Epsilon: 0.62)
Episode: 100/200, Score: 17 (Epsilon: 0.61)
Episode: 105/200, Score: 13 (Epsilon: 0.59)
Episode: 110/200, Scor

###**Generate the Video (The "Presentation")**

This code creates a video file (`rl_cartpole.mp4`) of your smart agent playing the game.

In [4]:
def record_video(agent, filename='rl_cartpole.mp4'):
    env = gym.make("CartPole-v1", render_mode='rgb_array')
    state, _ = env.reset()
    state = np.reshape(state, [1, state_size])
    done = False

    writer = imageio.get_writer(filename, fps=30)

    frames = []
    print("Recording... (Agent is playing)")

    max_steps = 500 # Force stop if it plays too long
    step = 0

    while not done and step < max_steps:
        frame = env.render()
        writer.append_data(frame)

        # ACT (Exploit only)
        action = np.argmax(agent.model.predict(state, verbose=0)[0])

        next_state, reward, done, _, _ = env.step(action)
        state = np.reshape(next_state, [1, state_size])
        step += 1

    writer.close()
    print(f"Video saved as {filename}")

    # Display in Notebook
    mp4 = open(filename,'rb').read()
    data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
    return HTML("""
    <video width=400 controls>
          <source src="%s" type="video/mp4">
    </video>
    """ % data_url)

# Run the recorder
record_video(agent)

Recording... (Agent is playing)




Video saved as rl_cartpole.mp4
