In [None]:
!pip install gymnasium
!pip install swig
!pip install gymnasium[classic-control]
!pip install moviepy

Collecting gymnasium
  Downloading gymnasium-1.0.0-py3-none-any.whl.metadata (9.5 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Downloading gymnasium-1.0.0-py3-none-any.whl (958 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/958.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━[0m [32m583.7/958.1 kB[0m [31m17.9 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m19.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Installing collected packages: farama-notifications, gymnasium
Successfully installed farama-notifications-0.0.4 gymnasium-1.0.0
Collecting swig
  Downloading swig-4.3.0-py2.py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.whl.metadata (3.5 kB)
Downloading swig-4.3.0-py2.py3-none-m

## Pendulum Q-learning agent

In [None]:
from collections import defaultdict
import gymnasium as gym
import numpy as np
from tqdm import tqdm
import time
import matplotlib.pyplot as plt

class PendulumAgent:
    def __init__(
        self,
        env: gym.Env,
        learning_rate: float,
        initial_epsilon: float,
        epsilon_decay: float,
        final_epsilon: float,
        discount_factor: float = 0.95,
        n_bins: int = 10,
        n_action_bins: int = 10,
    ):
        self.env = env
        self.q_values = defaultdict(lambda: np.zeros(n_action_bins))
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.n_bins = n_bins
        self.n_action_bins = n_action_bins
        self.bins = self.create_bins()
        self.action_bins = np.linspace(env.action_space.low[0], env.action_space.high[0], n_action_bins)
        self.training_error = []

    def create_bins(self):
        bin_limits = [
            [-1.0, 1.0],  # cos(theta)
            [-1.0, 1.0],  # sin(theta)
            [-8.0, 8.0],  # angular velocity
        ]
        bins = [np.linspace(limits[0], limits[1], self.n_bins - 1) for limits in bin_limits]
        return bins

    def discretize(self, obs):
        return tuple(np.digitize(obs[i], self.bins[i]) for i in range(len(obs)))

    def get_discrete_action(self, action):
        return np.digitize(action, self.action_bins) - 1

    def get_continuous_action(self, discrete_action):
        return self.action_bins[discrete_action]

    def get_action(self, obs: np.ndarray) -> float:
        discrete_state = self.discretize(obs)
        if np.random.random() < self.epsilon:
            return self.env.action_space.sample()  # explore (returns continuous action)
        else:
            discrete_action = int(np.argmax(self.q_values[discrete_state]))  # exploit
            return self.get_continuous_action(discrete_action)

    def update(self, obs: np.ndarray, action: float, reward: float, terminated: bool, next_obs: np.ndarray):
        discrete_state = self.discretize(obs)

        discrete_action = self.get_discrete_action(action)
        discrete_next_states = self.discretize(next_obs)
        future_q_value = (not terminated) * np.max(self.q_values[discrete_next_states])
        temporal_difference = (
            reward + self.discount_factor * future_q_value - self.q_values[discrete_state][discrete_action]
        )
        self.q_values[discrete_state][discrete_action] += self.lr * temporal_difference
        self.training_error.append(temporal_difference)


    def decay_epsilon(self):
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)


import moviepy.editor as mpy
import time
import os

env_name = "Pendulum-v1"
if not os.path.exists(f"videos_{env_name}"):
  os.mkdir(f"videos_{env_name}")

def record_video(frames, episode_num, env_name):
    """Saves a video from a list of frames."""
    clip = mpy.ImageSequenceClip(frames, fps=30)
    clip.write_videofile(f"videos_{env_name}/episode_{episode_num}.mp4", codec="libx264")


# Parameters
learning_rate = 0.05
n_episodes = 250000
start_epsilon = 1.0
epsilon_decay = start_epsilon / (n_episodes / 2)
final_epsilon = 0.01
video_interval = 5000  # Record every 1000 episodes
nr_bins = 50

# Create environment
env = gym.make("Pendulum-v1", render_mode="rgb_array", g=9.81)
agent = PendulumAgent(
    env,
    learning_rate=learning_rate,
    initial_epsilon=start_epsilon,
    epsilon_decay=epsilon_decay,
    final_epsilon=final_epsilon,
    n_bins=nr_bins,
    n_action_bins=nr_bins,
)

episode_rewards = []

# Training loop
for episode in tqdm(range(n_episodes + 1)):
    obs, info = env.reset()
    done = False
    total_reward = 0
    frames = []  # To store frames for video

    while not done:
        action = agent.get_action(obs)
        next_obs, reward, terminated, truncated, info = env.step([action])
        next_obs = np.squeeze(next_obs)

        # Record the frame if it's a video episode
        if episode % video_interval == 0:
            frames.append(env.render())

        agent.update(obs, action, reward, terminated, next_obs)
        total_reward += reward
        done = terminated or truncated
        obs = next_obs

    episode_rewards.append(total_reward)
    agent.decay_epsilon()

    # Save the video for this episode if needed
    if episode % video_interval == 0:
        record_video(frames, episode, env_name)

env.close()

episode_rewards_flat = []
for episode_reward in episode_rewards:
  if isinstance(episode_reward, float):
    episode_rewards_flat.append(episode_reward)
  else:
    episode_rewards_flat.append(episode_reward[0])

# Plot the episode rewards
plt.figure(figsize=(12, 6))
plt.plot(episode_rewards_flat, label="Episode Reward")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("Training Progress: Episode Rewards Over Time (Pendulum-v1)")
plt.legend()
plt.grid()
plt.show()



  0%|          | 0/250001 [00:01<?, ?it/s]

Moviepy - Building video videos_Pendulum-v1/episode_0.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_0.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 36/200 [00:00<00:00, 352.31it/s, now=None][A
t:  36%|███▌      | 72/200 [00:00<00:00, 230.06it/s, now=None][A
t:  49%|████▉     | 98/200 [00:00<00:00, 203.48it/s, now=None][A
t:  60%|██████    | 120/200 [00:00<00:00, 198.03it/s, now=None][A
t:  70%|███████   | 141/200 [00:00<00:00, 188.58it/s, now=None][A
t:  80%|████████  | 161/200 [00:00<00:00, 173.21it/s, now=None][A
t:  90%|█████████ | 181/200 [00:00<00:00, 178.28it/s, now=None][A
t: 100%|██████████| 200/200 [00:01<00:00, 180.54it/s, now=None][A
  0%|          | 3/250001 [00:02<44:05:11,  1.58it/s] 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_0.mp4


  self.q_values[discrete_state][discrete_action] += self.lr * temporal_difference

  2%|▏         | 4999/250001 [04:47<5:29:55, 12.38it/s]

Moviepy - Building video videos_Pendulum-v1/episode_5000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_5000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▋        | 33/200 [00:00<00:00, 326.96it/s, now=None][A
t:  33%|███▎      | 66/200 [00:00<00:00, 217.73it/s, now=None][A
t:  45%|████▌     | 90/200 [00:00<00:00, 203.85it/s, now=None][A
t:  56%|█████▌    | 112/200 [00:00<00:00, 190.07it/s, now=None][A
t:  66%|██████▌   | 132/200 [00:00<00:00, 181.68it/s, now=None][A
t:  76%|███████▌  | 151/200 [00:00<00:00, 183.37it/s, now=None][A
t:  85%|████████▌ | 170/200 [00:00<00:00, 183.44it/s, now=None][A
t:  94%|█████████▍| 189/200 [00:00<00:00, 181.87it/s, now=None][A
  2%|▏         | 5003/250001 [04:48<23:34:52,  2.89it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_5000.mp4


  4%|▍         | 9999/250001 [09:30<3:38:51, 18.28it/s]

Moviepy - Building video videos_Pendulum-v1/episode_10000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_10000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  19%|█▉        | 38/200 [00:00<00:00, 371.86it/s, now=None][A
t:  38%|███▊      | 76/200 [00:00<00:00, 222.02it/s, now=None][A
t:  51%|█████     | 102/200 [00:00<00:00, 203.77it/s, now=None][A
t:  62%|██████▏   | 124/200 [00:00<00:00, 192.78it/s, now=None][A
t:  72%|███████▎  | 145/200 [00:00<00:00, 187.69it/s, now=None][A
t:  82%|████████▎ | 165/200 [00:00<00:00, 183.79it/s, now=None][A
t:  92%|█████████▏| 184/200 [00:00<00:00, 176.95it/s, now=None][A
  4%|▍         | 10003/250001 [09:31<18:12:23,  3.66it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_10000.mp4


  6%|▌         | 14999/250001 [14:09<4:47:10, 13.64it/s]

Moviepy - Building video videos_Pendulum-v1/episode_15000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_15000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  12%|█▏        | 24/200 [00:00<00:00, 234.03it/s, now=None][A
t:  24%|██▍       | 48/200 [00:00<00:00, 174.39it/s, now=None][A
t:  34%|███▎      | 67/200 [00:00<00:00, 172.44it/s, now=None][A
t:  42%|████▎     | 85/200 [00:00<00:00, 173.86it/s, now=None][A
t:  52%|█████▏    | 103/200 [00:00<00:00, 155.18it/s, now=None][A
t:  62%|██████▏   | 123/200 [00:00<00:00, 168.12it/s, now=None][A
t:  72%|███████▏  | 143/200 [00:00<00:00, 173.17it/s, now=None][A
t:  82%|████████▏ | 163/200 [00:00<00:00, 178.22it/s, now=None][A
t:  91%|█████████ | 182/200 [00:01<00:00, 180.97it/s, now=None][A
  6%|▌         | 15003/250001 [14:10<23:20:41,  2.80it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_15000.mp4


  8%|▊         | 19998/250001 [18:45<3:10:57, 20.07it/s]

Moviepy - Building video videos_Pendulum-v1/episode_20000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_20000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 36/200 [00:00<00:00, 351.10it/s, now=None][A
t:  36%|███▌      | 72/200 [00:00<00:00, 185.05it/s, now=None][A
t:  48%|████▊     | 95/200 [00:00<00:00, 152.76it/s, now=None][A
t:  56%|█████▋    | 113/200 [00:00<00:00, 137.75it/s, now=None][A
t:  64%|██████▍   | 128/200 [00:00<00:00, 127.57it/s, now=None][A
t:  71%|███████   | 142/200 [00:01<00:00, 116.02it/s, now=None][A
t:  77%|███████▋  | 154/200 [00:01<00:00, 109.78it/s, now=None][A
t:  83%|████████▎ | 166/200 [00:01<00:00, 105.16it/s, now=None][A
t:  88%|████████▊ | 177/200 [00:01<00:00, 96.50it/s, now=None] [A
t:  94%|█████████▎| 187/200 [00:01<00:00, 97.16it/s, now=None][A
t: 100%|██████████| 200/200 [00:01<00:00, 102.48it/s, now=None][A
  8%|▊         | 20003/250001 [18:47<18:01:56,  3.54it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_20000.mp4


 10%|▉         | 24998/250001 [23:21<2:56:13, 21.28it/s]

Moviepy - Building video videos_Pendulum-v1/episode_25000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_25000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▋        | 33/200 [00:00<00:00, 322.99it/s, now=None][A
t:  33%|███▎      | 66/200 [00:00<00:00, 228.44it/s, now=None][A
t:  46%|████▌     | 91/200 [00:00<00:00, 209.30it/s, now=None][A
t:  56%|█████▋    | 113/200 [00:00<00:00, 194.71it/s, now=None][A
t:  66%|██████▋   | 133/200 [00:00<00:00, 191.07it/s, now=None][A
t:  76%|███████▋  | 153/200 [00:00<00:00, 188.58it/s, now=None][A
t:  86%|████████▋ | 173/200 [00:00<00:00, 189.53it/s, now=None][A
t:  96%|█████████▋| 193/200 [00:00<00:00, 182.75it/s, now=None][A
 10%|█         | 25003/250001 [23:23<13:55:42,  4.49it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_25000.mp4


 12%|█▏        | 30000/250001 [27:51<2:54:39, 20.99it/s]

Moviepy - Building video videos_Pendulum-v1/episode_30000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_30000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 35/200 [00:00<00:00, 345.13it/s, now=None][A
t:  35%|███▌      | 70/200 [00:00<00:00, 224.80it/s, now=None][A
t:  48%|████▊     | 95/200 [00:00<00:00, 211.18it/s, now=None][A
t:  59%|█████▉    | 118/200 [00:00<00:00, 192.22it/s, now=None][A
t:  69%|██████▉   | 138/200 [00:00<00:00, 184.16it/s, now=None][A
t:  78%|███████▊  | 157/200 [00:00<00:00, 179.18it/s, now=None][A
t:  88%|████████▊ | 176/200 [00:00<00:00, 180.51it/s, now=None][A
t:  98%|█████████▊| 195/200 [00:01<00:00, 181.63it/s, now=None][A
 12%|█▏        | 30003/250001 [27:52<16:25:27,  3.72it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_30000.mp4


 14%|█▍        | 34998/250001 [32:18<2:49:47, 21.10it/s]

Moviepy - Building video videos_Pendulum-v1/episode_35000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_35000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 36/200 [00:00<00:00, 358.20it/s, now=None][A
t:  36%|███▌      | 72/200 [00:00<00:00, 216.43it/s, now=None][A
t:  48%|████▊     | 97/200 [00:00<00:00, 212.14it/s, now=None][A
t:  60%|██████    | 120/200 [00:00<00:00, 208.40it/s, now=None][A
t:  71%|███████   | 142/200 [00:00<00:00, 202.18it/s, now=None][A
t:  82%|████████▏ | 163/200 [00:00<00:00, 196.21it/s, now=None][A
t:  92%|█████████▏| 183/200 [00:00<00:00, 181.81it/s, now=None][A
 14%|█▍        | 35003/250001 [32:20<13:29:04,  4.43it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_35000.mp4


 16%|█▌        | 39998/250001 [36:43<2:56:47, 19.80it/s]

Moviepy - Building video videos_Pendulum-v1/episode_40000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_40000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  17%|█▋        | 34/200 [00:00<00:00, 332.08it/s, now=None][A
t:  34%|███▍      | 68/200 [00:00<00:00, 213.30it/s, now=None][A
t:  46%|████▌     | 92/200 [00:00<00:00, 205.72it/s, now=None][A
t:  57%|█████▋    | 114/200 [00:00<00:00, 201.89it/s, now=None][A
t:  68%|██████▊   | 135/200 [00:00<00:00, 195.27it/s, now=None][A
t:  78%|███████▊  | 155/200 [00:00<00:00, 179.80it/s, now=None][A
t:  87%|████████▋ | 174/200 [00:00<00:00, 180.46it/s, now=None][A
t:  96%|█████████▋| 193/200 [00:00<00:00, 182.92it/s, now=None][A
 16%|█▌        | 40003/250001 [36:44<13:12:37,  4.42it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_40000.mp4


 18%|█▊        | 44999/250001 [41:08<4:10:38, 13.63it/s]

Moviepy - Building video videos_Pendulum-v1/episode_45000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_45000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 32/200 [00:00<00:00, 314.33it/s, now=None][A
t:  32%|███▏      | 64/200 [00:00<00:00, 207.20it/s, now=None][A
t:  44%|████▎     | 87/200 [00:00<00:00, 203.34it/s, now=None][A
t:  55%|█████▍    | 109/200 [00:00<00:00, 196.63it/s, now=None][A
t:  65%|██████▌   | 130/200 [00:00<00:00, 185.44it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 183.96it/s, now=None][A
t:  84%|████████▍ | 169/200 [00:00<00:00, 184.20it/s, now=None][A
t:  94%|█████████▍| 189/200 [00:00<00:00, 184.68it/s, now=None][A
 18%|█▊        | 45003/250001 [41:10<20:16:52,  2.81it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_45000.mp4


 20%|█▉        | 49999/250001 [45:32<2:32:06, 21.91it/s]

Moviepy - Building video videos_Pendulum-v1/episode_50000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_50000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  11%|█         | 22/200 [00:00<00:00, 217.89it/s, now=None][A
t:  23%|██▎       | 46/200 [00:00<00:00, 226.91it/s, now=None][A
t:  34%|███▍      | 69/200 [00:00<00:00, 175.00it/s, now=None][A
t:  44%|████▍     | 88/200 [00:00<00:00, 174.04it/s, now=None][A
t:  54%|█████▎    | 107/200 [00:00<00:00, 172.28it/s, now=None][A
t:  62%|██████▎   | 125/200 [00:00<00:00, 166.54it/s, now=None][A
t:  72%|███████▎  | 145/200 [00:00<00:00, 173.97it/s, now=None][A
t:  82%|████████▏ | 163/200 [00:00<00:00, 152.58it/s, now=None][A
t:  90%|████████▉ | 179/200 [00:01<00:00, 131.10it/s, now=None][A
t:  96%|█████████▋| 193/200 [00:01<00:00, 131.52it/s, now=None][A
 20%|██        | 50002/250001 [45:33<17:21:26,  3.20it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_50000.mp4


 22%|██▏       | 54998/250001 [49:53<2:39:36, 20.36it/s]

Moviepy - Building video videos_Pendulum-v1/episode_55000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_55000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   8%|▊         | 15/200 [00:00<00:01, 149.61it/s, now=None][A
t:  16%|█▋        | 33/200 [00:00<00:01, 165.05it/s, now=None][A
t:  25%|██▌       | 50/200 [00:00<00:01, 120.84it/s, now=None][A
t:  32%|███▏      | 64/200 [00:00<00:01, 106.39it/s, now=None][A
t:  38%|███▊      | 76/200 [00:00<00:01, 105.80it/s, now=None][A
t:  44%|████▍     | 88/200 [00:00<00:01, 107.28it/s, now=None][A
t:  50%|█████     | 100/200 [00:00<00:00, 102.95it/s, now=None][A
t:  56%|█████▌    | 111/200 [00:01<00:00, 101.92it/s, now=None][A
t:  61%|██████    | 122/200 [00:01<00:00, 99.84it/s, now=None] [A
t:  66%|██████▋   | 133/200 [00:01<00:00, 101.59it/s, now=None][A
t:  72%|███████▏  | 144/200 [00:01<00:00, 98.67it/s, now=None] [A
t:  78%|███████▊  | 155/200 [00:01<00:00, 100.20it/s, now=None][A
t:  83%|████████▎ | 166/200 [00:01<00:00, 102.28it/s, now=None][A
t:  88%|████████▊ | 177/200 [00:01<00:00, 97.81it/s, now=None] [A
t:  94%|███

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_55000.mp4


 24%|██▍       | 59999/250001 [54:12<2:18:20, 22.89it/s]

Moviepy - Building video videos_Pendulum-v1/episode_60000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_60000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  12%|█▏        | 24/200 [00:00<00:00, 232.54it/s, now=None][A
t:  24%|██▍       | 48/200 [00:00<00:00, 199.00it/s, now=None][A
t:  34%|███▍      | 69/200 [00:00<00:00, 182.36it/s, now=None][A
t:  44%|████▍     | 89/200 [00:00<00:00, 186.44it/s, now=None][A
t:  55%|█████▍    | 109/200 [00:00<00:00, 188.03it/s, now=None][A
t:  64%|██████▍   | 129/200 [00:00<00:00, 189.66it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 175.80it/s, now=None][A
t:  84%|████████▎ | 167/200 [00:00<00:00, 169.07it/s, now=None][A
t:  92%|█████████▎| 185/200 [00:01<00:00, 171.18it/s, now=None][A
 24%|██▍       | 60005/250001 [54:13<11:34:32,  4.56it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_60000.mp4


 26%|██▌       | 64999/250001 [58:27<2:20:18, 21.98it/s]

Moviepy - Building video videos_Pendulum-v1/episode_65000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_65000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  12%|█▏        | 24/200 [00:00<00:00, 230.09it/s, now=None][A
t:  24%|██▍       | 48/200 [00:00<00:00, 154.05it/s, now=None][A
t:  32%|███▎      | 65/200 [00:00<00:01, 120.22it/s, now=None][A
t:  40%|███▉      | 79/200 [00:00<00:01, 94.85it/s, now=None] [A
t:  45%|████▌     | 90/200 [00:00<00:01, 90.74it/s, now=None][A
t:  50%|█████     | 101/200 [00:00<00:01, 93.73it/s, now=None][A
t:  56%|█████▌    | 111/200 [00:01<00:00, 93.25it/s, now=None][A
t:  60%|██████    | 121/200 [00:01<00:00, 93.28it/s, now=None][A
t:  66%|██████▌   | 131/200 [00:01<00:00, 94.26it/s, now=None][A
t:  76%|███████▌  | 151/200 [00:01<00:00, 122.11it/s, now=None][A
t:  86%|████████▌ | 171/200 [00:01<00:00, 143.00it/s, now=None][A
t:  95%|█████████▌| 190/200 [00:01<00:00, 154.11it/s, now=None][A
 26%|██▌       | 65002/250001 [58:29<17:33:18,  2.93it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_65000.mp4


 28%|██▊       | 69998/250001 [1:02:43<2:16:37, 21.96it/s]

Moviepy - Building video videos_Pendulum-v1/episode_70000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_70000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 36/200 [00:00<00:00, 355.45it/s, now=None][A
t:  36%|███▌      | 72/200 [00:00<00:00, 195.10it/s, now=None][A
t:  48%|████▊     | 96/200 [00:00<00:00, 192.67it/s, now=None][A
t:  59%|█████▉    | 118/200 [00:00<00:00, 190.84it/s, now=None][A
t:  70%|██████▉   | 139/200 [00:00<00:00, 182.81it/s, now=None][A
t:  79%|███████▉  | 158/200 [00:00<00:00, 173.02it/s, now=None][A
t:  88%|████████▊ | 177/200 [00:00<00:00, 174.38it/s, now=None][A
t:  98%|█████████▊| 195/200 [00:01<00:00, 171.88it/s, now=None][A
 28%|██▊       | 70003/250001 [1:02:44<11:21:32,  4.40it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_70000.mp4


 30%|██▉       | 74998/250001 [1:06:53<2:24:57, 20.12it/s]

Moviepy - Building video videos_Pendulum-v1/episode_75000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_75000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 307.78it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 203.68it/s, now=None][A
t:  42%|████▎     | 85/200 [00:00<00:00, 204.94it/s, now=None][A
t:  54%|█████▎    | 107/200 [00:00<00:00, 200.75it/s, now=None][A
t:  64%|██████▍   | 128/200 [00:00<00:00, 195.00it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 197.53it/s, now=None][A
t:  85%|████████▌ | 170/200 [00:00<00:00, 195.01it/s, now=None][A
t:  95%|█████████▌| 190/200 [00:00<00:00, 187.83it/s, now=None][A
 30%|███       | 75003/250001 [1:06:55<11:16:20,  4.31it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_75000.mp4


 32%|███▏      | 80000/250001 [1:11:01<2:01:23, 23.34it/s]

Moviepy - Building video videos_Pendulum-v1/episode_80000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_80000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  18%|█▊        | 35/200 [00:00<00:00, 343.62it/s, now=None][A
t:  35%|███▌      | 70/200 [00:00<00:00, 224.28it/s, now=None][A
t:  48%|████▊     | 95/200 [00:00<00:00, 162.44it/s, now=None][A
t:  57%|█████▋    | 114/200 [00:00<00:00, 137.29it/s, now=None][A
t:  65%|██████▌   | 130/200 [00:00<00:00, 125.92it/s, now=None][A
t:  72%|███████▏  | 144/200 [00:00<00:00, 122.97it/s, now=None][A
t:  80%|████████  | 161/200 [00:01<00:00, 131.75it/s, now=None][A
t:  88%|████████▊ | 176/200 [00:01<00:00, 136.22it/s, now=None][A
t:  97%|█████████▋| 194/200 [00:01<00:00, 146.43it/s, now=None][A
 32%|███▏      | 80003/250001 [1:11:03<13:59:57,  3.37it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_80000.mp4


 34%|███▍      | 84998/250001 [1:15:05<1:59:05, 23.09it/s]

Moviepy - Building video videos_Pendulum-v1/episode_85000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_85000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  15%|█▌        | 30/200 [00:00<00:00, 299.00it/s, now=None][A
t:  30%|███       | 60/200 [00:00<00:00, 206.24it/s, now=None][A
t:  42%|████▏     | 83/200 [00:00<00:00, 176.78it/s, now=None][A
t:  52%|█████▏    | 103/200 [00:00<00:00, 183.62it/s, now=None][A
t:  62%|██████▏   | 123/200 [00:00<00:00, 187.43it/s, now=None][A
t:  72%|███████▏  | 143/200 [00:00<00:00, 184.27it/s, now=None][A
t:  81%|████████  | 162/200 [00:00<00:00, 164.65it/s, now=None][A
t:  90%|████████▉ | 179/200 [00:01<00:00, 149.56it/s, now=None][A
t:  98%|█████████▊| 195/200 [00:01<00:00, 133.89it/s, now=None][A
 34%|███▍      | 85003/250001 [1:15:07<11:24:48,  4.02it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_85000.mp4


 36%|███▌      | 89999/250001 [1:19:06<1:47:44, 24.75it/s]

Moviepy - Building video videos_Pendulum-v1/episode_90000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_90000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 306.05it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 226.55it/s, now=None][A
t:  43%|████▎     | 86/200 [00:00<00:00, 208.21it/s, now=None][A
t:  54%|█████▍    | 108/200 [00:00<00:00, 197.48it/s, now=None][A
t:  64%|██████▍   | 129/200 [00:00<00:00, 198.49it/s, now=None][A
t:  75%|███████▌  | 150/200 [00:00<00:00, 196.03it/s, now=None][A
t:  85%|████████▌ | 170/200 [00:00<00:00, 190.72it/s, now=None][A
t:  95%|█████████▌| 190/200 [00:00<00:00, 192.24it/s, now=None][A
 36%|███▌      | 90002/250001 [1:19:07<11:39:56,  3.81it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_90000.mp4


 38%|███▊      | 94998/250001 [1:23:07<1:47:02, 24.14it/s]

Moviepy - Building video videos_Pendulum-v1/episode_95000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_95000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 32/200 [00:00<00:00, 316.89it/s, now=None][A
t:  32%|███▏      | 64/200 [00:00<00:00, 204.29it/s, now=None][A
t:  44%|████▎     | 87/200 [00:00<00:00, 195.59it/s, now=None][A
t:  54%|█████▍    | 108/200 [00:00<00:00, 182.22it/s, now=None][A
t:  64%|██████▎   | 127/200 [00:00<00:00, 182.55it/s, now=None][A
t:  74%|███████▎  | 147/200 [00:00<00:00, 186.09it/s, now=None][A
t:  84%|████████▎ | 167/200 [00:00<00:00, 187.66it/s, now=None][A
t:  93%|█████████▎| 186/200 [00:00<00:00, 183.74it/s, now=None][A
 38%|███▊      | 95003/250001 [1:23:08<9:25:41,  4.57it/s] 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_95000.mp4


 40%|███▉      | 100000/250001 [1:27:03<2:55:17, 14.26it/s]

Moviepy - Building video videos_Pendulum-v1/episode_100000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_100000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 32/200 [00:00<00:00, 318.99it/s, now=None][A
t:  32%|███▏      | 64/200 [00:00<00:00, 215.72it/s, now=None][A
t:  44%|████▍     | 88/200 [00:00<00:00, 194.80it/s, now=None][A
t:  55%|█████▍    | 109/200 [00:00<00:00, 193.21it/s, now=None][A
t:  64%|██████▍   | 129/200 [00:00<00:00, 189.93it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 189.14it/s, now=None][A
t:  84%|████████▍ | 169/200 [00:00<00:00, 186.94it/s, now=None][A
t:  94%|█████████▍| 188/200 [00:00<00:00, 186.93it/s, now=None][A
 40%|████      | 100002/250001 [1:27:04<16:59:56,  2.45it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_100000.mp4


 42%|████▏     | 104999/250001 [1:30:55<1:35:06, 25.41it/s]

Moviepy - Building video videos_Pendulum-v1/episode_105000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_105000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 308.39it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 215.89it/s, now=None][A
t:  43%|████▎     | 86/200 [00:00<00:00, 200.77it/s, now=None][A
t:  54%|█████▎    | 107/200 [00:00<00:00, 200.06it/s, now=None][A
t:  64%|██████▍   | 128/200 [00:00<00:00, 199.23it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 194.51it/s, now=None][A
t:  84%|████████▍ | 169/200 [00:00<00:00, 194.87it/s, now=None][A
t:  94%|█████████▍| 189/200 [00:00<00:00, 194.26it/s, now=None][A
 42%|████▏     | 105004/250001 [1:30:56<9:10:09,  4.39it/s] 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_105000.mp4


 44%|████▍     | 109998/250001 [1:34:43<1:34:22, 24.73it/s]

Moviepy - Building video videos_Pendulum-v1/episode_110000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_110000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 307.70it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 213.47it/s, now=None][A
t:  43%|████▎     | 86/200 [00:00<00:00, 207.50it/s, now=None][A
t:  54%|█████▍    | 108/200 [00:00<00:00, 187.80it/s, now=None][A
t:  64%|██████▍   | 129/200 [00:00<00:00, 191.92it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 193.61it/s, now=None][A
t:  84%|████████▍ | 169/200 [00:00<00:00, 191.67it/s, now=None][A
t:  94%|█████████▍| 189/200 [00:00<00:00, 189.51it/s, now=None][A
 44%|████▍     | 110004/250001 [1:34:44<7:47:12,  4.99it/s] 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_110000.mp4


 46%|████▌     | 114999/250001 [1:38:27<2:25:46, 15.44it/s]

Moviepy - Building video videos_Pendulum-v1/episode_115000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_115000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 305.70it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 200.87it/s, now=None][A
t:  42%|████▎     | 85/200 [00:00<00:00, 197.81it/s, now=None][A
t:  53%|█████▎    | 106/200 [00:00<00:00, 190.96it/s, now=None][A
t:  64%|██████▎   | 127/200 [00:00<00:00, 194.93it/s, now=None][A
t:  74%|███████▎  | 147/200 [00:00<00:00, 194.25it/s, now=None][A
t:  84%|████████▎ | 167/200 [00:00<00:00, 191.23it/s, now=None][A
t:  94%|█████████▎| 187/200 [00:00<00:00, 176.60it/s, now=None][A
 46%|████▌     | 115004/250001 [1:38:29<10:11:08,  3.68it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_115000.mp4


 48%|████▊     | 119998/250001 [1:42:06<1:26:57, 24.91it/s]

Moviepy - Building video videos_Pendulum-v1/episode_120000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_120000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  12%|█▎        | 25/200 [00:00<00:00, 245.05it/s, now=None][A
t:  25%|██▌       | 50/200 [00:00<00:00, 218.86it/s, now=None][A
t:  36%|███▋      | 73/200 [00:00<00:00, 198.83it/s, now=None][A
t:  47%|████▋     | 94/200 [00:00<00:00, 190.35it/s, now=None][A
t:  57%|█████▋    | 114/200 [00:00<00:00, 176.94it/s, now=None][A
t:  66%|██████▌   | 132/200 [00:00<00:00, 165.12it/s, now=None][A
t:  74%|███████▍  | 149/200 [00:00<00:00, 160.48it/s, now=None][A
t:  84%|████████▍ | 168/200 [00:00<00:00, 166.74it/s, now=None][A
t:  94%|█████████▎| 187/200 [00:01<00:00, 173.13it/s, now=None][A
 48%|████▊     | 120004/250001 [1:42:07<7:31:51,  4.79it/s] 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_120000.mp4


 50%|████▉     | 124998/250001 [1:45:28<1:10:01, 29.75it/s]

Moviepy - Building video videos_Pendulum-v1/episode_125000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_125000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   8%|▊         | 17/200 [00:00<00:01, 162.78it/s, now=None][A
t:  18%|█▊        | 36/200 [00:00<00:00, 175.81it/s, now=None][A
t:  27%|██▋       | 54/200 [00:00<00:00, 152.87it/s, now=None][A
t:  36%|███▌      | 71/200 [00:00<00:00, 158.80it/s, now=None][A
t:  44%|████▍     | 88/200 [00:00<00:00, 162.06it/s, now=None][A
t:  53%|█████▎    | 106/200 [00:00<00:00, 165.79it/s, now=None][A
t:  62%|██████▎   | 125/200 [00:00<00:00, 170.42it/s, now=None][A
t:  72%|███████▏  | 143/200 [00:00<00:00, 172.02it/s, now=None][A
t:  80%|████████  | 161/200 [00:00<00:00, 157.89it/s, now=None][A
t:  89%|████████▉ | 178/200 [00:01<00:00, 148.71it/s, now=None][A
t:  97%|█████████▋| 194/200 [00:01<00:00, 141.59it/s, now=None][A
 50%|█████     | 125004/250001 [1:45:30<7:36:53,  4.56it/s] 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_125000.mp4


 52%|█████▏    | 129998/250001 [1:48:39<1:05:22, 30.59it/s]

Moviepy - Building video videos_Pendulum-v1/episode_130000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_130000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   8%|▊         | 16/200 [00:00<00:01, 152.75it/s, now=None][A
t:  17%|█▋        | 34/200 [00:00<00:01, 164.08it/s, now=None][A
t:  26%|██▌       | 51/200 [00:00<00:01, 130.26it/s, now=None][A
t:  32%|███▎      | 65/200 [00:00<00:01, 105.16it/s, now=None][A
t:  38%|███▊      | 77/200 [00:00<00:01, 93.05it/s, now=None] [A
t:  44%|████▎     | 87/200 [00:00<00:01, 93.53it/s, now=None][A
t:  50%|████▉     | 99/200 [00:00<00:01, 99.26it/s, now=None][A
t:  55%|█████▌    | 110/200 [00:01<00:00, 99.36it/s, now=None][A
t:  60%|██████    | 121/200 [00:01<00:00, 100.15it/s, now=None][A
t:  66%|██████▌   | 132/200 [00:01<00:00, 102.80it/s, now=None][A
t:  72%|███████▏  | 144/200 [00:01<00:00, 107.34it/s, now=None][A
t:  78%|███████▊  | 155/200 [00:01<00:00, 103.14it/s, now=None][A
t:  84%|████████▎ | 167/200 [00:01<00:00, 106.75it/s, now=None][A
t:  90%|████████▉ | 179/200 [00:01<00:00, 107.85it/s, now=None][A
t:  96%|███████

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_130000.mp4


 54%|█████▍    | 134999/250001 [1:51:51<1:08:27, 28.00it/s]

Moviepy - Building video videos_Pendulum-v1/episode_135000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_135000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  10%|▉         | 19/200 [00:00<00:00, 183.74it/s, now=None][A
t:  22%|██▏       | 43/200 [00:00<00:00, 212.45it/s, now=None][A
t:  32%|███▎      | 65/200 [00:00<00:00, 168.16it/s, now=None][A
t:  42%|████▏     | 84/200 [00:00<00:00, 175.18it/s, now=None][A
t:  52%|█████▏    | 103/200 [00:00<00:00, 168.32it/s, now=None][A
t:  61%|██████    | 122/200 [00:00<00:00, 173.61it/s, now=None][A
t:  70%|███████   | 141/200 [00:00<00:00, 178.20it/s, now=None][A
t:  80%|████████  | 160/200 [00:00<00:00, 176.64it/s, now=None][A
t:  89%|████████▉ | 178/200 [00:01<00:00, 162.66it/s, now=None][A
t:  98%|█████████▊| 195/200 [00:01<00:00, 157.30it/s, now=None][A
 54%|█████▍    | 135006/250001 [1:51:53<6:17:04,  5.08it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_135000.mp4


 56%|█████▌    | 139999/250001 [1:55:02<1:04:39, 28.36it/s]

Moviepy - Building video videos_Pendulum-v1/episode_140000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_140000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   8%|▊         | 17/200 [00:00<00:01, 161.36it/s, now=None][A
t:  17%|█▋        | 34/200 [00:00<00:01, 165.40it/s, now=None][A
t:  26%|██▌       | 51/200 [00:00<00:01, 126.45it/s, now=None][A
t:  32%|███▎      | 65/200 [00:00<00:01, 112.81it/s, now=None][A
t:  38%|███▊      | 77/200 [00:00<00:01, 109.61it/s, now=None][A
t:  44%|████▍     | 89/200 [00:00<00:01, 110.92it/s, now=None][A
t:  50%|█████     | 101/200 [00:00<00:00, 108.90it/s, now=None][A
t:  56%|█████▋    | 113/200 [00:00<00:00, 108.50it/s, now=None][A
t:  62%|██████▏   | 124/200 [00:01<00:00, 105.42it/s, now=None][A
t:  68%|██████▊   | 135/200 [00:01<00:00, 105.07it/s, now=None][A
t:  74%|███████▎  | 147/200 [00:01<00:00, 106.06it/s, now=None][A
t:  79%|███████▉  | 158/200 [00:01<00:00, 104.91it/s, now=None][A
t:  84%|████████▍ | 169/200 [00:01<00:00, 102.79it/s, now=None][A
t:  90%|█████████ | 180/200 [00:01<00:00, 98.58it/s, now=None] [A
t:  96%|███

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_140000.mp4


 58%|█████▊    | 144998/250001 [1:58:13<1:08:12, 25.66it/s]

Moviepy - Building video videos_Pendulum-v1/episode_145000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_145000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  14%|█▍        | 28/200 [00:00<00:00, 274.06it/s, now=None][A
t:  28%|██▊       | 56/200 [00:00<00:00, 175.85it/s, now=None][A
t:  38%|███▊      | 76/200 [00:00<00:00, 176.69it/s, now=None][A
t:  48%|████▊     | 95/200 [00:00<00:00, 159.50it/s, now=None][A
t:  56%|█████▌    | 112/200 [00:00<00:00, 144.49it/s, now=None][A
t:  64%|██████▎   | 127/200 [00:00<00:00, 132.61it/s, now=None][A
t:  70%|███████   | 141/200 [00:00<00:00, 122.86it/s, now=None][A
t:  77%|███████▋  | 154/200 [00:01<00:00, 106.53it/s, now=None][A
t:  82%|████████▎ | 165/200 [00:01<00:00, 99.60it/s, now=None] [A
t:  90%|█████████ | 181/200 [00:01<00:00, 112.35it/s, now=None][A
t: 100%|██████████| 200/200 [00:01<00:00, 130.17it/s, now=None][A
 58%|█████▊    | 145003/250001 [1:58:15<7:29:05,  3.90it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_145000.mp4


 60%|█████▉    | 149998/250001 [2:01:23<56:25, 29.54it/s]

Moviepy - Building video videos_Pendulum-v1/episode_150000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_150000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  14%|█▍        | 28/200 [00:00<00:00, 279.13it/s, now=None][A
t:  28%|██▊       | 56/200 [00:00<00:00, 184.70it/s, now=None][A
t:  38%|███▊      | 77/200 [00:00<00:00, 178.59it/s, now=None][A
t:  48%|████▊     | 96/200 [00:00<00:00, 180.92it/s, now=None][A
t:  57%|█████▊    | 115/200 [00:00<00:00, 177.10it/s, now=None][A
t:  67%|██████▋   | 134/200 [00:00<00:00, 179.10it/s, now=None][A
t:  76%|███████▋  | 153/200 [00:00<00:00, 181.05it/s, now=None][A
t:  86%|████████▌ | 172/200 [00:00<00:00, 180.52it/s, now=None][A
t:  96%|█████████▌| 191/200 [00:01<00:00, 182.95it/s, now=None][A
 60%|██████    | 150004/250001 [2:01:25<5:21:09,  5.19it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_150000.mp4


 62%|██████▏   | 154998/250001 [2:04:32<51:39, 30.65it/s]

Moviepy - Building video videos_Pendulum-v1/episode_155000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_155000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 307.04it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 211.76it/s, now=None][A
t:  42%|████▎     | 85/200 [00:00<00:00, 210.60it/s, now=None][A
t:  54%|█████▎    | 107/200 [00:00<00:00, 207.26it/s, now=None][A
t:  64%|██████▍   | 129/200 [00:00<00:00, 202.84it/s, now=None][A
t:  75%|███████▌  | 150/200 [00:00<00:00, 193.87it/s, now=None][A
t:  85%|████████▌ | 170/200 [00:00<00:00, 194.14it/s, now=None][A
t:  95%|█████████▌| 190/200 [00:00<00:00, 175.16it/s, now=None][A
 62%|██████▏   | 155005/250001 [2:04:34<4:31:48,  5.83it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_155000.mp4


 64%|██████▍   | 159998/250001 [2:07:43<1:14:18, 20.19it/s]

Moviepy - Building video videos_Pendulum-v1/episode_160000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_160000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▋        | 33/200 [00:00<00:00, 326.10it/s, now=None][A
t:  33%|███▎      | 66/200 [00:00<00:00, 207.33it/s, now=None][A
t:  45%|████▌     | 90/200 [00:00<00:00, 197.68it/s, now=None][A
t:  56%|█████▌    | 111/200 [00:00<00:00, 193.63it/s, now=None][A
t:  66%|██████▌   | 132/200 [00:00<00:00, 190.27it/s, now=None][A
t:  76%|███████▌  | 152/200 [00:00<00:00, 191.05it/s, now=None][A
t:  88%|████████▊ | 176/200 [00:00<00:00, 202.85it/s, now=None][A
t: 100%|█████████▉| 199/200 [00:00<00:00, 210.13it/s, now=None][A
 64%|██████▍   | 160004/250001 [2:07:44<5:45:28,  4.34it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_160000.mp4


 66%|██████▌   | 164999/250001 [2:10:53<49:20, 28.71it/s]

Moviepy - Building video videos_Pendulum-v1/episode_165000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_165000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   9%|▉         | 18/200 [00:00<00:01, 179.53it/s, now=None][A
t:  24%|██▍       | 48/200 [00:00<00:00, 211.49it/s, now=None][A
t:  34%|███▍      | 69/200 [00:00<00:00, 188.06it/s, now=None][A
t:  44%|████▍     | 88/200 [00:00<00:00, 184.21it/s, now=None][A
t:  54%|█████▎    | 107/200 [00:00<00:00, 184.88it/s, now=None][A
t:  63%|██████▎   | 126/200 [00:00<00:00, 185.25it/s, now=None][A
t:  72%|███████▎  | 145/200 [00:00<00:00, 177.01it/s, now=None][A
t:  82%|████████▏ | 163/200 [00:00<00:00, 161.56it/s, now=None][A
t:  90%|█████████ | 180/200 [00:01<00:00, 159.49it/s, now=None][A
t:  98%|█████████▊| 197/200 [00:01<00:00, 159.41it/s, now=None][A
 66%|██████▌   | 165006/250001 [2:10:55<4:09:53,  5.67it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_165000.mp4


 68%|██████▊   | 170000/250001 [2:14:04<42:27, 31.41it/s]

Moviepy - Building video videos_Pendulum-v1/episode_170000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_170000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  16%|█▌        | 31/200 [00:00<00:00, 302.72it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:00, 199.07it/s, now=None][A
t:  42%|████▏     | 84/200 [00:00<00:00, 173.37it/s, now=None][A
t:  52%|█████▏    | 103/200 [00:00<00:00, 176.29it/s, now=None][A
t:  61%|██████    | 122/200 [00:00<00:00, 175.62it/s, now=None][A
t:  70%|███████   | 141/200 [00:00<00:00, 176.92it/s, now=None][A
t:  80%|████████  | 160/200 [00:00<00:00, 179.24it/s, now=None][A
t:  90%|████████▉ | 179/200 [00:00<00:00, 179.63it/s, now=None][A
t:  99%|█████████▉| 198/200 [00:01<00:00, 176.04it/s, now=None][A
 68%|██████▊   | 170004/250001 [2:14:05<5:06:56,  4.34it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_170000.mp4


 70%|██████▉   | 174998/250001 [2:17:16<43:18, 28.86it/s]

Moviepy - Building video videos_Pendulum-v1/episode_175000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_175000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  14%|█▍        | 29/200 [00:00<00:00, 284.65it/s, now=None][A
t:  29%|██▉       | 58/200 [00:00<00:00, 208.67it/s, now=None][A
t:  40%|████      | 81/200 [00:00<00:00, 196.62it/s, now=None][A
t:  51%|█████     | 102/200 [00:00<00:00, 178.81it/s, now=None][A
t:  60%|██████    | 121/200 [00:00<00:00, 181.64it/s, now=None][A
t:  70%|███████   | 140/200 [00:00<00:00, 178.67it/s, now=None][A
t:  80%|███████▉  | 159/200 [00:00<00:00, 181.09it/s, now=None][A
t:  89%|████████▉ | 178/200 [00:00<00:00, 178.90it/s, now=None][A
t:  98%|█████████▊| 196/200 [00:01<00:00, 172.70it/s, now=None][A
 70%|███████   | 175004/250001 [2:17:18<4:08:51,  5.02it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_175000.mp4


 72%|███████▏  | 180000/250001 [2:20:28<42:26, 27.48it/s]

Moviepy - Building video videos_Pendulum-v1/episode_180000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_180000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:  15%|█▌        | 30/200 [00:00<00:00, 299.73it/s, now=None][A
t:  30%|███       | 60/200 [00:00<00:00, 215.27it/s, now=None][A
t:  42%|████▏     | 83/200 [00:00<00:00, 195.00it/s, now=None][A
t:  52%|█████▏    | 104/200 [00:00<00:00, 187.89it/s, now=None][A
t:  62%|██████▏   | 124/200 [00:00<00:00, 184.21it/s, now=None][A
t:  72%|███████▏  | 143/200 [00:00<00:00, 179.49it/s, now=None][A
t:  81%|████████  | 162/200 [00:00<00:00, 160.51it/s, now=None][A
t:  90%|████████▉ | 179/200 [00:01<00:00, 154.41it/s, now=None][A
t:  99%|█████████▉| 198/200 [00:01<00:00, 162.95it/s, now=None][A
 72%|███████▏  | 180003/250001 [2:20:30<4:52:26,  3.99it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_180000.mp4


 74%|███████▍  | 184998/250001 [2:23:39<35:26, 30.57it/s]

Moviepy - Building video videos_Pendulum-v1/episode_185000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_185000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   9%|▉         | 18/200 [00:00<00:01, 177.61it/s, now=None][A
t:  18%|█▊        | 36/200 [00:00<00:00, 178.73it/s, now=None][A
t:  27%|██▋       | 54/200 [00:00<00:01, 128.56it/s, now=None][A
t:  34%|███▍      | 69/200 [00:00<00:01, 110.78it/s, now=None][A
t:  41%|████      | 82/200 [00:00<00:01, 113.71it/s, now=None][A
t:  48%|████▊     | 95/200 [00:00<00:00, 106.63it/s, now=None][A
t:  54%|█████▍    | 108/200 [00:00<00:00, 111.78it/s, now=None][A
t:  60%|██████    | 120/200 [00:01<00:00, 110.62it/s, now=None][A
t:  66%|██████▌   | 132/200 [00:01<00:00, 107.14it/s, now=None][A
t:  72%|███████▏  | 143/200 [00:01<00:00, 106.39it/s, now=None][A
t:  77%|███████▋  | 154/200 [00:01<00:00, 102.09it/s, now=None][A
t:  82%|████████▎ | 165/200 [00:01<00:00, 103.49it/s, now=None][A
t:  88%|████████▊ | 176/200 [00:01<00:00, 98.64it/s, now=None] [A
t:  93%|█████████▎| 186/200 [00:01<00:00, 93.29it/s, now=None][A
t:  98%|████

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_185000.mp4


 76%|███████▌  | 190000/250001 [2:26:51<38:23, 26.05it/s]

Moviepy - Building video videos_Pendulum-v1/episode_190000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_190000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   6%|▌         | 12/200 [00:00<00:01, 115.78it/s, now=None][A
t:  12%|█▎        | 25/200 [00:00<00:01, 123.86it/s, now=None][A
t:  19%|█▉        | 38/200 [00:00<00:01, 122.17it/s, now=None][A
t:  26%|██▌       | 51/200 [00:00<00:01, 87.53it/s, now=None] [A
t:  30%|███       | 61/200 [00:00<00:01, 81.03it/s, now=None][A
t:  35%|███▌      | 70/200 [00:00<00:01, 82.99it/s, now=None][A
t:  40%|████      | 81/200 [00:00<00:01, 88.04it/s, now=None][A
t:  46%|████▌     | 91/200 [00:00<00:01, 89.38it/s, now=None][A
t:  54%|█████▍    | 108/200 [00:01<00:00, 110.21it/s, now=None][A
t:  63%|██████▎   | 126/200 [00:01<00:00, 129.31it/s, now=None][A
t:  70%|███████   | 141/200 [00:01<00:00, 134.45it/s, now=None][A
t:  80%|███████▉  | 159/200 [00:01<00:00, 144.98it/s, now=None][A
t:  87%|████████▋ | 174/200 [00:01<00:00, 137.64it/s, now=None][A
t:  94%|█████████▍| 189/200 [00:01<00:00, 140.72it/s, now=None][A
 76%|███████▌  | 

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_190000.mp4


 78%|███████▊  | 194999/250001 [2:30:03<33:16, 27.55it/s]

Moviepy - Building video videos_Pendulum-v1/episode_195000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_195000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   8%|▊         | 16/200 [00:00<00:01, 156.30it/s, now=None][A
t:  16%|█▌        | 32/200 [00:00<00:01, 157.32it/s, now=None][A
t:  24%|██▍       | 48/200 [00:00<00:01, 130.05it/s, now=None][A
t:  31%|███       | 62/200 [00:00<00:01, 115.50it/s, now=None][A
t:  40%|████      | 80/200 [00:00<00:00, 134.59it/s, now=None][A
t:  49%|████▉     | 98/200 [00:00<00:00, 146.33it/s, now=None][A
t:  58%|█████▊    | 116/200 [00:00<00:00, 156.06it/s, now=None][A
t:  68%|██████▊   | 136/200 [00:00<00:00, 166.29it/s, now=None][A
t:  76%|███████▋  | 153/200 [00:01<00:00, 167.16it/s, now=None][A
t:  86%|████████▌ | 172/200 [00:01<00:00, 173.00it/s, now=None][A
t:  95%|█████████▌| 190/200 [00:01<00:00, 157.36it/s, now=None][A
 78%|███████▊  | 195006/250001 [2:30:05<3:17:42,  4.64it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_195000.mp4


 80%|███████▉  | 199997/250001 [2:33:17<27:46, 30.01it/s]

Moviepy - Building video videos_Pendulum-v1/episode_200000.mp4.
Moviepy - Writing video videos_Pendulum-v1/episode_200000.mp4




t:   0%|          | 0/200 [00:00<?, ?it/s, now=None][A
t:   7%|▋         | 14/200 [00:00<00:01, 138.75it/s, now=None][A
t:  16%|█▌        | 32/200 [00:00<00:01, 162.78it/s, now=None][A
t:  24%|██▍       | 49/200 [00:00<00:01, 132.81it/s, now=None][A
t:  32%|███▏      | 63/200 [00:00<00:01, 122.44it/s, now=None][A
t:  40%|████      | 81/200 [00:00<00:00, 139.19it/s, now=None][A
t:  50%|████▉     | 99/200 [00:00<00:00, 150.45it/s, now=None][A
t:  57%|█████▊    | 115/200 [00:00<00:00, 151.61it/s, now=None][A
t:  66%|██████▌   | 131/200 [00:00<00:00, 149.55it/s, now=None][A
t:  75%|███████▌  | 150/200 [00:01<00:00, 160.18it/s, now=None][A
t:  86%|████████▋ | 173/200 [00:01<00:00, 179.75it/s, now=None][A
t:  98%|█████████▊| 197/200 [00:01<00:00, 197.02it/s, now=None][A
 80%|████████  | 200004/250001 [2:33:19<2:47:46,  4.97it/s]

Moviepy - Done !
Moviepy - video ready videos_Pendulum-v1/episode_200000.mp4


 81%|████████  | 201257/250001 [2:34:08<33:34, 24.20it/s]

In [None]:
np.save(f"learning_episode_rewards_50_250000.npy", episode_rewards_flat)

In [None]:
def evaluate_pendulum(agent, env, num_episodes=100):
    total_rewards = []
    for _ in range(num_episodes):
        obs, info = env.reset()
        done = False
        total_reward = 0
        while not done:
            action = agent.get_action(obs)
            obs, reward, terminated, truncated, info = env.step([action])
            total_reward += reward
            done = terminated or truncated
            obs = np.squeeze(obs)
        if type(total_reward) != np.ndarray:
          total_rewards.append(total_reward)
        else:
          total_rewards.append(np.squeeze(total_reward[0]))
    return np.mean(total_rewards)

average_reward_eval = evaluate_pendulum(agent, env, num_episodes=1000)

print(f"Average reward over 1000 episodes: {average_reward_eval}")

## Acrobot Q-learning agent

In [None]:
from collections import defaultdict
import gymnasium as gym
import numpy as np
from tqdm import tqdm
import time
import os
import matplotlib.pyplot as plt
import moviepy.editor as mpy

env_name = "Acrobot-v1"
if not os.path.exists(f"videos_{env_name}"):
    os.mkdir(f"videos_{env_name}")

def record_video(frames, episode_num, env_name):
    """Saves a video from a list of frames."""
    clip = mpy.ImageSequenceClip(frames, fps=60)
    clip.write_videofile(f"videos_{env_name}/episode_{episode_num}.mp4", codec="libx264")


class AcrobotAgent:
    def __init__(
        self,
        env: gym.Env,
        learning_rate: float,
        initial_epsilon: float,
        epsilon_decay: float,
        final_epsilon: float,
        discount_factor: float = 0.95,
        n_bins: int = 10,
    ):
        self.env = env
        self.q_values = defaultdict(lambda: np.zeros(env.action_space.n))
        self.lr = learning_rate
        self.discount_factor = discount_factor
        self.epsilon = initial_epsilon
        self.epsilon_decay = epsilon_decay
        self.final_epsilon = final_epsilon
        self.n_bins = n_bins
        self.bins = self.create_bins()
        self.training_error = []

    def create_bins(self):
        # Observation space: [cos(theta1), sin(theta1), cos(theta2), sin(theta2), theta1_dot, theta2_dot]
        bin_limits = [
            [-1.0, 1.0],  # cos(theta1)
            [-1.0, 1.0],  # sin(theta1)
            [-1.0, 1.0],  # cos(theta2)
            [-1.0, 1.0],  # sin(theta2)
            [-12.0, 12.0],  # theta1_dot
            [-36.0, 36.0],  # theta2_dot
        ]
        bins = [np.linspace(limits[0], limits[1], self.n_bins - 1) for limits in bin_limits]
        return bins

    def discretize(self, obs):
        return tuple(np.digitize(obs[i], self.bins[i]) for i in range(len(obs)))

    def get_action(self, obs: np.ndarray) -> int:
        discrete_state = self.discretize(obs)
        if np.random.random() < self.epsilon:
            return self.env.action_space.sample()  # Explore
        else:
            return int(np.argmax(self.q_values[discrete_state]))  # Exploit

    def update(self, obs: np.ndarray, action: int, reward: float, terminated: bool, next_obs: np.ndarray):
        discrete_state = self.discretize(obs)
        discrete_next_state = self.discretize(next_obs)
        future_q_value = (not terminated) * np.max(self.q_values[discrete_next_state])
        temporal_difference = (
            reward + self.discount_factor * future_q_value - self.q_values[discrete_state][action]
        )
        self.q_values[discrete_state][action] += self.lr * temporal_difference
        self.training_error.append(temporal_difference)

    def decay_epsilon(self):
        self.epsilon = max(self.final_epsilon, self.epsilon - self.epsilon_decay)


# Training loop
# Hyperparameters
learning_rate = 0.05
n_episodes = 13000
start_epsilon = 1.0
epsilon_decay = start_epsilon / (n_episodes / 2)
final_epsilon = 0.01
video_interval = 500  # Save video every 100 episodes

# Create environment
env = gym.make("Acrobot-v1", render_mode="rgb_array")
agent = AcrobotAgent(
    env,
    learning_rate=learning_rate,
    initial_epsilon=start_epsilon,
    epsilon_decay=epsilon_decay,
    final_epsilon=final_epsilon,
)

episode_rewards = []

for episode in tqdm(range(n_episodes + 1)):
    obs, info = env.reset()
    done = False
    total_reward = 0
    frames = []

    while not done:
        frame = env.render()
        if episode % video_interval == 0:
            frames.append(frame)

        action = agent.get_action(obs)
        next_obs, reward, terminated, truncated, info = env.step(action)
        reward = reward if not terminated else -100  # Penalize failure
        agent.update(obs, action, reward, terminated, next_obs)

        total_reward += reward
        done = terminated or truncated
        obs = next_obs

    episode_rewards.append(total_reward)
    agent.decay_epsilon()

    if episode % video_interval == 0:
        record_video(frames, episode, env_name)

env.close()

# Plot the episode rewards
plt.figure(figsize=(12, 6))
plt.plot(episode_rewards, label="Episode Reward")
plt.xlabel("Episode")
plt.ylabel("Total Reward")
plt.title("Training Progress: Episode Rewards Over Time (Acrobot-v1)")
plt.legend()
plt.grid()
plt.show()


  0%|          | 0/13001 [00:02<?, ?it/s]

Moviepy - Building video videos_Acrobot-v1/episode_0.mp4.
Moviepy - Writing video videos_Acrobot-v1/episode_0.mp4




t:   0%|          | 0/500 [00:00<?, ?it/s, now=None][A
t:   7%|▋         | 37/500 [00:00<00:01, 362.69it/s, now=None][A
t:  15%|█▍        | 74/500 [00:00<00:01, 242.69it/s, now=None][A
t:  20%|██        | 101/500 [00:00<00:01, 204.95it/s, now=None][A
t:  25%|██▍       | 123/500 [00:00<00:01, 196.73it/s, now=None][A
t:  29%|██▉       | 144/500 [00:00<00:01, 191.10it/s, now=None][A
t:  33%|███▎      | 164/500 [00:00<00:01, 180.60it/s, now=None][A
t:  37%|███▋      | 183/500 [00:00<00:01, 167.32it/s, now=None][A
t:  40%|████      | 200/500 [00:01<00:01, 167.40it/s, now=None][A
t:  44%|████▎     | 218/500 [00:01<00:01, 167.94it/s, now=None][A
t:  47%|████▋     | 235/500 [00:01<00:01, 166.89it/s, now=None][A
t:  50%|█████     | 252/500 [00:01<00:01, 161.09it/s, now=None][A
t:  54%|█████▍    | 269/500 [00:01<00:01, 163.23it/s, now=None][A
t:  57%|█████▋    | 286/500 [00:01<00:01, 163.76it/s, now=None][A
t:  61%|██████    | 303/500 [00:01<00:01, 162.75it/s, now=None][A
t:  64%

Moviepy - Done !
Moviepy - video ready videos_Acrobot-v1/episode_0.mp4


  0%|          | 26/13001 [00:53<7:21:18,  2.04s/it]


KeyboardInterrupt: 

In [None]:
#!zip -r /content/videos_CartPole-v1/cartpole_combined_videos.zip /content/videos_CartPole-v1/*
#!zip -r /content/videos_Pendulum-v1/pendulum_combined_videos.zip /content/videos_Pendulum-v1/*
#!zip -r /content/videos_Acrobot-v1/acrobot_combined_videos.zip /content/videos_Acrobot-v1/*

  adding: content/videos_CartPole-v1/episode_0.mp4 (deflated 20%)
  adding: content/videos_CartPole-v1/episode_1000.mp4 (deflated 22%)
  adding: content/videos_CartPole-v1/episode_1500.mp4 (deflated 21%)
  adding: content/videos_CartPole-v1/episode_2000.mp4 (deflated 20%)
  adding: content/videos_CartPole-v1/episode_2500.mp4 (deflated 19%)
  adding: content/videos_CartPole-v1/episode_3000.mp4 (deflated 20%)
  adding: content/videos_CartPole-v1/episode_3500.mp4 (deflated 24%)
  adding: content/videos_CartPole-v1/episode_4000.mp4 (deflated 20%)
  adding: content/videos_CartPole-v1/episode_4500.mp4 (deflated 21%)
  adding: content/videos_CartPole-v1/episode_5000.mp4 (deflated 21%)
  adding: content/videos_CartPole-v1/episode_500.mp4 (deflated 21%)
  adding: content/videos_CartPole-v1/episode_5500.mp4 (deflated 21%)
  adding: content/videos_CartPole-v1/episode_6000.mp4 (deflated 18%)
  adding: content/videos_CartPole-v1/episode_6500.mp4 (deflated 23%)
  adding: content/videos_CartPole-v1/e