In [1]:
import gymnasium as gym
import ale_py  # Ensure Atari environments work
import matplotlib.pyplot as plt
from IPython.display import display, clear_output
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.nn.utils.rnn import pad_sequence
import collections
import random
from collections import deque
import torch.nn.functional as F
import cv2
from tqdm import tqdm
import wandb
from functools import partial
import os

from utils import get_env, record_video_oc, get_obj_classes

In [2]:
class DQN_OC(nn.Module):
    def __init__(self, num_classes, hidden_dim, action_dim):
        super(DQN_OC, self).__init__()

        self.class_embs = nn.Embedding(num_embeddings=num_classes, embedding_dim=hidden_dim, padding_idx=0)
        self.time_embs = nn.Embedding(num_embeddings=4, embedding_dim=hidden_dim)
        self.xywh_proj = nn.Linear(4, hidden_dim)

        self.encoder = nn.GRU(
            input_size=hidden_dim,
            hidden_size=hidden_dim,
            num_layers=3,
        )

        self.fc_layers = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Linear(hidden_dim // 2, action_dim)  # Output Q-values for each action
        )

    def forward(self, x):
        time_emb = self.time_embs(x[:, :, 0].long())
        class_emb = self.class_embs(x[:, :, 1].long())
        xywh_emb = self.xywh_proj(x[:, :, 2:])
        obj_emb = time_emb + class_emb + xywh_emb
        x, _ = self.encoder(obj_emb)
        x, _ = torch.max(x, dim=1)
        x = self.fc_layers(x)
        return x

In [3]:
def select_action(env, model, state_objs, epsilon):
    if random.random() < epsilon:
        return env.action_space.sample()  # Random action (exploration)

    with torch.no_grad():
        # Add batch dimension
        state_objs = state_objs.to_tensor()
        state_objs = state_objs.unsqueeze(0).to(device)
        return model(state_objs).squeeze().argmax().item()

def train(model, target_model, buffer, optimizer, batch_size, gamma):
    # print('train buffer.size():', buffer.size())
    # print('batch_size:', batch_size)

    if buffer.size() < batch_size:
        return 0
    
    # Sample batch from experience replay
    state_objs, actions, rewards, next_state_objs, dones = buffer.sample(batch_size)

    state_objs = state_objs.to(device)
    actions = actions.to(device)
    rewards = rewards.to(device)
    dones = dones.to(device)
    next_state_objs = next_state_objs.to(device)

    # Compute Q-values for current states
    q = model(state_objs)
    # print('q.shape:', q.shape)
    q_values = q.gather(1, actions.unsqueeze(1)).squeeze(1)  # Select Q-values of taken actions

    # Compute next Q-values from the target network
    next_q_values = target_model(next_state_objs).max(1)[0].detach()  # Max Q-value of next state

    dones = dones.to(torch.bool)
    # Zero next_q_values for terminal states
    next_q_values[dones] = 0.0

    # Compute target Q-values
    scaled_rewards = 0.01 * rewards
    target_q_values = scaled_rewards + gamma * next_q_values

    dq_loss = F.mse_loss(q_values, target_q_values.detach())

    # Backpropagation
    optimizer.zero_grad()
    dq_loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Gradient clipping
    optimizer.step()
    return dq_loss.item()


In [4]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = collections.deque(maxlen=capacity)

    def push(self, state_objs, action, reward, next_state_objs, done):
        self.buffer.append((
            state_objs,
            action,
            int(reward),
            next_state_objs,
            bool(done)
        ))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        state_objs, action, reward, next_state_objs, done = zip(*batch)

        return (
            pad_sequence((objs.to_tensor() for objs in state_objs), batch_first=True, padding_value=0.0),
            torch.LongTensor(action),
            torch.FloatTensor(reward),
            pad_sequence((objs.to_tensor() for objs in next_state_objs), batch_first=True, padding_value=0.0),
            torch.FloatTensor(done)
        )

    def size(self):
        return len(self.buffer)

In [5]:
# Create the Atari environment
knn=3
env = get_env(process=False, oc=True, knn=knn)

# Check Action / State space
obs, info = env.reset()

action_dim = env.action_space.n

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dqn = DQN_OC(num_classes=len(get_obj_classes())+1, hidden_dim=300, action_dim=action_dim).to(device)
target_dqn = DQN_OC(num_classes=len(get_obj_classes())+1, hidden_dim=300, action_dim=action_dim).to(device)
target_dqn.load_state_dict(dqn.state_dict())

lr = 0.0001
weight_decay = 1e-5
replay_buffer_size = 10000
optimizer = optim.AdamW(dqn.parameters(), lr=lr, weight_decay=weight_decay)
replay_buffer = ReplayBuffer(replay_buffer_size)

num_train_iterations = 1000000
batch_size = 32
gamma = 0.99
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.999885
target_update_freq = 10000
rewards_list = []

project_name = 'dqn_oc_knn'

wandb.require("core")
wandb.login()
wandb.init(
      # Set the project where this run will be logged
      project="frogger",
      # We pass a run name (otherwise it’ll be randomly assigned, like sunshine-lollypop-10)
      name=project_name,
      # Track hyperparameters and run metadata
      config={
      "lr": lr,
      "weight_decay": weight_decay,
      "batch_size": batch_size,
      "gamma": gamma,
      "epsilon": epsilon,
      "epsilon_min": epsilon_min,
      "epsilon_decay": epsilon_decay,
      "replay_buffer_size": replay_buffer_size,
      "variant": project_name,
      "num_train_iterations": num_train_iterations,
      "target_update_freq": target_update_freq,
      })

A.L.E: Arcade Learning Environment (version 0.10.2+c9d4b19)
[Powered by Stella]
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mkevinxli[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [6]:
os.makedirs(f'{project_name}/train', exist_ok=True)

optimizer = optim.AdamW(dqn.parameters(), lr=lr, weight_decay=weight_decay)
state, info = env.reset()
total_loss = 0
total_reward = 0

for iteration in tqdm(range(num_train_iterations+1)):
    action = select_action(env, dqn, state, epsilon)
    next_state, reward, terminated, truncated, info = env.step(action)
    total_reward += reward

    replay_buffer.push(state, action, reward, next_state, terminated)

    if terminated or truncated:
        state, info = env.reset()
        wandb.log({'train/loss': total_loss})
        total_loss = 0
        total_reward = 0
    else:
        state = next_state

    loss = train(dqn, target_dqn, replay_buffer, optimizer, batch_size, gamma)
    total_loss += loss
    epsilon = max(epsilon_min, epsilon * epsilon_decay)

    rewards_list.append(total_reward)

    if iteration % target_update_freq == 0:
        target_dqn.load_state_dict(dqn.state_dict())
        torch.save(dqn.state_dict(), f"{project_name}/train/frogger_dqn_iter_{iteration}.pth")
        if iteration > 0:
            reward, length, time = record_video_oc(select_action=partial(select_action, model=dqn, epsilon=0), video_folder=f"{project_name}/videos", video_name=f"train_iter_{iteration}", knn=knn)
            wandb.log({'train/reward': reward, 'train/length': length, 'train/time': time})


  0%|          | 0/1000001 [00:00<?, ?it/s]

  1%|          | 9993/1000001 [02:00<3:16:26, 83.99it/s] 

iteration 0 action 0
iteration 1 action 0
iteration 2 action 0
iteration 3 action 0
iteration 4 action 0
iteration 5 action 0
iteration 6 action 0
iteration 7 action 0
iteration 8 action 0
iteration 9 action 0
iteration 10 action 0
iteration 11 action 0
iteration 12 action 0
iteration 13 action 0
iteration 14 action 0
iteration 15 action 0
iteration 16 action 0
iteration 17 action 0
iteration 18 action 0
iteration 19 action 0
iteration 20 action 0
iteration 21 action 0
iteration 22 action 2
iteration 23 action 2
iteration 24 action 2
iteration 25 action 2
iteration 26 action 2
iteration 27 action 2
iteration 28 action 2
iteration 29 action 2
iteration 30 action 2
iteration 31 action 2
iteration 32 action 2
iteration 33 action 2
iteration 34 action 2
iteration 35 action 2
iteration 36 action 2
iteration 37 action 2
iteration 38 action 2
iteration 39 action 2
iteration 40 action 2
iteration 41 action 2
iteration 42 action 2
iteration 43 action 2
iteration 44 action 2
iteration 45 action 

  1%|          | 9993/1000001 [02:20<3:16:26, 83.99it/s]

iteration 1063 action 2
iteration 1064 action 2
iteration 1065 action 2
iteration 1066 action 2
iteration 1067 action 2
iteration 1068 action 2
iteration 1069 action 2
iteration 1070 action 2
iteration 1071 action 2
iteration 1072 action 2
iteration 1073 action 2
iteration 1074 action 2
iteration 1075 action 2
iteration 1076 action 2
iteration 1077 action 2
iteration 1078 action 2
iteration 1079 action 2
iteration 1080 action 2
iteration 1081 action 2
iteration 1082 action 2
iteration 1083 action 2
iteration 1084 action 2
iteration 1085 action 2
iteration 1086 action 2
iteration 1087 action 2
iteration 1088 action 2
iteration 1089 action 2
iteration 1090 action 2
iteration 1091 action 2
iteration 1092 action 2
iteration 1093 action 2
iteration 1094 action 2
iteration 1095 action 2
iteration 1096 action 2
iteration 1097 action 2
iteration 1098 action 2
iteration 1099 action 2
iteration 1100 action 2
iteration 1101 action 2
iteration 1102 action 2
iteration 1103 action 2
iteration 1104 a

  1%|          | 9993/1000001 [02:36<3:16:26, 83.99it/s]

iteration 1997 action 2
iteration 1998 action 2
iteration 1999 action 2
iteration 2000 action 2
MoviePy - Building video dqn_oc_knn/videos/train_iter_10000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_10000.mp4



  1%|          | 10008/1000001 [03:11<497:21:08,  1.81s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_10000.mp4


  2%|▏         | 19996/1000001 [05:27<3:57:22, 68.81it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 4
iteration 23 action 4
iteration 24 action 4
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  2%|▏         | 19996/1000001 [05:35<3:57:22, 68.81it/s]

iteration 448 action 1
iteration 449 action 1
iteration 450 action 1
iteration 451 action 1
iteration 452 action 1
iteration 453 action 1
iteration 454 action 1
iteration 455 action 1
iteration 456 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_20000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_20000.mp4



  2%|▏         | 20009/1000001 [05:43<130:44:52,  2.08it/s]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_20000.mp4


  3%|▎         | 29996/1000001 [07:57<3:31:15, 76.53it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  3%|▎         | 29996/1000001 [08:10<3:31:15, 76.53it/s]

iteration 703 action 1
iteration 704 action 1
iteration 705 action 1
iteration 706 action 1
iteration 707 action 1
iteration 708 action 1
iteration 709 action 1
iteration 710 action 1
iteration 711 action 1
iteration 712 action 1
iteration 713 action 1
iteration 714 action 1
iteration 715 action 1
iteration 716 action 1
iteration 717 action 1
iteration 718 action 1
iteration 719 action 1
iteration 720 action 1
iteration 721 action 1
iteration 722 action 1
iteration 723 action 1
iteration 724 action 1
iteration 725 action 1
iteration 726 action 1
iteration 727 action 1
iteration 728 action 1
iteration 729 action 1
iteration 730 action 1
iteration 731 action 1
iteration 732 action 1
iteration 733 action 1
iteration 734 action 1
iteration 735 action 1
iteration 736 action 1
iteration 737 action 1
iteration 738 action 1
iteration 739 action 1
iteration 740 action 1
iteration 741 action 1
iteration 742 action 1
iteration 743 action 1
iteration 744 action 1
iteration 745 action 1
iteration 7

  3%|▎         | 29996/1000001 [08:32<3:31:15, 76.53it/s]

iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_30000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_30000.mp4



  3%|▎         | 30009/1000001 [09:04<507:34:20,  1.88s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_30000.mp4


  4%|▍         | 39998/1000001 [11:23<3:55:19, 67.99it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  4%|▍         | 39998/1000001 [11:40<3:55:19, 67.99it/s]

iteration 935 action 1
iteration 936 action 1
iteration 937 action 1
iteration 938 action 1
iteration 939 action 1
iteration 940 action 1
iteration 941 action 1
iteration 942 action 1
iteration 943 action 1
iteration 944 action 1
iteration 945 action 1
iteration 946 action 1
iteration 947 action 1
iteration 948 action 1
iteration 949 action 1
iteration 950 action 1
iteration 951 action 1
iteration 952 action 1
iteration 953 action 1
iteration 954 action 1
iteration 955 action 1
iteration 956 action 1
iteration 957 action 1
iteration 958 action 1
iteration 959 action 1
iteration 960 action 1
iteration 961 action 1
iteration 962 action 1
iteration 963 action 1
iteration 964 action 1
iteration 965 action 1
iteration 966 action 1
iteration 967 action 1
iteration 968 action 1
iteration 969 action 1
iteration 970 action 1
iteration 971 action 1
iteration 972 action 1
iteration 973 action 1
iteration 974 action 1
iteration 975 action 1
iteration 976 action 1
iteration 977 action 1
iteration 9

  4%|▍         | 39998/1000001 [11:58<3:55:19, 67.99it/s]

iteration 1995 action 1
iteration 1996 action 1
iteration 1997 action 1
iteration 1998 action 1
iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_40000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_40000.mp4



  4%|▍         | 40010/1000001 [12:30<551:18:46,  2.07s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_40000.mp4


  5%|▍         | 49998/1000001 [14:51<4:03:59, 64.89it/s]  

iteration 0 action 0
iteration 1 action 0
iteration 2 action 0
iteration 3 action 0
iteration 4 action 0
iteration 5 action 0
iteration 6 action 0
iteration 7 action 0
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 0
iteration 25 action 0
iteration 26 action 0
iteration 27 action 0
iteration 28 action 0
iteration 29 action 0
iteration 30 action 0
iteration 31 action 0
iteration 32 action 0
iteration 33 action 0
iteration 34 action 0
iteration 35 action 0
iteration 36 action 0
iteration 37 action 0
iteration 38 action 0
iteration 39 action 0
iteration 40 action 0
iteration 41 action 0
iteration 42 action 0
iteration 43 action 0
iteration 44 action 0
iteration 45 action 

  5%|▍         | 49998/1000001 [14:58<4:03:59, 64.89it/s]

iteration 372 action 0
iteration 373 action 0
iteration 374 action 0
iteration 375 action 0
iteration 376 action 0
MoviePy - Building video dqn_oc_knn/videos/train_iter_50000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_50000.mp4



  5%|▌         | 50015/1000001 [15:04<93:13:15,  2.83it/s] 

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_50000.mp4


  6%|▌         | 59996/1000001 [17:23<3:44:02, 69.93it/s] 

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  6%|▌         | 59996/1000001 [17:40<3:44:02, 69.93it/s]

iteration 948 action 1
iteration 949 action 1
iteration 950 action 1
iteration 951 action 1
iteration 952 action 1
iteration 953 action 1
iteration 954 action 1
iteration 955 action 1
iteration 956 action 1
iteration 957 action 1
iteration 958 action 1
iteration 959 action 1
iteration 960 action 1
iteration 961 action 1
iteration 962 action 1
iteration 963 action 1
iteration 964 action 1
iteration 965 action 1
iteration 966 action 1
iteration 967 action 1
iteration 968 action 1
iteration 969 action 1
iteration 970 action 1
iteration 971 action 1
iteration 972 action 1
iteration 973 action 1
iteration 974 action 1
iteration 975 action 1
iteration 976 action 1
iteration 977 action 1
iteration 978 action 1
iteration 979 action 1
iteration 980 action 1
iteration 981 action 1
iteration 982 action 1
iteration 983 action 1
iteration 984 action 1
iteration 985 action 1
iteration 986 action 1
iteration 987 action 1
iteration 988 action 1
iteration 989 action 1
iteration 990 action 1
iteration 9

  6%|▌         | 59996/1000001 [17:58<3:44:02, 69.93it/s]

iteration 1995 action 1
iteration 1996 action 1
iteration 1997 action 1
iteration 1998 action 1
iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_60000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_60000.mp4



  6%|▌         | 60008/1000001 [18:34<554:48:07,  2.12s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_60000.mp4


  7%|▋         | 69997/1000001 [20:48<3:52:27, 66.68it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  7%|▋         | 69997/1000001 [21:00<3:52:27, 66.68it/s]

iteration 627 action 1
iteration 628 action 1
iteration 629 action 1
iteration 630 action 1
iteration 631 action 1
iteration 632 action 1
iteration 633 action 1
iteration 634 action 1
iteration 635 action 1
iteration 636 action 1
iteration 637 action 1
iteration 638 action 1
iteration 639 action 1
iteration 640 action 1
iteration 641 action 1
iteration 642 action 1
iteration 643 action 1
iteration 644 action 1
iteration 645 action 1
iteration 646 action 1
iteration 647 action 1
iteration 648 action 1
iteration 649 action 1
iteration 650 action 1
iteration 651 action 1
iteration 652 action 1
iteration 653 action 1
iteration 654 action 1
iteration 655 action 1
iteration 656 action 1
iteration 657 action 1
iteration 658 action 1
iteration 659 action 1
iteration 660 action 1
iteration 661 action 1
iteration 662 action 1
iteration 663 action 1
iteration 664 action 1
iteration 665 action 1
iteration 666 action 1
iteration 667 action 1
iteration 668 action 1
iteration 669 action 1
iteration 6

  7%|▋         | 69997/1000001 [21:24<3:52:27, 66.68it/s]

iteration 1993 action 1
iteration 1994 action 1
iteration 1995 action 1
iteration 1996 action 1
iteration 1997 action 1
iteration 1998 action 1
iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_70000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_70000.mp4



  7%|▋         | 70007/1000001 [21:56<600:55:53,  2.33s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_70000.mp4


  8%|▊         | 79995/1000001 [24:14<3:34:38, 71.44it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  8%|▊         | 79995/1000001 [24:30<3:34:38, 71.44it/s]

iteration 850 action 1
iteration 851 action 1
iteration 852 action 1
iteration 853 action 1
iteration 854 action 1
iteration 855 action 1
iteration 856 action 1
iteration 857 action 1
iteration 858 action 1
iteration 859 action 1
iteration 860 action 1
iteration 861 action 1
iteration 862 action 1
iteration 863 action 1
iteration 864 action 1
iteration 865 action 1
iteration 866 action 1
iteration 867 action 1
iteration 868 action 1
iteration 869 action 1
iteration 870 action 1
iteration 871 action 1
iteration 872 action 1
iteration 873 action 1
iteration 874 action 1
iteration 875 action 1
iteration 876 action 1
iteration 877 action 1
iteration 878 action 1
iteration 879 action 1
iteration 880 action 1
iteration 881 action 1
iteration 882 action 1
iteration 883 action 1
iteration 884 action 1
iteration 885 action 1
iteration 886 action 1
iteration 887 action 1
iteration 888 action 1
iteration 889 action 1
iteration 890 action 1
iteration 891 action 1
iteration 892 action 1
iteration 8

  8%|▊         | 79995/1000001 [24:51<3:34:38, 71.44it/s]

iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_80000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_80000.mp4



  8%|▊         | 80011/1000001 [25:28<494:44:19,  1.94s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_80000.mp4


  9%|▉         | 89996/1000001 [27:34<3:35:54, 70.25it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

  9%|▉         | 89996/1000001 [27:50<3:35:54, 70.25it/s]

iteration 878 action 1
iteration 879 action 1
iteration 880 action 1
iteration 881 action 1
iteration 882 action 1
iteration 883 action 1
iteration 884 action 1
iteration 885 action 1
iteration 886 action 1
iteration 887 action 1
iteration 888 action 1
iteration 889 action 1
iteration 890 action 1
iteration 891 action 1
iteration 892 action 1
iteration 893 action 1
iteration 894 action 1
iteration 895 action 1
iteration 896 action 1
iteration 897 action 1
iteration 898 action 1
iteration 899 action 1
iteration 900 action 1
iteration 901 action 1
iteration 902 action 1
iteration 903 action 1
iteration 904 action 1
iteration 905 action 1
iteration 906 action 1
iteration 907 action 1
iteration 908 action 1
iteration 909 action 1
iteration 910 action 1
iteration 911 action 1
iteration 912 action 1
iteration 913 action 1
iteration 914 action 1
iteration 915 action 1
iteration 916 action 1
iteration 917 action 1
iteration 918 action 1
iteration 919 action 1
iteration 920 action 1
iteration 9

  9%|▉         | 89996/1000001 [28:10<3:35:54, 70.25it/s]

MoviePy - Building video dqn_oc_knn/videos/train_iter_90000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_90000.mp4



  9%|▉         | 90008/1000001 [28:42<527:11:51,  2.09s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_90000.mp4


 10%|▉         | 99998/1000001 [31:03<3:33:03, 70.40it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

 10%|▉         | 99998/1000001 [31:20<3:33:03, 70.40it/s]

iteration 914 action 1
iteration 915 action 1
iteration 916 action 1
iteration 917 action 1
iteration 918 action 1
iteration 919 action 1
iteration 920 action 1
iteration 921 action 1
iteration 922 action 1
iteration 923 action 1
iteration 924 action 1
iteration 925 action 1
iteration 926 action 1
iteration 927 action 1
iteration 928 action 1
iteration 929 action 1
iteration 930 action 1
iteration 931 action 1
iteration 932 action 1
iteration 933 action 1
iteration 934 action 1
iteration 935 action 1
iteration 936 action 1
iteration 937 action 1
iteration 938 action 1
iteration 939 action 1
iteration 940 action 1
iteration 941 action 1
iteration 942 action 1
iteration 943 action 1
iteration 944 action 1
iteration 945 action 1
iteration 946 action 1
iteration 947 action 1
iteration 948 action 1
iteration 949 action 1
iteration 950 action 1
iteration 951 action 1
iteration 952 action 1
iteration 953 action 1
iteration 954 action 1
iteration 955 action 1
iteration 956 action 1
iteration 9

 10%|▉         | 99998/1000001 [31:39<3:33:03, 70.40it/s]

iteration 1995 action 1
iteration 1996 action 1
iteration 1997 action 1
iteration 1998 action 1
iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_100000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_100000.mp4



 10%|█         | 100009/1000001 [32:10<509:34:29,  2.04s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_100000.mp4


 11%|█         | 109997/1000001 [34:26<3:42:09, 66.77it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

 11%|█         | 109997/1000001 [34:40<3:42:09, 66.77it/s]

iteration 749 action 1
iteration 750 action 1
iteration 751 action 1
iteration 752 action 1
iteration 753 action 1
iteration 754 action 1
iteration 755 action 1
iteration 756 action 1
iteration 757 action 1
iteration 758 action 1
iteration 759 action 1
iteration 760 action 1
iteration 761 action 1
iteration 762 action 1
iteration 763 action 1
iteration 764 action 1
iteration 765 action 1
iteration 766 action 1
iteration 767 action 1
iteration 768 action 1
iteration 769 action 1
iteration 770 action 1
iteration 771 action 1
iteration 772 action 1
iteration 773 action 1
iteration 774 action 1
iteration 775 action 1
iteration 776 action 1
iteration 777 action 1
iteration 778 action 1
iteration 779 action 1
iteration 780 action 1
iteration 781 action 1
iteration 782 action 1
iteration 783 action 1
iteration 784 action 1
iteration 785 action 1
iteration 786 action 1
iteration 787 action 1
iteration 788 action 1
iteration 789 action 1
iteration 790 action 1
iteration 791 action 1
iteration 7

 11%|█         | 109997/1000001 [35:02<3:42:09, 66.77it/s]

iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_110000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_110000.mp4



 11%|█         | 110009/1000001 [35:36<535:47:18,  2.17s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_110000.mp4


 12%|█▏        | 119996/1000001 [37:49<3:20:02, 73.32it/s]  

iteration 0 action 1
iteration 1 action 1
iteration 2 action 1
iteration 3 action 1
iteration 4 action 1
iteration 5 action 1
iteration 6 action 1
iteration 7 action 1
iteration 8 action 1
iteration 9 action 1
iteration 10 action 1
iteration 11 action 1
iteration 12 action 1
iteration 13 action 1
iteration 14 action 1
iteration 15 action 1
iteration 16 action 1
iteration 17 action 1
iteration 18 action 1
iteration 19 action 1
iteration 20 action 1
iteration 21 action 1
iteration 22 action 1
iteration 23 action 1
iteration 24 action 1
iteration 25 action 1
iteration 26 action 1
iteration 27 action 1
iteration 28 action 1
iteration 29 action 1
iteration 30 action 1
iteration 31 action 1
iteration 32 action 1
iteration 33 action 1
iteration 34 action 1
iteration 35 action 1
iteration 36 action 1
iteration 37 action 1
iteration 38 action 1
iteration 39 action 1
iteration 40 action 1
iteration 41 action 1
iteration 42 action 1
iteration 43 action 1
iteration 44 action 1
iteration 45 action 

 12%|█▏        | 119996/1000001 [38:00<3:20:02, 73.32it/s]

iteration 613 action 1
iteration 614 action 1
iteration 615 action 1
iteration 616 action 1
iteration 617 action 1
iteration 618 action 1
iteration 619 action 1
iteration 620 action 1
iteration 621 action 1
iteration 622 action 1
iteration 623 action 1
iteration 624 action 1
iteration 625 action 1
iteration 626 action 1
iteration 627 action 1
iteration 628 action 1
iteration 629 action 1
iteration 630 action 1
iteration 631 action 1
iteration 632 action 1
iteration 633 action 1
iteration 634 action 1
iteration 635 action 1
iteration 636 action 1
iteration 637 action 1
iteration 638 action 1
iteration 639 action 1
iteration 640 action 1
iteration 641 action 1
iteration 642 action 1
iteration 643 action 1
iteration 644 action 1
iteration 645 action 1
iteration 646 action 1
iteration 647 action 1
iteration 648 action 1
iteration 649 action 1
iteration 650 action 1
iteration 651 action 1
iteration 652 action 1
iteration 653 action 1
iteration 654 action 1
iteration 655 action 1
iteration 6

 12%|█▏        | 119996/1000001 [38:25<3:20:02, 73.32it/s]

iteration 1998 action 1
iteration 1999 action 1
iteration 2000 action 1
MoviePy - Building video dqn_oc_knn/videos/train_iter_120000.mp4.
MoviePy - Writing video dqn_oc_knn/videos/train_iter_120000.mp4



 12%|█▏        | 120009/1000001 [38:59<486:18:17,  1.99s/it]

MoviePy - Done !
MoviePy - video ready dqn_oc_knn/videos/train_iter_120000.mp4


 13%|█▎        | 129994/1000001 [41:16<3:29:26, 69.23it/s]  

iteration 0 action 3
iteration 1 action 3
iteration 2 action 3
iteration 3 action 3
iteration 4 action 3
iteration 5 action 3
iteration 6 action 3
iteration 7 action 3
iteration 8 action 3
iteration 9 action 3
iteration 10 action 3
iteration 11 action 3
iteration 12 action 3
iteration 13 action 3
iteration 14 action 3
iteration 15 action 3
iteration 16 action 3
iteration 17 action 3
iteration 18 action 3
iteration 19 action 3
iteration 20 action 3
iteration 21 action 3
iteration 22 action 3
iteration 23 action 3
iteration 24 action 3
iteration 25 action 3
iteration 26 action 3
iteration 27 action 3
iteration 28 action 3
iteration 29 action 3
iteration 30 action 3
iteration 31 action 3
iteration 32 action 3
iteration 33 action 3
iteration 34 action 3
iteration 35 action 3
iteration 36 action 3
iteration 37 action 3
iteration 38 action 3
iteration 39 action 3
iteration 40 action 3
iteration 41 action 3
iteration 42 action 3
iteration 43 action 3
iteration 44 action 3
iteration 45 action 

 13%|█▎        | 129994/1000001 [41:30<3:29:26, 69.23it/s]

iteration 719 action 3
iteration 720 action 3
iteration 721 action 3
iteration 722 action 3
iteration 723 action 3
iteration 724 action 3
iteration 725 action 3
iteration 726 action 3
iteration 727 action 3
iteration 728 action 3
iteration 729 action 3
iteration 730 action 3
iteration 731 action 3
iteration 732 action 3
iteration 733 action 3
iteration 734 action 3
iteration 735 action 3
iteration 736 action 3
iteration 737 action 3
iteration 738 action 3
iteration 739 action 3
iteration 740 action 3
iteration 741 action 3
iteration 742 action 3
iteration 743 action 3
iteration 744 action 3
iteration 745 action 3
iteration 746 action 3
iteration 747 action 3
iteration 748 action 3
iteration 749 action 3
iteration 750 action 3
iteration 751 action 3
iteration 752 action 3
iteration 753 action 3
iteration 754 action 3
iteration 755 action 3
iteration 756 action 3
iteration 757 action 3
iteration 758 action 3
iteration 759 action 3
iteration 760 action 3
iteration 761 action 3
iteration 7

 13%|█▎        | 130000/1000001 [41:47<4:39:39, 51.85it/s]


KeyboardInterrupt: 