In [None]:
%load_ext autoreload
%autoreload 2

from agents.dqn_agent import DQNAgent

import time

import gymnasium as gym
import numpy as np
import torch
from infrastructure import pytorch_util as ptu

from scripting_utils import make_logger, make_config

import matplotlib.pyplot as plt
from IPython.display import clear_output

In [None]:
class Args:
  def __init__(self):
    self.config_file = "experiments/dqn/car_racing.yaml"
    self.metrics_interval = 10000
    self.video_interval = 20000
    self.save_interval = 10000
    self.num_eval_trajectories = 10
    self.num_render_trajectories = 1
    self.seed = 1
    self.no_gpu = False
    self.which_gpu = 0
    self.log_interval = 1000

args = Args()

# create directory for logging
# logdir_prefix = "hw3_dqn_"  # keep for autograder

logdir_path = "data/hw3_dqn_dqn_CarRacing-v2_d0.99_tu2000_lr0.0001_doubleq_clip10.0_06-05-2024_15-59-12"

config = make_config(args.config_file)
# logger = make_logger(logdir_prefix, config)

In [None]:
# set random seeds
np.random.seed(args.seed)
torch.manual_seed(args.seed)
ptu.init_gpu(use_gpu=not args.no_gpu, gpu_id=args.which_gpu)

In [None]:
# make the gym environment
env = config["make_env"]()

discrete = isinstance(env.action_space, gym.spaces.Discrete)

assert discrete, "DQN only supports discrete action spaces"

In [None]:
agent = DQNAgent(
  env.observation_space.shape,
  env.action_space.n,
  **config["agent_kwargs"],
)

In [None]:
steps = 290000
agent.load(logdir_path + f"/agent_{steps}.pt")

In [None]:
observation, info = env.reset()
for _ in range(1000):
    # Render the environment
    clear_output(wait=True)
    print(observation[0].shape)
    plt.imshow(observation[0], cmap='gray')
    plt.show()
    
    action = agent.get_action(observation)

    observation, reward, terminated, truncated, info = env.step(action)

    # If the episode has ended then we can reset to start a new episode
    if terminated or truncated:
        observation, info = env.reset()
        break

env.close()