### PPO for Discrete Action Space

In [None]:
import gymnasium as gym
import numpy as np

env = gym.make(
    id="Parking-v0", render_mode="human", observation_type="rgb", action_type="discrete"
)
env = gym.wrappers.GrayScaleObservation(env)
env = gym.wrappers.FrameStack(env, 4)

In [None]:
from parking_ppo import Agent
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = torch.load("ppo.pth").eval().to(device)

In [None]:
obs, _ = env.reset(seed=42)
terminated = False
truncated = False

while not terminated and not truncated:
    obs = torch.Tensor(np.array(obs)).to(device).unsqueeze(0)
    with torch.no_grad():
        action, _, _, _ = agent.get_action_and_value(obs)
        action = action.cpu().numpy().item()
    obs, reward, terminated, truncated, info = env.step(action)

In [None]:
env.close()

### PPO for Multidiscrete Action Space

In [None]:
import gymnasium as gym
import numpy as np

env = gym.make(
    id="Parking-v0",
    render_mode="rgb_array",
    observation_type="rgb",
    action_type="multidiscrete",
)
env = gym.wrappers.RecordVideo(env, f"videos/parking_ppo_multidiscrete", lambda n: True)
env = gym.wrappers.GrayScaleObservation(env)
env = gym.wrappers.FrameStack(env, 4)

In [None]:
from parking_ppo_multidiscrete import Agent
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = torch.load("ppo_multidiscrete.pth").eval().to(device)

In [None]:
obs, _ = env.reset(seed=42)
terminated = False
truncated = False

while not terminated and not truncated:
    obs = torch.Tensor(np.array(obs)).to(device).unsqueeze(0)
    with torch.no_grad():
        action, _, _, _ = agent.get_action_and_value(obs)
        action = action.cpu().numpy()[0]
    obs, reward, terminated, truncated, info = env.step(action)

In [None]:
env.close()

### PPO for Continuous Action Space

In [None]:
import gymnasium as gym
import numpy as np

env = gym.make(
    id="Parking-v0",
    render_mode="rgb_array",
    observation_type="rgb",
    action_type="continuous",
)
env = gym.wrappers.RecordVideo(env, f"videos/parking_ppo_continuous", lambda n: True)
env = gym.wrappers.GrayScaleObservation(env)
env = gym.wrappers.FrameStack(env, 4)

In [None]:
from parking_ppo_continuous import Agent
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
agent = torch.load("ppo_continuous.pth").eval().to(device)

In [None]:
obs, _ = env.reset(seed=42)
terminated = False
truncated = False

while not terminated and not truncated:
    obs = torch.Tensor(np.array(obs)).to(device).unsqueeze(0)
    with torch.no_grad():
        action, _, _, _ = agent.get_action_and_value(obs)
        action = action.cpu().numpy()[0]
    obs, reward, terminated, truncated, info = env.step(action)

In [None]:
env.close()