In [16]:
import torch
import dataclasses
import mediapy
from huggingface_hub import PyTorchModelHubMixin
from huggingface_hub import ModelCard
from gpudrive.networks.late_fusion import NeuralNet

from gpudrive.env.config import EnvConfig
from gpudrive.env.env_torch import GPUDriveTorchEnv
from gpudrive.visualize.utils import img_from_fig
from gpudrive.env.dataset import SceneDataLoader
from gpudrive.utils.config import load_config
import os
import torch
from pathlib import Path
# Set working directory to the base directory 'gpudrive_madrona'
working_dir = Path.cwd()
while working_dir.name != 'my_gpudrive':
    working_dir = working_dir.parent
    if working_dir == Path.home():
        raise FileNotFoundError("Base directory 'gpudrive_madrona' not found")
os.chdir(working_dir)
working_dir

PosixPath('/home/mingke.wang/Documents/my_gpudrive')

### Configs

In [17]:
# Configs model has been trained with
# config = load_config("examples/experimental/config/reliable_agents_params")
config = load_config("baselines/ppo/config/ppo_base_puffer")
print(config)

max_agents = config.environment.max_controlled_agents
num_envs = 1
device = "cpu"  # cpu just because we're in a notebook

{'mode': 'train', 'use_rnn': False, 'eval_model_path': None, 'baseline': False, 'data_dir': 'data/processed/training', 'continue_training': False, 'model_cpt': None, 'environment': {'name': 'gpudrive', 'num_worlds': 75, 'k_unique_scenes': 75, 'max_controlled_agents': 64, 'ego_state': True, 'road_map_obs': True, 'partner_obs': True, 'norm_obs': True, 'remove_non_vehicles': True, 'lidar_obs': False, 'reward_type': 'weighted_combination', 'collision_weight': -0.75, 'off_road_weight': -0.75, 'goal_achieved_weight': 1.0, 'dynamics_model': 'classic', 'collision_behavior': 'ignore', 'dist_to_goal_threshold': 2.0, 'polyline_reduction_threshold': 0.1, 'sampling_seed': 42, 'obs_radius': 50.0, 'action_space_steer_disc': 13, 'action_space_accel_disc': 7, 'use_vbd': False, 'vbd_model_path': 'gpudrive/integrations/vbd/weights/epoch=18.ckpt', 'init_steps': 11, 'vbd_trajectory_weight': 0.1, 'vbd_in_obs': False}, 'wandb': {'entity': '', 'project': 'gpudrive', 'group': 'test', 'mode': 'online', 'tags': 

### Load pre-trained agent via Hugging Face hub


In [18]:
# sim_agent = NeuralNet.from_pretrained("daphne-cornelisse/policy_S10_000_02_27").to(device)

# Create the neural network instance first
saved_cpt = torch.load(
    f="model_PPO____S_256__06_11_15_50_29_632_210423.pt",
    map_location=device,
    weights_only=False,
)
sim_agent = NeuralNet(
    input_dim=saved_cpt["model_arch"]["input_dim"],
    action_dim=saved_cpt["action_dim"],
    hidden_dim=saved_cpt["model_arch"]["hidden_dim"],
    config=config,
)

# Load the model parameters
sim_agent.load_state_dict(saved_cpt["parameters"])

# sim_agent = NeuralNet.from_pretrained(
#     "model_PPO____S_256__06_11_15_50_29_632_210423.pt")

<All keys matched successfully>

In [19]:
# Agent has an action dimension of 91: 13 steering wheel angle discretizations x 9 acceleration discretizations
sim_agent.action_dim

91

In [20]:
# Size of flattened observation vector
sim_agent.obs_dim

2984

In [21]:
# Some other info
card = ModelCard.load("daphne-cornelisse/policy_S10_000_02_27")
card.data.tags

['ffn', 'model_hub_mixin', 'pytorch_model_hub_mixin']

In [22]:
# Model architecture
# agent

In [23]:
# Weights
# agent.state_dict()

### Make environment

In [24]:
# Create data loader
train_loader = SceneDataLoader(
    root='data/exp',
    batch_size=num_envs,
    dataset_size=1,
    sample_with_replacement=False,
    file_prefix="",
)

# Set params
env_config = dataclasses.replace(
    EnvConfig(),
    ego_state=config.environment.ego_state,
    road_map_obs=config.environment.road_map_obs,
    partner_obs=config.environment.partner_obs,
    reward_type=config.environment.reward_type,
    norm_obs=config.environment.norm_obs,
    dynamics_model=config.environment.dynamics_model,
    collision_behavior=config.environment.collision_behavior,
    dist_to_goal_threshold=config.environment.dist_to_goal_threshold,
    polyline_reduction_threshold=config.environment.polyline_reduction_threshold,
    remove_non_vehicles=config.environment.remove_non_vehicles,
    lidar_obs=config.environment.lidar_obs,
    disable_classic_obs=config.environment.lidar_obs,
    obs_radius=config.environment.obs_radius,
    steer_actions=torch.round(
        torch.linspace(-torch.pi, torch.pi, config.environment.action_space_steer_disc), decimals=3
    ),
    accel_actions=torch.round(
        torch.linspace(-4.0, 4.0, config.environment.action_space_accel_disc), decimals=3
    ),
)

# Make env
env = GPUDriveTorchEnv(
    config=env_config,
    data_loader=train_loader,
    max_cont_agents=config.environment.max_controlled_agents,
    device=device,
)

In [25]:
env.data_batch

['data/exp/new_map.json']

### Use the agent

In [26]:
next_obs = env.reset()

control_mask = env.cont_agent_mask

next_obs.shape

torch.Size([1, 64, 2984])

In [27]:
action, logprob, entropy, value = sim_agent(
    next_obs[control_mask], deterministic=False
)

In [28]:
action.shape, logprob.shape, entropy.shape, value.shape

(torch.Size([]), torch.Size([]), torch.Size([]), torch.Size([1, 1]))

In [29]:
env._get_ego_state()[0][0]

tensor([0.0000, 0.0933, 0.0933, 0.1120, 0.0043, 0.0000])

### Rollout

In [30]:
next_obs = env.reset()

control_mask = env.cont_agent_mask

print(next_obs.shape)

frames = {f"env_{i}": [] for i in range(num_envs)}

for time_step in range(200):
    print(f"\rStep: {time_step}", end="", flush=True)

    # Predict actions
    action, _, _, _ = sim_agent(
        next_obs[control_mask], deterministic=False
    )
    action_template = torch.zeros(
        (num_envs, max_agents), dtype=torch.int64, device=device
    )
    action_template[control_mask] = action.to(device)

    # Step
    env.step_dynamics(action_template)

    # Render
    sim_states = env.vis.plot_simulator_state(
        env_indices=list(range(num_envs)),
        time_steps=[time_step]*num_envs,
        zoom_radius=100,
        center_agent_indices=[0],   
    )

    for i in range(num_envs):
        frames[f"env_{i}"].append(img_from_fig(sim_states[i]))

    next_obs = env.get_obs()
    reward = env.get_rewards()
    done = env.get_dones()
    info = env.get_infos()

    if done.all():
        break

env.close()

torch.Size([1, 64, 2984])
Step: 90

In [31]:
mediapy.show_videos(frames, fps=15, width=500,
                    height=500, columns=2, codec='gif')

0
env_0


In [32]:


mediapy.write_video(
    "output.gif",
    images=frames['env_0'],  # Change to 'env_0' to save only the first environment's frames
    fps=10,
    codec='gif',
)