Pragnay/randomagents #292

eugenevinitsky · 2026-03-06T05:22:53Z

why change this?

Rendering was taking a really long amount of time even with async on so I just increased the interval

This only happens when max_agents is very high; previously, we were working with just 32 so it was not a problem

Copilot · 2026-03-02T17:20:01Z

load_weights("best_policy_with_reward_conditioning.bin") appears to reference a file that isn’t in the repo and also drops the resources/drive/ prefix used elsewhere. This breaks the demo by default unless the artifact is committed and the path is corrected.

Is this comment correct? It's true that I don't see this file

-Original file line number
+Diff line change
@@ Expand Up / @@ -50,16 +50,27 @@ offroad_behavior = 0 @@
     episode_length = 300
     resample_frequency = 300
     termination_mode = 1 # 0 - terminate at episode_length, 1 - terminate after all agents have been reset
-    map_dir = "resources/drive/binaries/carla_3D"
-    num_maps = 10000
+    map_dir = "resources/drive/binaries/carla_2D"
+    num_maps = 3
     ; If True, allows training with fewer maps than requested (warns instead of erroring)
     allow_fewer_maps = True
     ; Determines which step of the trajectory to initialize the agents at upon reset
     init_steps = 0
     ; Options: "control_vehicles", "control_agents", "control_wosac", "control_sdc_only"
     control_mode = "control_vehicles"
-    ; Options: "created_all_valid", "create_only_controlled"
-    init_mode = "create_all_valid"
+    ; Options: "create_all_valid", "create_only_controlled", "init_variable_agent_number"(creates random number of controlled agents per env)
+    init_mode = "init_variable_agent_number"
+    ; Below options only valid for "init_variable_agent_number" init_mode
+    min_agents_per_env = 1
+    max_agents_per_env = 128
+    ; Dimension Ranges for agents
+    spawn_width_min = 1.5
+    spawn_width_max = 2.5
+    spawn_length_min = 2.0
+    spawn_length_max = 5.5
+    spawn_height = 1.5
+    ; Reward settings
     reward_randomization = 1
     ; Options: 0 - Fixed reward values, 1 - Random reward values
     reward_conditioning = 1
@@ Expand Down Expand Up / @@ -144,11 +155,11 @@ vf_clip_coef = 0.1999999999999999 @@
     vf_coef = 2
     vtrace_c_clip = 1
     vtrace_rho_clip = 1
-    checkpoint_interval = 250
+    checkpoint_interval = 1000
     ; Rendering options
     render = True
     render_async = False     # Render interval of below 50 might cause process starvation and slowness in training
-    render_interval = 250
+    render_interval = 1000
     ; If True, show exactly what the agent sees in agent observation
     obs_only = True
     ; Show grid lines
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up @@
         float min_avg_speed_to_consider_goal_attempt = unpack(kwargs, "min_avg_speed_to_consider_goal_attempt");
         int use_all_maps = unpack(kwargs, "use_all_maps");
+        int min_agents_per_env = unpack(kwargs, "min_agents_per_env");
+        int max_agents_per_env = unpack(kwargs, "max_agents_per_env");
         clock_gettime(CLOCK_REALTIME, &ts);
         srand(ts.tv_nsec);
+        int max_envs = use_all_maps ? num_maps : num_agents;
+        if (init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+            // Training mode: random agent counts per env
+            int agent_counts[max_envs];
+            int remaining = num_agents;
+            int env_count = 0;
+            while (remaining > 0) {
+                int count;
+                if (remaining <= max_agents_per_env) {
+                    count = remaining;
+                } else {
+                    // Ensure last env can still meet min_agents_per_env requirement
+                    int upper = (remaining - max_agents_per_env < min_agents_per_env) ? remaining - min_agents_per_env
+                                                                                      : max_agents_per_env;
+                    if (upper - min_agents_per_env + 1 == 0) {
+                        count = min_agents_per_env;
+                    } else {
+                        count = min_agents_per_env + rand() % (upper - min_agents_per_env + 1);
+                    }
+                }
+                agent_counts[env_count++] = count;
+                remaining -= count;
+            }
+            PyObject *agent_offsets = PyList_New(env_count + 1);
+            PyObject *map_ids_list = PyList_New(env_count);
+            int offset = 0;
+            for (int i = 0; i < env_count; i++) {
+                PyList_SetItem(agent_offsets, i, PyLong_FromLong(offset));
+                PyList_SetItem(map_ids_list, i, PyLong_FromLong(rand() % num_maps));
+                offset += agent_counts[i];
+            }
+            PyList_SetItem(agent_offsets, env_count,
+                           PyLong_FromLong(num_agents)); // In random mode, we guarantee num_agents accross all envs
+            PyObject *tuple = PyTuple_New(3);
+            PyTuple_SetItem(tuple, 0, agent_offsets);
+            PyTuple_SetItem(tuple, 1, map_ids_list);
+            PyTuple_SetItem(tuple, 2, PyLong_FromLong(env_count));
+            return tuple;
+        }
+        // For all other modes
         int total_agent_count = 0;
         int env_count = 0;
-        int max_envs = use_all_maps ? num_maps : num_agents;
         int map_idx = 0;
         int maps_checked = 0;
         PyObject *agent_offsets = PyList_New(max_envs + 1);
         PyObject *map_ids = PyList_New(max_envs);
         // getting env count
         while (use_all_maps ? map_idx < max_envs : total_agent_count < num_agents && env_count < max_envs) {
             int map_id = use_all_maps ? map_idx++ : rand() % num_maps;
@@ Expand Down Expand Up @@
         char *map_path = unpack_str(kwargs, "map_path");
         int max_agents = unpack(kwargs, "max_agents");
         int init_steps = unpack(kwargs, "init_steps");
+        int max_agents_per_env = unpack(kwargs, "max_agents_per_env");
+        AgentSpawnSettings spawn_settings = {
+            .min_w = unpack(kwargs, "spawn_width_min"),
+            .max_w = unpack(kwargs, "spawn_width_max"),
+            .min_l = unpack(kwargs, "spawn_length_min"),
+            .max_l = unpack(kwargs, "spawn_length_max"),
+            .h = unpack(kwargs, "spawn_height"),
+        };
+        env->spawn_settings = spawn_settings;
         env->num_agents = max_agents;
+        if (env->init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+            env->spawn_settings.max_agents_in_sim =
+                max_agents_per_env; // INIT_VARIABLE_AGENT_NUMBER only supports controlled agents
+        }
         env->map_name = map_path;
         env->init_steps = init_steps;
         env->timestep = init_steps;
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -244,6 +244,7 @@ struct RoadMapElement { @@
         float *x;
         float *y;
         float *z;
+        float polyline_length; // Total length of the polyline
         // Lane specific info
         int num_entries;
@@ Expand Down Expand Up / @@ -281,6 +282,13 @@ void free_agent(struct Agent *agent) { @@
         free(agent->path);
     }
+    void free_agents(struct Agent *agents, int num_agents) {
+        for (int i = 0; i < num_agents; i++) {
+            free_agent(&agents[i]);
+        }
+        free(agents);
+    }
     void free_road_element(struct RoadMapElement *element) {
         free(element->x);
         free(element->y);
@@ Expand Down @@

-Original file line number
+Diff line change
@@ Expand Up / @@ -42,6 +42,9 @@ void demo() { @@
             exit(1);
         }
+        // Set different seed each time
+        srand(time(NULL));
         // Note: Use below hardcoded settings for 2.0 demo purposes. Since the policy was
         // trained with these exact settings, changing them may lead to
         // weird behavior.
@@ Expand All / @@ -68,6 +71,15 @@ void demo() { @@
         //     .map_name = "resources/drive/map_town_02_carla.bin",
         // };
+        AgentSpawnSettings spawn_settings = {
+            .max_agents_in_sim = conf.max_agents_per_env,
+            .min_w = conf.spawn_width_min,
+            .max_w = conf.spawn_width_max,
+            .min_l = conf.spawn_length_min,
+            .max_l = conf.spawn_length_max,
+            .h = conf.spawn_height,
+        };
         Drive env = {
             .human_agent_idx = 0,
             .action_type = 0, // Demo doesn't support continuous action space
@@ Expand All / @@ -90,13 +102,19 @@ void demo() { @@
             .init_steps = conf.init_steps,
             .init_mode = conf.init_mode,
             .control_mode = conf.control_mode,
-            .map_name = "resources/drive/binaries/carla/carla_3D/map_001.bin",
-            .reward_conditioning = 1,
+            .spawn_settings = spawn_settings,
+            .map_name = "resources/drive/binaries/carla_2D/map_000.bin",
+            .reward_conditioning = conf.reward_conditioning,
         };
+        if (conf.init_mode == INIT_VARIABLE_AGENT_NUMBER) {
+            env.num_agents = conf.min_agents_per_env + rand() % (conf.max_agents_per_env - conf.min_agents_per_env + 1);
+        }
         allocate(&env);
         c_reset(&env);
         c_render(&env);
-        Weights *weights = load_weights("resources/drive/puffer_drive_resampling_speed_lane.bin");
+        Weights *weights = load_weights("best_policy_with_reward_conditioning.bin");
         DriveNet *net = init_drivenet(weights, env.active_agent_count, env.dynamics_model, env.reward_conditioning);
         int accel_delta = 1;
@@ Expand Down @@

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Pragnay/randomagents #292

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

eugenevinitsky Mar 6, 2026

Uh oh!

mpragnay Mar 6, 2026

Uh oh!

mpragnay Mar 6, 2026

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Copilot AI Mar 2, 2026

Uh oh!

eugenevinitsky Mar 6, 2026

Uh oh!

Uh oh!

Pragnay/randomagents #292

Uh oh!

Pragnay/randomagents #292

Uh oh!

Uh oh!

Diff view

Diff view

There are no files selected for viewing

Uh oh!

eugenevinitsky Mar 6, 2026

Choose a reason for hiding this comment

Uh oh!

mpragnay Mar 6, 2026

Choose a reason for hiding this comment

Uh oh!

mpragnay Mar 6, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Copilot AI Mar 2, 2026

Choose a reason for hiding this comment

Uh oh!

eugenevinitsky Mar 6, 2026

Choose a reason for hiding this comment

Uh oh!

Uh oh!