From 0734d891ef1b532f060c805f8a1c703615d8daea Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Fri, 23 May 2025 23:32:41 -0400 Subject: [PATCH 01/39] changed policy and rename factors to match common names --- pufferlib/config/ocean/boids.ini | 11 ++++++----- pufferlib/ocean/boids/binding.c | 6 +++--- pufferlib/ocean/boids/boids.h | 22 +++++++++++----------- pufferlib/ocean/boids/boids.py | 12 ++++++------ 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index 2f8412d248..0e6c8d33e6 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -1,7 +1,8 @@ [base] package = ocean env_name = puffer_boids -policy_name = Boids +; policy_name = Boids +policy_name = Policy rnn_name = Recurrent ; rnn_name = None @@ -10,10 +11,10 @@ num_envs = 64 num_boids = 64 ; num_envs = 1 ; num_boids = 1 -margin_turn_factor = 0.0 -centering_factor = 0.00 -avoid_factor = 1.00 -matching_factor = 1.00 +margin_turn_factor = 1.0 +cohesion_factor = 0.0 +seperation_factor = 0.0 +alignment_factor = 0.0 [vec] num_workers = 2 diff --git a/pufferlib/ocean/boids/binding.c b/pufferlib/ocean/boids/binding.c index a3483d6520..27b40defb4 100644 --- a/pufferlib/ocean/boids/binding.c +++ b/pufferlib/ocean/boids/binding.c @@ -7,9 +7,9 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { env->num_boids = unpack(kwargs, "num_boids"); env->report_interval = unpack(kwargs, "report_interval"); env->margin_turn_factor = unpack(kwargs, "margin_turn_factor"); - env->centering_factor = unpack(kwargs, "centering_factor"); - env->avoid_factor = unpack(kwargs, "avoid_factor"); - env->matching_factor = unpack(kwargs, "matching_factor"); + env->cohesion_factor = unpack(kwargs, "cohesion_factor"); + env->seperation_factor = unpack(kwargs, "seperation_factor"); + env->alignment_factor = unpack(kwargs, "alignment_factor"); init(env); return 0; } diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index bf2bf6331e..5828b57169 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -52,9 +52,9 @@ typedef struct { Boid* boids; unsigned int num_boids; float margin_turn_factor; - float centering_factor; - float avoid_factor; - float matching_factor; + float cohesion_factor; + float seperation_factor; + float alignment_factor; unsigned tick; Log log; Log* boid_logs; @@ -134,8 +134,8 @@ void c_step(Boids *env) { current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP); current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP); } else { - current_boid->velocity.x = flclip(current_boid->velocity.x + 2*env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP); - current_boid->velocity.y = flclip(current_boid->velocity.y + 2*env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP); + current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP); + current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP); } current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH - BOID_WIDTH); current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT); @@ -161,8 +161,8 @@ void c_step(Boids *env) { } } if (protected_count > 0) { - //current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->avoid_factor; - current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->avoid_factor; + //current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->seperation_factor; + current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->seperation_factor; } if (visual_count) { vis_x_avg = vis_x_sum / visual_count; @@ -170,10 +170,10 @@ void c_step(Boids *env) { vis_vx_avg = vis_vx_sum / visual_count; vis_vy_avg = vis_vy_sum / visual_count; - current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->matching_factor; - current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->matching_factor; - current_boid_reward -= fabsf(vis_x_avg - current_boid->x) * env->centering_factor; - current_boid_reward -= fabsf(vis_y_avg - current_boid->y) * env->centering_factor; + current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor; + current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor; + current_boid_reward -= fabsf(vis_x_avg - current_boid->x) * env->cohesion_factor; + current_boid_reward -= fabsf(vis_y_avg - current_boid->y) * env->cohesion_factor; } if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) { current_boid_reward -= env->margin_turn_factor; diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py index 329b36dea3..b386449b99 100644 --- a/pufferlib/ocean/boids/boids.py +++ b/pufferlib/ocean/boids/boids.py @@ -19,9 +19,9 @@ def __init__( report_interval=1, num_boids=1, margin_turn_factor=1.0, - centering_factor=0.0, - avoid_factor=0.0, - matching_factor=0.0 + cohesion_factor=0.0, + seperation_factor=0.0, + alignment_factor=0.0 ): ACTION_SPACE_SIZE = 2 self.num_agents = num_envs * num_boids @@ -59,9 +59,9 @@ def __init__( num_boids=num_boids, report_interval=self.report_interval, margin_turn_factor=margin_turn_factor, - centering_factor=centering_factor, - avoid_factor=avoid_factor, - matching_factor=matching_factor, + cohesion_factor=cohesion_factor, + seperation_factor=seperation_factor, + alignment_factor=alignment_factor, )) self.c_envs = binding.vectorize(*c_envs) From e94a4aae43e0568d1e284bea95b0b95da65ecf4a Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sat, 24 May 2025 13:35:15 -0400 Subject: [PATCH 02/39] remove unused log fields --- pufferlib/ocean/boids/binding.c | 3 --- pufferlib/ocean/boids/boids.h | 14 +++----------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/pufferlib/ocean/boids/binding.c b/pufferlib/ocean/boids/binding.c index 27b40defb4..89cb389759 100644 --- a/pufferlib/ocean/boids/binding.c +++ b/pufferlib/ocean/boids/binding.c @@ -15,10 +15,7 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { } static int my_log(PyObject* dict, Log* log) { - assign_to_dict(dict, "perf", log->perf); assign_to_dict(dict, "score", log->score); - assign_to_dict(dict, "episode_return", log->episode_return); - assign_to_dict(dict, "episode_length", log->episode_length); assign_to_dict(dict, "n", log->n); return 0; } diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 5828b57169..1feb6be255 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -22,10 +22,7 @@ #define BOID_TEXTURE_PATH "./resources/puffers_128.png" typedef struct { - float perf; float score; - float episode_return; - float episode_length; float n; } Log; @@ -60,7 +57,6 @@ typedef struct { Log* boid_logs; unsigned report_interval; Client* client; - } Boids; static inline float flmax(float a, float b) { return a > b ? a : b; } @@ -191,13 +187,9 @@ void c_step(Boids *env) { env->rewards[current_indx] = current_boid_reward / 2.0f; //log updates - if (env->tick == env->report_interval) { - env->log.score += env->rewards[current_indx]; - env->log.n += 1.0f; - - /* clear per-boid log for next episode */ - // env->boid_logs[boid_indx] = (Log){0}; - env->tick = 0; + if (env->tick % env->report_interval == 0) { + env->log.score += env->rewards[current_indx]; + env->log.n += 1.0f; } } //env->log.score /= env->num_boids; From 817dc14f08bf0d625e574d4235690d6edc1feb98 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sat, 24 May 2025 13:35:29 -0400 Subject: [PATCH 03/39] remove unused variable --- pufferlib/ocean/boids/boids.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 1feb6be255..cc8bfeced7 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -88,8 +88,6 @@ void init(Boids *env) { static void compute_observations(Boids *env) { - unsigned base_indx; - int idx = 0; for (unsigned i=0; inum_boids; i++) { for (unsigned j=0; jnum_boids; j++) { From e232d3e2b1670c1c046e2e5f354071e44f02af0e Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sat, 24 May 2025 16:08:29 -0400 Subject: [PATCH 04/39] remove unused commented code --- pufferlib/ocean/boids/boids.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py index b386449b99..96784b7bd9 100644 --- a/pufferlib/ocean/boids/boids.py +++ b/pufferlib/ocean/boids/boids.py @@ -5,7 +5,6 @@ import numpy as np import gymnasium - import pufferlib from pufferlib.ocean.boids import binding @@ -74,12 +73,6 @@ def reset(self, seed=0): def step(self, actions): # Clip actions to valid range clipped_actions = (actions.astype(np.float32) - 2.0) / 4.0 - #clipped_actions = np.clip(actions, -1.0, 1.0) - - # Copy the clipped actions to our flat actions buffer for C binding - # Flatten from [num_agents, num_boids, 2] to a 1D array for C - # TODO: Check if I even need this? its not like I'm using the actions anywhere else - #self.flat_actions[:] = clipped_actions.reshape(-1) # Save the original actions for the experience buffer # TODO: Same thing with this @@ -94,7 +87,6 @@ def step(self, actions): if log_data: info.append(log_data) - # print(f"OBSERVATIONS: {self.observations}") return (self.observations, self.rewards, self.terminals, self.truncations, info) From 4eb410acae0de76ef6c3407115f6079c43f72c7a Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 28 May 2025 23:33:31 -0400 Subject: [PATCH 05/39] remove unused boid_logs and fix logs calculation --- pufferlib/ocean/boids/boids.h | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index cc8bfeced7..f815036114 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -54,7 +54,6 @@ typedef struct { float alignment_factor; unsigned tick; Log log; - Log* boid_logs; unsigned report_interval; Client* client; } Boids; @@ -69,12 +68,10 @@ static void respawn_boid(Boids *env, unsigned int i) { env->boids[i].y = rndf(BOTTOM_MARGIN, HEIGHT - TOP_MARGIN); env->boids[i].velocity.x = 0; env->boids[i].velocity.y = 0; - env->boid_logs[i] = (Log){0}; } void init(Boids *env) { env->boids = (Boid*)calloc(env->num_boids, sizeof(Boid)); - env->boid_logs = (Log*)calloc(env->num_boids, sizeof(Log)); env->log = (Log){0}; env->tick = 0; @@ -117,10 +114,12 @@ void c_step(Boids *env) { bool manual_control = IsKeyDown(KEY_LEFT_SHIFT); float mouse_x = (float)GetMouseX(); float mouse_y = (float)GetMouseY(); + float avg_reward = 0.0f; env->tick++; env->rewards[0] = 0; env->log.score = 0; + env->log.n = 0; for (unsigned current_indx = 0; current_indx < env->num_boids; current_indx++) { // apply action current_boid = &env->boids[current_indx]; @@ -183,14 +182,14 @@ void c_step(Boids *env) { // env->rewards[current_indx] = current_boid_reward / 15.0f; // printf("current_boid_reward: %f\n", current_boid_reward); env->rewards[current_indx] = current_boid_reward / 2.0f; - - //log updates - if (env->tick % env->report_interval == 0) { - env->log.score += env->rewards[current_indx]; - env->log.n += 1.0f; - } + avg_reward += env->rewards[current_indx]; + } + //log updates + avg_reward /= env->num_boids; + if (env->tick % env->report_interval == 0) { + env->log.score = avg_reward; + env->log.n = 1; } - //env->log.score /= env->num_boids; compute_observations(env); } @@ -210,7 +209,6 @@ void c_close_client(Client* client) { void c_close(Boids* env) { free(env->boids); - free(env->boid_logs); if (env->client != NULL) { c_close_client(env->client); } From 1d424d7c3b86f0ca45a2d32f9091a6245dcdac27 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sat, 31 May 2025 16:08:37 -0400 Subject: [PATCH 06/39] fix overflow and zero report_interval --- pufferlib/ocean/boids/boids.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.c b/pufferlib/ocean/boids/boids.c index 735f8e8c9e..14de0d5e5a 100644 --- a/pufferlib/ocean/boids/boids.c +++ b/pufferlib/ocean/boids/boids.c @@ -7,6 +7,7 @@ // --- Demo Configuration --- #define NUM_BOIDS_DEMO 20 // Number of boids for the standalone demo +#define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo #define MAX_STEPS_DEMO 500 // Max steps per episode in the demo #define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0] @@ -27,11 +28,12 @@ void demo() { // Initialize Boids environment struct Boids env = {0}; env.num_boids = NUM_BOIDS_DEMO; + env.report_interval = REPORT_INTERVAL_DEMO; // In the Python binding, these pointers are assigned from NumPy arrays. // Here, we need to allocate them explicitly. - size_t obs_size = env.num_boids * 4; // num_boids * (x, y, vx, vy) - size_t act_size = env.num_boids * 2; // num_boids * (dvx, dvy) + size_t obs_size = env.num_boids * env.num_boids * 4; // the 4 = (x, y, vx, vy) + size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy) env.observations = (float*)calloc(obs_size, sizeof(float)); env.actions = (float*)calloc(act_size, sizeof(float)); env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward From 7376334bd58bb5ce389ffae62f8b08c752c97a4e Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sat, 31 May 2025 16:32:35 -0400 Subject: [PATCH 07/39] add above zero checks for num_boids and report_interval --- pufferlib/ocean/boids/boids.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index f815036114..c26ffa56e7 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -71,6 +71,14 @@ static void respawn_boid(Boids *env, unsigned int i) { } void init(Boids *env) { + if(env->num_boids < 1) { + printf("ERROR: num_boids must be bigger than 0\n"); + exit(1); + } + if (env->report_interval < 1) { + printf("ERROR: report_interval must be bigger than 0\n"); + exit(1); + } env->boids = (Boid*)calloc(env->num_boids, sizeof(Boid)); env->log = (Log){0}; env->tick = 0; From ff483f6d08575504e5ad3050e3fa39979dd40111 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Mon, 2 Jun 2025 19:38:43 -0400 Subject: [PATCH 08/39] remove unused commented flat_actions --- pufferlib/ocean/boids/boids.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py index 96784b7bd9..a597b46ca3 100644 --- a/pufferlib/ocean/boids/boids.py +++ b/pufferlib/ocean/boids/boids.py @@ -49,7 +49,6 @@ def __init__( for env_num in range(num_envs): c_envs.append(binding.env_init( self.observations[env_num*num_boids:(env_num+1)*num_boids], - #self.flat_actions[env_num*num_boids*ACTION_SPACE_SIZE:(env_num+1)*num_boids*ACTION_SPACE_SIZE], self.actions[env_num*num_boids:(env_num+1)*num_boids], self.rewards[env_num*num_boids:(env_num+1)*num_boids], self.terminals[env_num*num_boids:(env_num+1)*num_boids], From 26bebefe42050d58d7186a6a15677483f7a91ec4 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Mon, 2 Jun 2025 19:50:18 -0400 Subject: [PATCH 09/39] simplify seperation reward and test it --- pufferlib/config/ocean/boids.ini | 8 ++++---- pufferlib/ocean/boids/boids.h | 30 +++++++++++++++++------------- 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index 0e6c8d33e6..691ef6c2a4 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -11,10 +11,10 @@ num_envs = 64 num_boids = 64 ; num_envs = 1 ; num_boids = 1 -margin_turn_factor = 1.0 -cohesion_factor = 0.0 -seperation_factor = 0.0 -alignment_factor = 0.0 +margin_turn_factor = 0.0 +centering_factor = 0.00 +avoid_factor = 1.00 +matching_factor = 1.00 [vec] num_workers = 2 diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index c26ffa56e7..1eb161532d 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -117,7 +117,7 @@ void c_step(Boids *env) { Boid* current_boid; Boid observed_boid; float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg; - float diff_x, diff_y, dist, protected_dist_sum, current_boid_reward; + float diff_x, diff_y, dist, current_boid_reward; unsigned visual_count, protected_count; bool manual_control = IsKeyDown(KEY_LEFT_SHIFT); float mouse_x = (float)GetMouseX(); @@ -142,7 +142,7 @@ void c_step(Boids *env) { current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT); // reward calculation - current_boid_reward = 0.0f, protected_dist_sum = 0.0f, protected_count = 0.0f; + current_boid_reward = 0.0f, protected_count = 0.0f; visual_count = 0.0f, vis_vx_sum = 0.0f, vis_vy_sum = 0.0f, vis_x_sum = 0.0f, vis_y_sum = 0.0f; for (unsigned observed_indx = 0; observed_indx < env->num_boids; observed_indx++) { if (current_indx == observed_indx) continue; @@ -151,7 +151,6 @@ void c_step(Boids *env) { diff_y = current_boid->y - observed_boid.y; dist = sqrtf(diff_x*diff_x + diff_y*diff_y); if (dist < PROTECTED_RANGE) { - protected_dist_sum += (PROTECTED_RANGE - dist); protected_count++; } else if (dist < VISUAL_RANGE) { vis_x_sum += observed_boid.x; @@ -162,8 +161,7 @@ void c_step(Boids *env) { } } if (protected_count > 0) { - //current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->seperation_factor; - current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->seperation_factor; + current_boid_reward -= flclip(protected_count/env->num_boids * env->avoid_factor, 0.0f, 1.0f); } if (visual_count) { vis_x_avg = vis_x_sum / visual_count; @@ -189,15 +187,21 @@ void c_step(Boids *env) { // Normalization // env->rewards[current_indx] = current_boid_reward / 15.0f; // printf("current_boid_reward: %f\n", current_boid_reward); - env->rewards[current_indx] = current_boid_reward / 2.0f; - avg_reward += env->rewards[current_indx]; - } - //log updates - avg_reward /= env->num_boids; - if (env->tick % env->report_interval == 0) { - env->log.score = avg_reward; - env->log.n = 1; + // env->rewards[current_indx] = current_boid_reward / 2.0f; + env->rewards[current_indx] = current_boid_reward; + + + //log updates + if (env->tick == env->report_interval) { + env->log.score += env->rewards[current_indx]; + env->log.n += 1.0f; + + /* clear per-boid log for next episode */ + // env->boid_logs[boid_indx] = (Log){0}; + env->tick = 0; + } } + //env->log.score /= env->num_boids; compute_observations(env); } From 06878ebc84ba802fdd205ed62d2cd85b45b74572 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Mon, 2 Jun 2025 19:51:30 -0400 Subject: [PATCH 10/39] test out only avoid factor --- pufferlib/config/ocean/boids.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index 691ef6c2a4..c9be5ee885 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -14,7 +14,7 @@ num_boids = 64 margin_turn_factor = 0.0 centering_factor = 0.00 avoid_factor = 1.00 -matching_factor = 1.00 +matching_factor = 0.00 [vec] num_workers = 2 From 35de375a2a01fa36827eb9157df7d31a0ca5eb01 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Tue, 3 Jun 2025 16:07:11 -0400 Subject: [PATCH 11/39] remove unused avg_reward and change seperation factor reward --- pufferlib/ocean/boids/boids.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 1eb161532d..f5c9037e62 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -122,7 +122,6 @@ void c_step(Boids *env) { bool manual_control = IsKeyDown(KEY_LEFT_SHIFT); float mouse_x = (float)GetMouseX(); float mouse_y = (float)GetMouseY(); - float avg_reward = 0.0f; env->tick++; env->rewards[0] = 0; @@ -161,7 +160,7 @@ void c_step(Boids *env) { } } if (protected_count > 0) { - current_boid_reward -= flclip(protected_count/env->num_boids * env->avoid_factor, 0.0f, 1.0f); + current_boid_reward -= (float)((env->num_boids - protected_count) - protected_count) / env->num_boids * env->seperation_factor; } if (visual_count) { vis_x_avg = vis_x_sum / visual_count; From 9709a469a071fc11976237256fa7b7bb64065c7f Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 4 Jun 2025 14:17:11 -0400 Subject: [PATCH 12/39] fix factor names --- pufferlib/config/ocean/boids.ini | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index c9be5ee885..0edba1bff0 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -12,9 +12,9 @@ num_boids = 64 ; num_envs = 1 ; num_boids = 1 margin_turn_factor = 0.0 -centering_factor = 0.00 -avoid_factor = 1.00 -matching_factor = 0.00 +cohesion_factor = 0.00 +seperation_factor = 1.00 +alignment_factor = 0.00 [vec] num_workers = 2 From 1c28c7230a8a89de573563692667bcddd3460c9d Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 4 Jun 2025 15:06:38 -0400 Subject: [PATCH 13/39] remove unused commented code --- pufferlib/ocean/boids/boids.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index f5c9037e62..32ead6670c 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -194,13 +194,9 @@ void c_step(Boids *env) { if (env->tick == env->report_interval) { env->log.score += env->rewards[current_indx]; env->log.n += 1.0f; - - /* clear per-boid log for next episode */ - // env->boid_logs[boid_indx] = (Log){0}; env->tick = 0; } } - //env->log.score /= env->num_boids; compute_observations(env); } From bf8c75f7520d781e52918cc6747f9e5f45001f4e Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 02:29:46 -0400 Subject: [PATCH 14/39] fix seperation factor reward calculation --- pufferlib/ocean/boids/boids.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 32ead6670c..476b461e7e 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -117,7 +117,7 @@ void c_step(Boids *env) { Boid* current_boid; Boid observed_boid; float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg; - float diff_x, diff_y, dist, current_boid_reward; + float diff_x, diff_y, dist, current_boid_reward, protected_range_diff; unsigned visual_count, protected_count; bool manual_control = IsKeyDown(KEY_LEFT_SHIFT); float mouse_x = (float)GetMouseX(); @@ -160,7 +160,8 @@ void c_step(Boids *env) { } } if (protected_count > 0) { - current_boid_reward -= (float)((env->num_boids - protected_count) - protected_count) / env->num_boids * env->seperation_factor; + protected_range_diff = (float)(env->num_boids - protected_count) - protected_count; + current_boid_reward += protected_range_diff / env->num_boids * env->seperation_factor; } if (visual_count) { vis_x_avg = vis_x_sum / visual_count; From 23e2399bc0582bded8a47ff1ffe8a418e790ca50 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 03:02:49 -0400 Subject: [PATCH 15/39] remove unused commented params --- pufferlib/config/ocean/boids.ini | 47 +------------------------------- 1 file changed, 1 insertion(+), 46 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index 0edba1bff0..0b3fe5a6d0 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -25,49 +25,4 @@ batch_size = auto total_timesteps = 100_000_000 gamma = 0.95 learning_rate = 0.025 -minibatch_size = 16384 -; minibatch_size = 1 - -; [sweep] -; method = protein -; metric = episode_length - -; [sweep.train.total_timesteps] -; distribution = log_normal -; min = 1e6 -; max = 1e7 -; mean = 5e6 -; scale = 0.5 - -; [sweep.train.gamma] -; distribution = log_normal -; min = 0.9 -; max = 0.999 -; mean = 0.97 - -; [sweep.train.gae_lambda] -; distribution = log_normal -; min = 0.7 -; max = 0.999 -; mean = 0.95 - -; [sweep.train.learning_rate] -; distribution = log_normal -; min = 0.0001 -; max = 0.001 -; mean = 0.00025 -; scale = 0.5 - -; [sweep.train.batch_size] -; min = 32768 -; max = 131072 -; mean = 65536 -; scale = 0.5 - -; [sweep.train.minibatch_size] -; min = 512 -; max = 2048 -; mean = 1024 -; scale = 0.5 - - +minibatch_size = 16384 \ No newline at end of file From bb162fb424fb6030a5ff21d2762ac1409754f7b8 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 12:58:48 -0400 Subject: [PATCH 16/39] remove normalization from separation factor calculation --- pufferlib/ocean/boids/boids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 476b461e7e..0b0db55b0e 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -161,7 +161,7 @@ void c_step(Boids *env) { } if (protected_count > 0) { protected_range_diff = (float)(env->num_boids - protected_count) - protected_count; - current_boid_reward += protected_range_diff / env->num_boids * env->seperation_factor; + current_boid_reward += protected_range_diff * env->seperation_factor; } if (visual_count) { vis_x_avg = vis_x_sum / visual_count; From 85d589168b6cf3e31119e86ca9da1ffcd69d189c Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 14:21:18 -0400 Subject: [PATCH 17/39] fix visual range --- pufferlib/ocean/boids/boids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 0b0db55b0e..77a98f204f 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -13,7 +13,7 @@ #define LEFT_MARGIN 50 #define RIGHT_MARGIN 50 #define VELOCITY_CAP 5 -#define VISUAL_RANGE 20 +#define VISUAL_RANGE 400 #define PROTECTED_RANGE 100 #define WIDTH 1080 #define HEIGHT 720 From 435ac9eaf9ca9359832c1f0b4fde5953091993d4 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 20:24:34 -0400 Subject: [PATCH 18/39] remove positve margin rewards and remove commented code --- pufferlib/ocean/boids/boids.h | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 77a98f204f..744440cf90 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -10,8 +10,8 @@ #define TOP_MARGIN 50 #define BOTTOM_MARGIN 50 -#define LEFT_MARGIN 50 -#define RIGHT_MARGIN 50 +#define LEFT_MARGIN 100 +#define RIGHT_MARGIN 100 #define VELOCITY_CAP 5 #define VISUAL_RANGE 400 #define PROTECTED_RANGE 100 @@ -176,20 +176,13 @@ void c_step(Boids *env) { } if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) { current_boid_reward -= env->margin_turn_factor; - } else { - current_boid_reward += env->margin_turn_factor; } if (current_boid->x < LEFT_MARGIN || current_boid->x > WIDTH - RIGHT_MARGIN) { current_boid_reward -= env->margin_turn_factor; - } else { - current_boid_reward += env->margin_turn_factor; } - // Normalization - // env->rewards[current_indx] = current_boid_reward / 15.0f; - // printf("current_boid_reward: %f\n", current_boid_reward); - // env->rewards[current_indx] = current_boid_reward / 2.0f; - env->rewards[current_indx] = current_boid_reward; + // Normalization + env->rewards[current_indx] = current_boid_reward / 4.0f; //log updates if (env->tick == env->report_interval) { From 463f60a5394f99b875c878bd62a5cff05a49b9b4 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 20:33:52 -0400 Subject: [PATCH 19/39] add factors to env run with "boids.c" --- pufferlib/ocean/boids/boids.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/pufferlib/ocean/boids/boids.c b/pufferlib/ocean/boids/boids.c index 14de0d5e5a..aca6d35c68 100644 --- a/pufferlib/ocean/boids/boids.c +++ b/pufferlib/ocean/boids/boids.c @@ -10,6 +10,10 @@ #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo #define MAX_STEPS_DEMO 500 // Max steps per episode in the demo #define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0] +#define MARGIN_TURN_FACTOR 1.0 +#define COHESION_FACTOR 0.0 +#define SEPERATION_FACTOR 0.0 +#define ALIGNMENT_FACTOR 0.0 // Dummy action generation: random velocity changes for each boid void generate_dummy_actions(Boids* env) { @@ -29,6 +33,10 @@ void demo() { Boids env = {0}; env.num_boids = NUM_BOIDS_DEMO; env.report_interval = REPORT_INTERVAL_DEMO; + env.margin_turn_factor = MARGIN_TURN_FACTOR; + env.cohesion_factor = COHESION_FACTOR; + env.seperation_factor = SEPERATION_FACTOR; + env.alignment_factor = ALIGNMENT_FACTOR; // In the Python binding, these pointers are assigned from NumPy arrays. // Here, we need to allocate them explicitly. From 00af443301c3e5745710aaeb6c7566bd29585db9 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 20:44:00 -0400 Subject: [PATCH 20/39] add debug margin lines and adjust reward normalization --- pufferlib/ocean/boids/boids.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 744440cf90..7caef00d68 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -182,7 +182,7 @@ void c_step(Boids *env) { } // Normalization - env->rewards[current_indx] = current_boid_reward / 4.0f; + env->rewards[current_indx] = current_boid_reward / 2.0f; //log updates if (env->tick == env->report_interval) { @@ -257,6 +257,11 @@ void c_render(Boids* env) { BeginDrawing(); ClearBackground((Color){6, 24, 24, 255}); + DrawLine(LEFT_MARGIN, 0, LEFT_MARGIN, HEIGHT, RED); + DrawLine(WIDTH - RIGHT_MARGIN, 0, WIDTH - RIGHT_MARGIN, HEIGHT, RED); + DrawLine(0, TOP_MARGIN, WIDTH, TOP_MARGIN, RED); + DrawLine(0, HEIGHT - BOTTOM_MARGIN, WIDTH, HEIGHT - BOTTOM_MARGIN, RED); + for (unsigned boid_indx = 0; boid_indx < env->num_boids; boid_indx++) { DrawTexturePro( env->client->boid_texture, From fc4e72217b3123e9686c78f37c891caf36ca4d06 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 20:45:09 -0400 Subject: [PATCH 21/39] only turn on margin turn factor and adjust total timesteps --- pufferlib/config/ocean/boids.ini | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index 0b3fe5a6d0..b3732b5126 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -11,10 +11,10 @@ num_envs = 64 num_boids = 64 ; num_envs = 1 ; num_boids = 1 -margin_turn_factor = 0.0 -cohesion_factor = 0.00 -seperation_factor = 1.00 -alignment_factor = 0.00 +margin_turn_factor = 1.0 +cohesion_factor = 0.0 +seperation_factor = 0.0 +alignment_factor = 0.0 [vec] num_workers = 2 @@ -22,7 +22,8 @@ num_envs = 2 batch_size = auto [train] -total_timesteps = 100_000_000 +total_timesteps = 150_000_000 +; total_timesteps = 15_000_000 gamma = 0.95 learning_rate = 0.025 minibatch_size = 16384 \ No newline at end of file From a413221013fd3656a69354d69d572a7af553654b Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 21:35:23 -0400 Subject: [PATCH 22/39] change top/bottom margins --- pufferlib/ocean/boids/boids.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 7caef00d68..aef336cd1a 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -8,8 +8,8 @@ #include "raylib.h" -#define TOP_MARGIN 50 -#define BOTTOM_MARGIN 50 +#define TOP_MARGIN 100 +#define BOTTOM_MARGIN 100 #define LEFT_MARGIN 100 #define RIGHT_MARGIN 100 #define VELOCITY_CAP 5 From 342d83c80ee65c80037fe6c5cf9250289758b8de Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Thu, 5 Jun 2025 21:39:33 -0400 Subject: [PATCH 23/39] account for boid width and hight in margin reward calculation --- pufferlib/ocean/boids/boids.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index aef336cd1a..54242bd65e 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -174,10 +174,10 @@ void c_step(Boids *env) { current_boid_reward -= fabsf(vis_x_avg - current_boid->x) * env->cohesion_factor; current_boid_reward -= fabsf(vis_y_avg - current_boid->y) * env->cohesion_factor; } - if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) { + if (current_boid->y < TOP_MARGIN || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) { current_boid_reward -= env->margin_turn_factor; } - if (current_boid->x < LEFT_MARGIN || current_boid->x > WIDTH - RIGHT_MARGIN) { + if (current_boid->x < LEFT_MARGIN || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) { current_boid_reward -= env->margin_turn_factor; } From 01a84c0bb0c9398a6dc7fb8b76516657a410a08b Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 11 Jun 2025 00:11:20 -0400 Subject: [PATCH 24/39] increase max steps --- pufferlib/ocean/boids/boids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.c b/pufferlib/ocean/boids/boids.c index aca6d35c68..8718e70065 100644 --- a/pufferlib/ocean/boids/boids.c +++ b/pufferlib/ocean/boids/boids.c @@ -8,7 +8,7 @@ // --- Demo Configuration --- #define NUM_BOIDS_DEMO 20 // Number of boids for the standalone demo #define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo -#define MAX_STEPS_DEMO 500 // Max steps per episode in the demo +#define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo #define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0] #define MARGIN_TURN_FACTOR 1.0 #define COHESION_FACTOR 0.0 From 117e9b6e610278cdffa4a92669f6763202d7c71e Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 11 Jun 2025 20:17:24 -0400 Subject: [PATCH 25/39] remove debug margin lines --- pufferlib/ocean/boids/boids.h | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 54242bd65e..6b0311d376 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -257,11 +257,6 @@ void c_render(Boids* env) { BeginDrawing(); ClearBackground((Color){6, 24, 24, 255}); - DrawLine(LEFT_MARGIN, 0, LEFT_MARGIN, HEIGHT, RED); - DrawLine(WIDTH - RIGHT_MARGIN, 0, WIDTH - RIGHT_MARGIN, HEIGHT, RED); - DrawLine(0, TOP_MARGIN, WIDTH, TOP_MARGIN, RED); - DrawLine(0, HEIGHT - BOTTOM_MARGIN, WIDTH, HEIGHT - BOTTOM_MARGIN, RED); - for (unsigned boid_indx = 0; boid_indx < env->num_boids; boid_indx++) { DrawTexturePro( env->client->boid_texture, From 618cb0b4e6cc875f41e276fe76778442bf5cf1bc Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 11 Jun 2025 20:18:27 -0400 Subject: [PATCH 26/39] fix observations for margin factor --- pufferlib/ocean/boids/boids.h | 20 ++++++++++++++++---- pufferlib/ocean/boids/boids.py | 2 +- 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 6b0311d376..b883d72246 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -95,11 +95,23 @@ void init(Boids *env) { static void compute_observations(Boids *env) { int idx = 0; for (unsigned i=0; inum_boids; i++) { + env->observations[idx++] = env->boids[i].x / WIDTH; + env->observations[idx++] = env->boids[i].y / HEIGHT; + env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP; + env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP; + for (unsigned j=0; j<4; j++) { + env->observations[idx++] = 0; + } for (unsigned j=0; jnum_boids; j++) { - env->observations[idx++] = (env->boids[j].x - env->boids[i].x) / WIDTH; - env->observations[idx++] = (env->boids[j].y - env->boids[i].y) / HEIGHT; - env->observations[idx++] = (env->boids[j].velocity.x - env->boids[i].velocity.x) / VELOCITY_CAP; - env->observations[idx++] = (env->boids[j].velocity.y - env->boids[i].velocity.y) / VELOCITY_CAP; + if (i == j) continue; + env->observations[idx++] = env->boids[j].x / WIDTH; + env->observations[idx++] = env->boids[j].y / HEIGHT; + env->observations[idx++] = env->boids[j].velocity.x / VELOCITY_CAP; + env->observations[idx++] = env->boids[j].velocity.y / VELOCITY_CAP; + env->observations[idx++] = (env->boids[i].x - env->boids[j].x) / WIDTH; + env->observations[idx++] = (env->boids[i].y - env->boids[j].y) / HEIGHT; + env->observations[idx++] = (env->boids[i].velocity.x - env->boids[j].velocity.x) / VELOCITY_CAP; + env->observations[idx++] = (env->boids[i].velocity.y - env->boids[j].velocity.y) / VELOCITY_CAP; } } } diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py index a597b46ca3..5582cec81f 100644 --- a/pufferlib/ocean/boids/boids.py +++ b/pufferlib/ocean/boids/boids.py @@ -27,7 +27,7 @@ def __init__( self.num_boids = num_boids self.single_observation_space = gymnasium.spaces.Box( - -1000.0, 1000.0, shape=(num_boids*4,), dtype=np.float32 + -1000.0, 1000.0, shape=(num_boids*8,), dtype=np.float32 ) #self.single_action_space = gymnasium.spaces.Box( From 7526366e017bf29aa71f547b5cbd950011dcd4d1 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 11 Jun 2025 20:50:23 -0400 Subject: [PATCH 27/39] remove single agent params --- pufferlib/config/ocean/boids.ini | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index b3732b5126..ca757d9cf1 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -9,11 +9,9 @@ rnn_name = Recurrent [env] num_envs = 64 num_boids = 64 -; num_envs = 1 -; num_boids = 1 -margin_turn_factor = 1.0 +margin_turn_factor = 0.0 cohesion_factor = 0.0 -seperation_factor = 0.0 +seperation_factor = 1.0 alignment_factor = 0.0 [vec] @@ -22,8 +20,8 @@ num_envs = 2 batch_size = auto [train] -total_timesteps = 150_000_000 -; total_timesteps = 15_000_000 +; total_timesteps = 150_000_000 +total_timesteps = 50_000_000 gamma = 0.95 learning_rate = 0.025 minibatch_size = 16384 \ No newline at end of file From 2d38d98d1fc72399694d85d6a0717f19d23f9c47 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 11 Jun 2025 21:59:46 -0400 Subject: [PATCH 28/39] update boids.c observations allocation --- pufferlib/ocean/boids/boids.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pufferlib/ocean/boids/boids.c b/pufferlib/ocean/boids/boids.c index 8718e70065..ef522c3df1 100644 --- a/pufferlib/ocean/boids/boids.c +++ b/pufferlib/ocean/boids/boids.c @@ -38,9 +38,7 @@ void demo() { env.seperation_factor = SEPERATION_FACTOR; env.alignment_factor = ALIGNMENT_FACTOR; - // In the Python binding, these pointers are assigned from NumPy arrays. - // Here, we need to allocate them explicitly. - size_t obs_size = env.num_boids * env.num_boids * 4; // the 4 = (x, y, vx, vy) + size_t obs_size = env.num_boids * env.num_boids * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy) size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy) env.observations = (float*)calloc(obs_size, sizeof(float)); env.actions = (float*)calloc(act_size, sizeof(float)); From 971732bd471271dbf544a23dc4ffba7c785ca76b Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Wed, 11 Jun 2025 22:00:04 -0400 Subject: [PATCH 29/39] update observations and actions comments --- pufferlib/ocean/boids/boids.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index b883d72246..547b62e4a8 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -39,9 +39,12 @@ typedef struct { typedef struct Client Client; typedef struct { - // an array of shape (num_boids, 4) with the 4 values correspoinding to (x, y, velocity x, velocity y) + // Flat array of shape (num_boids * 8) values: + // - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy) + // - The first 8 values are for the boid itself + // - All the other 8 values for the other boids float* observations; - // an array of shape (num_boids, 2) with the 2 values correspoinding to (velocity x, velocity y) + // an array of shape (num_boids, 2) with the 2 values correspoinding to (dvx, dvy) float* actions; // an array of shape (1) with the summed up reward for all boids float* rewards; From bcecdd22eb0855549692d2e142efea6d2e87a34c Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Fri, 4 Jul 2025 01:39:29 -0400 Subject: [PATCH 30/39] remove commented parameters and update parameters to current best --- pufferlib/config/ocean/boids.ini | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index ca757d9cf1..cf03a8aa8d 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -1,18 +1,16 @@ [base] package = ocean env_name = puffer_boids -; policy_name = Boids policy_name = Policy rnn_name = Recurrent -; rnn_name = None [env] num_envs = 64 num_boids = 64 -margin_turn_factor = 0.0 -cohesion_factor = 0.0 -seperation_factor = 1.0 -alignment_factor = 0.0 +margin_turn_factor = 2.0 +cohesion_factor = 0.0048 +separation_factor = 0.0128 +alignment_factor = 0.2 [vec] num_workers = 2 @@ -20,8 +18,7 @@ num_envs = 2 batch_size = auto [train] -; total_timesteps = 150_000_000 -total_timesteps = 50_000_000 +total_timesteps = 100_000_000 gamma = 0.95 learning_rate = 0.025 minibatch_size = 16384 \ No newline at end of file From a9f1b98c88e4524e43e968e29f945a96c059461c Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Fri, 4 Jul 2025 01:41:22 -0400 Subject: [PATCH 31/39] fix to "separation_factor" instead of "seperation_factor" --- pufferlib/ocean/boids/binding.c | 2 +- pufferlib/ocean/boids/boids.c | 4 ++-- pufferlib/ocean/boids/boids.h | 2 +- pufferlib/ocean/boids/boids.py | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pufferlib/ocean/boids/binding.c b/pufferlib/ocean/boids/binding.c index 89cb389759..d8e5820169 100644 --- a/pufferlib/ocean/boids/binding.c +++ b/pufferlib/ocean/boids/binding.c @@ -8,7 +8,7 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) { env->report_interval = unpack(kwargs, "report_interval"); env->margin_turn_factor = unpack(kwargs, "margin_turn_factor"); env->cohesion_factor = unpack(kwargs, "cohesion_factor"); - env->seperation_factor = unpack(kwargs, "seperation_factor"); + env->separation_factor = unpack(kwargs, "separation_factor"); env->alignment_factor = unpack(kwargs, "alignment_factor"); init(env); return 0; diff --git a/pufferlib/ocean/boids/boids.c b/pufferlib/ocean/boids/boids.c index ef522c3df1..4e4937666c 100644 --- a/pufferlib/ocean/boids/boids.c +++ b/pufferlib/ocean/boids/boids.c @@ -12,7 +12,7 @@ #define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0] #define MARGIN_TURN_FACTOR 1.0 #define COHESION_FACTOR 0.0 -#define SEPERATION_FACTOR 0.0 +#define SEPARATION_FACTOR 0.0 #define ALIGNMENT_FACTOR 0.0 // Dummy action generation: random velocity changes for each boid @@ -35,7 +35,7 @@ void demo() { env.report_interval = REPORT_INTERVAL_DEMO; env.margin_turn_factor = MARGIN_TURN_FACTOR; env.cohesion_factor = COHESION_FACTOR; - env.seperation_factor = SEPERATION_FACTOR; + env.separation_factor = SEPARATION_FACTOR; env.alignment_factor = ALIGNMENT_FACTOR; size_t obs_size = env.num_boids * env.num_boids * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 547b62e4a8..8c17f8712f 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -53,7 +53,7 @@ typedef struct { unsigned int num_boids; float margin_turn_factor; float cohesion_factor; - float seperation_factor; + float separation_factor; float alignment_factor; unsigned tick; Log log; diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py index 5582cec81f..6eadcc8627 100644 --- a/pufferlib/ocean/boids/boids.py +++ b/pufferlib/ocean/boids/boids.py @@ -19,7 +19,7 @@ def __init__( num_boids=1, margin_turn_factor=1.0, cohesion_factor=0.0, - seperation_factor=0.0, + separation_factor=0.0, alignment_factor=0.0 ): ACTION_SPACE_SIZE = 2 @@ -58,7 +58,7 @@ def __init__( report_interval=self.report_interval, margin_turn_factor=margin_turn_factor, cohesion_factor=cohesion_factor, - seperation_factor=seperation_factor, + separation_factor=separation_factor, alignment_factor=alignment_factor, )) From 9dc328fe0e704eb7f5a74dec796e60bdd2653b53 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Fri, 4 Jul 2025 01:42:33 -0400 Subject: [PATCH 32/39] update preset env parameters --- pufferlib/ocean/boids/boids.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 8c17f8712f..460b4531b9 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -8,13 +8,13 @@ #include "raylib.h" -#define TOP_MARGIN 100 -#define BOTTOM_MARGIN 100 -#define LEFT_MARGIN 100 -#define RIGHT_MARGIN 100 +#define TOP_MARGIN 50 +#define BOTTOM_MARGIN 50 +#define LEFT_MARGIN 50 +#define RIGHT_MARGIN 50 #define VELOCITY_CAP 5 #define VISUAL_RANGE 400 -#define PROTECTED_RANGE 100 +#define PROTECTED_RANGE 60 #define WIDTH 1080 #define HEIGHT 720 #define BOID_WIDTH 32 From 4b339a36eca35a8a174c395232a21d4dbc609b94 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Fri, 4 Jul 2025 01:42:50 -0400 Subject: [PATCH 33/39] condence controlled boid observation loop --- pufferlib/ocean/boids/boids.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 460b4531b9..f84e25b4c6 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -102,9 +102,7 @@ static void compute_observations(Boids *env) { env->observations[idx++] = env->boids[i].y / HEIGHT; env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP; env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP; - for (unsigned j=0; j<4; j++) { - env->observations[idx++] = 0; - } + for (unsigned j=0; j<4; j++) { env->observations[idx++] = 0; } for (unsigned j=0; jnum_boids; j++) { if (i == j) continue; env->observations[idx++] = env->boids[j].x / WIDTH; From e56d28f6eb5abb6301d561dc20ed21f43d749f58 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sun, 6 Jul 2025 23:15:44 -0400 Subject: [PATCH 34/39] remove use of protected range diff --- pufferlib/ocean/boids/boids.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index f84e25b4c6..8b45dd463a 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -130,7 +130,7 @@ void c_step(Boids *env) { Boid* current_boid; Boid observed_boid; float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg; - float diff_x, diff_y, dist, current_boid_reward, protected_range_diff; + float diff_x, diff_y, dist, current_boid_reward; unsigned visual_count, protected_count; bool manual_control = IsKeyDown(KEY_LEFT_SHIFT); float mouse_x = (float)GetMouseX(); @@ -173,8 +173,10 @@ void c_step(Boids *env) { } } if (protected_count > 0) { - protected_range_diff = (float)(env->num_boids - protected_count) - protected_count; - current_boid_reward += protected_range_diff * env->seperation_factor; + // protected_range_diff = (float)(env->num_boids - protected_count) - protected_count; + // current_boid_reward += protected_range_diff * env->seperation_factor; + + current_boid_reward -= protected_count * env->separation_factor; } if (visual_count) { vis_x_avg = vis_x_sum / visual_count; From cc8a397b18870090f90e3f56d5225eb25a8f2e45 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Mon, 7 Jul 2025 00:36:54 -0400 Subject: [PATCH 35/39] change reward normalization number --- pufferlib/ocean/boids/boids.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 8b45dd463a..f7391126d2 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -197,7 +197,10 @@ void c_step(Boids *env) { } // Normalization - env->rewards[current_indx] = current_boid_reward / 2.0f; + // env->rewards[current_indx] = current_boid_reward; + env->rewards[current_indx] = current_boid_reward / 6.0f; + // env->rewards[current_indx] = current_boid_reward / 205.0f; + // env->rewards[current_indx] = current_boid_reward / 10.0f; //log updates if (env->tick == env->report_interval) { From 0eda88952542d85cab5a63549acd0a77299014b0 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Tue, 8 Jul 2025 19:52:52 -0400 Subject: [PATCH 36/39] update puffer resource path --- pufferlib/ocean/boids/boids.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index f7391126d2..9e9b87db89 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -19,7 +19,7 @@ #define HEIGHT 720 #define BOID_WIDTH 32 #define BOID_HEIGHT 32 -#define BOID_TEXTURE_PATH "./resources/puffers_128.png" +#define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png" typedef struct { float score; From 6158014a91f38f971308c86b0fe1a54d3748ede8 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sun, 27 Jul 2025 16:30:53 -0400 Subject: [PATCH 37/39] enable all factors --- pufferlib/config/ocean/boids.ini | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pufferlib/config/ocean/boids.ini b/pufferlib/config/ocean/boids.ini index cf03a8aa8d..683d8e9189 100644 --- a/pufferlib/config/ocean/boids.ini +++ b/pufferlib/config/ocean/boids.ini @@ -6,8 +6,8 @@ rnn_name = Recurrent [env] num_envs = 64 -num_boids = 64 -margin_turn_factor = 2.0 +num_boids = 16 +margin_turn_factor = 1.0 cohesion_factor = 0.0048 separation_factor = 0.0128 alignment_factor = 0.2 @@ -19,6 +19,7 @@ batch_size = auto [train] total_timesteps = 100_000_000 +; total_timesteps = 80_000_000 gamma = 0.95 learning_rate = 0.025 minibatch_size = 16384 \ No newline at end of file From e67ad51947ff882c537f9977e132a7460b71b6bf Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sun, 27 Jul 2025 16:31:39 -0400 Subject: [PATCH 38/39] add euclidean distance to observations --- pufferlib/ocean/boids/boids.h | 17 ++++++++++++++--- pufferlib/ocean/boids/boids.py | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/pufferlib/ocean/boids/boids.h b/pufferlib/ocean/boids/boids.h index 9e9b87db89..a56f489437 100644 --- a/pufferlib/ocean/boids/boids.h +++ b/pufferlib/ocean/boids/boids.h @@ -20,6 +20,7 @@ #define BOID_WIDTH 32 #define BOID_HEIGHT 32 #define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png" +#define MAX_DIST 2000 typedef struct { float score; @@ -97,20 +98,30 @@ void init(Boids *env) { static void compute_observations(Boids *env) { int idx = 0; + float diff_x, diff_y, dist; for (unsigned i=0; inum_boids; i++) { + // observations for the current boid env->observations[idx++] = env->boids[i].x / WIDTH; env->observations[idx++] = env->boids[i].y / HEIGHT; env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP; env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP; - for (unsigned j=0; j<4; j++) { env->observations[idx++] = 0; } + // zeros for relative observations since comparing to itself will always be 0 + for (unsigned j=0; j<5; j++) { env->observations[idx++] = 0; } + + // observations for the other boids compared to the current boid for (unsigned j=0; jnum_boids; j++) { if (i == j) continue; + diff_x = env->boids[i].x - env->boids[j].x; + diff_y = env->boids[i].y - env->boids[j].y; + dist = sqrtf(diff_x*diff_x + diff_y*diff_y); + env->observations[idx++] = env->boids[j].x / WIDTH; env->observations[idx++] = env->boids[j].y / HEIGHT; env->observations[idx++] = env->boids[j].velocity.x / VELOCITY_CAP; env->observations[idx++] = env->boids[j].velocity.y / VELOCITY_CAP; - env->observations[idx++] = (env->boids[i].x - env->boids[j].x) / WIDTH; - env->observations[idx++] = (env->boids[i].y - env->boids[j].y) / HEIGHT; + env->observations[idx++] = diff_x / WIDTH; + env->observations[idx++] = diff_y / HEIGHT; + env->observations[idx++] = dist / MAX_DIST; env->observations[idx++] = (env->boids[i].velocity.x - env->boids[j].velocity.x) / VELOCITY_CAP; env->observations[idx++] = (env->boids[i].velocity.y - env->boids[j].velocity.y) / VELOCITY_CAP; } diff --git a/pufferlib/ocean/boids/boids.py b/pufferlib/ocean/boids/boids.py index 6eadcc8627..5e9b522226 100644 --- a/pufferlib/ocean/boids/boids.py +++ b/pufferlib/ocean/boids/boids.py @@ -27,7 +27,7 @@ def __init__( self.num_boids = num_boids self.single_observation_space = gymnasium.spaces.Box( - -1000.0, 1000.0, shape=(num_boids*8,), dtype=np.float32 + -1000.0, 1000.0, shape=(num_boids*9,), dtype=np.float32 ) #self.single_action_space = gymnasium.spaces.Box( From 0bf4d7410fda7354cce4d49e344a8e50c3616687 Mon Sep 17 00:00:00 2001 From: PLAZMAMA Date: Sun, 27 Jul 2025 16:34:44 -0400 Subject: [PATCH 39/39] add euclidean distance to local build observations --- pufferlib/ocean/boids/boids.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pufferlib/ocean/boids/boids.c b/pufferlib/ocean/boids/boids.c index 4e4937666c..0573cb9735 100644 --- a/pufferlib/ocean/boids/boids.c +++ b/pufferlib/ocean/boids/boids.c @@ -38,7 +38,7 @@ void demo() { env.separation_factor = SEPARATION_FACTOR; env.alignment_factor = ALIGNMENT_FACTOR; - size_t obs_size = env.num_boids * env.num_boids * 8; // 8 = (x, y, vx, vy, dx, dy, dvx, dvy) + size_t obs_size = env.num_boids * env.num_boids * 9; // 9 = (x, y, vx, vy, dx, dy, dist, dvx, dvy) size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy) env.observations = (float*)calloc(obs_size, sizeof(float)); env.actions = (float*)calloc(act_size, sizeof(float));