Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
0734d89
changed policy and rename factors to match common names
PLAZMAMA May 24, 2025
e94a4aa
remove unused log fields
PLAZMAMA May 24, 2025
817dc14
remove unused variable
PLAZMAMA May 24, 2025
e232d3e
remove unused commented code
PLAZMAMA May 24, 2025
4eb410a
remove unused boid_logs and fix logs calculation
PLAZMAMA May 29, 2025
1d424d7
fix overflow and zero report_interval
PLAZMAMA May 31, 2025
7376334
add above zero checks for num_boids and report_interval
PLAZMAMA May 31, 2025
ff483f6
remove unused commented flat_actions
PLAZMAMA Jun 2, 2025
26bebef
simplify seperation reward and test it
PLAZMAMA Jun 2, 2025
06878eb
test out only avoid factor
PLAZMAMA Jun 2, 2025
35de375
remove unused avg_reward and change seperation factor reward
PLAZMAMA Jun 3, 2025
9709a46
fix factor names
PLAZMAMA Jun 4, 2025
1c28c72
remove unused commented code
PLAZMAMA Jun 4, 2025
bf8c75f
fix seperation factor reward calculation
PLAZMAMA Jun 5, 2025
23e2399
remove unused commented params
PLAZMAMA Jun 5, 2025
bb162fb
remove normalization from separation factor calculation
PLAZMAMA Jun 5, 2025
85d5891
fix visual range
PLAZMAMA Jun 5, 2025
435ac9e
remove positve margin rewards and remove commented code
PLAZMAMA Jun 6, 2025
463f60a
add factors to env run with "boids.c"
PLAZMAMA Jun 6, 2025
00af443
add debug margin lines and adjust reward normalization
PLAZMAMA Jun 6, 2025
fc4e722
only turn on margin turn factor and adjust total timesteps
PLAZMAMA Jun 6, 2025
a413221
change top/bottom margins
PLAZMAMA Jun 6, 2025
342d83c
account for boid width and hight in margin reward calculation
PLAZMAMA Jun 6, 2025
01a84c0
increase max steps
PLAZMAMA Jun 11, 2025
117e9b6
remove debug margin lines
PLAZMAMA Jun 12, 2025
618cb0b
fix observations for margin factor
PLAZMAMA Jun 12, 2025
7526366
remove single agent params
PLAZMAMA Jun 12, 2025
2d38d98
update boids.c observations allocation
PLAZMAMA Jun 12, 2025
971732b
update observations and actions comments
PLAZMAMA Jun 12, 2025
bcecdd2
remove commented parameters and update parameters to current best
PLAZMAMA Jul 4, 2025
a9f1b98
fix to "separation_factor" instead of "seperation_factor"
PLAZMAMA Jul 4, 2025
9dc328f
update preset env parameters
PLAZMAMA Jul 4, 2025
4b339a3
condence controlled boid observation loop
PLAZMAMA Jul 4, 2025
e56d28f
remove use of protected range diff
PLAZMAMA Jul 7, 2025
cc8a397
change reward normalization number
PLAZMAMA Jul 7, 2025
0eda889
update puffer resource path
PLAZMAMA Jul 8, 2025
6158014
enable all factors
PLAZMAMA Jul 27, 2025
e67ad51
add euclidean distance to observations
PLAZMAMA Jul 27, 2025
0bf4d74
add euclidean distance to local build observations
PLAZMAMA Jul 27, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 8 additions & 55 deletions pufferlib/config/ocean/boids.ini
Original file line number Diff line number Diff line change
@@ -1,19 +1,16 @@
[base]
package = ocean
env_name = puffer_boids
policy_name = Boids
policy_name = Policy
rnn_name = Recurrent
; rnn_name = None

[env]
num_envs = 64
num_boids = 64
; num_envs = 1
; num_boids = 1
margin_turn_factor = 0.0
centering_factor = 0.00
avoid_factor = 1.00
matching_factor = 1.00
num_boids = 16
margin_turn_factor = 1.0
cohesion_factor = 0.0048
separation_factor = 0.0128
alignment_factor = 0.2

[vec]
num_workers = 2
Expand All @@ -22,51 +19,7 @@ batch_size = auto

[train]
total_timesteps = 100_000_000
; total_timesteps = 80_000_000
gamma = 0.95
learning_rate = 0.025
minibatch_size = 16384
; minibatch_size = 1

; [sweep]
; method = protein
; metric = episode_length

; [sweep.train.total_timesteps]
; distribution = log_normal
; min = 1e6
; max = 1e7
; mean = 5e6
; scale = 0.5

; [sweep.train.gamma]
; distribution = log_normal
; min = 0.9
; max = 0.999
; mean = 0.97

; [sweep.train.gae_lambda]
; distribution = log_normal
; min = 0.7
; max = 0.999
; mean = 0.95

; [sweep.train.learning_rate]
; distribution = log_normal
; min = 0.0001
; max = 0.001
; mean = 0.00025
; scale = 0.5

; [sweep.train.batch_size]
; min = 32768
; max = 131072
; mean = 65536
; scale = 0.5

; [sweep.train.minibatch_size]
; min = 512
; max = 2048
; mean = 1024
; scale = 0.5


minibatch_size = 16384
9 changes: 3 additions & 6 deletions pufferlib/ocean/boids/binding.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,15 @@ static int my_init(Env* env, PyObject* args, PyObject* kwargs) {
env->num_boids = unpack(kwargs, "num_boids");
env->report_interval = unpack(kwargs, "report_interval");
env->margin_turn_factor = unpack(kwargs, "margin_turn_factor");
env->centering_factor = unpack(kwargs, "centering_factor");
env->avoid_factor = unpack(kwargs, "avoid_factor");
env->matching_factor = unpack(kwargs, "matching_factor");
env->cohesion_factor = unpack(kwargs, "cohesion_factor");
env->separation_factor = unpack(kwargs, "separation_factor");
env->alignment_factor = unpack(kwargs, "alignment_factor");
init(env);
return 0;
}

static int my_log(PyObject* dict, Log* log) {
assign_to_dict(dict, "perf", log->perf);
assign_to_dict(dict, "score", log->score);
assign_to_dict(dict, "episode_return", log->episode_return);
assign_to_dict(dict, "episode_length", log->episode_length);
assign_to_dict(dict, "n", log->n);
return 0;
}
18 changes: 13 additions & 5 deletions pufferlib/ocean/boids/boids.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,13 @@

// --- Demo Configuration ---
#define NUM_BOIDS_DEMO 20 // Number of boids for the standalone demo
#define MAX_STEPS_DEMO 500 // Max steps per episode in the demo
#define REPORT_INTERVAL_DEMO 1000 // Report interval for the demo
#define MAX_STEPS_DEMO 10000 // Max steps per episode in the demo
#define ACTION_SCALE 3.0f // Corresponds to action space [-3.0, 3.0]
#define MARGIN_TURN_FACTOR 1.0
#define COHESION_FACTOR 0.0
#define SEPARATION_FACTOR 0.0
#define ALIGNMENT_FACTOR 0.0

// Dummy action generation: random velocity changes for each boid
void generate_dummy_actions(Boids* env) {
Expand All @@ -27,11 +32,14 @@ void demo() {
// Initialize Boids environment struct
Boids env = {0};
env.num_boids = NUM_BOIDS_DEMO;
env.report_interval = REPORT_INTERVAL_DEMO;
env.margin_turn_factor = MARGIN_TURN_FACTOR;
env.cohesion_factor = COHESION_FACTOR;
env.separation_factor = SEPARATION_FACTOR;
env.alignment_factor = ALIGNMENT_FACTOR;

// In the Python binding, these pointers are assigned from NumPy arrays.
// Here, we need to allocate them explicitly.
size_t obs_size = env.num_boids * 4; // num_boids * (x, y, vx, vy)
size_t act_size = env.num_boids * 2; // num_boids * (dvx, dvy)
size_t obs_size = env.num_boids * env.num_boids * 9; // 9 = (x, y, vx, vy, dx, dy, dist, dvx, dvy)
size_t act_size = env.num_boids * 2; // the 2 = (dvx, dvy)
env.observations = (float*)calloc(obs_size, sizeof(float));
env.actions = (float*)calloc(act_size, sizeof(float));
env.rewards = (float*)calloc(env.num_boids, sizeof(float)); // Env-level reward
Expand Down
110 changes: 64 additions & 46 deletions pufferlib/ocean/boids/boids.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,19 +13,17 @@
#define LEFT_MARGIN 50
#define RIGHT_MARGIN 50
#define VELOCITY_CAP 5
#define VISUAL_RANGE 20
#define PROTECTED_RANGE 100
#define VISUAL_RANGE 400
#define PROTECTED_RANGE 60
#define WIDTH 1080
#define HEIGHT 720
#define BOID_WIDTH 32
#define BOID_HEIGHT 32
#define BOID_TEXTURE_PATH "./resources/puffers_128.png"
#define BOID_TEXTURE_PATH "./resources/shared/puffers_128.png"
#define MAX_DIST 2000

typedef struct {
float perf;
float score;
float episode_return;
float episode_length;
float n;
} Log;

Expand All @@ -42,25 +40,26 @@ typedef struct {

typedef struct Client Client;
typedef struct {
// an array of shape (num_boids, 4) with the 4 values correspoinding to (x, y, velocity x, velocity y)
// Flat array of shape (num_boids * 8) values:
// - Each boid has 8 values corresponding to (x, y, vx, vy, dx, dy, dvx, dvy)
// - The first 8 values are for the boid itself
// - All the other 8 values for the other boids
float* observations;
// an array of shape (num_boids, 2) with the 2 values correspoinding to (velocity x, velocity y)
// an array of shape (num_boids, 2) with the 2 values correspoinding to (dvx, dvy)
float* actions;
// an array of shape (1) with the summed up reward for all boids
float* rewards;
unsigned char* terminals; // Not being used but is required by env_binding.h
Boid* boids;
unsigned int num_boids;
float margin_turn_factor;
float centering_factor;
float avoid_factor;
float matching_factor;
float cohesion_factor;
float separation_factor;
float alignment_factor;
unsigned tick;
Log log;
Log* boid_logs;
unsigned report_interval;
Client* client;

} Boids;

static inline float flmax(float a, float b) { return a > b ? a : b; }
Expand All @@ -73,12 +72,18 @@ static void respawn_boid(Boids *env, unsigned int i) {
env->boids[i].y = rndf(BOTTOM_MARGIN, HEIGHT - TOP_MARGIN);
env->boids[i].velocity.x = 0;
env->boids[i].velocity.y = 0;
env->boid_logs[i] = (Log){0};
}

void init(Boids *env) {
if(env->num_boids < 1) {
printf("ERROR: num_boids must be bigger than 0\n");
exit(1);
}
if (env->report_interval < 1) {
printf("ERROR: report_interval must be bigger than 0\n");
exit(1);
}
env->boids = (Boid*)calloc(env->num_boids, sizeof(Boid));
env->boid_logs = (Log*)calloc(env->num_boids, sizeof(Log));
env->log = (Log){0};
env->tick = 0;

Expand All @@ -92,15 +97,33 @@ void init(Boids *env) {


static void compute_observations(Boids *env) {
unsigned base_indx;

int idx = 0;
float diff_x, diff_y, dist;
for (unsigned i=0; i<env->num_boids; i++) {
// observations for the current boid
env->observations[idx++] = env->boids[i].x / WIDTH;
env->observations[idx++] = env->boids[i].y / HEIGHT;
env->observations[idx++] = env->boids[i].velocity.x / VELOCITY_CAP;
env->observations[idx++] = env->boids[i].velocity.y / VELOCITY_CAP;
// zeros for relative observations since comparing to itself will always be 0
for (unsigned j=0; j<5; j++) { env->observations[idx++] = 0; }

// observations for the other boids compared to the current boid
for (unsigned j=0; j<env->num_boids; j++) {
env->observations[idx++] = (env->boids[j].x - env->boids[i].x) / WIDTH;
env->observations[idx++] = (env->boids[j].y - env->boids[i].y) / HEIGHT;
env->observations[idx++] = (env->boids[j].velocity.x - env->boids[i].velocity.x) / VELOCITY_CAP;
env->observations[idx++] = (env->boids[j].velocity.y - env->boids[i].velocity.y) / VELOCITY_CAP;
if (i == j) continue;
diff_x = env->boids[i].x - env->boids[j].x;
diff_y = env->boids[i].y - env->boids[j].y;
dist = sqrtf(diff_x*diff_x + diff_y*diff_y);

env->observations[idx++] = env->boids[j].x / WIDTH;
env->observations[idx++] = env->boids[j].y / HEIGHT;
env->observations[idx++] = env->boids[j].velocity.x / VELOCITY_CAP;
env->observations[idx++] = env->boids[j].velocity.y / VELOCITY_CAP;
env->observations[idx++] = diff_x / WIDTH;
env->observations[idx++] = diff_y / HEIGHT;
env->observations[idx++] = dist / MAX_DIST;
env->observations[idx++] = (env->boids[i].velocity.x - env->boids[j].velocity.x) / VELOCITY_CAP;
env->observations[idx++] = (env->boids[i].velocity.y - env->boids[j].velocity.y) / VELOCITY_CAP;
}
}
}
Expand All @@ -118,7 +141,7 @@ void c_step(Boids *env) {
Boid* current_boid;
Boid observed_boid;
float vis_vx_sum, vis_vy_sum, vis_x_sum, vis_y_sum, vis_x_avg, vis_y_avg, vis_vx_avg, vis_vy_avg;
float diff_x, diff_y, dist, protected_dist_sum, current_boid_reward;
float diff_x, diff_y, dist, current_boid_reward;
unsigned visual_count, protected_count;
bool manual_control = IsKeyDown(KEY_LEFT_SHIFT);
float mouse_x = (float)GetMouseX();
Expand All @@ -127,21 +150,22 @@ void c_step(Boids *env) {
env->tick++;
env->rewards[0] = 0;
env->log.score = 0;
env->log.n = 0;
for (unsigned current_indx = 0; current_indx < env->num_boids; current_indx++) {
// apply action
current_boid = &env->boids[current_indx];
if (manual_control) {
current_boid->velocity.x = flclip(current_boid->velocity.x + (mouse_x - current_boid->x), -VELOCITY_CAP, VELOCITY_CAP);
current_boid->velocity.y = flclip(current_boid->velocity.y + (mouse_y - current_boid->y), -VELOCITY_CAP, VELOCITY_CAP);
} else {
current_boid->velocity.x = flclip(current_boid->velocity.x + 2*env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP);
current_boid->velocity.y = flclip(current_boid->velocity.y + 2*env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
current_boid->velocity.x = flclip(current_boid->velocity.x + env->actions[current_indx * 2 + 0], -VELOCITY_CAP, VELOCITY_CAP);
current_boid->velocity.y = flclip(current_boid->velocity.y + env->actions[current_indx * 2 + 1], -VELOCITY_CAP, VELOCITY_CAP);
}
current_boid->x = flclip(current_boid->x + current_boid->velocity.x, 0, WIDTH - BOID_WIDTH);
current_boid->y = flclip(current_boid->y + current_boid->velocity.y, 0, HEIGHT - BOID_HEIGHT);

// reward calculation
current_boid_reward = 0.0f, protected_dist_sum = 0.0f, protected_count = 0.0f;
current_boid_reward = 0.0f, protected_count = 0.0f;
visual_count = 0.0f, vis_vx_sum = 0.0f, vis_vy_sum = 0.0f, vis_x_sum = 0.0f, vis_y_sum = 0.0f;
for (unsigned observed_indx = 0; observed_indx < env->num_boids; observed_indx++) {
if (current_indx == observed_indx) continue;
Expand All @@ -150,7 +174,6 @@ void c_step(Boids *env) {
diff_y = current_boid->y - observed_boid.y;
dist = sqrtf(diff_x*diff_x + diff_y*diff_y);
if (dist < PROTECTED_RANGE) {
protected_dist_sum += (PROTECTED_RANGE - dist);
protected_count++;
} else if (dist < VISUAL_RANGE) {
vis_x_sum += observed_boid.x;
Expand All @@ -161,46 +184,42 @@ void c_step(Boids *env) {
}
}
if (protected_count > 0) {
//current_boid_reward -= fabsf(protected_dist_sum / protected_count) * env->avoid_factor;
current_boid_reward -= flclip(protected_count/5.0, 0.0f, 1.0f) * env->avoid_factor;
// protected_range_diff = (float)(env->num_boids - protected_count) - protected_count;
// current_boid_reward += protected_range_diff * env->seperation_factor;

current_boid_reward -= protected_count * env->separation_factor;
}
if (visual_count) {
vis_x_avg = vis_x_sum / visual_count;
vis_y_avg = vis_y_sum / visual_count;
vis_vx_avg = vis_vx_sum / visual_count;
vis_vy_avg = vis_vy_sum / visual_count;

current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->matching_factor;
current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->matching_factor;
current_boid_reward -= fabsf(vis_x_avg - current_boid->x) * env->centering_factor;
current_boid_reward -= fabsf(vis_y_avg - current_boid->y) * env->centering_factor;
current_boid_reward -= fabsf(vis_vx_avg - current_boid->velocity.x) * env->alignment_factor;
current_boid_reward -= fabsf(vis_vy_avg - current_boid->velocity.y) * env->alignment_factor;
current_boid_reward -= fabsf(vis_x_avg - current_boid->x) * env->cohesion_factor;
current_boid_reward -= fabsf(vis_y_avg - current_boid->y) * env->cohesion_factor;
}
if (current_boid->y < TOP_MARGIN || current_boid->y > HEIGHT - BOTTOM_MARGIN) {
if (current_boid->y < TOP_MARGIN || current_boid->y + BOID_HEIGHT > HEIGHT - BOTTOM_MARGIN) {
current_boid_reward -= env->margin_turn_factor;
} else {
current_boid_reward += env->margin_turn_factor;
}
if (current_boid->x < LEFT_MARGIN || current_boid->x > WIDTH - RIGHT_MARGIN) {
if (current_boid->x < LEFT_MARGIN || current_boid->x + BOID_WIDTH > WIDTH - RIGHT_MARGIN) {
current_boid_reward -= env->margin_turn_factor;
} else {
current_boid_reward += env->margin_turn_factor;
}

// Normalization
// env->rewards[current_indx] = current_boid_reward / 15.0f;
// printf("current_boid_reward: %f\n", current_boid_reward);
env->rewards[current_indx] = current_boid_reward / 2.0f;
// env->rewards[current_indx] = current_boid_reward;
env->rewards[current_indx] = current_boid_reward / 6.0f;
// env->rewards[current_indx] = current_boid_reward / 205.0f;
// env->rewards[current_indx] = current_boid_reward / 10.0f;

//log updates
if (env->tick == env->report_interval) {
env->log.score += env->rewards[current_indx];
env->log.n += 1.0f;

/* clear per-boid log for next episode */
// env->boid_logs[boid_indx] = (Log){0};
env->tick = 0;
}
}
//env->log.score /= env->num_boids;

compute_observations(env);
}
Expand All @@ -220,7 +239,6 @@ void c_close_client(Client* client) {

void c_close(Boids* env) {
free(env->boids);
free(env->boid_logs);
if (env->client != NULL) {
c_close_client(env->client);
}
Expand Down
Loading
Loading