Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
68 commits
Select commit Hold shift + click to select a range
ac10400
Merge demo
jsuarez5341 May 2, 2025
c7991fe
temp
jsuarez5341 May 2, 2025
7b7f048
temp
jsuarez5341 May 2, 2025
530ef57
clean up logging
jsuarez5341 May 2, 2025
d35b9de
Initial proper torch bind for cuda
jsuarez5341 May 2, 2025
94b98a6
Cleaned create function
jsuarez5341 May 2, 2025
a19fd37
more refactor
jsuarez5341 May 2, 2025
e5449b9
few small cleanups
jsuarez5341 May 3, 2025
e666bca
cleanup
jsuarez5341 May 3, 2025
31638d6
compile, ddp, amp
jsuarez5341 May 3, 2025
95e4fb1
More refactor
jsuarez5341 May 3, 2025
16de80c
Cleanup old sweep comments
jsuarez5341 May 5, 2025
6debb2d
Ocean ini files for new sweep
jsuarez5341 May 5, 2025
8a49f99
sota nmmo3
jsuarez5341 May 5, 2025
d3c1f8b
sweep defaults
jsuarez5341 May 5, 2025
b6ffb1d
Fix logit norm bug
jsuarez5341 May 5, 2025
9396f29
Fix build for 5090
jsuarez5341 May 5, 2025
5c4f068
Sweep setup for maze, sota maze at 32 size
jsuarez5341 May 5, 2025
f2c9770
Fix action sampling bug
jsuarez5341 May 5, 2025
9ad5b55
Move a bunch of stuff to a pufferlib.py
jsuarez5341 May 5, 2025
65014b5
Delete unused utils, move rest to pufferlib
jsuarez5341 May 5, 2025
41cbb4c
Robustify nans
jsuarez5341 May 6, 2025
1953f39
pufferlib.py
jsuarez5341 May 6, 2025
23f2f7e
Fix new api
jsuarez5341 May 6, 2025
2cba8e4
Fix cost fn passed to sweep:
jsuarez5341 May 6, 2025
3691201
breakpoint
jsuarez5341 May 6, 2025
15c6e54
Several small fixes
jsuarez5341 May 6, 2025
75d1dd1
Sweep smoke fix
jsuarez5341 May 6, 2025
d3588d2
sweep
jsuarez5341 May 6, 2025
82eb92f
Actually maximize in sweep...
jsuarez5341 May 6, 2025
e0d9b98
defaults for sweep
jsuarez5341 May 7, 2025
f479f3b
clean pufferdrive
l1onh3art88 May 7, 2025
993e22e
car models
l1onh3art88 May 7, 2025
2b6393f
Merge pull request #218 from l1onh3art88/clean_pufferdrive
jsuarez5341 May 7, 2025
2eadd25
driving training
jsuarez5341 May 7, 2025
8fe5229
Fix model size
jsuarez5341 May 7, 2025
d158d45
new binding is in
l1onh3art88 May 7, 2025
d80dee0
small adjustments
l1onh3art88 May 7, 2025
52774c0
Merge pull request #219 from l1onh3art88/release
jsuarez5341 May 7, 2025
3216ba8
binding.c
l1onh3art88 May 7, 2025
39ae42d
Merge pull request #220 from l1onh3art88/release
jsuarez5341 May 7, 2025
78285d7
gpudrive
jsuarez5341 May 8, 2025
cbe9e2e
Initial obs norm fix
jsuarez5341 May 8, 2025
b1b8945
gpudrive fixes
jsuarez5341 May 8, 2025
9627997
Initial main script refactor
jsuarez5341 May 8, 2025
adec1c0
trade sim config
jsuarez5341 May 8, 2025
24b7093
Remove puffer namespace from clean_pufferl
jsuarez5341 May 8, 2025
d037525
neptune/wandb model save/load
jsuarez5341 May 9, 2025
3f934ab
Remove scipy
jsuarez5341 May 9, 2025
ebe5f3c
Clean up model save and load
jsuarez5341 May 9, 2025
b7d2a6c
Clean up train file
jsuarez5341 May 9, 2025
dd0a986
more refactor:
jsuarez5341 May 10, 2025
3c28dd8
cleanup
jsuarez5341 May 10, 2025
1865d6f
temp
jsuarez5341 May 10, 2025
98f6ac6
nmmo3 test config
jsuarez5341 May 10, 2025
547caaa
nmmo3 policy
jsuarez5341 May 10, 2025
6253493
Auto batch size
jsuarez5341 May 10, 2025
2d1b874
speedrun
jsuarez5341 May 10, 2025
8557e4d
minor
jsuarez5341 May 10, 2025
3bbef95
remove namespace
jsuarez5341 May 10, 2025
267ad6d
merge conflict
jsuarez5341 May 10, 2025
223a132
Initial env bind tests
jsuarez5341 May 10, 2025
3bb3076
API fixes
jsuarez5341 May 10, 2025
688f4a1
Continuous atn space flatten
jsuarez5341 May 10, 2025
9d480b0
Initial build sys
jsuarez5341 May 11, 2025
d0f032e
Much cleaner setup.py
jsuarez5341 May 12, 2025
b7ae6d9
env get binding
jsuarez5341 May 12, 2025
16af736
prerelase merge
jsuarez5341 May 12, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,906 changes: 921 additions & 985 deletions clean_pufferl.py

Large diffs are not rendered by default.

164 changes: 62 additions & 102 deletions config/default.ini
Original file line number Diff line number Diff line change
@@ -1,124 +1,100 @@
[base]
package = None
env_name = None
vec = native
policy_name = Policy
rnn_name = None
max_suggestion_cost = 3600

[workspace]
name = pufferai
project = ablations
[vec]
backend = Multiprocessing
num_envs = 2
num_workers = auto
batch_size = auto
zero_copy = True
seed = 42

[env]
[policy]
[rnn]

[train]
seed = 0
name = pufferai
project = ablations
run_id = None
run_tag = None

seed = 42
torch_deterministic = True
cpu_offload = False
device = cuda
optimizer = muon
scheduler = cosine
anneal_lr = True
precision = float32
total_timesteps = 10_000_000
learning_rate = 0.025
gamma = 0.995
gae_lambda = 0.85
update_epochs = 1
norm_adv = True
# Consider raising clip coef to 0.2
clip_coef = 0.1
clip_vloss = True
vf_coef = 2.0
vf_clip_coef = 0.1
max_grad_norm = 0.5
ent_coef = 0.01
target_kl = None
adam_beta1 = 0.9
adam_beta2 = 0.999
adam_eps = 1e-12

num_envs = 2
num_workers = 2
env_batch_size = 1
zero_copy = True
data_dir = experiments
checkpoint_interval = 200
batch_size = 524288
batch_size = auto
minibatch_size = 8192
replay_factor = 0.0

# Accumulate gradients above this size
max_minibatch_size = 32768
bptt_horizon = 64
compile = False
compile_mode = reduce-overhead
compile_mode = max-autotune-no-cudagraphs
compile_fullgraph = True

use_diayn = False
diayn_archive = 256
diayn_loss_coef = 0.000
diayn_coef = 0.0

use_p3o = False
p3o_horizon = 128
puf = 0.0

use_vtrace = False
vtrace_rho_clip = 1.0
vtrace_c_clip = 1.0

use_puff_advantage = True

prio_alpha = 0.6
prio_beta0 = 0.4

[sweep]
method = protein
name = sweep

[sweep.metric]
method = Protein
metric = score
goal = maximize
name = score
min = 0
max = 1

[sweep.env.num_envs]
[sweep.vec.num_envs]
distribution = uniform_pow2
min = 64
max = 4096
mean = 1024
min = 1
max = 8
mean = 2
scale = auto
#scale = 0.5

#[sweep.policy.hidden_size]
#distribution = uniform_pow2
#min = 32
#max = 1024
#mean = 128
#scale = auto

# TODO: Elim from base
[sweep.train.total_timesteps]
distribution = log_normal
min = 5e7
max = 1e10
mean = 1e8
scale = time

[sweep.train.batch_size]
distribution = uniform_pow2
min = 32768
max = 1048576
mean = 262144
[sweep.train.bptt_horizon]
distribution = int_uniform
min = 16
max = 64
mean = 64
scale = auto

[sweep.train.minibatch_size]
distribution = uniform_pow2
min = 1024
max = 32768
mean = 8192
min = 8192
max = 131072
mean = 32768
scale = auto

[sweep.train.learning_rate]
Expand All @@ -141,15 +117,13 @@ min = 0.8
mean = 0.98
max = 0.9999
scale = auto
#scale = 0.5

[sweep.train.gae_lambda]
distribution = logit_normal
min = 0.6
mean = 0.95
max = 0.995
scale = auto
#scale = 0.5

[sweep.train.update_epochs]
distribution = int_uniform
Expand All @@ -158,6 +132,20 @@ max = 4
mean = 1
scale = 1.0

[sweep.train.clip_coef]
distribution = uniform
min = 0.01
max = 1.0
mean = 0.1
scale = auto

[sweep.train.vf_clip_coef]
distribution = uniform
min = 0.01
max = 5.0
mean = 0.1
scale = auto

[sweep.train.vf_coef]
distribution = uniform
min = 0.0
Expand All @@ -172,20 +160,6 @@ mean = 1.0
max = 5.0
scale = auto

[sweep.train.bptt_horizon]
distribution = uniform_pow2
min = 4
max = 128
mean = 16
scale = auto

#[sweep.train.puf]
#distribution = logit_normal
#min = 0.01
#mean = 0.5
#max = 0.99
#scale = auto

[sweep.train.adam_beta1]
distribution = logit_normal
min = 0.5
Expand All @@ -201,36 +175,22 @@ max = 0.99999
scale = auto

[sweep.train.adam_eps]
distribution = uniform
min = 0.00000000000001
mean = 0.00000001
max = 0.001
distribution = log_normal
min = 1e-14
mean = 1e-8
max = 1e-4
scale = auto

#[sweep.train.horizon]
#distribution = uniform_pow2
#min = 4
#max = 128
#mean = 32
#scale = 0.25

#[sweep.train.diayn_archive]
#distribution = uniform_pow2
#min = 2
#max = 64
#mean = 8
#scale = auto

#[sweep.train.diayn_loss_coef]
#distribution = uniform
#min = 0.0
#max = 2.0
#mean = 1.0
#scale = auto

#[sweep.train.diayn_coef]
#distribution = log_normal
#min = 0.0001
#mean = 0.1
#max = 0.99
#scale = auto
[sweep.train.prio_alpha]
distribution = logit_normal
min = 0.1
mean = 0.6
max = 0.99
scale = auto

[sweep.train.prio_beta0]
distribution = logit_normal
min = 0.1
mean = 0.4
max = 0.99
scale = auto
23 changes: 6 additions & 17 deletions config/metta.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,17 +3,17 @@ package = metta
env_name = metta
policy_name = Policy
rnn_name = Recurrent
vec = multiprocessing

[vec]
num_envs = 128
num_workers = 16
batch_size = 64

[env]
render_mode = auto
#num_envs = 128

[train]
total_timesteps = 5_000_000_000
num_envs = 128
num_workers = 16
env_batch_size = 64
total_timesteps = 100_000_000
learning_rate = 0.0013848535655657842
gamma = 0.9959746852829785
gae_lambda = 0.9283720217357007
Expand Down Expand Up @@ -45,17 +45,6 @@ adam_eps = 0.000249501214984291
#minibatch_size = 32768
#compile = False

[sweep]
method = protein
name = sweep

[sweep.metric]
goal = maximize
name = score
min = 0
max = 10
scale = auto

#[sweep.train.total_timesteps]
#distribution = log_normal
#min = 2e7
Expand Down
4 changes: 2 additions & 2 deletions config/ocean/blastar.ini
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ gamma = 0.95
learning_rate = 0.05
minibatch_size = 32768

[sweep.metric]
name = environment/enemy_crossed_screen
[sweep]
metric = environment/enemy_crossed_screen
goal = minimize

[sweep.parameters.train.parameters.batch_size]
Expand Down
56 changes: 35 additions & 21 deletions config/ocean/breakout.ini
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package = ocean
env_name = puffer_breakout
policy_name = Policy
rnn_name = Recurrent
vec = multiprocessing

[vec]
num_envs = 2

[env]
num_envs = 4096
Expand All @@ -16,23 +18,35 @@ input_size = 128
hidden_size = 128

[train]
total_timesteps = 80_000_000
learning_rate = 0.05
minibatch_size = 32768

[sweep]
method = protein
name = sweep

[sweep.metric]
goal = maximize
name = score
min = 0
max = 864

#[sweep.train.total_timesteps]
#distribution = log_normal
#min = 2e7
#max = 1e8
#mean = 5e7
#scale = auto
total_timesteps = 75_000_000

# Highly sensitive
adam_beta1 = 0.99

adam_beta2 = 0.9999
adam_eps = 1e-14
ent_coef = 0.025
gae_lambda = 0.85

# Highly sensitive
gamma = 0.975

learning_rate = 0.01
max_grad_norm = 1.5
minibatch_size = 16384

prio_alpha = 0.0
# Doesn't matter
prio_beta0 = 1.0

# Just can't be low
vf_coef = 1.3

# TODO: Try tuning clip coefs

[sweep.train.total_timesteps]
distribution = log_normal
min = 2e7
max = 5e8
mean = 8e7
scale = auto
Loading
Loading