Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
81 commits
Select commit Hold shift + click to select a range
81f75db
Initial nograd
jsuarez5341 Feb 14, 2026
38ac67e
Allocators
jsuarez5341 Feb 14, 2026
16851fd
Initial static native
jsuarez5341 Feb 14, 2026
4ce0bfa
pass static activ buffers
jsuarez5341 Feb 16, 2026
51e8b8a
A bunch more crappy porting
jsuarez5341 Feb 17, 2026
d7b09b0
Initial no-torch training (total mess)
jsuarez5341 Feb 18, 2026
0a278af
full deterministic training, major cleanups
jsuarez5341 Feb 18, 2026
26f2f5a
refactor + save/load
jsuarez5341 Feb 19, 2026
ddb5797
legacy
jsuarez5341 Feb 19, 2026
c9ae58b
refactor
jsuarez5341 Feb 19, 2026
28ab172
More refactor
jsuarez5341 Feb 19, 2026
2ffd40e
more refactor
jsuarez5341 Feb 19, 2026
202ff14
Initial model refactor
jsuarez5341 Feb 21, 2026
9511c04
refactor
jsuarez5341 Feb 24, 2026
b874f6c
g2048 bind
jsuarez5341 Feb 24, 2026
fcab65b
genial oath config
jsuarez5341 Feb 24, 2026
05330ee
fp32
jsuarez5341 Feb 24, 2026
096c2b9
fix single-buffer training. float32 still nondeterm for >1 buffer
jsuarez5341 Feb 24, 2026
1edc6f6
extend shmem
jsuarez5341 Feb 24, 2026
3f2749b
nmmo3, grid runnable with rng
jsuarez5341 Feb 24, 2026
7d63e72
Initial nmmo net - crappy port, unmaintainable garbage
jsuarez5341 Feb 26, 2026
78409f8
Major bug fix on val update
jsuarez5341 Feb 26, 2026
9f5de82
constellation fixes
jsuarez5341 Feb 26, 2026
eebcfbf
Major fix on zeroing mingru state. 82% on grid with 3 layers
jsuarez5341 Feb 26, 2026
d411b3a
Grid close
jsuarez5341 Feb 27, 2026
516c46c
zero rollout buffer
jsuarez5341 Feb 27, 2026
dde5b0c
initial sweep refactor (bad)
jsuarez5341 Feb 27, 2026
a623c70
spawn subproc in sweep
jsuarez5341 Feb 27, 2026
059b784
disable verbose in sweep, divide num threads
jsuarez5341 Feb 27, 2026
c74d51f
grid stable
jsuarez5341 Feb 28, 2026
2a15af0
highway connect
jsuarez5341 Feb 28, 2026
0abbf07
minor sweep fixes
jsuarez5341 Feb 28, 2026
d435305
merge fix
jsuarez5341 Feb 28, 2026
6a4646f
grid config
jsuarez5341 Feb 28, 2026
0a479a5
Semi-breaking major refactor to clean up pufferl
jsuarez5341 Mar 1, 2026
8229f92
temp
jsuarez5341 Mar 2, 2026
9bbc96a
Backport major g2048 game bug fix
jsuarez5341 Mar 3, 2026
ff2e86a
2048 swept config
jsuarez5341 Mar 3, 2026
1ca4faa
g2048 config'
jsuarez5341 Mar 3, 2026
482887d
sweep/train fixes
jsuarez5341 Mar 3, 2026
e2ab3d1
dash
jsuarez5341 Mar 3, 2026
e7ae8f9
sweep fixes:
jsuarez5341 Mar 3, 2026
95e19b0
sweep fixes
jsuarez5341 Mar 3, 2026
06bfa3a
default config sweep
jsuarez5341 Mar 3, 2026
aa8bc76
fixes
jsuarez5341 Mar 3, 2026
42811e6
more bug
jsuarez5341 Mar 3, 2026
7920c78
kaiming init
jsuarez5341 Mar 3, 2026
dd8f639
delete ortho init. Speculative
jsuarez5341 Mar 3, 2026
b27de98
cleanup muon
jsuarez5341 Mar 4, 2026
fc9f98c
Initial pufferl refactor training
jsuarez5341 Mar 5, 2026
649826a
update log format
jsuarez5341 Mar 6, 2026
2fb2fb7
sweep keys
jsuarez5341 Mar 6, 2026
760c6e4
dtype fix
jsuarez5341 Mar 6, 2026
426d6e8
tsnee
jsuarez5341 Mar 6, 2026
c6e3fc6
Fix rare norm bug
jsuarez5341 Mar 6, 2026
81bdfc6
constellation fixes
jsuarez5341 Mar 7, 2026
fbacc09
prevent tooltip drawing offscreen
jsuarez5341 Mar 7, 2026
0d82a03
pong
jsuarez5341 Mar 7, 2026
30a5a21
Begin refactor constellation
jsuarez5341 Mar 7, 2026
cd0fa4c
clean ui
jsuarez5341 Mar 7, 2026
a3c3edf
UI cleanup
jsuarez5341 Mar 7, 2026
9e6e431
temp fix color scale
jsuarez5341 Mar 8, 2026
fc09814
merge precision_t kernels and prune dead code
jsuarez5341 Mar 8, 2026
069f3df
purge check macros
jsuarez5341 Mar 10, 2026
fd28ef8
delete transpose indirection
jsuarez5341 Mar 10, 2026
4fbbc36
Initial cudnn conv + nmmo encoder
jsuarez5341 Mar 10, 2026
d2bbec6
refactor kernels
jsuarez5341 Mar 12, 2026
2966410
temp determ fix
jsuarez5341 Mar 12, 2026
dd4c184
refactor muon -> simplify, keep more ops in precision_t. Changes nume…
jsuarez5341 Mar 12, 2026
8c4117e
merge grad clip into muon
jsuarez5341 Mar 12, 2026
3063ff1
refactor
jsuarez5341 Mar 13, 2026
403cec0
more refactor
jsuarez5341 Mar 13, 2026
dff4b0e
more refactors
jsuarez5341 Mar 13, 2026
5f7784d
nccl bind
jsuarez5341 Mar 13, 2026
e5a0139
Stable multigpu
jsuarez5341 Mar 13, 2026
770c270
minor refactor
jsuarez5341 Mar 13, 2026
c8be914
:qMerge branch 'static-native' of https://github.com/pufferai/pufferl…
jsuarez5341 Mar 13, 2026
432e4f1
more refactor
jsuarez5341 Mar 14, 2026
90512a8
minor
jsuarez5341 Mar 14, 2026
29d746a
Merge branch '4.0' of https://github.com/pufferai/pufferlib into 4.0
jsuarez5341 Mar 14, 2026
f0f4df6
cursed merge fix
jsuarez5341 Mar 14, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
182 changes: 98 additions & 84 deletions cache_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,20 @@
import glob
import os

import pufferlib


env_names = sorted([
'breakout',
#'impulse_wars',
#'pacman',
#'tetris',
#'g2048',
'g2048',
#'moba',
#'pong',
'pong',
#'tower_climb',
#'grid',
#'nmmo3',
'grid',
'nmmo3',
#'snake',
#'tripletriad'
])
Expand All @@ -36,19 +38,21 @@
'train/eps',
'train/prio_alpha',
'train/prio_beta0',
'train/horizon',
#'train/horizon',
'train/replay_ratio',
'train/minibatch_size',
'policy/hidden_size',
'vec/total_agents',
]

ALL_KEYS = [
METRICS = [
'agent_steps',
'cost',
'environment/score',
'environment/perf'
] + HYPERS
'uptime',
'env/score',
'env/perf',
]

ALL_KEYS = HYPERS + METRICS

def pareto_idx(steps, costs, scores):
idxs = []
Expand All @@ -63,62 +67,77 @@ def pareto_idx(steps, costs, scores):

def load_sweep_data(path):
data = {}
keys = None
sweep_metadata = {}
num_metrics = 0
for fpath in glob.glob(path):
if 'cache.json' in fpath:
continue

with open(fpath, 'r') as f:
exp = json.load(f)

if not data:
for kk in exp.keys():
if kk == 'data':
for k, v in exp[kk][-1].items():
data[k] = []
else:
data[kk] = []

discard = False
for kk in list(data.keys()):
if kk not in exp and kk not in exp['data'][-1]:
discard = True
try:
exp = json.load(f)
except json.decoder.JSONDecodeError:
print(f'Skipping {fpath}')
continue

sweep_metadata = exp.pop('sweep')

data_len = len(exp['metrics']['agent_steps'])
if data_len > 100:
print(f'Skipping {fpath} (len={data_len})')
continue

if num_metrics == 0:
num_metrics = len(exp['metrics'])

skip = False
metrics = exp.pop('metrics')

if len(metrics) != num_metrics:
print(f'Skipping {fpath} (num_metrics={len(metrics)} != {num_metrics})')
continue

n = len(metrics['agent_steps'])
for k, v in metrics.items():
if len(v) != n:
skip = True
break

if k not in data:
data[k] = []

if np.isnan(v).any():
skip = True
break

if discard:
if skip:
print(f'Skipping {fpath} (bad data)')
continue

for kk in list(data.keys()):
if kk in exp:
v = exp[kk]
sweep_key = f'sweep/{kk}/distribution'
if sweep_key in data and exp[sweep_key] == 'logit_normal':
v = 1 - v
elif kk in ('train/vtrace_rho_clip', 'train/vtrace_c_clip'):
v = max(v, 0.1)
for k, v in metrics.items():
data[k].append(v)
if len(data[k]) != len(data['SPS']):
breakpoint()
pass

data[kk].append(v)
else:
data[kk].append(exp['data'][-1][kk])
for k, v in pufferlib.unroll_nested_dict(exp):
if k not in data:
data[k] = []

steps = data['agent_steps']
costs = data['cost']
scores = data['environment/score']
data[k].append([v]*n)

idxs = pareto_idx(steps, costs, scores)
for k, v in data.items():
data[k] = [item for sublist in v for item in sublist]

#steps = data['agent_steps']
#costs = data['uptime']
#scores = data['env/score']
#idxs = pareto_idx(steps, costs, scores)
# Filter to pareto
for k in data:
data[k] = [data[k][i] for i in idxs]

# Monkey patch: Cap performance
data['environment/perf'] = [min(e, 1.0) for e in data['environment/perf']]

# Monkey patch: Adjust steps by frameskip if present
if 'env/frameskip' in data:
skip = data['env/frameskip']
data['agent_steps'] = [n*m for n, m in zip(data['agent_steps'], skip)]

#for k in data:
# data[k] = [data[k][i] for i in idxs]

data['sweep'] = sweep_metadata
return data

def cached_sweep_load(path, env_name):
Expand All @@ -135,31 +154,32 @@ def cached_sweep_load(path, env_name):
return data

def compute_tsne():
data = {name: cached_sweep_load(f'experiments/logs/puffer_{name}', name) for name in env_names}
all_data = {}
normed = {}

flat = []
flat_mmin = []
flat_mmax = []
for env in env_names:
flat.append(np.stack([data[env][hyper] for hyper in HYPERS], axis=1))
flat_mmin.append(np.stack([data[env][f'sweep/{hyper}/min'] for hyper in HYPERS], axis=1))
flat_mmax.append(np.stack([data[env][f'sweep/{hyper}/max'] for hyper in HYPERS], axis=1))
env_data = cached_sweep_load(f'logs/puffer_{env}', env)
sweep_metadata = env_data.pop('sweep')
all_data[env] = env_data

normed_env = []
for key in HYPERS:
prefix, suffix = key.split('/')
mmin = sweep_metadata[prefix][suffix]['min']
mmax = sweep_metadata[prefix][suffix]['max']
dat = np.array(env_data[key])

flat_distribution = [data[env][f'sweep/{hyper}/distribution'] for env in env_names for hyper in HYPERS]
dist = sweep_metadata[prefix][suffix]['distribution']
if 'log' in dist or 'pow2' in dist:
mmin = np.log(mmin)
mmax = np.log(mmax)
dat = np.log(dat)

flat = np.concatenate(flat, axis=0)
flat_mmin = np.concatenate(flat_mmin, axis=0).min(axis=0)
flat_mmax = np.concatenate(flat_mmax, axis=0).max(axis=0)
normed_env.append((dat - mmin) / (mmax - mmin))

normed = flat.copy()
for i in range(len(HYPERS)):
dist = flat_distribution[i]
if 'log' in dist or 'pow2' in dist:
flat_mmin[i] = np.log(flat_mmin[i])
flat_mmax[i] = np.log(flat_mmax[i])
normed[:, i] = np.log(flat[:, i])
normed[env] = np.stack(normed_env, axis=1)

normed[:, i] = (normed[:, i] - flat_mmin[i]) / (flat_mmax[i] - flat_mmin[i])
normed = np.concatenate(list(normed.values()), axis=0)

from sklearn.manifold import TSNE
proj = TSNE(n_components=2)
Expand All @@ -171,25 +191,19 @@ def compute_tsne():

row = 0
for env in env_names:
'''
for i, hyper in enumerate(HYPERS):
sz = len(data[env][hyper])
data[env][hyper] = normed[row:row+sz, i].tolist()
'''
sz = len(data[env]['agent_steps'])

data[env] = {k: v for k, v in data[env].items() if k in ALL_KEYS}
sz = len(all_data[env]['agent_steps'])
#all_data[env] = {k: v for k, v in all_data[env].items()}
if reduced is not None:
data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()
all_data[env]['tsne1'] = reduced[row:row+sz, 0].tolist()
all_data[env]['tsne2'] = reduced[row:row+sz, 1].tolist()
else:
data[env]['tsne1'] = np.random.rand(sz).tolist()
data[env]['tsne2'] = np.random.rand(sz).tolist()
all_data[env]['tsne1'] = np.random.rand(sz).tolist()
all_data[env]['tsne2'] = np.random.rand(sz).tolist()

row += sz
print(f'Env {env} has {sz} points')

json.dump(data, open('all_cache.json', 'w'))
json.dump(all_data, open('all_cache.json', 'w'))

if __name__ == '__main__':
compute_tsne()
Loading
Loading