In [144]:
import json

import altair as alt
import pandas as pd

alt.data_transformers.enable("vegafusion")

DataTransformerRegistry.enable('vegafusion')

In [145]:
with open("results/vibevolve/rewards.jsonl", "r") as f:
    lines = f.readlines()

df = pd.DataFrame([json.loads(line) for line in lines])
df.head()

Unnamed: 0,run_id,completion_id,timestamp,parent_completion_id,reward,results,host
0,db67855a,1747506993.9613993,1747507000.0,,-1.0,"[{'name': 'simple', 'reward': 1.0}, {'name': '...",cortex1:8080
1,db67855a,1747506993.9357371,1747507000.0,,-0.584375,"[{'name': 'tmp_agent', 'reward': -0.5843750238...",cortex2:8081
2,db67855a,1747506993.9614189,1747507000.0,,-0.053125,"[{'name': 'tmp_agent', 'reward': -0.0531250014...",cortex2:8080
3,db67855a,1747507016.9986596,1747507000.0,,-1.0,"[{'name': 'simple', 'reward': 1.0}, {'name': '...",cortex1:8080
4,299e3db9,1747507125.1448984,1747507000.0,,-1.0,"[{'name': 'simple', 'reward': 1.0}, {'name': '...",cortex2:8081


In [146]:
min_timestamp = df.groupby("run_id")["timestamp"].min().to_dict()
data = []
for _, row in df.iterrows():
    for result in row["results"]:
        opponent = "round_robin" if result["name"] == "tmp_agent" else result["name"]
        reward = result["reward"] if result["name"] == "tmp_agent" else -result["reward"]
        error = reward == -1.0
        data.append({
            "opponent": opponent,
            "reward": reward,
            "run_id": row.run_id,
            "timestamp": row.timestamp,
            "seconds": row.timestamp - min_timestamp[row.run_id],
            "error": error,
            "completion_id": row.completion_id,
            "parent_completion_id": row.parent_completion_id,
        })
sub_reward_df = pd.DataFrame(data)
sub_reward_df

Unnamed: 0,opponent,reward,run_id,timestamp,seconds,error,completion_id,parent_completion_id
0,simple,-1.00000,db67855a,1.747507e+09,0.025662,True,1747506993.9613993,
1,vortex_swarm,-1.00000,db67855a,1.747507e+09,0.025662,True,1747506993.9613993,
2,hunter_swarm,-1.00000,db67855a,1.747507e+09,0.025662,True,1747506993.9613993,
3,predator_boid,-1.00000,db67855a,1.747507e+09,0.025662,True,1747506993.9613993,
4,boid,-1.00000,db67855a,1.747507e+09,0.025662,True,1747506993.9613993,
...,...,...,...,...,...,...,...,...
1414,chaser,0.87500,299e3db9,1.747510e+09,2646.858746,False,1747509772.0036442,1747509620.7030475
1415,concave_swarm,0.65625,299e3db9,1.747510e+09,2646.858746,False,1747509772.0036442,1747509620.7030475
1416,center,0.06250,299e3db9,1.747510e+09,2646.858746,False,1747509772.0036442,1747509620.7030475
1417,pairs,0.71875,299e3db9,1.747510e+09,2646.858746,False,1747509772.0036442,1747509620.7030475


In [147]:
sub_reward_df.sort_values(["run_id", "timestamp"], inplace=True)
sub_reward_df["cummax_reward"] = sub_reward_df.groupby(["opponent"])["reward"].cummax()

x_col = "seconds"
y_col = "reward"
y_col_cummax = "cummax_reward"
tooltip_cols = ["run_id", "completion_id", "reward", "seconds", "completion_id", "parent_completion_id"]

points = alt.Chart(sub_reward_df).encode(
    x=x_col,
    y=y_col,
    color="run_id",
    shape="error",
    tooltip=tooltip_cols,
).mark_circle(
    opacity=0.2,
)

cummax =  alt.Chart(sub_reward_df).encode(
    x=x_col,
    y=y_col_cummax,
    color="run_id",
    tooltip=tooltip_cols,
).mark_line(
    strokeWidth=2,
    color="run_id",
)

figure = (
    (cummax + points)
    .properties(width=175, height=100)
    .facet(
        alt.Facet(
            'opponent',
            sort=alt.EncodingSortField(y_col, op='mean', order='descending')
        ),
        columns=4,
    )
)
figure

In [148]:
df.iloc[df.reward.argmax()]

run_id                                                           299e3db9
completion_id                                           1747509034.859665
timestamp                                               1747509034.859665
parent_completion_id                                   1747508679.3847902
reward                                                            0.86875
results                 [{'name': 'tmp_agent', 'reward': 0.86875003576...
host                                                         cortex2:8081
Name: 96, dtype: object

In [149]:
from swarm.vibevolve import src_from_history
from difflib import unified_diff

def get_parent(run_id, completion_id):
    return df.loc[(df.run_id == run_id) & (df.completion_id == completion_id)].parent_completion_id.values[0]

def get_reward(run_id, completion_id):
    return df.loc[(df.run_id == run_id) & (df.completion_id == completion_id)].reward.values[0]

target_run_id = "299e3db9"
target_completion_id = "1747509034.859665"

parent = get_parent(target_run_id, target_completion_id)
history = [target_completion_id]
while parent is not None:
    parent = get_parent(target_run_id, history[-1])
    history.append(parent)
history.reverse()

for parent_id, child_id in zip(history[:-1], history[1:]):
    if parent_id is not None:
        parent_src = src_from_history(target_run_id, parent_id)
    else:
        parent_src = ""
    child_src = src_from_history(target_run_id, child_id)

    diff_lines = list(unified_diff(parent_src.splitlines(), child_src.splitlines()))
    diff = "\n".join(diff_lines)

    print("-" * 120)
    print(parent_id, "->", child_id)
    print(diff)


------------------------------------------------------------------------------------------------------------------------
None -> 1747507161.6284041
--- 

+++ 

@@ -0,0 +1,56 @@

+
+import jax
+import jax.numpy as jnp
+from jax import jit
+
+@jit
+def act(
+    t,
+    key,
+    ally_x,
+    ally_y,
+    ally_vx,
+    ally_vy,
+    ally_health,
+    enemy_x,
+    enemy_y,
+    enemy_vx,
+    enemy_vy,
+    enemy_health,
+):
+    batch_size, num_agents = ally_x.shape
+
+    # Normalize positions to be in [-1, 1] for better gradient stability
+    max_pos = jnp.max(jnp.max(jnp.stack([ally_x, ally_y, enemy_x, enemy_y], axis=0), axis=1))
+    ally_x = ally_x / max_pos
+    ally_y = ally_y / max_pos
+    enemy_x = enemy_x / max_pos
+    enemy_y = enemy_y / max_pos
+
+    # Compute relative positions from each ally to each enemy
+    relative_x = enemy_x[:, None, :] - ally_x[:, :, None]
+    relative_y = enemy_y[:, None, :] - ally_y[:, :, None]
+
+    # Normalize relative positions
+    relat