# Mettabook

## Setup

In [2]:
# Optional: confirm you're set up to connect to the services used in this notebook
#    If the command does not run, run `./install.sh` from your terminal

# !metta status --components=core,system,aws,wandb --non-interactive

In [None]:
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
from notebooks.utils.metrics import fetch_metrics
from notebooks.utils.metrics import find_training_jobs

%matplotlib inline
plt.style.use("default")

print("Setup complete! Auto-reload enabled.")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Setup complete! Auto-reload enabled.


In [None]:
# You can provide wandb tags when launching a run and can attach them on run's  "Overview" page on wandb

# Try it once or twice more if this times out; wandb's pipes may be warmed
run_names = find_training_jobs(wandb_tags=["permafrost-experiment"])
print("Found", len(run_names), "runs", run_names)

Found 3 runs ['b.jacke.basic_frost_125', 'b.jacke.basic_simple_update_branch', 'b.jacke.basic_permafrost_update_branch']


In [11]:
metrics_dfs = fetch_metrics(run_names, samples=500)

Fetching metrics for b.jacke.basic_frost_125: crashed, 2025-07-01T22:54:34Z
https://wandb.ai/metta-research/metta/runs/b.jacke.basic_frost_125...
  Fetched 500 data points.
  Reward: mean=0.1793, max=0.1915
  Access with `metrics_dfs['b.jacke.basic_frost_125']`

Fetching metrics for b.jacke.basic_simple_update_branch: crashed, 2025-07-01T21:23:37Z
https://wandb.ai/metta-research/metta/runs/b.jacke.basic_simple_update_branch...
  Fetched 500 data points.
  Reward: mean=0.1793, max=0.1946
  Access with `metrics_dfs['b.jacke.basic_simple_update_branch']`

Fetching metrics for b.jacke.basic_permafrost_update_branch: crashed, 2025-07-01T20:08:44Z
https://wandb.ai/metta-research/metta/runs/b.jacke.basic_permafrost_update_branch...
  Fetched 500 data points.
  Reward: mean=0.1799, max=0.1911
  Access with `metrics_dfs['b.jacke.basic_permafrost_update_branch']`



## View how freeze_duration effects how often agents attack

freeze_duration is [10, 125, 255] for [basic_simple, basic_frost, basic_permafrost] respectively. They are all based on the standard mettagrid environment, with a modified permafrost length.


In [21]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
from scipy.ndimage import uniform_filter1d

# Define chart configurations
charts = [
    {
        "metric": "env_agent/status.frozen.ticks.agent.avg",
        "y_title": "Frozen Ticks (avg)",
        "title": "Average Frozen Ticks",
        "start_step": 0,
    },
    {
        "metric": "overview/reward",
        "y_title": "Reward",
        "title": "Training Reward",
        "start_step": 7e7,  # Start at 70M steps
    },
    {
        "metric": "env_agent/attack.blocked.agent.avg",
        "y_title": "Blocked Attacks (avg)",
        "title": "Average Blocked Attacks",
        "start_step": 0,
    },
]

# Map run names to freeze durations
freeze_map = {
    "b.jacke.basic_simple_update_branch": 10,
    "b.jacke.basic_frost_125": 125,
    "b.jacke.basic_permafrost_update_branch": 255,
}

# Create figure with subplots - vertical arrangement
n_charts = len(charts)
fig = make_subplots(
    rows=n_charts,
    cols=1,
    subplot_titles=[c["title"] for c in charts],
    vertical_spacing=0.08,
    shared_yaxes=False,  # Explicitly ensure independent y-axes
)

# Colors for different freeze durations
colors = {10: "#1f77b4", 125: "#ff7f0e", 255: "#2ca02c"}

# Plot each chart
for chart_idx, chart in enumerate(charts):
    row = chart_idx + 1
    col = 1

    metric = chart["metric"]
    start_step = chart["start_step"]

    y_min, y_max = float("inf"), float("-inf")

    for run_name, df in metrics_dfs.items():
        if metric in df.columns and "_step" in df.columns:
            # Get freeze duration from the full run name
            freeze_duration = None
            for key, freeze_val in freeze_map.items():
                if key in run_name:
                    freeze_duration = freeze_val
                    break

            if freeze_duration is None:
                continue

            # Get data
            x = df["_step"].values
            y = df[metric].values

            # Apply start_step filter
            mask = x >= start_step
            x = x[mask]
            y = y[mask]

            # Remove NaN values
            nan_mask = ~np.isnan(y)
            x = x[nan_mask]
            y = y[nan_mask]

            if len(y) == 0:
                continue

            # Track min/max
            y_min = min(y_min, np.min(y))
            y_max = max(y_max, np.max(y))

            # Light smoothing
            if len(y) > 30:
                y = uniform_filter1d(y, size=10, mode="nearest")

            # Add trace
            fig.add_trace(
                go.Scatter(
                    x=x,
                    y=y,
                    mode="lines",
                    name=f"Freeze={freeze_duration}",
                    line=dict(color=colors[freeze_duration], width=2.5),
                    showlegend=(chart_idx == 0),  # Only show legend on first chart
                    legendgroup=str(freeze_duration),
                    hovertemplate=f"Freeze={freeze_duration}<br>Step: %{{x:.0e}}<br>Value: %{{y:.4f}}<extra></extra>",
                ),
                row=row,
                col=col,
            )

    # Update y-axis range with padding
    if y_min != float("inf"):
        padding = (y_max - y_min) * 0.05  # Reduced padding for tighter fit
        if padding == 0:  # Handle case where all values are the same
            padding = 0.001 * abs(y_min) if y_min != 0 else 0.01

        # Set the range for this specific subplot
        y_range = [y_min - padding, y_max + padding]
        print(f"{chart['title']}: y_range = [{y_range[0]:.6f}, {y_range[1]:.6f}]")

        fig.update_yaxes(
            range=y_range,
            title_text=chart["y_title"],
            row=row,
            col=col,
            autorange=False,  # Ensure we use our custom range
        )

# Update layout
fig.update_layout(
    height=350 * n_charts,
    width=800,
    title=dict(text="Agent Performance by Freeze Duration", x=0.5, xanchor="center", font=dict(size=18)),
    showlegend=True,
    legend=dict(
        x=0.02,
        y=0.98,
        xanchor="left",
        yanchor="top",
        bgcolor="rgba(255,255,255,0.8)",
        bordercolor="rgba(0,0,0,0.2)",
        borderwidth=1,
    ),
    plot_bgcolor="white",
    font=dict(size=12),
)

# Update x-axes only (not y-axes to preserve individual scaling)
fig.update_xaxes(title_text="Steps", gridcolor="rgba(200,200,200,0.5)", tickformat=".0e")
# Only update y-axis grid, not the range
for i in range(1, n_charts + 1):
    fig.update_yaxes(gridcolor="rgba(200,200,200,0.5)", row=i, col=1)

fig.show()


Average Frozen Ticks: y_range = [0.035565, 0.169792]
Training Reward: y_range = [0.152395, 0.196659]
Average Blocked Attacks: y_range = [0.039583, 0.085417]


Agents attack and block most with a moderate freeze duration. Operating on the same global policy, it's possible they learn to avoid attacking because the longer duration also implies a greater incentive to cooperate. Big caveat: the experimental results may be invalid because of a messy setup.