# Mettabook

## 1. Setup

### 1.1 Imports

In [1]:
import pandas as pd

# Enable auto-reload of modules
%load_ext autoreload
%autoreload 2

import matplotlib.pyplot as plt
import plotly.graph_objects as go
from IPython.display import display
from mettabook_widgets import (
    JobLauncher,
    JobStatusMonitor,
    MetricsFetcher,
    ReplayViewer,
    TrainingConfigurator,
    WandBConnector,
)
from plotly.subplots import make_subplots
from run_store import RunStore

%matplotlib inline
plt.style.use("default")

print("Setup complete! Auto-reload enabled.")

Setup complete! Auto-reload enabled.


### 1.2 Initialize Components

In [2]:
# Initialize a shared RunStore
run_store = RunStore()

# Initialize components with the shared RunStore
config = TrainingConfigurator(run_store=run_store)
launcher = JobLauncher(config, run_store=run_store)
monitor = JobStatusMonitor(launcher, run_store=run_store)
wandb_conn = WandBConnector(run_store=run_store)
fetcher = MetricsFetcher(wandb_conn)
replay_viewer = ReplayViewer(wandb_conn)

# Store monitor reference globally for JavaScript access
_metta_monitor = monitor
if "_metta_monitors" not in globals():
    _metta_monitors = {}
_metta_monitors[id(monitor)] = monitor

print("Components initialized with shared RunStore!")

Components initialized with shared RunStore!


### 1.3. Confirm Credential Setup

### 1.4. Confirm Credential Setup

### 1.3 Run Store Overview

The RunStore provides unified tracking of all your training runs across SkyPilot and W&B. It persists data locally and provides a single view of all runs.

**Important**: If you've made changes to the code, restart the kernel (Kernel → Restart) and re-run cells 1-5 to reload the modules.

View all tracked runs in the table below:

In [3]:
# Simple RunStore table
from simple_run_table import show_runs

# Display the table
display(show_runs(run_store))

# Usage:
# - To add a run: display(add_run(run_store, "run-id"))
# - To refresh one: display(refresh_run(run_store, "run-id"))
# - To refresh all: display(refresh_all(run_store))

Run ID,Status,SkyPilot,W&B,Created
daveey.bbc.server.1x4.3,RUNNING,-,running,2025-07-20 17:32:00
test.run.1,UNKNOWN,-,-,2025-07-20 13:22:07
relh.nav.july20.fff.44,FAILED,-,crashed,2025-07-20 11:46:34
2178 -,SUBMITTED,2178 - (CANCELLED),-,2025-07-20 11:46:34
relh.skypilot.fff.444,RUNNING,-,running,2025-07-20 11:46:34
2090 -,COMPLETED,2090 - (SUCCEEDED),-,2025-07-20 11:46:34
2177 -,SUBMITTED,2177 - (CANCELLED),-,2025-07-20 11:46:34
relh.skypilot.fff.j20.666,FAILED,-,crashed,2025-07-20 11:46:34
1279 -,COMPLETED,1279 - (SUCCEEDED),-,2025-07-20 11:46:34
1355 -,FAILED,1355 - (FAILED),-,2025-07-20 11:46:34


In [7]:
add_test_run("daveey.bbc.server.1x4.3")
refresh_test_run("daveey.bbc.server.1x4.3")

[RunStore] Adding run: daveey.bbc.server.1x4.3
[RunStore] Run 'daveey.bbc.server.1x4.3' already exists, updating...
[RunStore] Saved to /Users/nishadsingh/.metta/run_store.json
[RunStore] Total runs in store: 52
Added daveey.bbc.server.1x4.3
[RunStore] Refreshing run: daveey.bbc.server.1x4.3
[RunStore] Querying SkyPilot for: daveey.bbc.server.1x4.3
[RunStore] No SkyPilot data found for: daveey.bbc.server.1x4.3
[RunStore] Querying W&B for: daveey.bbc.server.1x4.3
[RunStore] Found W&B data: WandBStatus.RUNNING
[RunStore] Updated run daveey.bbc.server.1x4.3 and saved
Refreshed daveey.bbc.server.1x4.3: Updated=True


## 2. Training

This section allows you to launch and monitor a training run. You can skip to the "Analyze a Run" section if you have an existing run.


### 2.1 Specify and Launch Training Job

In [None]:
display(config.display())
display(launcher.display())

## 2.3. Monitor Training Job

In [None]:
# Display the job status monitor with integrated RunStore table
# Store monitor reference for JavaScript integration
_metta_monitor = monitor
if "_metta_monitors" not in globals():
    _metta_monitors = {}
_metta_monitors[id(monitor)] = monitor
_metta_monitor_id = id(monitor)

display(monitor.display())
monitor.start_monitoring()

## 3. Analyze a Run

### 3.1 Pick which run to analyze

In [None]:
display(wandb_conn.display())

### 3.2 Fetch metrics

In [None]:
display(fetcher.display())
if wandb_conn.run:
    fetcher.auto_fetch()
else:
    print("Run section 3.1 first")

## 3.3. Analyze

`fetcher.metrics_df` contains a dataframe with the sampled metrics from above

You can analyze them in any way you like. Below is some boilerplate code that shows `overview/*` metrics over agent step.

In [None]:
metrics_df = fetcher.metrics_df
if metrics_df is None or len(metrics_df) == 0:
    print("No metrics data available. Please fetch metrics first.")
else:
    assert metrics_df is not None
    include_prefixes = ["overview/"]
    plot_cols = []

    for col in metrics_df.columns:
        # Skip non-numeric columns
        if not pd.api.types.is_numeric_dtype(metrics_df[col]):
            continue
        # Skip columns with no variation
        if metrics_df[col].nunique() <= 1:
            continue
        if not any(col.startswith(prefix) for prefix in include_prefixes):
            continue
        plot_cols.append(col)

    if not plot_cols:
        print("No plottable metrics found")
    else:
        # Calculate grid dimensions
        n_metrics = len(plot_cols)
        n_cols = min(3, n_metrics)  # Max 3 columns
        n_rows = (n_metrics + n_cols - 1) // n_cols

        # Create subplots
        fig = make_subplots(
            rows=n_rows,
            cols=n_cols,
            subplot_titles=[col.replace("overview/", "").replace("_", " ") for col in plot_cols],
            vertical_spacing=0.08,
            horizontal_spacing=0.1,
        )

        # Color palette
        colors = ["blue", "red", "green", "orange", "purple", "brown", "pink", "gray", "olive", "cyan"]

        # Add traces for each metric
        for idx, col in enumerate(plot_cols):
            row = (idx // n_cols) + 1
            col_idx = (idx % n_cols) + 1
            color = colors[idx % len(colors)]

            if "_step" in metrics_df.columns:
                fig.add_trace(
                    go.Scatter(
                        x=metrics_df["_step"],
                        y=metrics_df[col],
                        mode="lines",
                        name=col.replace("overview/", ""),
                        line=dict(color=color, width=2),
                        showlegend=False,
                    ),
                    row=row,
                    col=col_idx,
                )

        # Update layout
        fig.update_layout(height=250 * n_rows, title_text="Overview Metric / Agent Step", showlegend=False)

        # Update x-axes labels for bottom row
        for col_idx in range(1, min(n_cols, n_metrics) + 1):
            fig.update_xaxes(title_text="Steps", row=n_rows, col=col_idx)

        fig.show()

### 3.4. Replay Viewer

In [None]:
display(replay_viewer.display())

if wandb_conn.run:
    print("Fetching replays...")
    replay_viewer.auto_fetch()
else:
    print("Select a run first")

In [None]:
replay_viewer.display_iframe(width=1000, height=600)