In [1]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.io as pio

pio.renderers.default = "browser"

from tqdm.auto import tqdm
import umap

from game import make_game
from rm import regret_matching
from sym import symmetrise
from metrics import (
    exploitability,
    induced_random_battlefield_marginal,
    tv_distance,
    hart_target_marginal_B553,
    hart_candidate_strategy_B553
)


IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html



In [2]:
# Game instance
S, K = 5, 3

# RM parameters
T = 10_000
N_SEEDS = 200

# burn-in averaging
USE_BURNIN_AVG = True
BURNIN_FRAC = 0.2  # discard first 20%
BURNIN = int(T * BURNIN_FRAC)

# UMAP parameters
UMAP_NEIGHBORS = 30
UMAP_MIN_DIST = 0.05
UMAP_RANDOM_STATE = 42

print("BURNIN =", BURNIN, "out of T =", T)


BURNIN = 2000 out of T = 10000


In [3]:
actions, idx, A = make_game(S=S, K=K)
n = A.shape[0]
print("num pure actions =", n)


num pure actions = 21


In [4]:
def burnin_average(hist, burnin):
    """
    hist: (T,n)
    returns mean over t=burnin..T-1
    """
    sub = hist[burnin:]
    p = sub.mean(axis=0)
    p = p / p.sum()
    return p

def sqrt_features(P):
    """UMAP features: sqrt probabilities (Hellinger geometry proxy)."""
    P = np.asarray(P, dtype=float)
    return np.sqrt(P)


In [5]:
Qhart = hart_target_marginal_B553()

seeds = np.arange(N_SEEDS)

# We store each run's (p,q) as two points: p-point and q-point.
# For plots (1) and (2): colour both points by run exploitability.
# For plot (3): colour each sym-point by its own TV-to-Hart.

P_normal_list, which_normal, run_normal, seed_normal, eps_normal = [], [], [], [], []

P_sym_list, which_sym, run_sym, seed_sym, eps_sym = [], [], [], [], []
tv_sym_list = []  # per-point tv for sym plot (3)

for r, sd in enumerate(tqdm(seeds, desc="RM runs")):
    res = regret_matching(A, T=T, seed=int(sd))

    if USE_BURNIN_AVG:
        p = burnin_average(res["p_hist"], BURNIN)
        q = burnin_average(res["q_hist"], BURNIN)
    else:
        p = res["p_avg"]
        q = res["q_avg"]

    # normal exploitability (run-level)
    eps_run, epsA_run, epsB_run, v_run = exploitability(p, q, A)

    # store normal points (both get run-level exploitability)
    P_normal_list += [p, q]
    which_normal += ["p_avg", "q_avg"]
    run_normal += [r, r]
    seed_normal += [int(sd), int(sd)]
    eps_normal += [eps_run, eps_run]

    # symmetrised strategies 
    pS = symmetrise(p, actions)
    qS = symmetrise(q, actions)

    epsS_run, epsSA_run, epsSB_run, vS_run = exploitability(pS, qS, A)

    # store sym points (both get sym run-level exploitability)
    P_sym_list += [pS, qS]
    which_sym += ["p_sym", "q_sym"]
    run_sym += [r, r]
    seed_sym += [int(sd), int(sd)]
    eps_sym += [epsS_run, epsS_run]

    # per-point TV-to-Hart (for plot 3)
    Pm_p = induced_random_battlefield_marginal(pS, actions, S=S)
    Pm_q = induced_random_battlefield_marginal(qS, actions, S=S)
    tv_sym_list += [tv_distance(Pm_p, Qhart), tv_distance(Pm_q, Qhart)]

P_normal = np.stack(P_normal_list)  # (2*N_SEEDS, n)
P_sym = np.stack(P_sym_list)

eps_normal = np.array(eps_normal)
eps_sym = np.array(eps_sym)
tv_sym = np.array(tv_sym_list)

print("Normal eps: min/mean/max =", float(eps_normal.min()), float(eps_normal.mean()), float(eps_normal.max()))
print("Sym eps:    min/mean/max =", float(eps_sym.min()), float(eps_sym.mean()), float(eps_sym.max()))
print("Sym TV:     min/mean/max =", float(tv_sym.min()), float(tv_sym.mean()), float(tv_sym.max()))


RM runs: 100%|██████████| 200/200 [01:10<00:00,  2.85it/s]

Normal eps: min/mean/max = 0.0023988383551255127 0.006020102026920406 0.010386801091262898
Sym eps:    min/mean/max = 0.0 1.1316187152542105e-05 0.00047994667985324334
Sym TV:     min/mean/max = 0.004540225917922497 0.11424616488337885 0.20302141982711036





In [6]:
# =========================
# Plot (1): NORMAL strategies
#   - UMAP 2D coloured by exploitability
#   - UMAP 3D coloured by exploitability
#   (legend/click box on the left, colourbar on the right)
# =========================

# 1) Build UMAP features + fit embeddings on NORMAL strategies 
X_normal = np.vstack([sqrt_features(p) for p in P_normal])   # (2*N_SEEDS, n)

umap_2d_normal = umap.UMAP(
    n_neighbors=UMAP_NEIGHBORS,
    min_dist=UMAP_MIN_DIST,
    n_components=2,
    random_state=UMAP_RANDOM_STATE,
    metric="euclidean",
)
Z2_normal = umap_2d_normal.fit_transform(X_normal)

umap_3d_normal = umap.UMAP(
    n_neighbors=UMAP_NEIGHBORS,
    min_dist=UMAP_MIN_DIST,
    n_components=3,
    random_state=UMAP_RANDOM_STATE,
    metric="euclidean",
)
Z3_normal = umap_3d_normal.fit_transform(X_normal)

# 2) Pack into a dataframe 
df_normal_2d = pd.DataFrame({
    "x": Z2_normal[:, 0],
    "y": Z2_normal[:, 1],
    "which": which_normal,          # "p_avg" / "q_avg"
    "run": run_normal,
    "seed": seed_normal,
    "exploitability": eps_normal,
})

df_normal_3d = pd.DataFrame({
    "x": Z3_normal[:, 0],
    "y": Z3_normal[:, 1],
    "z": Z3_normal[:, 2],
    "which": which_normal,
    "run": run_normal,
    "seed": seed_normal,
    "exploitability": eps_normal,
})

# # 3) Plot (1a): UMAP 2D coloured by exploitability 
# fig1_2d = px.scatter(
#     df_normal_2d,
#     x="x", y="y",
#     color="exploitability",
#     symbol="which",                           # gives the clickable legend box (p_avg/q_avg)
#     hover_data=["run", "seed", "which", "exploitability"],
#     title="(1) Normal strategies: UMAP 2D coloured by exploitability",
#     color_continuous_scale="Plasma",
# )

# # Put legend (click box) on the left, colourbar on the right, keep both clear
# fig1_2d.update_layout(
#     margin=dict(l=70, r=160, t=80, b=60),
#     legend=dict(
#         x=0.01, y=0.99,
#         xanchor="left", yanchor="top",
#         bgcolor="rgba(255,255,255,0.7)",
#         bordercolor="rgba(0,0,0,0.15)",
#         borderwidth=1,
#         title="which"
#     ),
#     coloraxis_colorbar=dict(
#         title="exploitability",
#         x=1.03,      # push right
#         y=0.5,
#         len=0.9,
#         thickness=22
#     ),
# )

# fig1_2d.show()

# 4) Plot (1b): UMAP 3D coloured by exploitability
fig1_3d = px.scatter_3d(
    df_normal_3d,
    x="x", y="y", z="z",
    color="exploitability",
    symbol="which",
    hover_data=["run", "seed", "which", "exploitability"],
    title="(1) Normal strategies: UMAP 3D coloured by exploitability",
    color_continuous_scale="Plasma",
)

fig1_3d.update_layout(
    margin=dict(l=70, r=160, t=80, b=60),
    legend=dict(
        x=0.01, y=0.99,
        xanchor="left", yanchor="top",
        bgcolor="rgba(255,255,255,0.7)",
        bordercolor="rgba(0,0,0,0.15)",
        borderwidth=1,
        title="which"
    ),
    coloraxis_colorbar=dict(
        title="exploitability",
        x=1.03,
        y=0.5,
        len=0.9,
        thickness=22
    ),
)

# fig1_3d.show()



n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



In [7]:
# =========================
# Plot (2): SYM strategies
#   - UMAP 2D coloured by exploitability
#   - UMAP 3D coloured by exploitability
#   (legend/click box on the left, colourbar on the right)
# =========================

# 1) Build UMAP features + fit embeddings on SYM strategies
X_sym = np.vstack([sqrt_features(p) for p in P_sym])   # (2*N_SEEDS, n)

umap_2d_sym = umap.UMAP(
    n_neighbors=UMAP_NEIGHBORS,
    min_dist=UMAP_MIN_DIST,
    n_components=2,
    random_state=UMAP_RANDOM_STATE,
    metric="euclidean",
)
Z2_sym = umap_2d_sym.fit_transform(X_sym)

umap_3d_sym = umap.UMAP(
    n_neighbors=UMAP_NEIGHBORS,
    min_dist=UMAP_MIN_DIST,
    n_components=3,
    random_state=UMAP_RANDOM_STATE,
    metric="euclidean",
)
Z3_sym = umap_3d_sym.fit_transform(X_sym)

# 2) Pack into a dataframe
df_sym_2d = pd.DataFrame({
    "x": Z2_sym[:, 0],
    "y": Z2_sym[:, 1],
    "which": which_sym,             # "p_sym" / "q_sym"
    "run": run_sym,
    "seed": seed_sym,
    "exploitability": eps_sym,
})

df_sym_3d = pd.DataFrame({
    "x": Z3_sym[:, 0],
    "y": Z3_sym[:, 1],
    "z": Z3_sym[:, 2],
    "which": which_sym,
    "run": run_sym,
    "seed": seed_sym,
    "exploitability": eps_sym,
})

# # 3) Plot (2a): UMAP 2D coloured by exploitability
# fig2_2d = px.scatter(
#     df_sym_2d,
#     x="x", y="y",
#     color="exploitability",
#     symbol="which",
#     hover_data=["run", "seed", "which", "exploitability"],
#     title="(2) Sym strategies: UMAP 2D coloured by exploitability",
#     color_continuous_scale="Plasma",
# )

# fig2_2d.update_layout(
#     margin=dict(l=70, r=160, t=80, b=60),
#     legend=dict(
#         x=0.01, y=0.99,
#         xanchor="left", yanchor="top",
#         bgcolor="rgba(255,255,255,0.7)",
#         bordercolor="rgba(0,0,0,0.15)",
#         borderwidth=1,
#         title="which"
#     ),
#     coloraxis_colorbar=dict(
#         title="exploitability",
#         x=1.03,
#         y=0.5,
#         len=0.9,
#         thickness=22
#     ),
# )

# fig2_2d.show()

# 4) Plot (2b): UMAP 3D coloured by exploitability 
fig2_3d = px.scatter_3d(
    df_sym_3d,
    x="x", y="y", z="z",
    color="exploitability",
    symbol="which",
    hover_data=["run", "seed", "which", "exploitability"],
    title="(2) Sym strategies: UMAP 3D coloured by exploitability",
    color_continuous_scale="Plasma",
)

fig2_3d.update_layout(
    margin=dict(l=70, r=160, t=80, b=60),
    legend=dict(
        x=0.01, y=0.99,
        xanchor="left", yanchor="top",
        bgcolor="rgba(255,255,255,0.7)",
        bordercolor="rgba(0,0,0,0.15)",
        borderwidth=1,
        title="which"
    ),
    coloraxis_colorbar=dict(
        title="exploitability",
        x=1.03,
        y=0.5,
        len=0.9,
        thickness=22
    ),
)

# fig2_3d.show()



n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



In [8]:
# =========================
# Plot (3): SYM strategies + Hart anchor
#   - UMAP 2D coloured by TV distance to Hart marginal
#   - UMAP 3D coloured by TV distance to Hart marginal
#   - Add Hart candidate as a single extra point WITHOUT creating a 2nd colourbar
#   (legend/click box on the left, single colourbar on the right)
# =========================

# 0) Compute Hart candidate strategy + its tv/eps 
p_hart = hart_candidate_strategy_B553(actions)
q_hart = p_hart.copy()

Pm_hart = induced_random_battlefield_marginal(p_hart, actions, S=S)
tv_hart_point = tv_distance(Pm_hart, Qhart)

eps_hart, _, _, _ = exploitability(p_hart, q_hart, A)

# -1) Fit UMAP on SYM strategies, then TRANSFORM the Hart point with the SAME mapper 
X_sym = np.vstack([sqrt_features(p) for p in P_sym])   # (2*N_SEEDS, n)
x_hart = sqrt_features(p_hart).reshape(1, -1)         # (1, n)

umap_2d = umap.UMAP(
    n_neighbors=UMAP_NEIGHBORS,
    min_dist=UMAP_MIN_DIST,
    n_components=2,
    random_state=UMAP_RANDOM_STATE,
    metric="euclidean",
)
Z2 = umap_2d.fit_transform(X_sym)
Z2_hart = umap_2d.transform(x_hart)[0]

umap_3d = umap.UMAP(
    n_neighbors=UMAP_NEIGHBORS,
    min_dist=UMAP_MIN_DIST,
    n_components=3,
    random_state=UMAP_RANDOM_STATE,
    metric="euclidean",
)
Z3 = umap_3d.fit_transform(X_sym)
Z3_hart = umap_3d.transform(x_hart)[0]

# 2) DataFrames: keep ONE px call by appending the Hart row 
df_sym_2d = pd.DataFrame({
    "x": Z2[:, 0],
    "y": Z2[:, 1],
    "which": which_sym,      # "p_sym" / "q_sym"
    "run": run_sym,
    "seed": seed_sym,
    "tv_hart": tv_sym,       # per-point TV distance
    "exploitability": eps_sym,
})

df_sym_3d = pd.DataFrame({
    "x": Z3[:, 0],
    "y": Z3[:, 1],
    "z": Z3[:, 2],
    "which": which_sym,
    "run": run_sym,
    "seed": seed_sym,
    "tv_hart": tv_sym,
    "exploitability": eps_sym,
})

hart_row_2d = pd.DataFrame([{
    "x": float(Z2_hart[0]),
    "y": float(Z2_hart[1]),
    "which": "hart",
    "run": -1,
    "seed": -1,
    "tv_hart": float(tv_hart_point),
    "exploitability": float(eps_hart),
}])

hart_row_3d = pd.DataFrame([{
    "x": float(Z3_hart[0]),
    "y": float(Z3_hart[1]),
    "z": float(Z3_hart[2]),
    "which": "hart",
    "run": -1,
    "seed": -1,
    "tv_hart": float(tv_hart_point),
    "exploitability": float(eps_hart),
}])

df3_2d = pd.concat([df_sym_2d, hart_row_2d], ignore_index=True)
df3_3d = pd.concat([df_sym_3d, hart_row_3d], ignore_index=True)

# --- 3) ONE plotly express call per figure => ONE colourbar ---
# We colour by tv_hart and use symbol to separate p/q/hart.
# Then we make the Hart marker big, and keep a fixed colour range.
tv_min = float(df_sym_2d["tv_hart"].min())
tv_max = float(df_sym_2d["tv_hart"].max())

# (3a) UMAP 2D 
fig3_2d = px.scatter(
    df3_2d,
    x="x", y="y",
    color="tv_hart",
    symbol="which",
    hover_data=["run", "seed", "which", "exploitability", "tv_hart"],
    title="(3) Sym strategies: UMAP 2D coloured by TV distance to Hart marginal (with Hart anchor)",
    color_continuous_scale="Plasma",
    range_color=[tv_min, tv_max],
)

# Make Hart point a GREEN CROSS (no extra colourbar)
fig3_2d.for_each_trace(
    lambda tr: tr.update(
        marker=dict(
            symbol="x",
            size=20,
            color="limegreen",              # fixed colour so it stands out
            line=dict(width=3, color="black")
        ),
        opacity=1.0
    ) if str(tr.name) == "hart" else None
)

# Layout: legend left, single colourbar right
fig3_2d.update_layout(
    margin=dict(l=70, r=180, t=80, b=60),
    legend=dict(
        x=0.01, y=0.99,
        xanchor="left", yanchor="top",
        bgcolor="rgba(255,255,255,0.75)",
        bordercolor="rgba(0,0,0,0.15)",
        borderwidth=1,
        title="which"
    ),
    coloraxis_colorbar=dict(
        title="TV(P, Hart)",
        x=1.05,
        y=0.5,
        len=0.9,
        thickness=24
    ),
)

# fig3_2d.show()

# (3b) UMAP 3D
fig3_3d = px.scatter_3d(
    df3_3d,
    x="x", y="y", z="z",
    color="tv_hart",
    symbol="which",
    hover_data=["run", "seed", "which", "exploitability", "tv_hart"],
    title="(3) Sym strategies: UMAP 3D coloured by TV distance to Hart marginal (with Hart anchor)",
    color_continuous_scale="Plasma",
    range_color=[tv_min, tv_max],
)

# Make Hart point a GREEN CROSS (no extra colourbar)
fig3_3d.for_each_trace(
    lambda tr: tr.update(
        marker=dict(
            symbol="x",
            size=12,
            color="limegreen",
            line=dict(width=5, color="black")
        ),
        opacity=1.0
    ) if str(tr.name) == "hart" else None
)

fig3_3d.update_layout(
    margin=dict(l=70, r=180, t=80, b=60),
    legend=dict(
        x=0.01, y=0.99,
        xanchor="left", yanchor="top",
        bgcolor="rgba(255,255,255,0.75)",
        bordercolor="rgba(0,0,0,0.15)",
        borderwidth=1,
        title="which"
    ),
    coloraxis_colorbar=dict(
        title="TV(P, Hart)",
        x=1.05,
        y=0.5,
        len=0.9,
        thickness=24
    ),
)

# fig3_3d.show()




n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.


n_jobs value 1 overridden to 1 by setting random_state. Use no seed for parallelism.



In [15]:
import numpy as np
from dash import Dash, dcc, html, Input, Output, Patch

# assumes: fig1_3d, fig2_3d, fig3_3d already built

def lighten_3d(fig, size=5, opacity=0.7):
    fig.update_traces(marker=dict(size=size, opacity=opacity))
    fig.update_layout(scene=dict(aspectmode="data"))
    return fig

fig1_3d = lighten_3d(fig1_3d, size=3, opacity=0.7)
fig2_3d = lighten_3d(fig2_3d, size=3, opacity=0.7)
fig3_3d = lighten_3d(fig3_3d, size=3, opacity=0.7)

# --- Make Hart cross MUCH more prominent (after lighten_3d so it doesn't get overridden) ---
def emphasise_hart(fig, hart_name="hart"):
    def _maybe_update(tr):
        # px uses trace.name = category value (e.g. "hart")
        if str(tr.name) == hart_name:
            tr.update(
                marker=dict(
                    symbol="x",                 # keep cross
                    size=4,                    # bigger
                    opacity=1.0,                # fully opaque
                    color="limegreen",          # bright
                    line=dict(width=8, color="black"),  # thick black outline
                )
            )
    fig.for_each_trace(_maybe_update)
    return fig

fig3_3d = emphasise_hart(fig3_3d)


app = Dash(__name__)

app.layout = html.Div(
    [
        dcc.Tabs(
            value="tab1",
            children=[
                dcc.Tab(label="(1) Normal (3D)", value="tab1", children=[
                    dcc.Graph(id="g1", figure=fig1_3d, style={"height": "85vh"},
                              config={"scrollZoom": True}),
                ]),
                dcc.Tab(label="(2) Sym (3D)", value="tab2", children=[
                    dcc.Graph(id="g2", figure=fig2_3d, style={"height": "85vh"},
                              config={"scrollZoom": True}),
                ]),
                dcc.Tab(label="(3) Sym + Hart (3D)", value="tab3", children=[
                    dcc.Graph(id="g3", figure=fig3_3d, style={"height": "85vh"},
                              config={"scrollZoom": True}),
                ]),
            ],
        ),
        dcc.Interval(id="rot", interval=160, n_intervals=0),
    ],
    style={"maxWidth": "1400px", "margin": "0 auto"},
)

def camera(theta, r=2.4, z=0.9):
    return dict(
        eye=dict(x=r*np.cos(theta), y=r*np.sin(theta), z=z),
        center=dict(x=0, y=0, z=0),
        up=dict(x=0, y=0, z=1),
    )

@app.callback(
    Output("g1", "figure"),
    Output("g2", "figure"),
    Output("g3", "figure"),
    Input("rot", "n_intervals"),
)
def rotate_all(n):
    speed = 0.05
    t1 = speed * n + 0.0
    t2 = speed * n + 1.2
    t3 = speed * n + 2.4

    p1 = Patch(); p1["layout"]["scene"]["camera"] = camera(t1)
    p2 = Patch(); p2["layout"]["scene"]["camera"] = camera(t2)
    p3 = Patch(); p3["layout"]["scene"]["camera"] = camera(t3)

    return p1, p2, p3

if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8050, debug=False)


#browser run http://127.0.0.1:8050