In [1]:
import os
import glob
import sys
import numpy as np
import pandas as pd
from PIL import Image

import plotly.graph_objects as go
from plotly.subplots import make_subplots
from plotly.colors import sample_colorscale

import ipywidgets as widgets
from IPython.display import display, clear_output
from pathlib import Path

# Path to the folder that contains this notebook
notebook_dir = os.getcwd()

# Add SELECT_SCENES/code to Python path
sys.path.append(os.path.join(notebook_dir, "..", "code"))

import utils

# Uploading the two df for metadata

In [2]:
df = pd.read_parquet('../sourcedata/df_metrics.parquet')
df

Unnamed: 0,subject,learning_phase,scene_full_name,delta_clr_tot,delta_spd_tot,delta_MAD_tot,count,cleared,speed,MAD_mean,level_full_name,scene
0,im_sub-06,sub-06_epoch=0-step=2000,6-3-8,0.000000,-5.114034,,35.0,0.000000,5.113232,,w6l3,8
1,im_sub-06,sub-06_epoch=0-step=2000,6-3-6,-0.633333,-54.833397,,30.0,0.066667,4.888235,,w6l3,6
2,im_sub-06,sub-06_epoch=0-step=2000,7-3-4,0.877193,74.317809,,57.0,0.771930,123.018701,,w7l3,4
3,im_sub-06,sub-06_epoch=0-step=2000,6-1-7,0.346154,48.959562,,26.0,0.692308,107.614981,,w6l1,7
4,im_sub-06,sub-06_epoch=0-step=2000,6-2-13,0.000000,-1.399093,,31.0,0.129032,11.897219,,w6l2,13
...,...,...,...,...,...,...,...,...,...,...,...,...
14495,ppo,ep-8000,2-1-10,0.077922,7.311172,2.292553,154.0,0.610390,32.306431,25.508865,w2l1,10
14496,ppo,ep-8000,1-2-11,0.000000,4.672283,2.767537,64.0,0.984375,32.762943,9.707165,w1l2,11
14497,ppo,ep-8000,2-1-8,0.000000,-11.218767,-9.520000,106.0,1.000000,74.767785,26.983333,w2l1,8
14498,ppo,ep-8000,2-1-11,0.015038,-0.346052,-1.857467,133.0,0.135338,10.119667,16.194656,w2l1,11


In [3]:
# -------------------------- #
# ---------- DATA ---------- #
# -------------------------- #

## top-right

order_hum = ['sub-01', 'sub-02', 'sub-03', 'sub-05', 'sub-06']
order_2phases = ["Early discovery", "Late discovery", "Early practice", "Late practice"]
colors_2phases = dict(zip(order_2phases, sample_colorscale("Viridis", [0.00,0.33, 0.66, 1.00])))
position_2phase = dict(discovery = "bottom left", practice="bottom right")
hum_2phases = dict(zip(order_hum, [order_2phases]*len(order_hum)))

## bottom

order_ppos = ['im_sub-01', 'im_sub-02', 'im_sub-03', 'im_sub-05', 'im_sub-06', 'ppo']
order_4phases = ["Early discovery", "Late discovery", "Early practice", "Late practice"]
ppos_phases = [['sub-01_epoch=0-step=500', 'sub-01_epoch=0-step=2000', 'sub-01_epoch=0-step=3500', 'sub-01_epoch=0-step=5000', 'sub-01_epoch=0-step=6500'],
                ['sub-02_epoch=0-step=500', 'sub-02_epoch=0-step=3000', 'sub-02_epoch=0-step=5500', 'sub-02_epoch=0-step=8000', 'sub-02_epoch=0-step=10000'],
                ['sub-03_epoch=0-step=500', 'sub-03_epoch=0-step=4000', 'sub-03_epoch=0-step=7500', 'sub-03_epoch=1-step=11408', 'sub-03_epoch=1-step=14908'],
                ['sub-05_epoch=0-step=500', 'sub-05_epoch=0-step=1500', 'sub-05_epoch=0-step=3000', 'sub-05_epoch=0-step=4000', 'sub-05_epoch=0-step=5000'],
                ['sub-06_epoch=0-step=500', 'sub-06_epoch=0-step=2000', 'sub-06_epoch=0-step=4000', 'sub-06_epoch=0-step=5500', 'sub-06_epoch=0-step=7000'],
                ['ep-20', 'ep-2000', 'ep-4000', 'ep-6000', 'ep-8000']]
phases_everyone = dict(zip(order_hum+order_ppos, [order_4phases]*len(order_hum) + ppos_phases))
order_all_phases = order_4phases + [phase for subset in ppos_phases for phase in subset]
color_4phases= sample_colorscale("Viridis", [0.00,0.33, 0.66, 1.00])
color_ckpt = sample_colorscale("magma", [0.2,0.4,0.6,0.8,1.0])
colors_phase_ckpt = dict(zip(order_all_phases, color_4phases+color_ckpt*6))

# ----------------------------- #
# ---------- HELPERS ---------- #
# ----------------------------- #

def scenes_for_level(df, level):
    return sorted(df.loc[df["level_full_name"] == level, "scene"].dropna().unique().tolist())

def subjects_for(df, level, scene):
    m = (df["level_full_name"] == level) & (df["scene"] == scene)
    return sorted(df.loc[m, "subject"].dropna().unique().tolist())

def get_subset(df, level, scene):
       wls = level[1]+'-'+level[3]+'-'+str(scene)
       m = (df["scene_full_name"] == wls)
       return df.loc[m].copy()

def load_scene_image(level, scene):
    scene_fullname = f"{level}s{scene}"
    root = Path.cwd().parent
    path = os.path.join(root, 'sourcedata', 'mario_backgrounds', 'scene_backgrounds', f"{scene_fullname}.png")
    if os.path.exists(path):
        im = Image.open(path).convert("RGB")
        return np.array(im), path
    return None, path


def get_traces(level, scene):
    root = Path.cwd().parent
    paths_traces = glob.glob(os.path.join(root, 'sourcedata', 'traces', 'sub-*','scenes', f'*scene-{level}s{scene}_traces.png'))
    paths_traces = sorted(paths_traces)
    imgs_list = []
    for path in paths_traces:
        img = Image.open(path).convert("RGB")
        imgs_list.append(np.array(img))
    return imgs_list, paths_traces

def concat_traces(img_scene, imgs_traces):
    img_tot = img_scene
    for img in imgs_traces:
        idx_marge = np.where(img.mean(axis=2)==255)
        slide_img = (slice(idx_marge[0].min(), idx_marge[0].max()+1), slice(idx_marge[1].max()+1, img.shape[1]))
        img = img[slide_img]
        img_tot = np.concat([img_tot, np.zeros([img.shape[0], 5, 3]),img], axis=1)

    return img_tot


# ----------------------------- #
# ------------ MAIN ----------- #
# ----------------------------- #

def make_dashboard(df, level, scene):
    df_wls = get_subset(df, level, scene)

    fig = make_subplots(
        rows=3, cols=2,
        specs=[[{"type": "xy", 'colspan': 2}, None], 
               [{"type": "xy"}, {"type": "xy"}], 
               [{"type": "xy"}, {"type": "xy"}]],
        column_widths=[0.5, 0.5],
        row_heights=[0.3, 0.35, 0.35],
        horizontal_spacing=0.1,
        subplot_titles=("Scene Background with Players Traces","mean MADs per Learning Phase ",  " Num. Clips per Learning Phase", "Clearance Through Learning-Phases", " Z Speed Through Learning-Phases")
    )
    fig.update_layout(height=6000)
    ########################################
    # Top-TOP: non-interactive background #
    ########################################

    img_arr, img_path = load_scene_image(level, scene)
    traces_arr, traces_path = get_traces(level, int(scene))
    img_final = concat_traces(img_arr, traces_arr)

    if img_arr is not None:
        fig.add_trace(go.Image(z=img_final, hoverinfo="skip"), row=1, col=1)
    else:
        fig.add_annotation(row=1, col=1, x=0.5, y=0.5, xref="x1", yref="y1",
                           text=f"No image found:<br>{img_path}", showarrow=False)
        
    x_labels = ('empty', 'sub_01', 'sub_02', 'sub_03', 'sub_05', 'sub_06')

    width = img_final.shape[1]
    n_imgs = len(x_labels)

    # Largeur d’une image individuelle (si elles ont toutes la même taille)
    w_single = width / n_imgs

    # Positions centrées
    tick_positions = np.arange(w_single/2, width, w_single)

    fig.update_xaxes(
        tickvals=tick_positions,
        ticktext=x_labels,
        title_text="Subjects",
        fixedrange=True,
        visible=True,
        row=1, col=1    
    )

    fig.update_yaxes(visible=False, fixedrange=True, row=1, col=1)

    #********************#
    # Mid-Left: Variance #
    #********************# 

    phase_to_human = {p: i % 4 for i, p in enumerate(order_4phases)}
    phase_to_checkpoint = {p: i % 5 for i, p in enumerate([phase for subset in ppos_phases for phase in subset])}
    phase_to_all = {**phase_to_human, **phase_to_checkpoint}

    for sub in phases_everyone.keys():
        for idx, i in enumerate(phases_everyone[sub]):

            mask_mad = (df_wls['subject'] == sub) & (df_wls['learning_phase'] == i)
            df_phase = df_wls[mask_mad]

            if df_phase.empty:
                df_phase = pd.DataFrame({
                    "subject": [sub],
                    "learning_phase": [i],
                    "MAD_mean": [0.0]
                })

            fig.add_trace(
                go.Bar(
                    x=df_phase["subject"],
                    y=df_phase["MAD_mean"],
                    name=i,
                    marker_color=colors_phase_ckpt[i],
                    offsetgroup=f"chk{phase_to_all[i]}",
                    text=df_phase["MAD_mean"].round(2).astype(str),
                    textposition="inside",
                    cliponaxis=False,
                    hovertemplate="Value: %{y:.2f}<extra></extra>",
                    showlegend=False
                ),
                row=2, col=1
            )   
    fig.update_xaxes(visible=False, fixedrange=True, row=2, col=1)
    fig.update_yaxes(title_text="Mean MAD", row=2, col=1, range=[-0.1, df_wls["MAD_mean"].max() * 1.4])

    for sub in df_wls['subject'].sort_values().unique():
        df_sub = df_wls[df_wls['subject'] == sub]
        fig.add_trace(
            go.Scatter(
                x=df_sub['subject'],
                y=[df_wls["MAD_mean"].max()],
                mode='text',
                text=(df_sub['delta_MAD_tot']).round(2).astype(str) + " d",
                textposition="top center",
                textfont=dict(
                size=10,
                color='rgb(68, 1, 84)',       
                family="Arial Black"
                ),
                name=f"Delta Total Clearance for {sub}",
                showlegend=False
            ),
            row=2, col=1
        )

    #*********************************************#
    # Mid-Right: bar (counts) with outside labels #
    #*********************************************#    
    
    show_legend = False
    legende_is_first_time = True
    for sub in hum_2phases.keys():
        sub_phases = df_wls[df_wls["subject"] == sub]["learning_phase"].unique()
        for phase in hum_2phases[sub]:
            mask = (df_wls['subject'] == sub) & (df_wls['learning_phase'] == phase)
            df_phase = df_wls[mask]

            if df_phase.empty:
                df_phase = pd.DataFrame({
                    "subject": [sub],
                    "phase": [phase],
                    "count": [0],
                    "cleared": [0.0]
                })

            if set(hum_2phases).issubset(set(sub_phases)):
                if legende_is_first_time:
                    show_legend = True
                    legende_is_first_time = False

            fig.add_trace(
                go.Bar(
                    x=df_phase["subject"],
                    y=df_phase["count"],
                    name=phase,
                    marker_color=colors_2phases[phase],
                    textfont=dict(
                        size=13,
                        color='rgb(68, 1, 84)',       
                        family="Arial Black"
                    ),
                    text=df_phase["count"],
                    textposition="outside",
                    offsetgroup=phase,
                    cliponaxis=False,
                    hovertemplate="Phase: %{name}<br>Value: %{y}<extra></extra>",
                    showlegend=show_legend
                    ),
                row=2, col=2
            )
        show_legend = False

    fig.update_layout(barmode="group")
    fig.update_yaxes(title_text="N tries", row=2, col=2, range=[-3, df_wls[df_wls['subject']!='ppo']["count"].max() * 1.2])
    fig.update_xaxes(title_text="Learning Phase", row=3, col=1)

    ###############################
    # Bottom Left: bar (clerance) #
    ###############################    

    for sub in phases_everyone.keys():
        for idx, i in enumerate(phases_everyone[sub]):
            mask_clr = (df_wls['subject'] == sub) & (df_wls['learning_phase'] == i)
            df_phase = df_wls[mask_clr]
            if df_phase.empty:
                df_phase = pd.DataFrame({
                    "subject": [sub],
                    "learning_phase": [i],
                    "cleared": [0.0]
                })

            chk = phase_to_all[i]   # <-- NEW: group phases into 5 groups
            fig.add_trace(
                go.Bar(
                    x=df_phase["subject"],
                    y=(df_phase["cleared"]*100).round(0),
                    name=i,
                    marker_color=colors_phase_ckpt[i],
                    offsetgroup=f"chk{chk}",
                    text=(df_phase["cleared"]*100).round(0).astype(str) + "%",
                    textposition="inside",
                    cliponaxis=False,
                    hovertemplate="Value: %{y:.2f}<extra></extra>",
                    showlegend=False
                ),
                row=3, col=1
            )
    
    fig.update_yaxes(title_text="Clearance Rate (%)", row=3, col=1, range=[-10, 120])

    simple_names = order_4phases + ["Checkpoint 1", "Checkpoint 2", "Checkpoint 3", "Checkpoint 4", "Checkpoint 5"]

    #creat a simple legend
    for label, color in zip(simple_names, color_4phases+color_ckpt):
        fig.add_trace(
            go.Scatter(
                x=[None], y=[None],      
                mode="markers",
                marker=dict(size=12, color=color),
                name=label,         
                showlegend=True,
            ),
            row=3, col=1
        )

    for sub in df_wls['subject'].sort_values().unique():
        df_sub = df_wls[df_wls['subject'] == sub]
        fig.add_trace(
            go.Scatter(
                x=df_sub['subject'],
                y=[100],
                mode='text',
                text=(df_sub['delta_clr_tot']*100).round(2).astype(str) + " d",
                textposition="top center",
                textfont=dict(
                size=10,
                color='rgb(68, 1, 84)',       
                family="Arial Black"
                ),
                name=f"Delta Total Clearance for {sub}",
                showlegend=False
            ),
            row=3, col=1
        )

    ###############################
    # Bottom Right: bar (speed) #
    ###############################

    for sub in phases_everyone.keys():
        for idx, i in enumerate(phases_everyone[sub]):
            mask_spd = (df_wls['subject'] == sub) & (df_wls['learning_phase'] == i)
            df_phase = df_wls[mask_spd]
            if df_phase.empty:
                df_phase = pd.DataFrame({
                    "subject": [sub],
                    "learning_phase": [i],
                    "speed": [0.0]
                })
                
            chk = phase_to_all[i]
            fig.add_trace(
                go.Bar(
                    x=df_phase["subject"],
                    y=df_phase["speed"],
                    name=i,
                    marker_color=colors_phase_ckpt[i],
                    offsetgroup=f"chk{chk}",
                    text=df_phase["speed"].round(0).astype(str),
                    textposition="inside",
                    cliponaxis=False,
                    hovertemplate="Value: %{y:.2f}<extra></extra>",
                    showlegend=False
                ),
                row=3, col=2
            )
    max_y = max(df_wls["speed"].max(), abs(df_wls["speed"].min()))
    fig.update_yaxes(title_text="Average Speed<br>(horizontal pixel<br>traveled per sec.)", row=3, col=2, range=[ -0.1, max_y*1.4])

    simple_names = order_4phases+["Checkpoint 1", "Checkpoint 2", "Checkpoint 3", "Checkpoint 4", "Checkpoint 5"]

    # Ajouter une entrée de légende par couleur (invisible dans le graphe)
    for label, color in zip(simple_names, color_4phases+color_ckpt):
        fig.add_trace(
            go.Scatter(
                x=[None], y=[None],      
                mode="markers",
                marker=dict(size=12, color=color),
                name=label,
                showlegend=False,
            ),
            row=3, col=2
        )
    
    for sub in df_wls['subject'].sort_values().unique():
        df_sub = df_wls[df_wls['subject'] == sub]
        fig.add_trace(
            go.Scatter(
                x=df_sub['subject'],
                y=[df_wls["speed"].max()],
                mode='text',
                text=(df_sub['delta_spd_tot']).round(2).astype(str) + " d",
                textposition="top center",
                textfont=dict(
                size=10,
                color='rgb(68, 1, 84)',       
                family="Arial Black"
                ),
                name=f"Delta Total Clearance for {sub}",
                showlegend=False
            ),
            row=3, col=2
        )

    # Global layout tuned for clean initial render
    scene_fullname = f"{level}s{scene}"
    fig.update_layout(
        height=540,
        autosize=True,
        margin=dict(l=20, r=20, t=70, b=70),
        title_text=f"Level: {level} | Scene: {scene} ({scene_fullname})",
        legend=dict(orientation="h", y=-0.22, x=0.0),
        uniformtext_minsize=10, uniformtext_mode='hide',
        bargap=0.3,
    )
    # Shrink subplot titles, nudge slightly
    for ann in fig.layout.annotations:
        ann.font.size = 12
        ann.yshift = 6

    return fig

# ---------- WIDGETS ----------
levels = sorted(df["level_full_name"].dropna().unique())

level_dd = widgets.Dropdown(
    options=levels, value=levels[0],
    description="Level:", layout=widgets.Layout(width="240px")
)

# Scene as SelectionSlider with exactly N positions (available scenes for the level)
def build_scene_slider(level):
    opts = scenes_for_level(df, level)
    if not opts:
        return widgets.SelectionSlider(options=[], value=None, description="Scene:", layout=widgets.Layout(width="500px"))
    # label each tick with the scene number; N positions = len(opts)
    return widgets.SelectionSlider(
        options=[(str(s), s) for s in opts],
        value=opts[0],
        description="Scene:",
        layout=widgets.Layout(width="500px"),
        continuous_update=False
    )

scene_slider = build_scene_slider(level_dd.value)

def current_subject_options():
    if scene_slider.value is None:
        return []
    return subjects_for(df, level_dd.value, scene_slider.value)

out = widgets.Output(layout=widgets.Layout(border="0px"))

# ---------- CALLBACKS ----------
def on_level_change(change):
    # rebuild scene slider with N positions for the selected level
    global scene_slider
    new_slider = build_scene_slider(level_dd.value)
    # swap the widget in place inside the UI
    controls.children = (level_dd, new_slider)
    # rebind handler
    new_slider.observe(on_scene_change, names="value")
    scene_slider.unobserve(on_scene_change, names="value")
    # update reference
    scene_slider = new_slider

    # update subject list
    subj_opts = current_subject_options()

    draw()

def on_scene_change(change):
    subj_opts = current_subject_options()
    draw()

def draw():
    with out:
        clear_output(wait=True)
        if (level_dd.value is None) or (scene_slider.value is None):
            print("No data available for the current selection.")
            return
        fig = make_dashboard(df, level_dd.value, scene_slider.value)
        # Use .show with minimal toolbar & no selection tools
        fig.show(config={"displaylogo": False, "modeBarButtonsToRemove": ["select", "lasso2d"]})

# Wire up observers
level_dd.observe(on_level_change, names="value")
scene_slider.observe(on_scene_change, names="value")

# ---------- DISPLAY FIRST, THEN DRAW (prevents initial overlap) ----------
controls = widgets.HBox([level_dd, scene_slider])
display(controls, out)
draw()  # clean initial render



HBox(children=(Dropdown(description='Level:', layout=Layout(width='240px'), options=('w1l1', 'w1l2', 'w1l3', '…

Output(layout=Layout(border_bottom='0px', border_left='0px', border_right='0px', border_top='0px'))