# Latent histogram viewer.

In [1]:
#| label: ae1m-distribution

%matplotlib widget
BACKEND = 'widget'

import os
import h5py
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as W
from IPython.display import display, Markdown

# Path to your HDF5 latents file (update as needed)
h5_path = r"C:\Users\Work\Desktop\Github_repo\cell_browser\data\nucleus-ae\latents.h5"

# Histogram defaults
DEFAULT_BINS = 250
INIT_DIM     = 0
INIT_PCTS    = (0.0, 100.0)  # (low, high) percentiles


if not os.path.exists(h5_path):
    raise FileNotFoundError(f"HDF5 file not found: {h5_path}\nPlease update 'h5_path' above.")

h5f = h5py.File(h5_path, 'r')
if 'z' not in h5f:
    h5f.close()
    raise KeyError("Dataset 'z' not found in HDF5 file.")

z_ds = h5f['z']      # shape = (N, D), float32
N, D = z_ds.shape

display(Markdown(f"**Loaded**: `{h5_path}`  \
"
                 f"**Samples**: {N:,}  \
"
                 f"**Latent dims**: {D}"))

# Output area for the plot (works with both backends)
out = W.Output()

# Controls
dim_slider = W.IntSlider(description='Dimension', min=0, max=D-1, step=1, value=INIT_DIM, continuous_update=False)
pct_slider = W.FloatRangeSlider(description='Percentiles', min=0.0, max=100.0, step=0.5, value=INIT_PCTS, readout_format='.1f', continuous_update=False)
bins_slider = W.IntSlider(description='Bins', min=10, max=1000, step=10, value=DEFAULT_BINS, continuous_update=False)

def compute_percentiles(dim, low_pct, high_pct):
    col = z_ds[:, dim]
    low_val, high_val = np.percentile(col, (low_pct, high_pct))
    return col, float(low_val), float(high_val)

def render(*_):
    with out:
        out.clear_output(wait=True)
        dim = int(dim_slider.value)
        low_pct, high_pct = pct_slider.value
        bins = int(bins_slider.value)

        # Build a fresh figure each time (works for both backends)
        fig, ax = plt.subplots(figsize=(7, 4))
        col, low_val, high_val = compute_percentiles(dim, low_pct, high_pct)
        ax.hist(col, bins=bins, range=(low_val, high_val), edgecolor='none')
        ax.set_title(f'Latent Dimension #{dim}  |  {low_pct:.0f}–{high_pct:.0f} percentile  |  bins={bins}')
        ax.set_xlabel('Value'); ax.set_ylabel('Frequency')

        # Show the figure explicitly so it appears reliably
        display(fig)
        plt.close(fig)  # prevent duplicate renders in some environments

# Hook up events
dim_slider.observe(render, names='value')
pct_slider.observe(render, names='value')
bins_slider.observe(render, names='value')

# Initial layout
ui = W.VBox([dim_slider, pct_slider, bins_slider])
display(W.VBox([ui, out]))

# Initial draw
render()


**Loaded**: `C:\Users\Work\Desktop\Github_repo\cell_browser\data\nucleus-ae\latents.h5`  **Samples**: 1,061,277  **Latent dims**: 512

VBox(children=(VBox(children=(IntSlider(value=0, continuous_update=False, description='Dimension', max=511), F…