## UMAP embedding of latent space.

In [1]:

# CONFIG — update if needed
RESULTS_DIR = r'C:\Users\Work\Desktop\Github_repo\cell_browser\data\nucleus-ae'
UMAP_H5 = f"{RESULTS_DIR}/umap_embeddings.h5"

import os, h5py
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from ipywidgets import ToggleButtons, VBox, Output

# Enable interactive Matplotlib (ipympl) in notebooks
%matplotlib widget

plt.rcParams['figure.figsize'] = (7, 6)
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.bbox'] = 'tight'

print("[i] Matplotlib", matplotlib.__version__, "| Backend:", matplotlib.get_backend())
print("[i] Path:", UMAP_H5)


[i] Matplotlib 3.10.5 | Backend: widget
[i] Path: C:\Users\Work\Desktop\Github_repo\cell_browser\data\nucleus-ae/umap_embeddings.h5


In [2]:

def _safe_read(h5_path, dset_name):
    if not os.path.isfile(h5_path):
        return None
    with h5py.File(h5_path, "r") as h5:
        if dset_name in h5:
            return np.asarray(h5[dset_name][...], dtype=np.float32)
        return None

def load_umap_embeddings():
    out = {}
    for k in ("umap_cosine", "umap_euclidean"):
        arr = _safe_read(UMAP_H5, k)
        if arr is not None:
            out[k.split("_",1)[1]] = arr
    return out

def _autosize_point_params(N):
    if N <= 50_000:
        return 6.0, 0.8
    elif N <= 200_000:
        return 2.5, 0.6
    elif N <= 1_000_000:
        return 0.8, 0.5
    else:
        return 0.4, 0.5

def scatter2d(ax, xy, title=None):
    N = xy.shape[0]
    s, alpha = _autosize_point_params(N)
    ax.scatter(xy[:,0], xy[:,1], s=s, alpha=alpha, rasterized=True)
    if title:
        ax.set_title(title)
    ax.set_xlabel("Dim 1"); ax.set_ylabel("Dim 2"); ax.grid(False)
    return ax

umap_dict = load_umap_embeddings()
print("[i] UMAP keys:", list(umap_dict.keys()))


[i] UMAP keys: ['cosine', 'euclidean']


### Plot

In [3]:

#| label: ae1m-umap

umap_out = Output()
umap_metric = ToggleButtons(
    options=[opt for opt in ("cosine", "euclidean") if opt in umap_dict],
    description='Metric:',
)

def draw_umap(metric):
    umap_out.clear_output(wait=True)
    xy = umap_dict.get(metric)
    if xy is None:
        with umap_out:
            print(f"No UMAP embedding for metric='{metric}'")
        return
    fig, ax = plt.subplots()
    scatter2d(ax, xy, title=f"UMAP ({metric})")
    with umap_out:
        plt.show()

def _on_umap_change(change):
    if change['name'] == 'value':
        draw_umap(change['new'])

if len(umap_metric.options) > 0:
    draw_umap(umap_metric.options[0])

umap_metric.observe(_on_umap_change, names='value')
VBox([umap_metric, umap_out])


VBox(children=(ToggleButtons(description='Metric:', options=('cosine', 'euclidean'), value='cosine'), Output()…