## PCA embedding of latent space.

In [1]:

# CONFIG — update if needed
RESULTS_DIR = r'C:\Users\Work\Desktop\Github_repo\cell_browser\data\nucleus-ae'
PCA_H5 = f"{RESULTS_DIR}/pca_embeddings.h5"

import os, h5py, itertools
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from ipywidgets import Dropdown, VBox, Output

# Enable interactive Matplotlib (ipympl) in notebooks
%matplotlib widget

plt.rcParams['figure.figsize'] = (7, 6)
plt.rcParams['figure.dpi'] = 100
plt.rcParams['savefig.bbox'] = 'tight'

print("[i] Matplotlib", matplotlib.__version__, "| Backend:", matplotlib.get_backend())
print("[i] Path:", PCA_H5)


[i] Matplotlib 3.10.5 | Backend: widget
[i] Path: D:/Results/09052025_AE1M_Conv2DTranspose/pca_embeddings.h5


In [2]:

def load_pca_pairs():
    pairs = []
    if not os.path.isfile(PCA_H5):
        return pairs
    with h5py.File(PCA_H5, "r") as h5:
        for i, j in itertools.combinations(range(1, 6), 2):
            name = f"PC{i}_PC{j}"
            if name in h5:
                arr = np.asarray(h5[name][...], dtype=np.float32)
                pairs.append((name, arr))
    return pairs

def _autosize_point_params(N):
    if N <= 50_000:
        return 6.0, 0.8
    elif N <= 200_000:
        return 2.5, 0.6
    elif N <= 1_000_000:
        return 0.8, 0.5
    else:
        return 0.4, 0.5

def scatter2d(ax, xy, title=None):
    N = xy.shape[0]
    s, alpha = _autosize_point_params(N)
    ax.scatter(xy[:,0], xy[:,1], s=s, alpha=alpha, rasterized=True)
    if title:
        ax.set_title(title)
    ax.set_xlabel("Dim 1"); ax.set_ylabel("Dim 2"); ax.grid(False)
    return ax

pca_pairs = load_pca_pairs()
print("[i] PCA pairs:", [n for n,_ in pca_pairs])


[i] PCA pairs: ['PC1_PC2', 'PC1_PC3', 'PC1_PC4', 'PC1_PC5', 'PC2_PC3', 'PC2_PC4', 'PC2_PC5', 'PC3_PC4', 'PC3_PC5', 'PC4_PC5']


### Plot

In [3]:

#| label: ae1m-pca

pca_out = Output()
pca_dd = Dropdown(
    options=[name for name,_ in pca_pairs],
    description='PC Pair:',
)

def draw_pca(name):
    pca_out.clear_output(wait=True)
    xy = None
    for nm, arr in pca_pairs:
        if nm == name:
            xy = arr
            break
    if xy is None:
        with pca_out:
            print(f"PCA dataset '{name}' not found.")
        return
    fig, ax = plt.subplots()
    xlab, ylab = name.split("_")
    scatter2d(ax, xy, title=f"PCA — {name}")
    ax.set_xlabel(xlab); ax.set_ylabel(ylab)
    with pca_out:
        plt.show()

def _on_pca_change(change):
    if change['name'] == 'value':
        draw_pca(change['new'])

if len(pca_dd.options) > 0:
    draw_pca(pca_dd.options[0])

pca_dd.observe(_on_pca_change, names='value')
VBox([pca_dd, pca_out])


VBox(children=(Dropdown(description='PC Pair:', options=('PC1_PC2', 'PC1_PC3', 'PC1_PC4', 'PC1_PC5', 'PC2_PC3'…