# Synthetic Galaxy Catalogue: Multiple Voids + LSS Background
This notebook synthesizes a toy 3D galaxy catalogue with:
- Multiple spherical voids at different locations and radii.
- Shell galaxies around each void (higher density near the void boundary).
- A clustered "large-scale structure" (LSS) background in the space not occupied by void interiors.

We then visualize the catalogue, compute per-galaxy k-NN distance tensors, and persist results as an `.npz` file for downstream experiments.


In [21]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from pathlib import Path
from scipy.spatial import cKDTree
from scipy.spatial.distance import cdist
from typing import Any, Dict, Optional
import torch
from voidx.config import get_config
import builtins
from numpy.lib.format import open_memmap

In [22]:
# Import reusable utilities from the library module for readability
from voidx.preparation import (
    GalaxySyntheticDataPreparer,
    compute_first_neighbors,
    compute_neighbor_distances_and_tensor,
    stack_density_stats,
    plot_density_profiles,
    save_density_profiles_npz,
    select_top_background_by_mean_knn,
    remove_points,
    recompute_first_neighbors,
    build_neighbor_tensor_to_memmap,
    plot_knn_curves_by_class_and_topk,
    compute_knn_features,
)

In [23]:
config = get_config(name='synthetic_void_LSS')  # picks up everything from config/global.json

# For backward compatibility, create variables from config
box = config.box
hdf = config.hdf
VIDE = config.VIDE
name = config.name
param = config.param
# Safe output filename component for param
param_str = str(param) if param else 'default'

fraction_in_voids = config.fraction_in_voids
N_neighbours = config.N_neighbours
box_size_mpc = config.box_size_mpc
void_size = config.void_size
model_name = config.model_name
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(f"Using device: {device}")
seed = config.seed

# Paths are automatically set up
data_dir = config.data_dir
param_dir = config.param_dir 
galaxy_info_dir = config.galaxy_info_dir
checkpoint_dir_spec = config.checkpoint_dir_spec
checkpoint_dir_global = config.checkpoint_dir_global
plot_dir = config.plot_dir
result_dir = config.result_dir

# Display configuration
config.print_info()

Loading global settings from /Users/boccardpierre/Documents/PhD/Research/Code/VoidX/config/config_global.json
Using device: mps
Notebook Configuration
Dataset: synthetic_void_LSS
Model name: synthetic_void_LSS_
Fraction in voids: 
Box: True, HDF: False, VIDE: False
Device: mps, Seed: 42
N neighbours: 20
Box size (Mpc/h): 60.0
Void size: 3.0
------------------------------------------------------------
Paths:
  data_dir: /Users/boccardpierre/Documents/PhD/Research/Code/VoidX/data/synthetic_void_LSS
  param_dir: /Users/boccardpierre/Documents/PhD/Research/Code/VoidX/data/synthetic_void_LSS/N_neighbours20_boxsize60_voidsize3
  galaxy_info_dir: /Users/boccardpierre/Documents/PhD/Research/Code/VoidX/data/synthetic_void_LSS/N_neighbours20_boxsize60_voidsize3/galaxy_info_files
  checkpoint_dir_spec: /Users/boccardpierre/Documents/PhD/Research/Code/VoidX/data/synthetic_void_LSS/N_neighbours20_boxsize60_voidsize3/checkpoints
  checkpoint_dir_global: /Users/boccardpierre/Documents/PhD/Research/Co

## Configuration
Set reproducible parameters for the shell, the void interior, and output paths.

In [24]:
# Reproducibility

seed = 42 
random_number_for_random_seed = np.random.randint(0, 1000)
seed = random_number_for_random_seed
rng = np.random.default_rng(seed)

# Simulation volume (cubic box centered at 0) 
half_box = box_size_mpc / 2.0

# Periodic boundary conditions
use_periodic_boundaries = True  # if True, compute distances with PBC (minimal image)

# Multiple voids configuration
void_radius_min = void_size # Mpc/h
void_radius_max = void_size + 2  # Mpc/h
shell_thickness = 0.5  # radial spread around the void radius
core_radius_fraction = 1 # galaxies inside void: r < core_radius_fraction * R_void
directional_jitter = 0.0     # optional angular jitter

num_voids = int(1.5*(box_size_mpc**3 // (4/3 * np.pi * ((void_radius_max + void_radius_min)/2)**3) / 3))  
print('Num voids:', num_voids)

# Interior (void) radial bias: beta=0 -> uniform; beta>0 -> fewer near center, denser near edge
void_radial_bias_beta = 2  # light gradient; try 0.5–1.5 for stronger effect

# Per-void galaxy counts
per_void_shell_galaxies = int(void_size * 150)
per_void_void_galaxies = int(void_size * 50)

# Large-scale structure (LSS) background via Thomas cluster process
use_thomas_background = True
background_parent_density = 7.0e-3  # parents per (Mpc/h)^3
background_offspring_mean = 300    # expected galaxies per parent
background_cluster_sigma = 1.6   # Mpc/h (Gaussian cluster std)

# k-NN settings
knn_neighbors = N_neighbours # N for k-NN distance tensor

Num voids: 402


## Generate synthetic galaxy positions
We sample two populations: shell galaxies concentrated around a spherical shell and void galaxies within the inner core. Radial and angular jitter keep the catalogue from being perfectly symmetric.

In [25]:
synthetic_data = GalaxySyntheticDataPreparer(box_size_mpc=box_size_mpc,
                                             use_periodic_boundaries=bool(use_periodic_boundaries),
                                             N_neighbours=N_neighbours, 
                                             rng=rng,
                                             chunk_size_query=int(2e5),
                                             rows_per_chunk_tensor=int(1e5))


In [26]:
# ---- Build catalogue per requested pipeline ----
# 1) Background first (parents/offspring), PBC-wrapped
# BEFORE
# background_positions = synthetic_data.sample_thomas_background(
#     parent_density=background_parent_density,
#     offspring_mean=background_offspring_mean,
#     cluster_sigma=background_cluster_sigma,
# )

# AFTER: lognormal + Zel'dovich LSS background
from voidx.lss_generators import sample_lognormal_zeldovich_background

# Match your previous raw background density (~2.1 galaxies / (Mpc/h)^3)
target_number_density = 2  # tweak to match your desired total count

background_positions = sample_lognormal_zeldovich_background(
    box_size_mpc=box_size_mpc,
    target_number_density=target_number_density,
    ngrid=192,           # 96–192 are reasonable; 128 is a good default for L=100
    R_smooth=1.5,        # filament thickness; try 1.0–2.5
    bias=2,            # tracer bias; higher = more contrast in nodes/filaments
    flow_strength=2.0,   # Zel'dovich amplitude; try 2–8
    Om=0.315, h=0.674, ns=0.965,
    rng=rng,
)
n_bg_raw = background_positions.shape[0]


# 2) Inject voids (non-overlapping, PBC-aware)
void_centers, void_radii = synthetic_data.sample_nonoverlapping_voids(
    n=int(num_voids),
    rmin=float(void_radius_min),
    rmax=float(void_radius_max),
    max_attempts_per_void=50,
 )
core_radii = core_radius_fraction * void_radii

# Remove background galaxies inside void cores
if background_positions.size and len(core_radii) > 0:
    if use_periodic_boundaries:
        dx = background_positions[:, None, :] - void_centers[None, :, :]
        dx -= box_size_mpc * np.round(dx / box_size_mpc)
        dist2 = np.sum(dx * dx, axis=2)
        inside_any = (dist2 < (core_radii[None, :] ** 2)).any(axis=1)
    else:
        from scipy.spatial.distance import cdist
        dmat = cdist(background_positions, void_centers)
        inside_any = (dmat < core_radii[None, :]).any(axis=1)
    background_positions = background_positions[~inside_any]
n_bg_kept = background_positions.shape[0]

# 3) Add void interior and shell galaxies around each void
shell_list = []
void_list = []
for c, R, Rc in zip(void_centers, void_radii, core_radii):
    shell_list.append(
        synthetic_data.sample_shell_galaxies(
            n=per_void_shell_galaxies,
            center=c,
            radius=float(R),
            thickness=float(shell_thickness),
            jitter_scale=float(directional_jitter),
        )
    )
    void_list.append(
        synthetic_data.sample_void_galaxies(
            n=per_void_void_galaxies,
            center=c,
            core_radius=float(Rc),
            beta=float(void_radial_bias_beta),
        )
    )

shell_positions = np.vstack(shell_list) if shell_list else np.empty((0, 3))
void_positions = np.vstack(void_list) if void_list else np.empty((0, 3))

# Stack all galaxies and labels (background first)
positions = np.vstack([background_positions, shell_positions, void_positions])
labels = np.array(
    ["background"] * background_positions.shape[0]
    + ["shell"] * shell_positions.shape[0]
    + ["void"] * void_positions.shape[0]
)

print("Counts:")
print("  voids:", void_radii.shape[0])
print("  background galaxies (raw, kept):", n_bg_raw, n_bg_kept)
print("  shell galaxies:", shell_positions.shape[0])
print("  void interior galaxies:", void_positions.shape[0])
print("  total:", positions.shape[0])

Counts:
  voids: 336
  background galaxies (raw, kept): 412395 278670
  shell galaxies: 151200
  void interior galaxies: 50400
  total: 480270


## Interactive Plotly view
Use Plotly to explore the shell and void populations interactively (drag to rotate, scroll to zoom).

In [20]:
print('Number of voids : ', void_radii.shape[0])

colors = {"shell": "#1f77b4", "void": "#d62728", "background": "#2ca02c"}
# colors = {"shell": "#1f77b4", "void": "#1f77b4", "background": "#1f77b4"}

fig = go.Figure()

indices = np.random.choice(background_positions.shape[0], size=min(500000, background_positions.shape[0]), replace=False)

if background_positions.size:
    fig.add_trace(
        go.Scatter3d(
            x=background_positions[indices, 0],
            y=background_positions[indices, 1],
            z=background_positions[indices, 2],
            mode="markers",
            name="background (LSS)",
            marker=dict(size=0.4, opacity=0.4, color=colors["background"]),
        )
    )

fig.update_layout(
    scene=dict(
        xaxis_title="x [Mpc/h]",
        yaxis_title="y [Mpc/h]",
        zaxis_title="z [Mpc/h]",
        aspectmode="data",
        xaxis=dict(range=[-half_box, half_box]),
        yaxis=dict(range=[-half_box, half_box]),
        zaxis=dict(range=[-half_box, half_box]),
    ),
    title="Synthetic Catalogue: Multiple Voids + LSS Background (Interactive)",
    legend=dict(x=0.01, y=0.99),
    width=800,
    height=600,
    margin=dict(l=10, r=10, b=40, t=80),
)
# fig.show()
save_path = plot_dir / f'background.html'
fig.write_html(save_path)
print(f"Saved interactive plot to: {save_path}")


Number of voids :  329
Saved interactive plot to: /Users/boccardpierre/Documents/PhD/Research/Code/VoidX/data/synthetic_void_LSS/N_neighbours20_boxsize60_voidsize3/plot/background.html


In [None]:
print('Number of voids : ', void_radii.shape[0])

colors = {"shell": "#1f77b4", "void": "#d62728", "background": "#2ca02c"}

fig = go.Figure()

if void_positions.size:
    fig.add_trace(
        go.Scatter3d(
            x=void_positions[:, 0],
            y=void_positions[:, 1],
            z=void_positions[:, 2],
            mode="markers",
            name="void interior",
            marker=dict(size=1, opacity=0.7, color=colors["void"]),
        )
    )

fig.update_layout(
    scene=dict(
        xaxis_title="x [Mpc/h]",
        yaxis_title="y [Mpc/h]",
        zaxis_title="z [Mpc/h]",
        aspectmode="data",
        xaxis=dict(range=[-half_box, half_box]),
        yaxis=dict(range=[-half_box, half_box]),
        zaxis=dict(range=[-half_box, half_box]),
    ),
    title="Synthetic Catalogue: Multiple Voids + LSS Background (Interactive)",
    legend=dict(x=0.01, y=0.99),
    width=800,
    height=600,
    margin=dict(l=10, r=10, b=40, t=80),
)
# fig.show()
save_path = plot_dir / f'shell.html'
fig.write_html(save_path)
print(f"Saved interactive plot to: {save_path}")


In [None]:
print('Number of voids : ', void_radii.shape[0])

colors = {"shell": "#1f77b4", "void": "#d62728", "background": "#2ca02c"}

fig = go.Figure()

if void_positions.size:
    fig.add_trace(
        go.Scatter3d(
            x=void_positions[:, 0],
            y=void_positions[:, 1],
            z=void_positions[:, 2],
            mode="markers",
            name="void interior",
            marker=dict(size=1, opacity=0.7, color=colors["void"]),
        )
    )

fig.update_layout(
    scene=dict(
        xaxis_title="x [Mpc/h]",
        yaxis_title="y [Mpc/h]",
        zaxis_title="z [Mpc/h]",
        aspectmode="data",
        xaxis=dict(range=[-half_box, half_box]),
        yaxis=dict(range=[-half_box, half_box]),
        zaxis=dict(range=[-half_box, half_box]),
    ),
    title="Synthetic Catalogue: Multiple Voids + LSS Background (Interactive)",
    legend=dict(x=0.01, y=0.99),
    width=800,
    height=600,
    margin=dict(l=10, r=10, b=40, t=80),
)
# fig.show()
save_path = plot_dir / f'void.html'
fig.write_html(save_path)
print(f"Saved interactive plot to: {save_path}")


## Compute first-neighbor distances only (pre-removal)
Build per-galaxy first k-NN distances now; defer the neighbor-of-neighbor tensor construction until after background removal to avoid unnecessary compute.

In [None]:
N_neighbours = int(knn_neighbors)
num_galaxies = positions.shape[0]

if num_galaxies <= N_neighbours:
    raise ValueError(
        f"N={N_neighbours} must be smaller than the number of galaxies ({num_galaxies}). "
        "Reduce knn_neighbors or increase sample size."
    )

# Chunk parameters for neighbor queries
chunk_size = int(2e5)
rows_per_chunk_tensor = int(1e5)  # kept for consistency; tensor will be built later

# Compute only the first-N neighbors now; we defer neighbor-of-neighbor tensor until after background removal
first_neighbor_idx, first_neighbor_dist = compute_first_neighbors(
    positions=positions,
    N=int(N_neighbours),
    box_size_mpc=float(box_size_mpc),
    use_periodic_boundaries=bool(use_periodic_boundaries),
    chunk_size_query=chunk_size,
 )

# Explicitly mark no tensor yet; will be created after removal
X_knn = None
X_knn_memmap_path = None

y_labels = (labels == "void").astype(np.int8)

print("First-level neighbor distances shape:", first_neighbor_dist.shape)
print("Neighbor-of-neighbor tensor: deferred until after background removal.")

# Plots

In [None]:
neighbor_order = np.arange(1, N_neighbours + 1)

colors = {"shell": "#1f77b4", "void": "#d62728", "background": "#2ca02c"}

void_mean = first_neighbor_dist[labels == "void"].mean(axis=0) if np.any(labels == "void") else np.zeros_like(neighbor_order, dtype=float)
shell_mean = first_neighbor_dist[labels == "shell"].mean(axis=0) if np.any(labels == "shell") else np.zeros_like(neighbor_order, dtype=float)
background_mean = first_neighbor_dist[labels == "background"].mean(axis=0) if np.any(labels == "background") else np.zeros_like(neighbor_order, dtype=float)

plt.figure(figsize=(8, 4.5))
if np.any(labels == "void"):
    plt.plot(neighbor_order, void_mean, label="Void interior", color=colors["void"], linewidth=2)
if np.any(labels == "shell"):
    plt.plot(neighbor_order, shell_mean, label="Shell", color=colors["shell"], linewidth=2)
if np.any(labels == "background"):
    plt.plot(neighbor_order, background_mean, label="Background (LSS)", color=colors["background"], linewidth=2)
plt.xlabel("Neighbor index")
plt.ylabel("Mean distance [Mpc/h]")
plt.title("Mean k-NN distance by neighbor rank")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
save_path = plot_dir / f"mean_knn_distance_by_rank.png"
plt.savefig(save_path, dpi=300)
print(f"Saved mean k-NN distance plot to: {save_path}")
plt.show()

# Density profiles around each void center: rho(R/Rv) and delta(R/Rv)

In [None]:
# Parameters for profile computation
rmax_factor = 5.0   # compute profiles up to R = rmax_factor * Rv
nbins = 40

# Use library method to compute density/delta profiles
bin_edges, bin_centers, density_profiles, delta_profiles, nbar = synthetic_data.compute_density_profiles(
    positions=positions,
    void_centers=void_centers,
    void_radii=void_radii,
    rmax_factor=rmax_factor,
    nbins=nbins,
)

# Compute stacked statistics using a utility function
(   stack_mean_density,
    stack_median_density,
    stack_p16_density,
    stack_p84_density,
    stack_mean_delta,
    stack_median_delta,
    stack_p16_delta,
    stack_p84_delta,
) = stack_density_stats(density_profiles, delta_profiles)

# Plot using utility (stacked + individual few)
plot_density_profiles(
    bin_centers=bin_centers,
    density_profiles=density_profiles,
    delta_profiles=delta_profiles,
    stack_mean_density=stack_mean_density,
    stack_p16_density=stack_p16_density,
    stack_p84_density=stack_p84_density,
    stack_mean_delta=stack_mean_delta,
    stack_p16_delta=stack_p16_delta,
    stack_p84_delta=stack_p84_delta,
    max_voids_to_plot=10,
    save_dir=plot_dir,
)

# Save results
result_dir = (param_dir / "result")
out_path = result_dir / "void_density_profiles.npz"
save_density_profiles_npz(
    out_path=out_path,
    bin_edges=bin_edges,
    bin_centers=bin_centers,
    density_profiles=density_profiles,
    delta_profiles=delta_profiles,
    stack_mean_density=stack_mean_density,
    stack_median_density=stack_median_density,
    stack_p16_density=stack_p16_density,
    stack_p84_density=stack_p84_density,
    stack_mean_delta=stack_mean_delta,
    stack_median_delta=stack_median_delta,
    stack_p16_delta=stack_p16_delta,
    stack_p84_delta=stack_p84_delta,
    nbar=nbar,
    rmax_factor=rmax_factor,
    nbins=nbins,
)

# Remove 10% of background galaxies with largest distance to first neighbors

In [None]:
# Compute mean distance over first K neighbors per galaxy and select background subset
bg_mask = (labels == "background")
bg_indices = np.flatnonzero(bg_mask)
if bg_indices.size == 0:
    raise ValueError("No background galaxies found.")

# Explicitly use the first 20 neighbors (or fewer if not available)
K_MEAN = 20
k_cols = builtins.min(K_MEAN, first_neighbor_dist.shape[1])
knn_dists = first_neighbor_dist  # distances to neighbors 1..N (misleading name retained for compatibility)
mean_over_k = knn_dists[:, :k_cols].mean(axis=1)

n_remove = n_bg_kept // 10  # number of background galaxies to highlight

# Pick top-k within background by mean over first K neighbors
k = int(builtins.min(n_remove, bg_indices.size))
top_in_bg = np.argpartition(mean_over_k[bg_indices], -k)[-k:]
top_global = bg_indices[top_in_bg]
# Sort descending by mean distance
top_global = top_global[np.argsort(mean_over_k[top_global])[::-1]]

pts = positions[top_global]
vals = mean_over_k[top_global]

In [None]:
plt.figure(figsize=(8, 4.5))
neighbor_order = np.arange(1, N_neighbours + 1)

mean_knn = first_neighbor_dist.mean(axis=1)

k_top = builtins.min(n_remove, first_neighbor_dist.shape[0])
top_idx = np.argpartition(mean_knn, -k_top)[-k_top:]
top_idx = top_idx[np.argsort(mean_knn[top_idx])[::-1]]

for j, gi in enumerate(top_idx):
    if j == 0:
        plt.plot(neighbor_order, first_neighbor_dist[gi], color="tab:red", alpha=0.6, lw=1.5, label=f"Top-{k_top} galaxies")
    else:
        plt.plot(neighbor_order, first_neighbor_dist[gi], color="tab:red", alpha=0.3, lw=1.0)

plt.plot(neighbor_order, first_neighbor_dist[top_idx].mean(axis=0), color="black", lw=3, label=f"Top-{k_top} mean")
# plt.plot(neighbor_order, background_mean, label="Previous Background mean", color="blue", linewidth=3)


plt.xlabel("Neighbor Order")
plt.ylabel("Distance")
plt.title(f"First Neighbor Distances: Top-{k_top} by mean k-NN distance")
plt.legend()
plt.grid(alpha=0.3)
plt.tight_layout()
save_path = plot_dir / f"top_{k_top}_knn_distance_profiles.png"
plt.savefig(save_path, dpi=300)
print(f"Saved top-{k_top} k-NN distance profiles plot to: {save_path}")
plt.show()

# Compute k-NN distance tensors after background galaxies removal


In [None]:
# 1) Select background galaxies to remove by mean over first K neighbors
K_MEAN = 20
rm_indices, mean_over_k, k_cols = select_top_background_by_mean_knn(
    first_neighbor_dist=first_neighbor_dist,
    labels=labels,
    n_remove=n_remove,
    K_MEAN=K_MEAN,
    background_label="background",
)

# Preserve pre-removal distances for the removed background galaxies
prev_first_neighbor_dist = first_neighbor_dist.copy()
removed_bg_dist_pre = prev_first_neighbor_dist[rm_indices]

# 2) Remove selected galaxies from positions and labels
positions, labels, keep_mask = remove_points(
    positions=positions,
    labels=labels,
    rm_indices=rm_indices,
)

print(f"Removed {rm_indices.size} background galaxies with highest mean over first {k_cols} neighbors.")
print(f"New totals: positions={positions.shape[0]}, background={np.sum(labels=='background')}, shell={np.sum(labels=='shell')}, void={np.sum(labels=='void')}")

# IMPORTANT: Recompute membership after removal so it stays aligned with positions
y_labels = (labels == "void").astype(np.int8)

# Invalidate any pre-removal in-memory tensor to avoid mismatched lengths
X_knn = None

# 3) Recompute k-NN for the updated catalogue (first-neighbor distances only)
first_neighbor_idx, first_neighbor_dist = recompute_first_neighbors(
    positions=positions,
    N=int(knn_neighbors),
    box_size_mpc=float(box_size_mpc),
    use_periodic_boundaries=bool(use_periodic_boundaries),
    chunk_size_query=int(2e5),
)

# 4) Efficient construction of neighbor-of-neighbor distance cube into a memmap on disk
compute_neighbor_cube_after_removal = True
X_knn_memmap_path = None
if compute_neighbor_cube_after_removal:
    info_dir = param_dir / 'galaxy_info_files'
    X_knn_memmap_path = build_neighbor_tensor_to_memmap(
        positions=positions,
        first_neighbor_idx=first_neighbor_idx,
        N=int(knn_neighbors),
        box_size_mpc=float(box_size_mpc),
        use_periodic_boundaries=bool(use_periodic_boundaries),
        out_path=Path(info_dir / f'tensor_distances_{param_str}.npz'),
        rows_per_chunk_tensor=int(1e5),
    )

# 5) Plot neighbor-distance curves (Top-X by mean over first K neighbors, updated)
plot_knn_curves_by_class_and_topk(
    first_neighbor_dist=first_neighbor_dist,
    labels=labels,
    k_cols=k_cols,
    n_remove=n_remove,
    title="k-NN distance curves after background removal (mean over first 20 neighbors)",
    save_dir=plot_dir,
)

# Save positions, membership labels, first-neighbor distances, and knn distance tensor

In [None]:
# galaxy positions, membership, k-NN tensor, first neighbor distances
knn_file = galaxy_info_dir / f'galaxy_knn_{param_str}.npz'

# Ensure membership reflects the current labels/positions
y_labels = (labels == "void").astype(np.int8)

# Sanity: raw counts and alignment
n_pos = positions.shape[0]
n_mem = y_labels.shape[0]
n_fnd = first_neighbor_dist.shape[0]
print(f"Raw counts before save -> positions: {n_pos} | membership: {n_mem} | first_neighbor_distances: {n_fnd}")
assert n_pos == n_mem == n_fnd, (
    f"Mismatch before save: positions={n_pos}, membership={n_mem}, first_neighbor_distances={n_fnd}")


payload = {
    'positions': positions.astype(np.float32),             # (num_galaxies, 3)
    'membership': y_labels,                                # (num_galaxies,) int8
    'first_neighbor_distances': first_neighbor_dist,       # (num_galaxies, N_neighbours) float32
    'void_centers': void_centers.astype(np.float32),       # (num_voids, 3) float32
}

# If we produced a memmap file on disk for the tensor, store the path
if X_knn_memmap_path is not None:
    print("Including k-NN tensor memmap path in output NPZ.")
    payload['tensor_distance_memmap_path'] = np.array(str(X_knn_memmap_path))

np.savez_compressed(knn_file, **payload)
print(f"Saved k-NN outputs to {knn_file}")