Plan
from voidx import GalaxyDataset, split_indices, normalize_features, setup_paths, setup_device_and_seed
- Build KDTree over galaxy positions and estimate mean number density.
- Grow spheres from seeds until density <= 0.7×mean (δ ≈ −0.3).
- Approach A: select seeds as high-probability galaxies whose KNN also have high probability; apply non-max suppression.
- Approach B: use DBSCAN to cluster low- (or high-) probability galaxies; seed spheres and NMS.
- Provide minimal plotting and example usage with your X and probabilities.



In [None]:
# Import VoidX configuration
import voidx
from voidx import *

# Enable LaTeX for text rendering
plt.rcParams['text.usetex'] = True
plt.rcParams['font.family'] = 'serif'
plt.rcParams['font.size'] = 14  # Adjust font size

# Configuration


In [None]:
from config import get_config

config = get_config()  # picks up everything from config/global.json

# For backward compatibility, create variables from config
box = config.box
local = config.local
hdf = config.hdf
VIDE = config.VIDE
name = config.name
fraction_in_voids = config.fraction_in_voids
model_name = config.model_name
device = config.device
seed = config.seed

# Paths are automatically set up
data_dir = config.data_dir
checkpoint_dir_spec = config.checkpoint_dir_spec
checkpoint_dir_global = config.checkpoint_dir_global
plot_dir = config.plot_dir
result_dir = config.result_dir

# Display configuration
config.print_info()

# Load and process data

In [33]:
output_file = data_dir / f'galaxy_positions_{fraction_in_voids}.npy'
data = np.load(output_file)
print(f"Loaded data shape: {data.shape}")

# Extract features and labels
X_pos = data[:, :3]  # Positions (x, y, z)
X_flags = data[:, 3].astype(np.int32)  # Labels (flags)

X = X_pos

# Normalize the features using mean and standard deviation
mean = X.mean(axis=0, keepdims=True)
std = X.std(axis=0, keepdims=True) + 1e-6  # Avoid division by zero
X_normalized = (X - mean) / std

# Prepare the data for inference
X_tensor = torch.as_tensor(X_normalized, dtype=torch.float32)  # Convert to PyTorch tensor

Loaded data shape: (44488, 4)


# ML approach

In [None]:
# GalaxyDataset and split_indices are now imported from voidx.data

batch_size = 128
ds_train = GalaxyDataset(X_train, y_train)
ds_val = GalaxyDataset(X_val, y_val)
ds_test = GalaxyDataset(X_test, y_test)

dl_train = DataLoader(ds_train, batch_size=batch_size, shuffle=True, num_workers=0) 
dl_val = DataLoader(ds_val, batch_size=batch_size, shuffle=False, num_workers=0)
dl_test = DataLoader(ds_test, batch_size=batch_size, shuffle=False, num_workers=0)

X shape: (44488, 10) y shape: (44488,)
