# Import Packages

In [1]:
import os
import time
import math
import copy
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import integrate, interpolate, stats
from scipy.interpolate import InterpolatedUnivariateSpline
from scipy.integrate import quad

import h5py
from scipy.spatial.distance import cdist
from scipy.interpolate import interp1d

from astropy.io import fits
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.cosmology import Planck18 as cosmo
from astropy.table import Table

from nbodykit.lab import *
from nbodykit import setup_logging, style

from pypower import CatalogMesh, MeshFFTPower, CatalogFFTPower, PowerSpectrumStatistics, utils, PowerSpectrumWedges
import sympy as sp
from sympy import Symbol

In [2]:
plt.rcParams["figure.figsize"] = (12,7)
plt.rcParams["font.size"] = 20
plt.rcParams["font.family"]='serif'
plt.rcParams['text.usetex']=True
plt.rcParams['axes.linewidth'] = 1.5
plt.rcParams['figure.dpi'] = 600

# Functions and Procedure

We use the following method:

First, we have coordinates in the /gpfs/nchugh/dmcoordssnaps/snap-{snap}/coords_*.npy files from 000 to 599.We have the fields in similar folders. For z=0 (snapshot 99), field is stored in /gpfs/nchugh/dmcoords/field.npy.

Second, we write functions to sample points using rejection sampling. Having done this, we are ready to compute the probability distributions (wrt mass, sfr, and metallicity of stars in a galaxy) of sampling sirens. 

In [3]:
# Functions that deal with probability distributions and sampling

def normalize_pdf(pdf, x_arr):
    """
    Normalize a probability density function (PDF) over a given array of x values.

    Parameters:
        pdf (array-like): Probability density values.
        x_arr (array-like): Corresponding x values.

    Returns:
        np.ndarray: Normalized PDF.
    """
    normalization = np.trapz(pdf, x_arr)  # Integrate PDF over x_arr
    return pdf / normalization

def rejection_sampling(N, p, x_low, x_high, y_max, p_args=[]):
    """
    Perform rejection sampling to draw samples from a probability distribution.

    Parameters:
        N (int): Number of samples to generate.
        p (callable): Probability distribution function to sample from.
        x_low (float): Lower bound of x range.
        x_high (float): Upper bound of x range.
        y_max (float): Maximum value of the PDF in the range (can be an overestimate).
        p_args (list): Additional arguments for the PDF function.

    Returns:
        np.ndarray: Array of sampled points.
    """
    sampled_points = []
    for i in range(N):
        accepted = False
        while not accepted:
            x = np.random.uniform(x_low, x_high)
            y = np.random.uniform(0, y_max)
            p_x = p(x, *p_args)
            if y <= p_x:
                accepted = True
                sampled_points.append(x)
    return np.array(sampled_points)

def rejection_sampling_numeric(N, m_array, p_m_array):
    """
    Perform rejection sampling using a numeric PDF array instead of a function.

    Parameters:
        N (int): Number of samples to generate.
        m_array (array-like): Sorted array of x values.
        p_m_array (array-like): PDF values corresponding to m_array.

    Returns:
        np.ndarray: Array of sampled points.
    """
    y_max = max(p_m_array)
    x_low, x_high = min(m_array), max(m_array)
    sampled_points = []
    for i in range(N):
        accepted = False
        while not accepted:
            x = np.random.uniform(x_low, x_high)
            y = np.random.uniform(0, y_max)
            # Find PDF value for x using searchsorted
            p_x = p_m_array[np.searchsorted(m_array, x, side="left")]
            if y <= p_x:
                accepted = True
                sampled_points.append(x)
    return np.array(sampled_points)

In [4]:
# functions that deal with the things that are required to build up the galaxy population distribution. 

def SFR(z):
    """
    Madau-Dickinson star formation rate (SFR) as a function of redshift z.

    Parameters:
        z (float or array): Redshift(s).

    Returns:
        float or np.ndarray: SFR in units of M_sun yr^-1 Mpc^-3.
    """
    return 0.015 * (1+z)**(2.7) / (1 + ((1+z)/2.9)**(5.6)) * 1e9

def metallicity(C1, C2):
    """
    Compute the average metallicity from two metallicity indicators.

    Parameters:
        C1 (array-like or float): First metallicity indicator.
        C2 (array-like or float): Second metallicity indicator.

    Returns:
        np.ndarray or float: The average metallicity value(s).
    """
    # Take the mean of the two metallicity indicators
    return np.array((C1 + C2) / 2)

def P_t_d(t_d, kappa=1, t_min=100e6, norm=True):
    """
    Delay time distribution: P(t_d) ∝ t_d^(-kappa) for t_d >= t_min, 0 otherwise.

    Parameters:
        t_d (float or array): Delay time(s) in years.
        kappa (float): Power-law index.
        t_min (float): Minimum delay time (years).
        norm (bool): If True, normalize the distribution.

    Returns:
        np.ndarray: Probability density values.
    """
    f_x = 0
    H_0_inv = 13.98e9
    if norm:
        # Normalized power-law with cutoff at t_min
        f_x = np.piecewise(t_d, [t_d < t_min, t_d >= t_min], [0, lambda t_d: 1/(np.log(H_0_inv/t_min)) * t_d**(-kappa)])
    else:
        f_x = np.piecewise(t_d, [t_d < t_min, t_d >= t_min], [0, lambda t_d: t_d**(-kappa)])
    return f_x

def R_integrand(z, t_merg, t_min=100e6, kappa=1):
    """
    Integrand for merger rate calculation as a function of redshift.

    Parameters:
        z (float): Redshift.
        t_merg (float): Merger time in years.
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index for delay time distribution.

    Returns:
        float: Value of the integrand.
    """
    t_d = cosmo.lookback_time(z).to_value('year') - t_merg
    return P_t_d(t_d, kappa=kappa, t_min=t_min) * cosmo.lookback_time_integrand(z) * SFR(z)

def merger_rate(z, t_min=500e6, kappa=1):
    """
    Compute the merger rate at a given redshift by integrating the delay time distribution.

    Parameters:
        z (float): Redshift.
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.

    Returns:
        float: Merger rate.
    """
    return integrate.quad(R_integrand, z, np.inf, args=(cosmo.lookback_time(z).to_value('year'), t_min, kappa), limit=300)[0]

def N_GW_integrand(z, interp_rate, obs_time, t_min=500e6, kappa=1):
    """
    Integrand for the expected number of gravitational wave events as a function of redshift.

    Parameters:
        z (float): Redshift.
        interp_rate (callable): Interpolated merger rate function.
        obs_time (float): Observation time (years).
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.

    Returns:
        float: Value of the integrand.
    """
    dVdz = 4 * np.pi * cosmo.differential_comoving_volume(z).to_value('Gpc3 / sr')
    return dVdz * interp_rate(z) / (1 + z) * obs_time

def N_GW_integrand_test(z, interp_rate, obs_time, t_min=500e6, kappa=1):
    """
    Alternative integrand for the expected number of GW events, using a different rate normalization.

    Parameters:
        z (float): Redshift.
        interp_rate (callable): Interpolated merger rate function.
        obs_time (float): Observation time (years).
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.

    Returns:
        float: Value of the integrand.
    """
    dVdz = 4 * np.pi * cosmo.differential_comoving_volume(z).to_value('Gpc3 / sr')
    rate = interp_rate(z, t_min=t_min, kappa=1)
    return dVdz * rate * 23.9 / interp_rate(0, t_min=t_min, kappa=1) / (1 + z) * obs_time

# NEW FUNCTION: N_GW_single to compute the expected number of GW events at a single redshift (very small range of z).

def N_GW_single(z, Range=0.01, obs_time=1, t_min=500e6, kappa=1, grid_points=1999):
    """
    Compute the expected number of GW events at a single redshift.
    Parameters:
        z (float): Redshift.
        Range (float): Small range around z for interpolation.
        obs_time (float): Observation time (years).
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.
        grid_points (int): Number of grid points for interpolation.
    Returns:
        float: Expected number of GW events at redshift z.
    """
    # Build merger rate interpolation over a small z range around z
    z_arr = np.linspace(z, z + Range, grid_points)
    rate_arr = np.array([merger_rate(zz, t_min=t_min, kappa=kappa) for zz in z_arr])
    interp_rate = interpolate.InterpolatedUnivariateSpline(z_arr, rate_arr, k=1)
    # Integrate over a tiny dz to get dN/dz at z
    dz = Range
    N_GW = integrate.quad(N_GW_integrand, z, z+dz, args=(interp_rate, obs_time, t_min, kappa), limit=100)[0]
    return N_GW

# I have not yet used the N_GW_bin function because we need to compute this for just one redshift. There is no need to bin the redshift yet.

In [5]:
# Functions used to compute the probability distributions of siren population in galaxies. 

def P_g_full(g, power_p, power_n, Break, low_g=None, high_g=None):
    """
    Compute a broken power-law probability distribution for a galaxy property (e.g., mass, SFR, metallicity).

    The distribution follows a power law with exponent 'power_p' below the break value, and 'power_n' above it.
    Optionally, lower and upper cut-offs can be applied.

    Parameters:
        g (array-like): Property values (e.g., mass, SFR, metallicity).
        power_p (float): Power-law exponent below the break.
        power_n (float): Power-law exponent above the break.
        Break (float): Break value separating the two regimes.
        low_g (float, optional): Lower cut-off for g.
        high_g (float, optional): Upper cut-off for g.

    Returns:
        np.ndarray: Normalized probability distribution for g.
    """
    g = np.array(g)
    prob = np.zeros(len(g))
    for i in range(len(g)):
        # Below the break, use power_p exponent
        if g[i] < Break:
            prob[i] = g[i] ** power_p
        # Above the break, use power_n exponent and normalize at the break
        if g[i] >= Break:
            prob[i] = g[i] ** power_n / Break ** (power_n - power_p)
    # Apply lower and upper cut-offs if specified
    if low_g:
        prob = prob * np.array([0 if g < low_g else 1 for g in g])
    if high_g:
        prob = prob * np.array([0 if g > high_g else 1 for g in g])
    # Normalize the probability distribution
    return prob / np.sum(prob)


def P_g_unconditional(
    M, sfr, metal=None,
    m_power_p=None, m_power_n=None, m_break=None, min_m=None, max_m=None,
    SFR_power_p=None, SFR_power_n=None, SFR_break=None, min_SFR=None, max_SFR=None,
    metal_power_p=None, metal_power_n=None, metal_break=None, min_metal=None, max_metal=None
):
    """
    Compute the unconditional joint probability distribution for galaxy properties (mass, SFR, metallicity).

    The distribution is the product of individual (possibly broken power-law) distributions for each property.
    If metallicity is not provided, only mass and SFR are used.

    Parameters:
        M (array-like): Stellar mass.
        sfr (array-like): Star formation rate.
        metal (array-like, optional): Metallicity.
        m_power_p, m_power_n, m_break, min_m, max_m: Parameters for mass distribution.
        SFR_power_p, SFR_power_n, SFR_break, min_SFR, max_SFR: Parameters for SFR distribution.
        metal_power_p, metal_power_n, metal_break, min_metal, max_metal: Parameters for metallicity distribution.

    Returns:
        np.ndarray: Normalized joint probability distribution.
    """
    if metal is None:
        # Product of mass and SFR distributions
        p = P_g_full(M, m_power_p, m_power_n, m_break, min_m, max_m) * \
            P_g_full(sfr, SFR_power_p, SFR_power_n, SFR_break, min_SFR, max_SFR)
    else:
        # Product of mass, SFR, and metallicity distributions
        p = P_g_full(M, m_power_p, m_power_n, m_break, min_m, max_m) * \
            P_g_full(sfr, SFR_power_p, SFR_power_n, SFR_break, min_SFR, max_SFR) * \
            P_g_full(metal, metal_power_p, metal_power_n, metal_break, min_metal, max_metal)
    # Normalize the joint probability
    p = p / np.sum(p)
    return p


def P_MZR(M, delta_l, delta_h, M_c, low_M=None, high_M=None):
    """
    Compute the probability distribution for the Mass-Metallicity Relation (MZR).

    This is modeled as a broken power-law in stellar mass, with a break at M_c.

    Parameters:
        M (array-like): Stellar mass.
        delta_l (float): Power-law slope below the break (low mass end).
        delta_h (float): Power-law slope above the break (high mass end).
        M_c (float): Break mass.
        low_M (float, optional): Lower cut-off for mass.
        high_M (float, optional): Upper cut-off for mass.

    Returns:
        np.ndarray: Probability distribution for MZR.
    """
    # Use P_g_full with appropriate exponents for the broken power-law
    return P_g_full(M, 1/delta_l, -1/delta_h, M_c, low_M, high_M)


def P_FMR(M, sfr, delta_h, M_c, low_M, high_M, epsilon_h, sfr_c, low_sfr, high_sfr):
    """
    Compute the probability distribution for the Fundamental Metallicity Relation (FMR).

    The FMR is modeled as the product of two broken power-law distributions:
    one in mass (M) and one in SFR, each with their own break and slope.

    Parameters:
        M (array-like): Stellar mass.
        sfr (array-like): Star formation rate.
        delta_h (float): Slope above the break for mass.
        M_c (float): Break mass.
        low_M, high_M (float): Mass cut-offs.
        epsilon_h (float): Slope above the break for SFR.
        sfr_c (float): Break SFR.
        low_sfr, high_sfr (float): SFR cut-offs.

    Returns:
        np.ndarray: Probability distribution for FMR.
    """
    # Product of mass and SFR broken power-law distributions
    # Note: normalization is handled later in the workflow
    return P_g_full(M, 0, -1/delta_h, M_c, low_M, high_M) * \
           P_g_full(sfr, 0, -1/epsilon_h, sfr_c, low_sfr, high_sfr)


def P_MZSFR(
    M, sfr, Z,
    delta_l, delta_h, M_c, low_M, high_M,
    epsilon_l, epsilon_h, sfr_c, low_sfr, high_sfr,
    zeta_l, zeta_h, Z_c, low_Z, high_Z
):
    """
    Compute the joint probability distribution for the Mass-Z-SFR relation (MZSFR).

    This is modeled as the product of three broken power-law distributions:
    one each for mass (M), SFR, and metallicity (Z), with their own breaks and slopes.

    Parameters:
        M (array-like): Stellar mass.
        sfr (array-like): Star formation rate.
        Z (array-like): Metallicity.
        delta_l, delta_h (float): Slopes for mass below/above break.
        M_c (float): Break mass.
        low_M, high_M (float): Mass cut-offs.
        epsilon_l, epsilon_h (float): Slopes for SFR below/above break.
        sfr_c (float): Break SFR.
        low_sfr, high_sfr (float): SFR cut-offs.
        zeta_l, zeta_h (float): Slopes for Z below/above break.
        Z_c (float): Break metallicity.
        low_Z, high_Z (float): Metallicity cut-offs.

    Returns:
        np.ndarray: Probability distribution for MZSFR.
    """
    # Product of mass, SFR, and metallicity broken power-law distributions
    # Note: normalization is handled later in the workflow
    return P_g_full(M, 1/delta_l, -1/delta_h, M_c, low_M, high_M) * \
           P_g_full(sfr, 1/epsilon_l, -1/epsilon_h, sfr_c, low_sfr, high_sfr) * \
           P_g_full(Z, 1/zeta_l, -1/zeta_h, Z_c, low_Z, high_Z)

def g_selection_full(df,N,p,p_args,SFR_Z_type):
    """
    Select a subsample of galaxies based on a probability distribution function (PDF) and given parameters.

    Parameters:
        df (DataFrame): Input data containing galaxy properties.
        N (int): Number of galaxies to select.
        p (callable): PDF function for selection.
        p_args (list): Additional arguments for the PDF function.
        SFR_Z_type (str): Type of model to use for selection ('old_powerlaw_model', 'old_powerlaw_observed', 'MZR', 'FMR', 'MZSFR').

    Returns:
        DataFrame: Subsampled data.
    """
    if SFR_Z_type=='old_powerlaw_model':
        sfr=SFR(np.array(df['z']))
        M=np.array(df['stellar_mass'])
        metal=None
        prob=p(M,sfr,metal,*p_args)
        prob=prob/np.sum(prob)
    elif SFR_Z_type=='old_powerlaw_observed':
        sfr=np.array(df['sfr'])
        metal=np.array(df['metallicity'])
        M=np.array(df['stellar_mass'])
        prob=p(M,sfr,metal,*p_args)
        prob=prob/np.sum(prob)
    elif SFR_Z_type=='MZR':
        M=np.array(df['stellar_mass'])
        prob=p(M,*p_args)
        prob=prob/np.sum(prob)
    elif SFR_Z_type=='FMR':
        sfr=np.array(df['sfr'])
        M=np.array(df['stellar_mass'])
        prob=p(M,sfr,*p_args)
        prob=prob/np.sum(prob)
    elif SFR_Z_type=='MZSFR':
        sfr=np.array(df['sfr'])
        metal=np.array(df['metallicity'])
        M=np.array(df['stellar_mass'])
        prob=p(M,sfr,metal,*p_args)
        prob=prob/np.sum(prob)
        
    index=np.array(df.index)
    new_index=np.random.choice(index, size=(N), replace=False, p=prob)    
    new_df=df.loc[new_index]
    
    return new_df

In [6]:
# Functions that deal with arrays.

def find_max(f, x_low, x_high, args):
    """
    Find the maximum value of a function f in the interval [x_low, x_high].

    Parameters:
        f (callable): Function to maximize.
        x_low (float): Lower bound.
        x_high (float): Upper bound.
        args (list): Arguments for the function f.

    Returns:
        float: Maximum value of f in the interval.
    """
    x = np.linspace(x_low, x_high, 100)
    f_x = f(x, *args)
    return max(f_x)

def find_nearest(array, value):
    """
    Find the nearest value in an array to a given value.

    Parameters:
        array (array-like): Array to search.
        value (float): Value to find.

    Returns:
        float: Nearest value in the array.
    """
    idx = np.searchsorted(array, value, side="left")
    if idx > 0 and (idx == len(array) or math.fabs(value - array[idx-1]) < math.fabs(value - array[idx])):
        return array[idx-1]
    else:
        return array[idx]

def find_nearest_idx(array, value):
    """
    Find the index of the nearest value in an array to a given value.

    Parameters:
        array (array-like): Array to search.
        value (float): Value to find.

    Returns:
        int: Index of the nearest value in the array.
    """
    idx = np.searchsorted(array, value, side="left")
    if idx > 0 and (idx == len(array) or math.fabs(value - array[idx-1]) < math.fabs(value - array[idx])):
        return idx-1
    else:
        return idx


In [7]:
# Functions that deal with binary mass modules

def m_kroupa(m, m_min, m_max, m_power):
    """
    Compute the normalized Kroupa initial mass function (IMF) probability for given masses.

    Parameters:
        m (float or array-like): Mass or array of masses.
        m_min (float): Minimum mass.
        m_max (float): Maximum mass.
        m_power (float): Power-law index.

    Returns:
        np.ndarray: Probability values for each mass.
    """
    # Normalization constant for the power-law IMF
    norm = (m_max ** (-m_power + 1) - m_min ** (-m_power + 1)) / (-m_power + 1)
    p_m = np.array([])
    if np.isscalar(m):
        m = [m]
    # Loop over all masses and compute probability
    for i in range(len(m)):
        if m[i] < m_min:
            p_m = np.append(p_m, 0)
        elif m_min <= m[i] <= m_max:
            p_m = np.append(p_m, m[i] ** -m_power / norm)
        else:
            p_m = np.append(p_m, 0)
    return p_m

def m_kroupa_unnormal(m, m_min, m_max, m_power):
    """
    Compute the unnormalized Kroupa IMF probability for given masses.

    Parameters:
        m (float or array-like): Mass or array of masses.
        m_min (float): Minimum mass.
        m_max (float): Maximum mass.
        m_power (float): Power-law index.

    Returns:
        np.ndarray: Unnormalized probability values for each mass.
    """
    p_m = np.array([])
    if np.isscalar(m):
        m = [m]
    # Loop over all masses and compute unnormalized probability
    for i in range(np.size(m)):
        if m[i] < m_min:
            p_m = np.append(p_m, 0)
        elif m_min <= m[i] <= m_max:
            p_m = np.append(p_m, m[i] ** -m_power)
        else:
            p_m = np.append(p_m, 0)
    return p_m

def m_peak_model(m, m_min, m_max, m_power, peak_pos, sigma, height):
    """
    Compute a mass function with a Kroupa IMF plus a Gaussian peak.

    Parameters:
        m (float or array-like): Mass or array of masses.
        m_min (float): Minimum mass.
        m_max (float): Maximum mass.
        m_power (float): Power-law index.
        peak_pos (float): Center of the Gaussian peak.
        sigma (float): Width of the Gaussian peak.
        height (float): Height of the Gaussian peak.

    Returns:
        np.ndarray: Normalized probability values for each mass.
    """
    p_m = m_kroupa_unnormal(m, m_min, m_max, m_power) + height * 1 / (sigma * (2 * np.pi) ** 0.5) * np.exp(-0.5 * ((m - peak_pos) / sigma) ** 2)
    norm = quad(m_kroupa_unnormal, m_min, m_max, args=(m_min, m_max, m_power))[0] + height
    return p_m / norm

def sec_mass_sample(mass_sample, m_min):
    """
    Sample secondary masses for binaries uniformly in mass ratio q.

    Parameters:
        mass_sample (float or array-like): Primary mass or array of primary masses.
        m_min (float): Minimum mass for secondary.

    Returns:
        np.ndarray: Array of secondary masses.
    """
    if np.isscalar(mass_sample):
        mass_sample = [mass_sample]
    sec_mass = np.zeros(len(mass_sample))
    for i in range(len(mass_sample)):
        q = np.random.uniform(m_min / mass_sample[i], 1)
        sec_mass[i] = q * mass_sample[i]
    return sec_mass

def sec_mass_sample_beta(mass_sample, m_min, beta):
    """
    Sample secondary masses for binaries using a beta distribution in mass ratio.

    Parameters:
        mass_sample (float or array-like): Primary mass or array of primary masses.
        m_min (float): Minimum mass for secondary.
        beta (float): Power-law index for mass ratio distribution.

    Returns:
        np.ndarray: Array of secondary masses.
    """
    if np.isscalar(mass_sample):
        mass_sample = [mass_sample]
    sec_mass = np.zeros(len(mass_sample))
    for i in range(len(mass_sample)):
        p_max = find_max(m_kroupa, m_min, mass_sample[i], [m_min, mass_sample[i], -beta])
        sec_mass[i] = rejection_sampling(1, m_kroupa, m_min, mass_sample[i], p_max, [m_min, mass_sample[i], -beta])[0]
    return sec_mass

def chirp_mass(m1, m2):
    """
    Compute the chirp mass for a binary system.

    Parameters:
        m1 (float): Mass of the primary.
        m2 (float): Mass of the secondary.

    Returns:
        float: Chirp mass.
    """
    return (m1 * m2) ** (3/5) / (m1 + m2) ** (1/5)

def PISN_mass(z, Z_star, M_Z_star, alpha, gamma, zeta):
    """
    Compute the pair-instability supernova (PISN) mass as a function of redshift and metallicity.

    Parameters:
        z (float): Redshift.
        Z_star (float): Reference metallicity.
        M_Z_star (float): Reference mass.
        alpha, gamma, zeta (float): Model parameters.

    Returns:
        float: PISN mass.
    """
    Z = 10 ** (gamma * z + zeta)  # metallicity from redshift
    return M_Z_star - alpha * np.log10(Z / Z_star)

In [8]:
# Functions that deal with the source frame window function.

def source_win(z, m, Z_star, M_Z_star, alpha, gamma, zeta):
    """
    Source frame window function for binary formation.

    Parameters:
        z (float): Redshift.
        m (float or array-like): Mass.
        Z_star, M_Z_star, alpha, gamma, zeta: Model parameters.

    Returns:
        np.ndarray: Window function values (1 or 0).
    """
    Z = 10 ** (gamma * z + zeta)
    m_PISN = PISN_mass(z, Z_star, M_Z_star, alpha, gamma, zeta)
    w = np.piecewise(m, [m < m_PISN, m >= m_PISN], [1, 0])
    return w

def win_integrand(z, m, Z_star, M_Z_star, alpha, gamma, zeta, z_merg, t_min=100e6, kappa=1):
    """
    Integrand for the window function in the observer frame.

    Parameters:
        z (float): Redshift.
        m (float): Mass.
        Z_star, M_Z_star, alpha, gamma, zeta: Model parameters.
        z_merg (float): Merger redshift.
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.

    Returns:
        float: Value of the integrand.
    """
    t_d = cosmo.lookback_time(z).to_value('year') - cosmo.lookback_time(z_merg).to_value('year')
    return P_t_d(t_d, kappa=kappa, t_min=t_min) * cosmo.lookback_time_integrand(z) * source_win(z, m, Z_star, M_Z_star, alpha, gamma, zeta)

def obs_win(m, z_merg, Z_star, M_Z_star, alpha, gamma, zeta, t_min=100e6, kappa=1):
    """
    Observer frame window function (not normalized).

    Parameters:
        m (float): Mass.
        z_merg (float): Merger redshift.
        Z_star, M_Z_star, alpha, gamma, zeta: Model parameters.
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.

    Returns:
        float: Value of the window function.
    """
    return integrate.quad(win_integrand, z_merg, np.inf, args=(m, Z_star, M_Z_star, alpha, gamma, zeta, z_merg, t_min, kappa), epsabs=1.49e-13, limit=300)[0]

def obs_win_normal(m, z_merg, Z_star, M_Z_star, alpha, gamma, zeta, t_min=100e6, kappa=1):
    """
    Normalized observer frame window function.

    Parameters:
        m (array-like): Array of masses.
        z_merg (float): Merger redshift.
        Z_star, M_Z_star, alpha, gamma, zeta: Model parameters.
        t_min (float): Minimum delay time (years).
        kappa (float): Power-law index.

    Returns:
        np.ndarray: Normalized window function values.
    """
    norm = integrate.quad(obs_win, min(m), max(m), args=(z_merg, Z_star, M_Z_star, alpha, gamma, zeta, t_min, kappa), epsabs=1.49e-13, limit=300)[0]
    w = np.zeros(len(m))
    for i in range(len(w)):
        w[i] = obs_win(m[i], z_merg, Z_star, M_Z_star, alpha, gamma, zeta, t_min, kappa) / norm
    return w

In [9]:
# Function to sample binary mass systems.

def binary_masses_sample(df, p_m1_type, p_m1, p_m1_args, p_m2, p_m2_args, z_min, z_max, n_z, t_min=100e6, kappa=1):
    """
    Assign binary component masses (m1, m2) to each galaxy in the input DataFrame.

    This function supports several physical and phenomenological models for the primary mass (m1)
    and uses either analytic or empirical relations for the secondary mass (m2).
    The assignment depends on galaxy properties (mass, SFR, metallicity, redshift) and model type.

    Parameters:
        df (DataFrame): Galaxy sample with properties (mass, SFR, metallicity, redshift).
        p_m1_type (str): Model for primary mass ('M_given', 'MSFR_given', 'Z_given', 'phys', 'phen').
        p_m1 (callable): PDF/function for primary mass sampling.
        p_m1_args (list): Arguments for p_m1.
        p_m2 (callable): Function for secondary mass given m1.
        p_m2_args (list): Arguments for p_m2.
        z_min (float): Minimum redshift for window function grid (used in 'phys' model).
        z_max (float): Maximum redshift for window function grid (used in 'phys' model).
        n_z (int): Number of redshift grid points for window function (used in 'phys' model).
        t_min (float): Minimum delay time (default 100 Myr).
        kappa (float): Power-law index for delay time distribution.

    Returns:
        None. Modifies df in-place by adding 'm1' and 'm2' columns.
    """
    # Model: Primary mass depends only on galaxy mass
    if p_m1_type == 'M_given':
        # p_m1_args: [m_min, m_max, m_power, Z_star, M_Z_star, alpha]
        m_min, m_max, m_power, Z_star, M_Z_star, alpha = p_m1_args
        num = df.shape[0]
        m1 = np.zeros(num)
        m2 = np.zeros(num)
        M = np.array(df['stellar_mass'])
        solar_OH = 10 ** (8.83 - 12)
        solar_metal = 0.017
        for i in range(num):
            # Compute metallicity from mass using empirical relation
            OH = 10 ** (8.96 + 0.31 * np.log10(M[i]) - 0.23 * (np.log10(M[i])) ** 2
                        - 0.017 * (np.log10(M[i])) ** 3 + 0.04 * (np.log10(M[i])) ** 4 - 12)
            metal = 10 ** (np.log10(solar_metal) + np.log10(OH) - np.log10(solar_OH))
            # Compute PISN cutoff mass for this metallicity
            m_PISN = M_Z_star - alpha * np.log10(metal / Z_star)
            p_m1_args_new = [m_min, m_PISN, m_power]
            p_max = find_max(p_m1, m_min, m_PISN, p_m1_args_new)
            # Sample primary and secondary masses
            m1[i] = rejection_sampling(1, p_m1, m_min, m_max, p_max, p_m1_args_new)
            m2[i] = p_m2(m1[i], *p_m2_args)
        df.insert(df.shape[1], 'm1', m1)
        df.insert(df.shape[1], 'm2', m2)

    # Model: Primary mass depends on galaxy mass and SFR
    if p_m1_type == 'MSFR_given':
        # p_m1_args: [m_min, m_max, m_power, Z_star, M_Z_star, alpha]
        m_min, m_max, m_power, Z_star, M_Z_star, alpha = p_m1_args
        num = df.shape[0]
        m1 = np.zeros(num)
        m2 = np.zeros(num)
        M = np.array(df['stellar_mass'])
        sfr = np.array(df['sfr'])
        solar_OH = 10 ** (8.83 - 12)
        solar_metal = 0.017
        for i in range(num):
            # Compute metallicity from mass and SFR using empirical relation
            OH = 10 ** (8.96 + 0.37 * np.log10(M[i]) - 0.14 * np.log10(sfr[i])
                        - 0.19 * (np.log10(M[i])) ** 2 + 0.12 * (np.log10(M[i]) * np.log10(sfr[i]))
                        - 0.054 * (np.log10(sfr[i])) ** 2 - 12)
            metal = 10 ** (np.log10(solar_metal) + np.log10(OH) - np.log10(solar_OH))
            m_PISN = M_Z_star - alpha * np.log10(metal / Z_star)
            p_m1_args_new = [m_min, m_PISN, m_power]
            p_max = find_max(p_m1, m_min, m_PISN, p_m1_args_new)
            m1[i] = rejection_sampling(1, p_m1, m_min, m_max, p_max, p_m1_args_new)
            m2[i] = p_m2(m1[i], *p_m2_args)
        df.insert(df.shape[1], 'm1', m1)
        df.insert(df.shape[1], 'm2', m2)

    # Model: Primary mass depends only on metallicity
    if p_m1_type == 'Z_given':
        # p_m1_args: [m_min, m_max, m_power, Z_star, M_Z_star, alpha]
        m_min, m_max, m_power, Z_star, M_Z_star, alpha = p_m1_args
        num = df.shape[0]
        m1 = np.zeros(num)
        m2 = np.zeros(num)
        metal = np.array(df['metallicity'])
        for i in range(num):
            m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
            p_m1_args_new = [m_min, m_PISN, m_power]
            p_max = find_max(p_m1, m_min, m_PISN, p_m1_args_new)
            m1[i] = rejection_sampling(1, p_m1, m_min, m_max, p_max, p_m1_args_new)
            m2[i] = p_m2(m1[i], *p_m2_args)
        df.insert(df.shape[1], 'm1', m1)
        df.insert(df.shape[1], 'm2', m2)

    # Model: Physical model with redshift-dependent window function
    if p_m1_type == 'phys':
        # p_m1_args: [m_min, m_max, m_power, Z_star, M_Z_star, alpha, gamma, zeta]
        m_min, m_max, m_power, Z_star, M_Z_star, alpha, gamma, zeta = p_m1_args
        m = np.linspace(m_min, m_max, 100)  # Mass grid for window function
        wins = [1 for _ in range(n_z)]
        z_arr = np.linspace(z_min, z_max, n_z)  # Redshift grid for window function
        # Precompute window functions for each redshift
        for i in range(n_z):
            wins[i] = obs_win_normal(m, z_arr[i], Z_star, M_Z_star, alpha, gamma, zeta, t_min=100e6, kappa=1)
        num = df.shape[0]
        m1 = np.zeros(num)
        m2 = np.zeros(num)
        z = np.array(df['z'])  # Actual galaxy redshifts
        p_m1_args_new = [m_min, m_max, m_power]
        for i in range(num):
            p_s = p_m1(m, *p_m1_args_new)  # Source frame mass distribution
            idx = find_nearest_idx(z_arr, z[i])  # Closest redshift in grid
            p_o = p_s * wins[idx]  # Observer frame mass distribution
            m1[i] = rejection_sampling_numeric(1, m, p_o)
            m2[i] = p_m2(m1[i], *p_m2_args)
        df.insert(df.shape[1], 'm1', m1)
        df.insert(df.shape[1], 'm2', m2)

    # Model: Phenomenological model, primary mass sampled from analytic PDF
    if p_m1_type == 'phen':
        # p_m1_args: [m_min, m_max, ...]
        m_min = p_m1_args[0]
        m_max = p_m1_args[1]
        num = df.shape[0]
        m1 = np.zeros(num)
        m2 = np.zeros(num)
        p_max = find_max(p_m1, m_min, m_max, p_m1_args)
        for i in range(num):
            m1[i] = rejection_sampling(1, p_m1, m_min, m_max, p_max, p_m1_args)
            m2[i] = p_m2(m1[i], *p_m2_args)
        df.insert(df.shape[1], 'm1', m1)
        df.insert(df.shape[1], 'm2', m2)

In [26]:
# --- Siren Catalog Construction Functions ---
# These functions are designed for single-redshift (single snapshot) usage.
# They sample gravitational wave sirens from a galaxy catalog and assign binary masses.

def sample_sirens_single(gal_cat, N_GW, p, p_args, SFR_Z_type, ampl=1):
    '''
    Sample gravitational wave sirens from a galaxy catalog at a single redshift.

    Parameters:
        gal_cat (DataFrame): Galaxy catalog for a single redshift.
        N_GW (float): Expected number of GW events.
        p (function): Probability function for sampling.
        p_args (tuple): Arguments for the probability function.
        SFR_Z_type (str): Star formation rate and metallicity type.
        ampl (float): Amplitude factor for sampling (default 1).

    Returns:
        DataFrame: Sampled galaxy catalog.
    '''
    N_sample = int(N_GW * ampl)
    return g_selection_full(gal_cat, N_sample, p, p_args, SFR_Z_type)

def siren_catalog_single(df_filtered, z, sky_frac, p, p_args, SFR_Z_type,
                        p_m1_type, p_m1, p_m1_args, p_m2, p_m2_args,
                        ampl=1, obs_time=1, t_min=500e6, kappa=1, grid_points=1999, Range=0.01):
    '''
    Generate a catalog of gravitational wave sirens for a single redshift.

    Parameters:
        df_filtered (DataFrame): Filtered galaxy catalog at one redshift.
        z (float): Redshift of the snapshot.
        sky_frac (float): Fraction of the sky covered.
        p, p_args, SFR_Z_type: Sampling parameters.
        p_m1_type, p_m1, p_m1_args, p_m2, p_m2_args: Mass sampling parameters.
        ampl, obs_time, t_min, kappa, grid_points: Other parameters.
        Range (float): Small range around z for interpolation.

    Returns:
        tuple: (original catalog, sampled catalog)
    '''
    # Compute expected number of GW events for this redshift
#    N_GW = N_GW_single(z, Range=Range, obs_time=obs_time, t_min=t_min, kappa=kappa, grid_points=grid_points) * sky_frac
    
    def N_GW_snapshot(z, box_size_Mpc, h, obs_time=1, t_min=500e6, kappa=1):

        # Convert box size to comoving Mpc (remove /h)
        box_size = box_size_Mpc / h  # now in Mpc
        # Convert volume to Gpc^3
        V_box = (box_size / 1000.0) ** 3  # Gpc^3

        # Compute merger rate density at snapshot redshift
        R_GW = merger_rate(z, t_min=t_min, kappa=kappa)  # Gpc^-3 yr^-1

        # Number of events
        N_GW = obs_time * V_box * R_GW / (1 + z)
        return N_GW
    
    N_GW = N_GW_snapshot(z, 205.0, 0.6774)
    # Sample sirens from the galaxy catalog
    df_sample = sample_sirens_single(df_filtered, N_GW, p, p_args, SFR_Z_type, ampl=ampl)
    # Assign binary masses to the sampled sirens
    binary_masses_sample(df_sample, p_m1_type, p_m1, p_m1_args, p_m2, p_m2_args, z, z, 1, t_min, kappa)
    return df_filtered, df_sample

In [11]:
# Function to compute the redshift-space distortion (RSD) power spectrum.

def rsd_p_gs(M, b_g, b_s, matter_pk, growthrate_s, kedges, sigmap_s):
    """
    Compute the redshift-space distortion (RSD) power spectrum for a galaxy sample.

    This function models the observed galaxy power spectrum in redshift space,
    accounting for galaxy bias, spectroscopic bias, growth rate of structure,
    and small-scale velocity dispersion (Gaussian smoothing).

    Parameters:
        M (array-like): Cosine of the angle to the line of sight (mu), or mass array.
        b_g (float or array-like): Galaxy bias parameter(s).
        b_s (float or array-like): Spectroscopic bias parameter(s).
        matter_pk (float or array-like): Matter power spectrum values.
        growthrate_s (float or array-like): Growth rate of structure (f).
        kedges (array-like): Edges or centers of the k bins (wavenumber).
        sigmap_s (float or array-like): Velocity dispersion for Gaussian smoothing.

    Returns:
        array-like: Redshift-space distortion power spectrum values.
    """
    # Kaiser formula with Gaussian damping for Fingers-of-God effect
    # The terms correspond to: bias, cross-terms, and growth rate squared
    # The denominator applies Gaussian smoothing to suppress small-scale power
    k_term = kedges ** 2 * M ** 2 * sigmap_s ** 2 / 2
    rsd_pk = (
        (b_g * b_s * matter_pk) +
        (M ** 2 * (b_g + b_s) * growthrate_s * matter_pk) +
        (M ** 4 * growthrate_s ** 2 * matter_pk)
    ) * (1 / (1 + k_term))
    return

# Preprocessing the data

In [19]:
import numpy as np
import pandas as pd

def preprocess_tng_data_fixed(galaxy_data, density_field, snapshot_redshift, box_size=205.0, stellar_mass_cut=None):
    """
    Preprocess IllustrisTNG galaxy data for clustering analysis.
    Modified to work with the actual DataFrame structure from subhalos_snap_99.pkl
    
    Parameters:
    -----------
    galaxy_data : DataFrame
        Galaxy catalog with columns like SubhaloPos_0, SubhaloPos_1, SubhaloPos_2, etc.
    density_field : ndarray
        3D density field from the snapshot
    snapshot_redshift : float
        Redshift of the snapshot (can also use 'Redshift' column from data)
    box_size : float
        Size of the simulation box in cMpc/h
    
    Returns:
    --------
    processed_data : DataFrame
        Processed galaxy catalog ready for clustering analysis
    random_catalog : DataFrame
        Random catalog for clustering measurements
    """
    
    # Work with a copy of the original data
    df = galaxy_data.copy()
    
    # Use redshift from the data if available, otherwise use provided value
    if 'Redshift' in df.columns:
        snapshot_redshift = df['Redshift'].iloc[0]
        print(f"Using redshift from data: {snapshot_redshift:.3f}")
    
    # 1. BASIC SELECTION CUTS
    # Apply stellar mass cut using SubhaloMassStars
    if stellar_mass_cut is None:
        if 'SubhaloMassStars' in df.columns:
            stellar_masses = df['SubhaloMassStars'].values
            print(f"Using SubhaloMassStars for stellar mass cut")
        else:
            print("Warning: SubhaloMassStars not found, using SubhaloMass instead")
            stellar_masses = df['SubhaloMass'].values
    
        print(f"Stellar mass range: {stellar_masses.min():.2e} to {stellar_masses.max():.2e}")
        
        # No masking if no cut is specified
    else:
        stellar_mass_cut = stellar_mass_cut  # M_sun/h

        # Check if we have stellar masses
        if 'SubhaloMassStars' in df.columns:
            stellar_masses = df['SubhaloMassStars'].values
            print(f"Using SubhaloMassStars for stellar mass cut")
        else:
            print("Warning: SubhaloMassStars not found, using SubhaloMass instead")
            stellar_masses = df['SubhaloMass'].values

        print(f"Stellar mass range: {stellar_masses.min():.2e} to {stellar_masses.max():.2e}")
        print(f"Stellar mass cut: {stellar_mass_cut:.2e}")

        # Apply mass cut
        mass_mask = stellar_masses > stellar_mass_cut
        print(f"Galaxies passing mass cut: {mass_mask.sum()}/{len(mass_mask)}")

        df = df[mass_mask]
        stellar_masses = stellar_masses[mass_mask]
            
    # Extract positions using the correct column names
    pos = df[['SubhaloPos_0', 'SubhaloPos_1', 'SubhaloPos_2']].values
    
    # 2. COMPUTE ENVIRONMENTAL DENSITY
    galaxy_density = get_density_at_positions(
        pos, density_field, box_size
    )
    df['local_density'] = galaxy_density
    
    # 3. COMPUTE FKP-LIKE WEIGHTS
    P_FKP = 16000  # Same as SDSS for consistency
    mean_density = np.mean(galaxy_density)
    
    print(f"Mean galaxy density: {mean_density:.3f}")
    print(f"Density range: {galaxy_density.min():.3f} to {galaxy_density.max():.3f}")
    
    # Normalize density to get n(x) equivalent
    normalized_density = galaxy_density / mean_density
    
    # Compute FKP weights
    w_fkp = 1.0 / (1 + P_FKP * normalized_density)
    df['W_FKP'] = w_fkp
    df['NZ'] = normalized_density
    
    # 4. COMPUTE GALAXY PROPERTIES
    df['stellar_mass'] = stellar_masses
    
    # Extract SFR (already in correct column name)
    if 'SubhaloSFR' in df.columns:
        df['sfr'] = df['SubhaloSFR']
    
    # Extract metallicity (already in correct column name)
    if 'SubhaloStarMetallicity' in df.columns:
        df['metallicity'] = df['SubhaloStarMetallicity']
    
    # 5. ASSIGN UNIFORM WEIGHTS
    df['WEIGHT'] = np.ones(len(df))
    
    # 6. ADD REDSHIFT INFORMATION
    df['z'] = snapshot_redshift
    
    # 7. CREATE RANDOM CATALOG
    n_random = len(df) * 10  # 10x more randoms than galaxies
    random_pos = generate_random_positions(n_random, box_size)
    
    # Get density at random positions
    random_density = get_density_at_positions(
        random_pos, density_field, box_size
    )
    
    # Compute FKP weights for randoms
    random_normalized_density = random_density / mean_density
    random_w_fkp = 1.0 / (1 + P_FKP * random_normalized_density)
    
    # Create random catalog
    random_catalog = pd.DataFrame({
        'x': random_pos[:, 0],
        'y': random_pos[:, 1],
        'z_pos': random_pos[:, 2],
        'z': snapshot_redshift,
        'local_density': random_density,
        'NZ': random_normalized_density,
        'W_FKP': random_w_fkp,
        'WEIGHT': np.ones(n_random)
    })
    
    # 8. CLEAN UP THE GALAXY CATALOG
    # Select final columns for clustering analysis
    final_columns = ['SubhaloPos_0', 'SubhaloPos_1', 'SubhaloPos_2', 
                    'stellar_mass', 'z', 'local_density', 'NZ', 'W_FKP', 'WEIGHT']
    
    # Add optional columns if available
    if 'sfr' in df.columns:
        final_columns.append('sfr')
    if 'metallicity' in df.columns:
        final_columns.append('metallicity')
    
    processed_data = df[final_columns].copy()
    
    # Rename position columns for consistency
    processed_data = processed_data.rename(columns={
        'SubhaloPos_0': 'x',
        'SubhaloPos_1': 'y', 
        'SubhaloPos_2': 'z_pos'
    })
    
    return processed_data, random_catalog

def get_density_at_positions(positions, density_field, box_size):
    """
    Interpolate density field at galaxy positions.
    
    Parameters:
    -----------
    positions : ndarray
        Galaxy positions (N, 3)
    density_field : ndarray
        3D density field
    box_size : float
        Size of simulation box
    
    Returns:
    --------
    densities : ndarray
        Density values at galaxy positions
    """
    grid_size = density_field.shape[0]
    cell_size = box_size / grid_size
    
    # Convert positions to grid indices
    indices = positions / cell_size
    
    # Handle periodic boundary conditions
    indices = indices % grid_size
    
    # Simple nearest-neighbor interpolation
    densities = np.zeros(len(positions))
    for i, idx in enumerate(indices):
        ix, iy, iz = idx.astype(int)
        densities[i] = density_field[ix, iy, iz]
    
    return densities

def generate_random_positions(n_random, box_size):
    """
    Generate random positions within the simulation box.

    Parameters:
    -----------
    n_random : int
        Number of random points
    box_size : float
        Size of simulation box

    Returns:
    --------
    random_pos : ndarray
        Random positions (n_random, 3)
    """
    random_pos = np.random.uniform(
        0, 
        box_size, 
        size=(n_random, 3)
    )
    return random_pos

def apply_additional_cuts_fixed(df, random_cat, mass_range=None, sfr_range=None, 
                               density_range=None):
    """
    Apply additional selection cuts similar to SDSS pipeline.
    
    Parameters:
    -----------
    df : DataFrame
        Galaxy catalog
    random_cat : DataFrame
        Random catalog
    mass_range : tuple, optional
        (min_mass, max_mass) in M_sun/h
    sfr_range : tuple, optional
        (min_sfr, max_sfr) in M_sun/yr
    density_range : tuple, optional
        (min_density, max_density) relative to mean
    
    Returns:
    --------
    df_cut : DataFrame
        Cut galaxy catalog
    random_cut : DataFrame
        Cut random catalog
    """
    
    # Initialize mask
    mask = np.ones(len(df), dtype=bool)
    
    # Apply mass cut
    if mass_range is not None:
        mask &= (df['stellar_mass'] >= mass_range[0]) & (df['stellar_mass'] <= mass_range[1])
    
    # Apply SFR cut
    if sfr_range is not None and 'sfr' in df.columns:
        mask &= (df['sfr'] >= sfr_range[0]) & (df['sfr'] <= sfr_range[1])
    
    # Apply density cut
    if density_range is not None:
        mask &= (df['NZ'] >= density_range[0]) & (df['NZ'] <= density_range[1])
    
    df_cut = df[mask].copy()
    
    # Apply similar cuts to random catalog (mainly density)
    random_mask = np.ones(len(random_cat), dtype=bool)
    if density_range is not None:
        random_mask &= (random_cat['NZ'] >= density_range[0]) & (random_cat['NZ'] <= density_range[1])
    
    random_cut = random_cat[random_mask].copy()
    
    return df_cut, random_cut

# Example usage with your actual data
def example_with_real_data():
    """
    Example of how to use the preprocessing functions with your actual data.
    """
    
    # Load your actual data
    file_path = '/gpfs/nchugh/groupcat_data/subhalos_snap_99.pkl'  ## Adjust path to you data
    df = pd.read_pickle(file_path)
    
    print(f"Loaded {len(df)} galaxies from {file_path}")
    print(f"Columns available: {list(df.columns)}")
    
    # You'll need to load or create a density field
    # For demonstration, create a mock density field
    # In practice, you would load this from your simulation data
    box_size = 205.0  # TNG300 box size in cMpc/h
    field_path = '/gpfs/nchugh/dmcoords/field.npy' # Adjust path to your density field
    density_field = np.load(field_path)

    # Use redshift from the data
    snapshot_redshift = df['Redshift'].iloc[0] if 'Redshift' in df.columns else 0.0
    
    # Preprocess the data
    processed_galaxies, random_catalog = preprocess_tng_data_fixed(
        df, density_field, snapshot_redshift, box_size,
        stellar_mass_cut=None, # No mass cut
    )
    
    print(f"Final galaxy sample: {len(processed_galaxies)} galaxies")
    print(f"Final random sample: {len(random_catalog)} points")
    
    return processed_galaxies, random_catalog

# Main execution
if __name__ == "__main__":
    # Test with your actual data
    galaxies, randoms = example_with_real_data()

Loaded 14485709 galaxies from /gpfs/nchugh/groupcat_data/subhalos_snap_99.pkl
Columns available: ['SubhaloFlag', 'SubhaloMass', 'SubhaloSFR', 'SubhaloParent', 'SubhaloPos_0', 'SubhaloPos_1', 'SubhaloPos_2', 'SubhaloStarMetallicity', 'SubhaloGasMetallicity', 'SubhaloMassStars', 'SubhaloMassBH', 'SubhaloVel_0', 'SubhaloVel_1', 'SubhaloVel_2', 'SubhaloVelDisp', 'SubhaloGrNr', 'SubhaloVmax', 'SubhaloSpin_0', 'SubhaloSpin_1', 'SubhaloSpin_2', 'Redshift']
Using redshift from data: 0.000
Using SubhaloMassStars for stellar mass cut
Stellar mass range: 0.00e+00 to 5.31e+02
Using redshift from data: 0.000
Using SubhaloMassStars for stellar mass cut
Stellar mass range: 0.00e+00 to 5.31e+02
Mean galaxy density: 924.883
Density range: 0.000 to 1439353.750
Mean galaxy density: 924.883
Density range: 0.000 to 1439353.750
Final galaxy sample: 14485709 galaxies
Final random sample: 144857090 points
Final galaxy sample: 14485709 galaxies
Final random sample: 144857090 points


In [20]:
print(galaxies.head(), len(galaxies))

           x          y      z_pos  stellar_mass    z  local_density  \
0  43718.812  48813.640  147594.95    530.948400  0.0     849.059082   
1  45442.273  51850.200  146416.50    253.662840  0.0      66.946030   
2  44490.760  49091.715  147870.58     57.365696  0.0     277.961945   
3  43820.785  50939.400  147711.05     23.909803  0.0    1967.043335   
4  44302.580  49630.973  147869.48     22.059591  0.0     550.890564   

         NZ     W_FKP  WEIGHT  sfr  metallicity  
0  0.918018  0.000068     1.0  0.0     0.020017  
1  0.072383  0.000863     1.0  0.0     0.017918  
2  0.300537  0.000208     1.0  0.0     0.025681  
3  2.126802  0.000029     1.0  0.0     0.019654  
4  0.595633  0.000105     1.0  0.0     0.020799   14485709


# Main Analysis

In [28]:
# Example: Run GW host bias analysis for a single redshift snapshot (no binning)

H_0 = 70  # Hubble constant in km/s/Mpc
H_0_inv = 13.98e9  # Inverse Hubble constant in years

# Load and preprocess galaxy data (single snapshot)
file_path = '/gpfs/nchugh/groupcat_data/subhalos_snap_99.pkl'
df = pd.read_pickle(file_path)
box_size = 205.0
field_path = '/gpfs/nchugh/dmcoords/field.npy'
density_field = np.load(field_path)
snapshot_redshift = df['Redshift'].iloc[0] if 'Redshift' in df.columns else 0.0

# Preprocess and apply cuts
galaxies, randoms = galaxies, randoms

# Print the minimum and maximum stellar mass
print(f"Stellar mass range: {galaxies['stellar_mass'].min():.2e} to {galaxies['stellar_mass'].max():.2e}")

# Set up GW host selection parameters
M_c_values = np.linspace(10, 500, 15)  # Pivot masses to scan
SFR_Z_type = 'MZR'
p = P_MZR
p_args_base = [4, .5, 0, 0, 150]  # [delta_l, delta_h, M_c, low_M, high_M]
p_m1_type = 'Z_given'
p_m1 = m_kroupa
p_m1_args = [1, 531, 2.3, 1e-4, 45, 1.5]  # [m_min, m_max, m_power, Z_star, M_Z_star, alpha]
p_m2 = sec_mass_sample_beta
p_m2_args = [5, 1]
ampl = 1e9
obs_time = 1
t_min = 500e6
kappa = 1
grid_points = 256
Range = 0.01
sky_frac = 1.0  # Use full box

# Debugging: Check input catalog and siren catalog after each run
print('Input galaxy catalog:')
print('stellar_mass min/max:', galaxies['stellar_mass'].min(), galaxies['stellar_mass'].max())
if 'z' in galaxies.columns:
    print('z min/max:', galaxies['z'].min(), galaxies['z'].max())
else:
    print('No z column in galaxies')
print('---')
for i, M_c in enumerate(M_c_values):
    p_args = p_args_base.copy()
    p_args[2] = M_c
    _, siren_cat = siren_catalog_single(
        galaxies, snapshot_redshift, sky_frac,
        p, p_args, SFR_Z_type,
        p_m1_type, p_m1, p_m1_args, p_m2, p_m2_args,
        ampl=ampl, obs_time=obs_time, t_min=t_min, kappa=kappa, grid_points=grid_points, Range=Range
    )
    print(f'Pivot mass {M_c:.3e}: siren_cat shape =', siren_cat.shape)
    if not siren_cat.empty:
        print('siren_cat stellar_mass min/max:', siren_cat['stellar_mass'].min(), siren_cat['stellar_mass'].max())
        if 'z' in siren_cat.columns:
            print('siren_cat z min/max:', siren_cat['z'].min(), siren_cat['z'].max())
        else:
            print('No z column in siren_cat')
    else:
        print('siren_cat is empty after cuts')
    print('---')

    # Compute chirp mass for each binary
    siren_cat['chirp_mass'] = chirp_mass(np.array(siren_cat['m1']), np.array(siren_cat['m2']))

    # Save siren catalog for later analysis
    siren_cat.to_pickle(f'/gpfs/nchugh/gw/siren_cat_M_c_{M_c:.3f}.pkl')

Stellar mass range: 0.00e+00 to 5.31e+02
Input galaxy catalog:
stellar_mass min/max: 0.0 530.9484
z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 1.000e+01: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00011712464 75.400116
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 4.500e+01: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 9.026628e-05 122.52858
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 8.000e+01: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00013119243 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 1.150e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 8.338495e-05 138.76486
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 1.500e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 8.323093e-05 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 1.850e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.0001479995 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 2.200e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00011043436 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 2.550e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 8.169194e-05 149.51189
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 2.900e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00013925701 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 3.250e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00012752524 149.51189
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 3.600e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00011766082 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 3.950e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.00012561397 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 4.300e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 0.000105629704 136.47209
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 4.650e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 9.659416e-05 149.9503
siren_cat z min/max: 0.0 0.0
---


  m_PISN = M_Z_star - alpha * np.log10(metal[i] / Z_star)
  y *= step


Pivot mass 5.000e+02: siren_cat shape = (74518, 13)
siren_cat stellar_mass min/max: 8.3152336e-05 136.79767
siren_cat z min/max: 0.0 0.0
---


In [30]:
print(len(siren_cat))
print(siren_cat.head())

74518
                  x          y       z_pos  stellar_mass    z  local_density  \
3188122  132724.170  137785.53   66687.970      0.002403  0.0    1548.197876   
967779   129355.420   81774.03    4623.174      0.009228  0.0      52.951645   
179108    24575.398  102311.16  141183.140      0.284393  0.0    1333.556519   
2638687  179618.770   34085.99   41393.543      0.015085  0.0     175.733398   
3893309   66296.850  152234.48   41335.473      0.000705  0.0    1122.042358   

               NZ     W_FKP  WEIGHT       sfr  metallicity          m1  \
3188122  1.673939  0.000037     1.0  0.000000     0.000302    3.736312   
967779   0.057252  0.001090     1.0  0.000000     0.001598    1.015870   
179108   1.441865  0.000043     1.0  0.000000     0.016095    2.431009   
2638687  0.190006  0.000329     1.0  0.010664     0.002460    1.508648   
3893309  1.213172  0.000052     1.0  0.000000     0.000000  525.091448   

                 m2  chirp_mass  
3188122    4.459347    3.550682  


In [16]:
N = N_GW_single(0, Range=0.01, obs_time=1e8, t_min=5e8, kappa=1, grid_points=256) * 1.0

  the requested tolerance from being achieved.  The error may be 
  underestimated.
  N_GW = integrate.quad(N_GW_integrand, z, z+dz, args=(interp_rate, obs_time, t_min, kappa), limit=100)[0]


In [17]:
print(N)

97.4973559671598


In [18]:
print(merger_rate(0, 5e8, 1))

0.002688659369656108


In [24]:
def N_GW_snapshot(z, box_size_Mpc, h, obs_time=1, t_min=500e6, kappa=1):

    # Convert box size to comoving Mpc (remove /h)
    box_size = box_size_Mpc / h  # now in Mpc
    # Convert volume to Gpc^3
    V_box = (box_size / 1000.0) ** 3  # Gpc^3

    # Compute merger rate density at snapshot redshift
    R_GW = merger_rate(z, t_min=t_min, kappa=kappa)  # Gpc^-3 yr^-1

    # Number of events
    N_GW = obs_time * V_box * R_GW / (1 + z)
    return N_GW

In [22]:
print(N_GW_snapshot(0, 205.0, 0.6774))

7.451810299114428e-05


In [33]:
print(len(siren_cat)/len(galaxies))

0.00514424250825417
