# Integrte p(z)

Probability to be in the background of the cluster i.e. 
$$
P(z > z_l) = \int_{z_l}^{+\infty} dz\ p(z) 
$$
to be higher than a given threshold.

In this notebook, we show the `clmm` functionalities developed to compute:
- those weights under a variety of configuration (true redhsifts or photoz, shape noise or not, ideal galaxy shapes or not).
- the background probability $P(z > z_l)$ for each galaxy

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os

import numpy as np
import matplotlib.pyplot as plt
from astropy.table import Table
import scipy

import clmm
from clmm import Cosmology
from clmm import GalaxyCluster
from clmm.dataops import compute_galaxy_weights
from clmm.support import mock_data as mock

clmm.__version__

In [None]:
cosmo = Cosmology(H0=71.0, Omega_dm0=0.265 - 0.0448, Omega_b0=0.0448, Omega_k0=0.0)

In [None]:
cluster_z = 0.4

In [None]:
args = (
    1e14,
    cluster_z,
    4,
    cosmo,
)
kwargs = dict(
    zsrc="chang13",
    delta_so=200,
    massdef="critical",
    halo_profile_model="nfw",
    zsrc_min=0.0,
    zsrc_max=3.0,
    field_size=10.0,
    shapenoise=0.5,
    photoz_sigma_unscaled=0.05,
    mean_e_err=0.1,
    ngals=10000,
)
np.random.seed(41363)
noisy_data_z = mock.generate_galaxy_catalog(*args, **kwargs, pzpdf_type="shared_bins")
np.random.seed(41363)
noisy_data_z2 = mock.generate_galaxy_catalog(*args, **kwargs, pzpdf_type="individual_bins")
np.random.seed(41363)
noisy_data_z3 = mock.generate_galaxy_catalog(*args, **kwargs, pzpdf_type="quantiles")

In [None]:
for i, data in enumerate(noisy_data_z[:5]):
    plt.plot(noisy_data_z.pzpdf_info["zbins"], data["pzpdf"], lw=0.5, color=f"C{i}")
for i, data in enumerate(noisy_data_z2[:5]):
    plt.plot(data["pzbins"], data["pzpdf"], lw=0.9, color=f"C{i}", ls="--")
pzbins, pzpdfs = noisy_data_z3.get_pzpdfs()
for i, data in enumerate(pzpdfs[:5]):
    plt.plot(pzbins, data, lw=3, color=f"C{i}", ls=":")
plt.xlim(0.4, 2.1)

## Test p(z) integrals

### From CLMM

In [None]:
from clmm.dataops import _integ_pzfuncs

In [None]:
integrals = {
    "clmm_shared": _integ_pzfuncs(*noisy_data_z.get_pzpdfs()[::-1], cluster_z),
    "clmm_individual": _integ_pzfuncs(*noisy_data_z2.get_pzpdfs()[::-1], cluster_z),
    "clmm_quantiles": _integ_pzfuncs(*noisy_data_z3.get_pzpdfs()[::-1], cluster_z),
}

### From `qp`

In [None]:
import qp

In [None]:
qp_dat = qp.Ensemble(
    qp.interp,
    data={"xvals": noisy_data_z.pzpdf_info["zbins"], "yvals": noisy_data_z["pzpdf"]},
)
integrals["qp_shared"] = 1 - qp_dat.cdf(cluster_z)[:, 0]

In [None]:
qp_dat2 = qp.Ensemble(
    qp.interp_irregular,
    data={"xvals": noisy_data_z2["pzbins"], "yvals": noisy_data_z2["pzpdf"]},
)
integrals["qp_individual"] = 1 - qp_dat2.cdf(cluster_z)[:, 0]

In [None]:
qp_dat3 = qp.Ensemble(
    qp.quant,
    data={"locs": noisy_data_z3["pzquantiles"], "quants": noisy_data_z3.pzpdf_info["quantiles"]},
)
integrals["qp_quantiles"] = 1 - qp_dat3.cdf(cluster_z)[:, 0]

### True Values

For a gaussian distribution, the integral can be computed with the error function:

$$
P(z > z_l) = 
\frac{1}{\sqrt{2\pi\sigma_z^2}}\int_{z_{l}}^{+\infty} dz\ e^{-\frac{(z-z_{gal})^2}{2\sigma_z^2}} =
\frac{1}{2} {\rm erfc}\left(\frac{z_{l}-z_{gal}}{\sqrt{2}\sigma_z}\right)
$$

In [None]:
from scipy.special import erfc

In [None]:
true_integ = 0.5 * erfc(
    (cluster_z - noisy_data_z2["z"]) / (0.05 * (1 + noisy_data_z2["z"]) * np.sqrt(2))
)

### Difference

In [None]:
from scipy.stats import binned_statistic

In [None]:
fig, axes = plt.subplots(2, sharex=True, sharey=True, figsize=(7, 5))
bins = np.linspace(0, 1, 21)
for comp_case, ax in zip(("clmm", "qp"), axes):
    for i, pdf_case in enumerate(("shared", "individual", "quantiles")):
        dx = (i - 1) * 0.01
        integ = integrals[f"{comp_case}_{pdf_case}"]
        ax.errorbar(
            0.5 * (bins[1:] + bins[:-1]) + dx,
            binned_statistic(integ, (integ / true_integ - 1) * 100, bins=bins)[0],
            binned_statistic(integ, (integ / true_integ - 1) * 100, bins=bins, statistic="std")[0],
            label=pdf_case,
            lw=0.7,
        )
    ax.axhline(0, c="0", ls="--", lw=0.5)
    ax.minorticks_on()
    ax.grid()
    ax.grid(which="minor", lw=0.3)
    ax.set_ylim(-20, 30)
    ax.set_title(f"{comp_case} integral")
axes[0].legend()
axes[1].set_xlabel("True integral")
for ax in axes:
    ax.set_ylabel("rel. diff [%]")