# Statistics

```{autolink-concat}
```

In [None]:
from __future__ import annotations

import jax.numpy as jnp
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Latex
from matplotlib import cm
from tensorwaves.function.sympy import create_parametrized_function
from tqdm.notebook import tqdm

from polarization import formulate_polarization
from polarization.amplitude import DalitzPlotDecompositionBuilder
from polarization.data import create_data_transformer, generate_meshgrid_sample
from polarization.io import mute_jax_warnings, perform_cached_doit
from polarization.lhcb import load_model_parameters, load_three_body_decays

mute_jax_warnings()

reference_subsystem = 1
dynamics_configurator = load_three_body_decays("../data/isobars.json")
decay = dynamics_configurator.decay
amplitude_builder = DalitzPlotDecompositionBuilder(decay)
amplitude_builder.dynamics_choices = dynamics_configurator
model = amplitude_builder.formulate(reference_subsystem)

## Function creation

In [None]:
%%time
polarization_exprs = formulate_polarization(amplitude_builder, reference_subsystem)
unfolded_polarization_exprs = [
    perform_cached_doit(expr.doit().xreplace(model.amplitudes))
    for expr in tqdm(polarization_exprs, desc="Unfolding polarization expressions")
]
unfolded_intensity_expr = perform_cached_doit(model.full_expression)

This time, we do not {ref}`substitute certain parameters with their parameter defaults<polarization:Definition of free parameters>`, but lambdify the full expression, so that parameter values can be set for different models. Note that this makes lambdification slower.

In [None]:
%%time
polarization_funcs = [
    create_parametrized_function(
        unfolded_polarization_exprs[xyz],
        parameters=model.parameter_defaults,
        backend="jax",
    )
    for xyz in tqdm(range(3))
]
intensity_func = create_parametrized_function(
    unfolded_intensity_expr,
    parameters=model.parameter_defaults,
    backend="jax",
)

## Parameter bootstrapping

In [None]:
n_bootstraps = 200

In [None]:
rng = np.random.default_rng(seed=0)


def create_gaussian_distribution(
    mean: complex | float, std: complex | float, size: int
):
    if isinstance(mean, complex) and isinstance(std, complex):
        return (
            rng.normal(mean.real, std.real, size)
            + rng.normal(mean.imag, std.imag, size) * 1j
        )
    if isinstance(mean, (float, int)) and isinstance(std, (float, int)):
        return rng.normal(mean, std, size)
    raise NotImplementedError


def smear_gaussian(
    parameter_values: dict[str, complex | float],
    parameter_uncertainties: dict[str, complex | float],
    size: int,
) -> dict[str, np.ndarray]:
    value_distributions = {}
    for k, mean in parameter_values.items():
        std = parameter_uncertainties[k]
        distribution = create_gaussian_distribution(mean, std, size)
        value_distributions[k] = distribution
    return value_distributions


# fmt:off
allowed_model_titles = [
    "Default amplitude model.",
    "Alternative amplitude model with K^*(892) with free mass and width.",
    "Alternative amplitude model with Lz(1670) with free mass and width.",
    "Alternative amplitude model with Lz(1690) with free mass and width.",
    "Alternative amplitude model with Deltares^{++}(1232) with free mass and width.",
    "Alternative amplitude model with Lz(1600), Deltares(1600)^{++}, Deltares(1700)^{++} with free mass and width.",
    "Alternative amplitude model with free Lz(1405) Flatt'e widths, indicated as G1 (pK channel) and G2 (Sigmapi).",
    "Alternative amplitude model with Lz(1800) contribution added with mass and width from Ref.~cite{PDG2020}.",
    "Alternative amplitude model with  Lz(1810) contribution added with mass and width from Ref.~cite{PDG2020}.",
    "Alternative amplitude model with Deltares(1620)^{++} contribution added with free mass and width.",
    "Alternative amplitude model in which a Relativistic Breit-Wigner is used for the K^*(700) contribution.",
    "Alternative amplitude model with K^*(700) with free mass and width.",
    "Alternative amplitude model with  K^*(1410) contribution added with mass and width from Ref.~cite{PDG2020}.",
    "Alternative amplitude model in which a Relativistic Breit-Wigner is used for the K^*(1430) contribution.",
    "Alternative amplitude model with K^*(1430) with free width.",
    "Alternative amplitude model with an additional overall exponential form factor exp(-alpha q^2) multiplying Bugg lineshapes. The exponential parameter is indicated as ``alpha''.",
    "Alternative amplitude model with free radial parameter d for the Lc resonance, indicated as dLc.",
    "Alternative amplitude model obtained using LS couplings.",
]
# fmt:on
model_parameters: dict[str, dict[str, np.ndarray]] = {}
for title in allowed_model_titles:
    symbol_parameters = load_model_parameters(
        "../data/modelparameters.json", decay, title, typ="value"
    )
    symbol_uncertainties = load_model_parameters(
        "../data/modelparameters.json", decay, title, typ="uncertainty"
    )
    values = {str(k): v for k, v in symbol_parameters.items()}
    uncertainties = {str(k): v for k, v in symbol_uncertainties.items()}
    model_parameters[title] = smear_gaussian(values, uncertainties, size=n_bootstraps)

In [None]:
resolution = 200
transformer = create_data_transformer(model)
phsp = generate_meshgrid_sample(decay, resolution)
data = transformer(phsp)
X = data["sigma1"]
Y = data["sigma2"]

In [None]:
model_number = 0
model_pars = list(model_parameters.values())[model_number]
original_parameters = dict(intensity_func.parameters)

bootstrap_intensities = []
bootstrap_polarizations = []
for i in tqdm(
    range(n_bootstraps), desc="Computing polarizations for parameter combinations"
):
    new_parameters = {k: v[i] for k, v in model_pars.items()}
    for func in polarization_funcs + [intensity_func]:
        func.update_parameters(original_parameters)
        func.update_parameters(new_parameters)
    bootstrap_polarizations.append([func(data).real for func in polarization_funcs])
    intensity_func.update_parameters(original_parameters)
    intensity_func.update_parameters(new_parameters)
    bootstrap_intensities.append(intensity_func(data))

bootstrap_intensities = jnp.array(bootstrap_intensities)
bootstrap_polarizations = jnp.array(bootstrap_polarizations)
bootstrap_polarizations = jnp.swapaxes(bootstrap_polarizations, 0, 1)
bootstrap_polarization_norms = jnp.sqrt(jnp.sum(bootstrap_polarizations**2, axis=0))

In [None]:
assert bootstrap_intensities.shape == (n_bootstraps, resolution, resolution)
assert bootstrap_polarizations.shape == (3, n_bootstraps, resolution, resolution)
assert bootstrap_polarization_norms.shape == (n_bootstraps, resolution, resolution)

## Statistical uncertainties

In [None]:
%config InlineBackend.figure_formats = ['png']

In [None]:
fig, axes = plt.subplots(
    ncols=4,
    nrows=2,
    figsize=(15, 8),
    gridspec_kw={"width_ratios": [1, 1, 1, 1.15]},
    sharex=True,
    sharey=True,
    tight_layout=True,
)
fig.suptitle(R"Polarization sensitivity $\vec\alpha$")
s1_label = R"$\sigma_1=m^2\left(K\pi\right)$"
s2_label = R"$\sigma_2=m^2\left(pK\right)$"
axes[0, 0].set_ylabel(s2_label)
axes[1, 0].set_ylabel(s2_label)

alpha_norm_mean_over_bootstrap = jnp.mean(bootstrap_polarization_norms, axis=0)
alpha_norm_std_over_bootstrap = jnp.std(bootstrap_polarization_norms, axis=0)
alpha_xyz_mean_over_bootstrap = jnp.mean(bootstrap_polarizations, axis=1)
alpha_xyz_std_over_bootstrap = jnp.std(bootstrap_polarizations, axis=1)
alpha_mean_over_bootstrap = [
    alpha_norm_mean_over_bootstrap,
    *alpha_xyz_mean_over_bootstrap,
]
alpha_std_over_bootstrap = [
    alpha_norm_std_over_bootstrap,
    *alpha_xyz_std_over_bootstrap,
]


for i in range(4):
    if i != 0:
        title = Rf"$\alpha_{'xyz'[i-1]}$"
    else:
        title = R"$\left|\vec\alpha\right|$"
    mesh = axes[0, i].pcolormesh(X, Y, alpha_mean_over_bootstrap[i], cmap=cm.RdYlGn_r)
    mesh.set_clim(vmin=-1, vmax=+1)
    axes[0, i].set_title(title)
    if axes[0, i] is axes[0, -1]:
        c_bar = fig.colorbar(mesh, ax=axes[0, i], pad=0.01)
        c_bar.ax.set_ylabel(Rf"$\alpha$ value averaged with {n_bootstraps} bootstraps")
        c_bar.ax.set_yticks([-1, 0, +1])
        c_bar.ax.set_yticklabels(["-1", "0", "+1"])
    mesh = axes[1, i].pcolormesh(X, Y, alpha_std_over_bootstrap[i])
    axes[1, i].set_xlabel(s1_label)
    if axes[1, i] is axes[1, -1]:
        c_bar = fig.colorbar(mesh, ax=axes[1, i], pad=0.01)
        c_bar.ax.set_ylabel("standard deviation")
plt.show()

In [None]:
fig, axes = plt.subplots(
    ncols=4,
    figsize=(15, 4.5),
    gridspec_kw={"width_ratios": [1, 1, 1, 1.15]},
    sharey=True,
    tight_layout=True,
)
fig.suptitle(R"$I \,\cdot \vec\alpha$ distributions")
axes[0].set_ylabel(s2_label)

alpha_xyz_times_I = jnp.sum(bootstrap_polarizations * bootstrap_intensities, axis=1)
alpha_xyz_times_I /= jnp.sum(bootstrap_intensities, axis=0)
alpha_norm_times_I = jnp.sum(
    bootstrap_polarization_norms * bootstrap_intensities, axis=0
)
alpha_norm_times_I /= jnp.sum(bootstrap_intensities, axis=0)
alpha_times_I = [alpha_norm_times_I, *alpha_xyz_times_I]

for i in range(4):
    if i != 0:
        title = Rf"$\alpha_{'xyz'[i-1]}$"
    else:
        title = R"$\left|\vec\alpha\right|$"
    axes[i].set_title(title)
    mesh = axes[i].pcolormesh(X, Y, alpha_times_I[i], cmap=cm.RdYlGn_r)
    mesh.set_clim(vmin=-1, vmax=+1)
    if axes[i] is axes[-1]:
        c_bar = fig.colorbar(mesh, ax=axes[i], pad=0.02)
        c_bar.ax.set_ylabel(Rf"$\alpha$ value averaged with {n_bootstraps} bootstraps")
        c_bar.ax.set_yticks([-1, 0, +1])
        c_bar.ax.set_yticklabels(["-1", "0", "+1"])
plt.show()

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(12, 6), sharey=True)
fig.suptitle("Intensity distribution")
ax1.set_xlabel(s1_label)
ax2.set_xlabel(s1_label)
ax1.set_ylabel(s2_label)

intensity_mean_over_bootstrap = jnp.mean(bootstrap_intensities, axis=0)
intensity_std_over_bootstrap = jnp.std(bootstrap_intensities, axis=0)

mesh = ax1.pcolormesh(X, Y, intensity_mean_over_bootstrap)
fig.colorbar(mesh, ax=ax1, pad=0.01)
ax1.set_title(Rf"average of {n_bootstraps} bootstraps")

Z = intensity_std_over_bootstrap / intensity_mean_over_bootstrap
mesh = ax2.pcolormesh(X, Y, Z)
fig.colorbar(mesh, ax=ax2, pad=0.01)
ax2.set_title("standard deviation / intensity")
fig.tight_layout()
plt.show()

### Averaged polarizations

```{autolink-skip} section
```

In [None]:
weighted_ɑ_norm_per_bootstrap = jnp.nansum(
    bootstrap_polarization_norms * bootstrap_intensities,
    axis=(-1, -2),
) / jnp.nansum(
    bootstrap_intensities,
    axis=(-1, -2),
)
weighted_ɑ_per_bootstrap = jnp.nansum(
    bootstrap_polarizations * bootstrap_intensities,
    axis=(-1, -2),
) / jnp.nansum(
    bootstrap_intensities,
    axis=(-1, -2),
)

In [None]:
ɑ_norm_weighted_mean = weighted_ɑ_norm_per_bootstrap.mean()
ɑ_norm_weighted_std = weighted_ɑ_norm_per_bootstrap.std() / np.sqrt(n_bootstraps)
ɑ_weighted_mean = weighted_ɑ_per_bootstrap.mean(axis=1)
ɑ_weighted_std = weighted_ɑ_per_bootstrap.std(axis=1) / np.sqrt(n_bootstraps)
src = Rf"""
\begin{{array}}{{ccr}}
  \overline{{\alpha_x}} & = & {ɑ_weighted_mean[0]:.4f} \pm {ɑ_weighted_std[0]:.4f} \\
  \overline{{\alpha_y}} & = & {ɑ_weighted_mean[1]:.4f} \pm {ɑ_weighted_std[1]:.4f} \\
  \overline{{\alpha_z}} & = & {ɑ_weighted_mean[2]:.4f} \pm {ɑ_weighted_std[2]:.4f} \\
  \overline{{\left|\vec{{\alpha}}\right|}} & = & {ɑ_norm_weighted_mean:.4f} \pm {ɑ_norm_weighted_std:.4f} \\
\end{{array}}
"""
Latex(src)

## Systematic uncertainties