diff --git a/.gitignore b/.gitignore index 24fbc1ff..af9f647c 100644 --- a/.gitignore +++ b/.gitignore @@ -159,3 +159,10 @@ cosmo_inference/cosmosis_config/values_* notebooks/tests/star_response.ipynb sp_validation.code-workspace +notebooks/calibrate_comprehensive_cat.ipynb +notebooks/calibrate_comprehensive_cat.ipynb +notebooks/calibrate_comprehensive_cat.ipynb +notebooks/test_apply_hsp_masks.ipynb +notebooks/calibrate_comprehensive_cat.ipynb +notebooks/demo_apply_hsp_masks.py +notebooks/demo_apply_hsp_masks.ipynb diff --git a/notebooks/calibrate_comprehensive_cat.py b/notebooks/calibrate_comprehensive_cat.py new file mode 100644 index 00000000..1fd6dfad --- /dev/null +++ b/notebooks/calibrate_comprehensive_cat.py @@ -0,0 +1,472 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.15.1 +# kernelspec: +# display_name: Python 3 +# language: python +# name: python3 +# --- + +# # Calibrate comprehensive catalogue + +# %reload_ext autoreload +# %autoreload 2 + +import sys +import os +import numpy as np +from astropy.io import fits +import matplotlib.pylab as plt + +from sp_validation import run_joint_cat as sp_joint +from sp_validation import util +from sp_validation.basic import metacal +from sp_validation import calibration +import sp_validation.cat as cat + +obj = sp_joint.CalibrateCat() + +obj._params["input_path"] = "unions_shapepipe_comprehensive_struc_2024_v1.4.2.hdf5" +obj._params["cmatrices"] = True +obj._params["verbose"] = True + +# !pwd + +dat, dat_ext = obj.read_cat(load_into_memory=False) + +print(f"Found {len(dat)} (~{util.millify(len(dat))}) objects in catalogue") + + +# ## Masking + +class mask(): + def __init__(self, col_name, label, kind="equal", value=0, dat=None): + self._col_name = col_name + self._label = label + self._value = value + self._kind = kind + self._num_ok = None + + if dat is not None: + self.apply(dat) + + def from_list(cls, masks, label="combined"): + my_mask = cls(label, label, kind="combined", value=None) + + my_mask._mask = np.logical_and.reduce([m._mask for m in masks]) + + return my_mask + + def apply(self, dat): + if self._kind == "equal": + self._mask = dat[self._col_name] == self._value + elif self._kind == "not_equal": + self._mask = dat[self._col_name] != self._value + elif self._kind == "greater_equal": + self._mask = dat[self._col_name] >= self._value + elif self._kind == "range": + self._mask = (dat[self._col_name] >= self._value[0]) & (dat[self._col_name] <= self._value[1]) + else: + raise ValueError(f"Invalid kind {kind}") + + @classmethod + def print_strings(cls, coln, lab, num, fnum): + print(f"{coln:30s} {lab:30s} {num:10s} {fnum:10s}") + + def print_stats(self, num_obj): + if self._num_ok is None: + self._num_ok = sum(self._mask) + + si = f"{self._num_ok:10d}" + sf = f"{self._num_ok/num_obj:10.2%}" + self.print_strings(self._col_name, self._label, si, sf) + + def print_summary(self, f_out): + print(f"[{self._label}]\t\t\t", file=f_out, end="") + sign = None + if self._kind =="equal": + sign = "=" + elif self._kind =="not_equal": + sign = "!=" + elif self._kind =="greater_equal": + sign = ">=" + if sign is not None: + print(f"{self._col_name} {sign} {self._value}", file=f_out) + + if self._kind == "range": + print(f"{self._value[0]} <= {self._col_name} <= {self._value[1]}", file=f_out) + + +# ### Pre-processing ShapePipe flags + +masks = [] + +# + +# SExtractor flags (see galaxy.py:classification_galaxy_base) + +my_mask = mask(col_name="FLAGS", label="SE FLAGS", kind="equal", value=0, dat=dat) +masks.append(my_mask) + +# + +# Duplicate objects + +my_mask = mask(col_name="overlap", label="tile overlap", kind="equal", value=True, dat=dat) +masks.append(my_mask) +# - + +my_mask = mask(col_name="IMAFLAGS_ISO", label="SP mask", kind="equal", value=0, dat=dat) +masks.append(my_mask) + +# + +# Number of epochs + +my_mask = mask(col_name="N_EPOCH", label=r"$n_{\rm epoch}$", kind="greater_equal", value=2, dat=dat) +masks.append(my_mask) + +# + +# Magnitude range + +my_mask = mask(col_name="mag", label="mag range", kind="range", value=[15, 30], dat=dat) +masks.append(my_mask) + +# + +# ngmix flags + +col_names = ["NGMIX_MCAL_FLAGS", "NGMIX_MOM_FAIL"] +tmp_labels = ["ngmix flag", "ngmix moments fail"] +good_mask_value = 0 + +for col_name, label in zip(col_names, tmp_labels): + my_mask = mask(col_name=col_name, label=label, kind="equal", value=good_mask_value, dat=dat) + masks.append(my_mask) + +# MKDEBUG TODO: check should be two components, see galaxy.py. +col_name = "NGMIX_ELL_PSFo_NOSHEAR_0" +label = "bad PSF ell" +kind = "not_equal" +value = -10 +my_mask = mask(col_name=col_name, label=label, kind=kind, value=value, dat=dat) +masks.append(my_mask) + +# - + +print(f"Combining {len(masks)} pre-processing masks") +mask_combined_pre = mask.from_list(masks, label="combined_pre") + +# + +# Output some mask statistics + +num_obj = dat.shape[0] + +mask.print_strings("flag", "label", f"{'num_ok':>10}", f"{'num_ok[%]':>10}") +for my_mask in masks: + my_mask.print_stats(num_obj) + +mask_combined_pre.print_stats(num_obj) +# - + +# ### Structural masks + +# Get all columns of type ``bool'' from the extended catalogue +column_ext_info = { + name: "bool" if dat_ext.dtype.fields[name][0] == "int8" else dat_ext.dtype.fields[name][0] + for name in dat_ext.dtype.names +} + +# + +# Get all columns of type ``bool'' from the extended catalogue. +# 0 = good mask value + +# Store the index of the first structural mask = current length of masks list +idx_first_struct = len(masks) + +value = False +for name in column_ext_info: + if column_ext_info[name] == "bool": + my_mask = mask(col_name=name, label=name, kind="equal", value=value, dat=dat_ext) + masks.append(my_mask) + else: + print(f"Type for column {name} not bool, skipping") +# - + +# Number of pointings +name = "npoint3" +label = r"$n_{\rm pointing}$" +value = 3 +kind = "greater_equal" +my_mask = mask(col_name=name, label=label, kind=kind, value=value, dat=dat_ext) +masks.append(my_mask) + +n_struct = len(masks) - idx_first_struct +print(f"Combining {n_struct} structural masks") +mask_combined_struct = mask.from_list(masks[idx_first_struct:], label="combined_struct") + +for my_mask in masks[idx_first_struct:]: + my_mask.print_stats(num_obj) +mask_combined_struct.print_stats(num_obj) + +print(f"Combining pre-processing and structural masks, {len(masks)} in total") +mask_combined = mask.from_list([mask_combined_pre, mask_combined_struct], label="combined") + +# + +# ### Calibration + +# + +# Define cuts and metacal input parameters + +# Ellipticity dispersion +sigma_eps_prior = 0.34 + +# Signal-to-noise range +gal_snr_min = 10 +gal_snr_max = 500 + +# Relative-size (hlr / hlr_psf) range +gal_rel_size_min = 0.5 +gal_rel_size_max = 3 + +# Correct relative size for ellipticity? +gal_size_corr_ell = False + +# For output +mask_metacal = [] + +col_name = "T_gal/T_PSF" +label = "metacal relaive galaxy_size" +my_mask = mask(col_name=col_name, label=label, kind="range", value=[gal_rel_size_min, gal_rel_size_max]) +masks.append(my_mask) + +col_name = "snr" +label = "metacal signal-to-noise ratio" +my_mask = mask(col_name=col_name, label=label, kind="range", value=[gal_snr_min, gal_snr_max]) +masks.append(my_mask) + +# + +# Call metacal + +gal_metacal = metacal( + dat, + mask_combined._mask, + snr_min=gal_snr_min, + snr_max=gal_snr_max, + rel_size_min=gal_rel_size_min, + rel_size_max=gal_rel_size_max, + size_corr_ell=gal_size_corr_ell, + sigma_eps=sigma_eps_prior, + col_2d=False, + verbose=True, +) + +# + +# Get calibrated quantities + +g_corr, g_uncorr, w, mask_metacal = calibration.get_calibrated_quantities(gal_metacal) + +# Additive bias +c = np.zeros(2) +c_err = np.zeros(2) + + +for comp in (0, 1): + c[comp] = np.mean(g_uncorr[comp]) + + # MKDEBUG TODO: Use std of mean instead, which is consistent with jackknife + c_err[comp] = np.std(g_uncorr[comp]) + +# Shear estimate corrected for additive bias +g_corr_mc = np.zeros_like(g_corr) +c_corr = np.linalg.inv(gal_metacal.R).dot(c) +for comp in (0, 1): + g_corr_mc[comp] = g_corr[comp] - c_corr[comp] +# - + +name = "after cuts" +num_ok = len(masks) +print(f"{name:30s} {num_ok:10d} {num_ok/num_obj:10.2%}") + +# + +# Additional quantities +R_shear = np.mean(gal_metacal.R_shear, 2) + +ra = cat.get_col(dat, "RA", mask_combined._mask, mask_metacal) +dec = cat.get_col(dat, "Dec", mask_combined._mask, mask_metacal) +mag = cat.get_col(dat, "mag", mask_combined._mask, mask_metacal) +snr = cat.get_snr("ngmix", dat, mask_combined._mask, mask_metacal) + +add_cols = ["FLUX_RADIUS", "FWHM_IMAGE", "FWHM_WORLD", "MAGERR_AUTO", "MAG_WIN", "MAGERR_WIN", "FLUX_AUTO", "FLUXERR_AUTO", "FLUX_APER", "FLUXERR_APER"] +add_cols_data = {} +for key in add_cols: + add_cols_data[key] = dat[key][mask_combined._mask][mask_metacal] + +# + +# Compute DES weights + +cat_gal = {} +cat_gal["e1_uncal"] = g_uncorr[0] +cat_gal["e2_uncal"] = g_uncorr[1] +cat_gal["R_g11"] = gal_metacal.R11 +cat_gal["R_g12"] = gal_metacal.R12 +cat_gal["R_g21"] = gal_metacal.R21 +cat_gal["R_g22"] = gal_metacal.R22 +cat_gal["NGMIX_T_NOSHEAR"] = dat["NGMIX_T_NOSHEAR"][mask_combined._mask][mask_metacal] +cat_gal["NGMIX_Tpsf_NOSHEAR"] = dat["NGMIX_Tpsf_NOSHEAR"][mask_combined._mask][mask_metacal] +cat_gal["snr"] = snr + +name = 'w_des' +num_bins = 20 +w = calibration.get_w_des(cat_gal, num_bins) + +# + +# Correct for PSF leakage + +cat_gal["e1"] = g_corr_mc[0] +cat_gal["e2"] = g_corr_mc[1] +#cat_gal["e1_PSF"] = e_psf + +num_bins = 20 +weight_type = 'des' +#alpha_1, alpha_2 = calibration.get_alpha_leakage_per_object(cat_gal, num_bins, weight_type) + +# + +output_shape_cat_path = obj._params["input_path"].replace("comprehensive", "cut") +output_shape_cat_path = output_shape_cat_path.replace("hdf5", "fits") + +cat.write_shape_catalog( + output_shape_cat_path, + ra, + dec, + w, + mag=mag, + g=g_corr_mc, + g1_uncal=g_uncorr[0], + g2_uncal=g_uncorr[1], + R=gal_metacal.R, + R_shear=R_shear, + R_select=gal_metacal.R_selection, + c=c, + c_err=c_err, + add_cols=add_cols_data +) +# - + +with open("masks.txt", "w") as f_out: + for my_mask in masks: + my_mask.print_summary(f_out) + for my_mask in masks: + my_mask.print_summary(f_out) + +from scipy import stats +def correlation_matrix(masks, confidence_level=0.9): + + n_key = len(masks) + print(n_key) + + cm = np.empty((n_key, n_key)) + r_val = np.zeros_like(cm) + r_cl = np.empty((n_key, n_key, 2)) + + for idx, mask_idx in enumerate(masks): + for jdx, mask_jdx in enumerate(masks): + res = stats.pearsonr(mask_idx._mask, mask_jdx._mask) + r_val[idx][jdx] = res.statistic + r_cl[idx][jdx] = res.confidence_interval(confidence_level=confidence_level) + + return r_val, r_cl + + +# + +all_masks = masks[:-3] + +# + +if not obj._params["cmatrices"]: + print("Skipping cmatric calculations") + sys.exit(0) + +r_val, r_cl = correlation_matrix(all_masks) + +# + + +n = len(all_masks) +keys = [my_mask._label for my_mask in all_masks] + +plt.imshow(r_val, cmap='coolwarm', vmin=-1, vmax=1) +plt.xticks(np.arange(n), keys) +plt.yticks(np.arange(n), keys) +plt.xticks(rotation=90) +plt.colorbar() +plt.savefig("correlation_matrix.png") + +# - + +def confusion_matrix(prediction, observation): + + result = {} + + pred_pos = sum(prediction) + result["true_pos"] = sum(prediction & observation) + result["true_neg"] = sum(np.logical_not(prediction) & np.logical_not(observation)) + result["false_neg"] = sum(prediction & np.logical_not(observation)) + result["false_pos"] = sum(np.logical_not(prediction) & observation) + result["false_pos_rate"] = result["false_pos"] / (result["false_pos"] + result["true_neg"]) + result["false_neg_rate"] = result["false_neg"] / (result["false_neg"] + result["true_pos"]) + result["sensitivity"] = result["true_pos"] / (result["true_pos"] + result["false_neg"]) + result["specificity"] = result["true_neg"] / (result["true_neg"] + result["false_pos"]) + + cm = np.zeros((2, 2)) + cm[0][0] = result["true_pos"] + cm[1][1] = result["true_neg"] + cm[0][1] = result["false_neg"] + cm[1][0] = result["false_pos"] + cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis] + result["cmn"] = cmn + + return result + + +n_key = len(all_masks) +cms = np.zeros((n_key, n_key, 2, 2)) +for idx in range(n_key): + for jdx in range(n_key): + + if idx == jdx: continue + + print(idx, jdx) + res = confusion_matrix(masks[idx]._mask, masks[jdx]._mask) + cms[idx][jdx] = res["cmn"] + +# + +import seaborn as sns + +fig = plt.figure(figsize=(30, 30)) +axes = np.empty((n_key, n_key), dtype=object) +for idx in range(n_key): + for jdx in range(n_key): + if idx == jdx: continue + axes[idx][jdx] = plt.subplot2grid((n_key, n_key), (idx, jdx), fig=fig) + +matrix_elements = ["True", "False"] + +for idx in range(n_key): + for jdx in range(n_key): + + if idx == jdx: continue + + ax = axes[idx, jdx] + sns.heatmap(cms[idx][jdx], annot=True, fmt='.2f', xticklabels=matrix_elements, yticklabels=matrix_elements, ax=ax) + ax.set_ylabel(masks[idx]._label) + ax.set_xlabel(masks[jdx]._label) + +plt.show(block=False) +plt.savefig("confusion_matrix.png") +# - + +obj.close_hd5() + + diff --git a/notebooks/cosmology.ipynb b/notebooks/cosmology.ipynb index 0dc7ce47..4878d549 100644 --- a/notebooks/cosmology.ipynb +++ b/notebooks/cosmology.ipynb @@ -116,16 +116,24 @@ " r_planck = res_g[sh].meanr\n", " break\n", "\n", - "xi_p_planck, xi_m_planck = get_theo_xi(\n", - " r_planck,\n", - " z,\n", - " nz,\n", - " Omega_m=Om,\n", - " h=h,\n", - " Omega_b=Ob,\n", - " sig8=sig8,\n", - " ns=ns\n", - ")" + "try:\n", + " import pyccl as ccl\n", + "\n", + " xi_p_planck, xi_m_planck = get_theo_xi(\n", + " r_planck,\n", + " z,\n", + " nz,\n", + " Omega_m=Om,\n", + " h=h,\n", + " Omega_b=Ob,\n", + " sig8=sig8,\n", + " ns=ns\n", + " )\n", + "\n", + "except:\n", + " print(\"Warning: Cannot impory pyccl\")\n", + " xi_p_planck = None\n", + " xi_m_planck = None\n" ] }, { @@ -151,18 +159,21 @@ " x = [\n", " res_g[sh].meanr[pos_ind_gg],\n", " res_g[sh].meanr[neg_ind_gg],\n", - " r_planck\n", " ]\n", " y = [\n", " res_g[sh].xip[pos_ind_gg],\n", " -res_g[sh].xip[neg_ind_gg],\n", - " xi_p_planck\n", " ]\n", " yerr = [\n", " np.sqrt(res_g[sh].varxip[pos_ind_gg]),\n", " np.sqrt(res_g[sh].varxip[neg_ind_gg]),\n", - " xi_p_planck * np.nan\n", " ]\n", + "\n", + " if xi_p_planck is not None:\n", + " x.append(r_planck)\n", + " y.append(xi_p_planck)\n", + " yerr.append(xi_p_planck * np.nan)\n", + "\n", " title = fr'{sh} Shear-shear correlation function $\\xi_+$'\n", " out_path = f'{plot_dir}/xi_+_shear_shear_{sh}.pdf'\n", "\n", @@ -200,21 +211,23 @@ " x = [\n", " res_g[sh].meanr[pos_ind_gg],\n", " res_g[sh].meanr[neg_ind_gg],\n", - " r_planck\n", " ]\n", " y = [\n", " res_g[sh].xim[pos_ind_gg],\n", " -res_g[sh].xim[neg_ind_gg],\n", - " xi_m_planck\n", " ]\n", " yerr = [\n", " np.sqrt(res_g[sh].varxim[pos_ind_gg]),\n", " np.sqrt(res_g[sh].varxim[neg_ind_gg]),\n", - " xi_m_planck * np.nan\n", " ]\n", " title = f'{sh} Shear-shear correlation function $\\\\xi_-$'\n", " out_path = f'{plot_dir}/xi_-_shear_shear_{sh}.pdf'\n", "\n", + " if xi_m_planck is not None:\n", + " x.append(r_planck)\n", + " y.append(xi_m_planck)\n", + " yerr.append(xi_m_planck * np.nan)\n", + " \n", " plot_data_1d(\n", " x,\n", " y,\n", @@ -412,7 +425,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/notebooks/demo_apply_hsp_masks.py b/notebooks/demo_apply_hsp_masks.py new file mode 100644 index 00000000..0e4e0726 --- /dev/null +++ b/notebooks/demo_apply_hsp_masks.py @@ -0,0 +1,110 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.15.1 +# kernelspec: +# display_name: sp_validation +# language: python +# name: python3 +# --- + +# # Test to apply hsp masks + +# %reload_ext autoreload +# %autoreload 2 + +# + +import os +import numpy as np +import tracemalloc +import healsparse as hsp + +from sp_validation import run_joint_cat as sp_joint + +# + +# Trace and print used memory if True +trace_mem = True + +if trace_mem: + tracemalloc.start() +# - + +# Create instance of ApplyHspMasks object +obj = sp_joint.ApplyHspMasks() + + +# Bits set for non-tomographic catalogues +bit_list = [1, 2, 4, 8, 64, 1024] + +# + +# combine bit list with & operator +# equivalent to bits = sum(bit_list) +# if elements in bit_list are of type 2^n + +bits = 0 +for b in bit_list: + bits = bits | b + +# + +# Set parameters +obj._params["input_path"] = "unions_shapepipe_comprehensive_2024_v1.4.2.hdf5" +obj._params["output_path"] = "unions_shapepipe_comprehensive_struc_2024_v1.4.2.hdf5" +obj._params["mask_dir"] = f"{os.environ['HOME']}/v1.4.x/masks" +obj._params["nside"] = 131072 +obj._params["file_base"] = "mask_r_" +obj._params["bits"] = bits + +obj._params["aux_mask_files"] = f"{obj._params['mask_dir']}/coverage.hsp" +obj._params["aux_mask_labels"] = "npoint3" +obj._params["verbose"] = True +# - + +# ## Run + +# + +# Check parameter validity +obj.check_params() + +# Update parameters (here: strings to list) +obj.update_params() +# - + +if trace_mem: + current, peak = tracemalloc.get_traced_memory() + print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB") + +# Read catalogue +dat = obj.read_cat(load_into_memory=False, mode="r") + +if trace_mem: + current, peak = tracemalloc.get_traced_memory() + print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB") + +# Get bit-coded masks +masks = obj.get_masks(dat=dat) + +if trace_mem: + current, peak = tracemalloc.get_traced_memory() + print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB") + +# Add mask bits as new columns +dat_new = obj.append_masks(dat, masks) + +if trace_mem: + current, peak = tracemalloc.get_traced_memory() + print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB") + +# Write extended data to new HDF5 file +obj.write_hdf5_file(dat, dat_new) + +# Close input HDF5 catalogue file +obj.close_hd5() + +if trace_mem: + current, peak = tracemalloc.get_traced_memory() + print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB") + tracemalloc.stop() diff --git a/notebooks/main_set_up.ipynb b/notebooks/main_set_up.ipynb index fd10349d..7ecdc347 100644 --- a/notebooks/main_set_up.ipynb +++ b/notebooks/main_set_up.ipynb @@ -22,13 +22,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.299Z" + "iopub.execute_input": "2025-02-11T13:27:17.061070Z", + "iopub.status.busy": "2025-02-11T13:27:17.060921Z", + "iopub.status.idle": "2025-02-11T13:27:17.101791Z", + "shell.execute_reply": "2025-02-11T13:27:17.101155Z", + "shell.execute_reply.started": "2025-02-11T13:27:17.061054Z" } }, "outputs": [], + "source": [ + "%reload_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# General library imports\n", "import sys\n", @@ -42,7 +56,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.299Z" + "iopub.status.busy": "2025-02-11T13:27:18.198398Z", + "iopub.status.idle": "2025-02-11T13:27:18.198606Z", + "shell.execute_reply": "2025-02-11T13:27:18.198506Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.198497Z" } }, "outputs": [], @@ -51,7 +68,8 @@ "from sp_validation.io import *\n", "from sp_validation.cat import *\n", "from sp_validation.survey import *\n", - "from sp_validation.galaxy import *" + "from sp_validation.galaxy import *\n", + "from sp_validation.calibration import *" ] }, { @@ -66,7 +84,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.299Z" + "iopub.status.busy": "2025-02-11T13:27:18.199368Z", + "iopub.status.idle": "2025-02-11T13:27:18.199556Z", + "shell.execute_reply": "2025-02-11T13:27:18.199472Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.199463Z" } }, "outputs": [], @@ -87,7 +108,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.299Z" + "iopub.status.busy": "2025-02-11T13:27:18.200467Z", + "iopub.status.idle": "2025-02-11T13:27:18.200992Z", + "shell.execute_reply": "2025-02-11T13:27:18.200889Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.200878Z" } }, "outputs": [], @@ -110,7 +134,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.299Z" + "iopub.status.busy": "2025-02-11T13:27:18.201455Z", + "iopub.status.idle": "2025-02-11T13:27:18.201655Z", + "shell.execute_reply": "2025-02-11T13:27:18.201558Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.201549Z" } }, "outputs": [], @@ -121,7 +148,10 @@ " dd = np.load(galaxy_cat_path, mmap_mode=mmap_mode)\n", "else:\n", " print(\"Loading galaxy .hdf5 file...\")\n", - " dd = read_hdf5_file(galaxy_cat_path, name, stats_file, param_path=param_list_path)" + " dd = read_hdf5_file(galaxy_cat_path, name, stats_file, param_path=param_list_path)\n", + "\n", + "n_obj = len(dd)\n", + "print_stats(f'Read {n_obj} objects from file {galaxy_cat_path}', stats_file, verbose=verbose)" ] }, { @@ -136,7 +166,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.202131Z", + "iopub.status.idle": "2025-02-11T13:27:18.202308Z", + "shell.execute_reply": "2025-02-11T13:27:18.202227Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.202219Z" } }, "outputs": [], @@ -166,7 +199,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.203120Z", + "iopub.status.idle": "2025-02-11T13:27:18.203317Z", + "shell.execute_reply": "2025-02-11T13:27:18.203233Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.203224Z" } }, "outputs": [], @@ -198,7 +234,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.203827Z", + "iopub.status.idle": "2025-02-11T13:27:18.204043Z", + "shell.execute_reply": "2025-02-11T13:27:18.203949Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.203941Z" } }, "outputs": [], @@ -218,7 +257,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.204434Z", + "iopub.status.idle": "2025-02-11T13:27:18.204618Z", + "shell.execute_reply": "2025-02-11T13:27:18.204529Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.204521Z" } }, "outputs": [], @@ -238,7 +280,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.205399Z", + "iopub.status.idle": "2025-02-11T13:27:18.205581Z", + "shell.execute_reply": "2025-02-11T13:27:18.205500Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.205492Z" } }, "outputs": [], @@ -252,7 +297,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.206293Z", + "iopub.status.idle": "2025-02-11T13:27:18.206474Z", + "shell.execute_reply": "2025-02-11T13:27:18.206392Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.206384Z" } }, "outputs": [], @@ -300,7 +348,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.300Z" + "iopub.status.busy": "2025-02-11T13:27:18.207015Z", + "iopub.status.idle": "2025-02-11T13:27:18.207209Z", + "shell.execute_reply": "2025-02-11T13:27:18.207122Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.207113Z" } }, "outputs": [], @@ -330,7 +381,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.301Z" + "iopub.status.busy": "2025-02-11T13:27:18.207671Z", + "iopub.status.idle": "2025-02-11T13:27:18.207846Z", + "shell.execute_reply": "2025-02-11T13:27:18.207767Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.207758Z" } }, "outputs": [], @@ -388,7 +442,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.301Z" + "iopub.status.busy": "2025-02-11T13:27:18.208418Z", + "iopub.status.idle": "2025-02-11T13:27:18.208617Z", + "shell.execute_reply": "2025-02-11T13:27:18.208518Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.208509Z" } }, "outputs": [], @@ -414,7 +471,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.301Z" + "iopub.status.busy": "2025-02-11T13:27:18.209210Z", + "iopub.status.idle": "2025-02-11T13:27:18.209386Z", + "shell.execute_reply": "2025-02-11T13:27:18.209305Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.209297Z" } }, "outputs": [], @@ -470,7 +530,10 @@ "execution_count": null, "metadata": { "execution": { - "execution_failed": "2025-01-06T09:34:01.301Z" + "iopub.status.busy": "2025-02-11T13:27:18.209826Z", + "iopub.status.idle": "2025-02-11T13:27:18.210021Z", + "shell.execute_reply": "2025-02-11T13:27:18.209941Z", + "shell.execute_reply.started": "2025-02-11T13:27:18.209932Z" } }, "outputs": [], @@ -481,6 +544,9 @@ " dec_key=col_name_dec\n", ")\n", "\n", + "n_ok = sum(cut_overlap)\n", + "print_stats(f\"Non-overlapping objects: {n_ok:10d}, {n_ok/n_obj:10.2%}\", stats_file, verbose=verbose)\n", + "\n", "m_gal = {}\n", "\n", "for sh in shapes:\n", @@ -505,7 +571,16 @@ " cut_common,\n", " stats_file,\n", " verbose=verbose,\n", - " )" + " )\n", + "\n", + " n_ok = sum(cut_common)\n", + " print_stats(f\"{sh}: objects after common cut: {n_ok:10d}, {n_ok/n_obj:10.2%}\", stats_file, verbose=verbose)\n", + "\n", + " # MKDEBUG for debugging calibrate_comprehensive\n", + " n_ok = sum(m_gal[sh])\n", + " print_stats(f\"common & ngmix = galaxy selection: {n_ok:10d}, {n_ok/n_obj:10.2%}\", stats_file, verbose=verbose)\n", + "\n", + "\n" ] }, { @@ -532,7 +607,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/notebooks/maps.ipynb b/notebooks/maps.ipynb index 0afb8e0f..becd90ba 100644 --- a/notebooks/maps.ipynb +++ b/notebooks/maps.ipynb @@ -28,7 +28,7 @@ "metadata": {}, "outputs": [], "source": [ - "from scipy.ndimage.filters import gaussian_filter\n", + "from scipy.ndimage import gaussian_filter\n", "\n", "from lenspack.utils import bin2d\n", "from lenspack.image.inversion import ks93" diff --git a/notebooks/mask_fits2hsparse.py b/notebooks/mask_fits2hsparse.py new file mode 100644 index 00000000..1af04a27 --- /dev/null +++ b/notebooks/mask_fits2hsparse.py @@ -0,0 +1,237 @@ +# --- +# jupyter: +# jupytext: +# text_representation: +# extension: .py +# format_name: light +# format_version: '1.5' +# jupytext_version: 1.15.1 +# kernelspec: +# display_name: sp_validation +# language: python +# name: sp_validation +# --- + +# + +import os +import numpy as np +import healpy as hp +import healsparse as hs +import glob +import matplotlib.pylab as plt +from astropy.io import fits +from astropy import wcs +from astropy import units +import tqdm +from timeit import default_timer as timer + +from IPython.display import display, clear_output + +from sp_validation import plot_style +# - + +import warnings +warnings.filterwarnings('ignore') + +# List of FITS mask files +directory = f"{os.environ['HOME']}/arc/projects/unions/catalogues/unions/GAaP_photometry/extfinalmask_UNIONS5000" +#directory = "." +fits_mask_files = glob.glob(f"{directory}/UNIONS.*_extfinalmask.fits") + +# Define HealSparse parameters +nside_coverage = 32 # Define the nside of the coverage map (adjust as needed) +nside_sparse = 4096 # Define the sparse map resolution + +# + +from scipy.interpolate import griddata +from scipy.interpolate import RectBivariateSpline + +def interpol(ra_step, dec_step, x, y, x_step, y_step): + + # Flatten the coarse grid and create pairs of coordinates + points = np.array([x_step.flatten(), y_step.flatten()]).T + ra_values = ra_step.flatten() + dec_values = dec_step.flatten() + + # Interpolate RA and Dec values to the fine grid + ra = griddata(points, ra_values, (x, y), method='cubic') + dec = griddata(points, dec_values, (x, y), method='cubic') + + return ra, dec + +def process(fits_mask_file, nside_sparse, nside_coverage, step=1): + + # Get data + hdu_list = fits.open(fits_mask_file) + WCS = wcs.WCS(hdu_list[0].header) + + # Mask values + mask = hdu_list[0].data + + # Coordinates + + nx, ny = mask.shape + x = np.arange(nx) + y = np.arange(ny) + x_mesh, y_mesh = np.meshgrid(x, y) + + if step > 1: + + # Note: Use nx+1, ny+1 to add additional point at x_ar=nx, y_ar=ny. + # Required to + # reduce extrapolation errors towards large x, y. + x_ar = np.arange(0, nx + 1, step) + y_ar = np.arange(0, ny + 1, step) + x_step, y_step = np.meshgrid(x_ar, y_ar, indexing="ij") + + #start = timer() + ra_step, dec_step = WCS.wcs_pix2world(x_step, y_step, 0) + #end = timer() + #print(f"WCS pix2 world course {end - start:.1f}s") + + #start = timer() + ra_interp = RectBivariateSpline(y_ar, x_ar, ra_step, ky=2, kx=2) + dec_interp = RectBivariateSpline(y_ar, x_ar, dec_step, ky=2, kx=2) + #end = timer() + #print(f"create spline {end - start:.1f}s") + + #start = timer() + ra = ra_interp(y, x).T + dec = dec_interp(y, x).T + #end = timer() + #print(f"interpolate {end - start:.1f}s") + + else: + #start = timer() + ra, dec = WCS.wcs_pix2world(x_mesh, y_mesh, 0) + #end = timer() + #print(f"WCS pix2 world {end - start:.1f}s") + + # coordinates in deg + return ra, dec, mask + +def update_map(hs_map, ra, dec, mask): + + # Add pixels to healsparse map + #start = timer() + hs_map.update_values_pos(ra, dec, mask, lonlat=True, operation="or") + #end = timer() + #print(f"update map {end - start:.1f}s") + + +# - + +def flatten(ra, dec, mask): + ra_flatten = np.ravel(np.radians(ra)) + dec_flatten = np.ravel(np.radians(dec)) + mask_flatten = np.ravel(mask) + + return ra_flatten, dec_flatten, mask_flatten + + +# Initialise map (first time) +hs_map = hs.HealSparseMap.make_empty(nside_coverage, nside_sparse, dtype="int16", sentinel=0) + +do_plot = False +do_gif = True + +# + +step = 10 + +batch_size = 10 + +batch_ra, batch_dec, batch_mask = [], [], [] + +# Loop over mask files +idx = 0 +for fits_mask_file in tqdm.tqdm(fits_mask_files, total=len(fits_mask_files), disable=False): + + # Check for zero file size + if os.stat(fits_mask_file).st_size == 0: + print(f"File {fits_mask_file} has zero size, continuing") + + # Get coordinates of pixel grid + ra, dec, mask = process(fits_mask_files[0], nside_sparse, nside_coverage, step=step) + + ra_flatten, dec_flatten, mask_flatten = flatten(ra, dec, mask) + + + # Append to batch + batch_ra.append(ra_flatten) + batch_dec.append(dec_flatten) + batch_mask.append(mask_flatten) + + # Process batch update every `batch_size` files or at the end + if (idx + 1) % batch_size == 0 or idx == len(fits_mask_files) - 1: + start = timer() + + # Concatenate arrays for batch processing + batch_ra = np.concatenate(batch_ra) + batch_dec = np.concatenate(batch_dec) + batch_mask = np.concatenate(batch_mask) + + update_map(hs_map, batch_ra, batch_dec, batch_mask) + + # Clear batch storage + batch_ra, batch_dec, batch_mask = [], [], [] + + if do_gif: + + map_lowres = hs_map.generate_healpix_map(nside=256) + hp.mollview(map_lowres) + fname = f"map_lowres_{idx:04d}.jpg" + print(f"Saving figure {fname}") + + plt.savefig(fname) + + idx += 1 + + if do_plot: + clear_output(wait=True) + + # Create the figure + plt.figure(figsize=(8, 5)) + + hp.mollview(hs_map.coverage_mask) + plt.show() +# - + +hs_map.write('mask_all.fits', clobber=False) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/notebooks/metacal_global.ipynb b/notebooks/metacal_global.ipynb index f1f2442d..a467dfc9 100644 --- a/notebooks/metacal_global.ipynb +++ b/notebooks/metacal_global.ipynb @@ -56,7 +56,8 @@ "from sp_validation.basic import *\n", "from sp_validation.plots import *\n", "from sp_validation.plot_style import *\n", - "from sp_validation.calibration import *" + "from sp_validation.calibration import *\n", + "from sp_validation import cat as spv_cat" ] }, { @@ -90,7 +91,18 @@ " size_corr_ell=gal_size_corr_ell,\n", " sigma_eps=sigma_eps_prior,\n", " verbose=verbose\n", - " )" + " )\n", + "\n", + " print_stats(\n", + " f\"Number of objects on metal input = {gal_metacal[sh]._n_input}\",\n", + " stats_file,\n", + " verbose=verbose,\n", + " )\n", + " print_stats(\n", + " f\"Number of objects after galaxy selection masking = {gal_metacal[sh]._n_after_gal_mask}\",\n", + " stats_file,\n", + " verbose=verbose,\n", + " )\n" ] }, { @@ -125,19 +137,35 @@ "tile_ID = {}\n", "\n", "for sh in shapes:\n", + " \n", " g_corr[sh], g_uncorr[sh], w[sh], mask[sh] = (\n", " get_calibrated_quantities(gal_metacal[sh])\n", " )\n", "\n", + " n_before_cuts = len(gal_metacal[sh].ns['g1'])\n", + " n_after_cuts = len(w[sh])\n", + " print_stats(\n", + " f\"Number of objects before cuts = {n_before_cuts}\",\n", + " stats_file,\n", + " verbose=verbose\n", + " )\n", + " print_stats(\n", + " f\"Number of objects after cuts = {n_after_cuts}\",\n", + " stats_file,\n", + " verbose=verbose\n", + " )\n", + " \n", " # coordinates\n", - " ra[sh] = dd['XWIN_WORLD'][m_gal[sh]][mask[sh]]\n", + " #ra[sh] = dd['XWIN_WORLD'][m_gal[sh]][mask[sh]]\n", + " ra[sh] = spv_cat.get_col(dd, col_name_ra, m_gal[sh], mask[sh]) \n", " \n", " # Modify R.A. for plots if R.A. = 0 in area\n", " if wrap_ra != 0:\n", " ra_wrap[sh] = (ra[sh] + wrap_ra) % 360 - wrap_ra + 360\n", " else:\n", " ra_wrap[sh] = ra[sh]\n", - " dec[sh] = dd['YWIN_WORLD'][m_gal[sh]][mask[sh]]\n", + " dec[sh] = spv_cat.get_col(dd, col_name_dec, m_gal[sh], mask[sh])\n", + "\n", " ra_mean[sh] = np.mean(ra_wrap[sh])\n", " dec_mean[sh] = np.mean(dec[sh])\n", " \n", @@ -150,18 +178,13 @@ " )\n", "\n", " # magnitude, from SExtractor\n", - " mag[sh] = dd['MAG_AUTO'][m_gal[sh]][mask[sh]]\n", + " mag[sh] = spv_cat.get_col(dd, \"MAG_AUTO\", m_gal[sh], mask[sh])\n", " \n", " # Keep tile ID if external mask\n", " if mask_external_path:\n", - " tile_ID[sh] = dd['TILE_ID'][m_gal[sh]][mask[sh]]\n", + " tile_ID[sh] = spv_cat.get_col(dd, \"TILE_ID\", m_gal[sh], mask[sh])\n", "\n", - "if 'ngmix' in shapes:\n", - " # signal-to-noise ratio, only for ngmix, from fitted flux and error\n", - " snr['ngmix'] = (\n", - " dd['NGMIX_FLUX_NOSHEAR'][m_gal['ngmix']][mask['ngmix']] \n", - " / dd['NGMIX_FLUX_ERR_NOSHEAR'][m_gal['ngmix']][mask['ngmix']]\n", - " )" + " snr[sh] = spv_cat.get_snr(sh, dd, m_gal[sh], mask[sh])" ] }, { @@ -259,12 +282,12 @@ "outputs": [], "source": [ "# Get coordinates for all objects\n", - "ra['all'] = dd[col_name_ra]\n", + "ra[\"all\"] = spv_cat.get_col(dd, col_name_ra)\n", "if wrap_ra:\n", " ra_wrap['all'] = (ra['all'] + wrap_ra) % 360 - wrap_ra + 360\n", "else:\n", " ra_wrap['all'] = ra['all']\n", - "dec['all'] = dd[col_name_dec]\n", + "dec[\"all\"] = spv_cat.get_col(dd, col_name_dec)\n", "\n", "shapes_all = []\n", "for sh in shapes:\n", @@ -664,7 +687,7 @@ " print_stats(f'dc_{comp+1} = {c_err[sh][comp]:.3g}', stats_file, verbose=verbose) \n", " print_stats(f'dcw_{comp+1} = {cw_err[sh][comp]:.3g}', stats_file, verbose=verbose)\n", "\n", - " # Error of mean: divde by sqrt(N) (TBC whether this is correct)\n", + " # Error of mean: divide by sqrt(N) (TBC whether this is correct)\n", " for comp in (0, 1):\n", " print_stats(f'dmc_{comp+1} = {c_err[sh][comp]/np.sqrt(n_gal[sh]):.3e}', stats_file, verbose=verbose)\n", " print_stats(\n", @@ -1139,21 +1162,13 @@ " print_stats('Dispersion of (average) single-component ellipticity = {:.3f} = {:.3f} / sqrt(2)' \\\n", " ''.format(sig_eps / np.sqrt(2), sig_eps), stats_file, verbose=verbose)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bed6267c-152e-4b16-9e91-18a9220c1e5a", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "sp_validation", "language": "python", - "name": "python3" + "name": "sp_validation" }, "language_info": { "codemirror_mode": { @@ -1165,7 +1180,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/notebooks/params.py b/notebooks/params.py index 6c315998..c192ebfb 100644 --- a/notebooks/params.py +++ b/notebooks/params.py @@ -12,7 +12,6 @@ """ -import os import numpy as np @@ -41,7 +40,6 @@ ## 'ngix': multi-epoch model fitting ## 'galsim': stacked-image moments (experimental) shapes = ['ngmix'] -print('Shape measurement method(s):', shapes) # Paths @@ -117,6 +115,27 @@ ### Additional output columns add_cols = ["FLUX_RADIUS", "FWHM_IMAGE", "FWHM_WORLD", "MAGERR_AUTO", "MAG_WIN", "MAGERR_WIN", "FLUX_AUTO", "FLUXERR_AUTO", "FLUX_APER", "FLUXERR_APER", "NGMIX_T_NOSHEAR", "NGMIX_Tpsf_NOSHEAR"] +## Pre-calibration catalogue, including masked objects and mask flags +add_cols_pre_cal = ["IMAFLAGS_ISO", "FLAGS", "NGMIX_MCAL_FLAGS", "NGMIX_MOM_FAIL", "N_EPOCH", "NGMIX_N_EPOCH", "NGMIX_ELL_PSFo_NOSHEAR", "NGMIX_ELL_ERR_NOSHEAR"] + +### Set flag columns as integer format +add_cols_pre_cal_format = {} +for key in ("IMAFLAGS_ISO", "FLAGS", "NGMIX_MCAL_FLAGS", "NGMIX_MOM_FAIL", "N_EPOCH", "NGMIX_N_EPOCH"): + add_cols_pre_cal_format[key] = "I" + +# Crete key names for metacal information +prefix = "NGMIX" +suffixes = ["1M", "1P", "2M", "2P", "NOSHEAR"] +centers = ["FLAGS", "ELL", "FLUX", "FLUX_ERR", "T", "T_ERR", "Tpsf"] +for center in centers: + for suffix in suffixes: + add_cols_pre_cal.append(f"{prefix}_{center}_{suffix}") + +for suffix in suffixes: + add_cols_pre_cal_format[f"FLAGS_{suffix}"] = "I" + + +#add_cols_pre_cal_descr = ["ShapePipe pipeline flags", "SExtractor flags"] # Catalog parameters diff --git a/notebooks/psf_leakage.ipynb b/notebooks/psf_leakage.ipynb index e395a840..562ac56b 100644 --- a/notebooks/psf_leakage.ipynb +++ b/notebooks/psf_leakage.ipynb @@ -25,9 +25,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 120, "id": "35bc23a5", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:22.304812Z", + "iopub.status.busy": "2025-01-27T14:55:22.304500Z", + "iopub.status.idle": "2025-01-27T14:55:23.135275Z", + "shell.execute_reply": "2025-01-27T14:55:23.134437Z", + "shell.execute_reply.started": "2025-01-27T14:55:22.304770Z" + } + }, "outputs": [], "source": [ "import os\n", @@ -36,9 +44,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 121, "id": "bdeed0dc-7871-4833-b735-3d21f0e39b2e", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:27.071494Z", + "iopub.status.busy": "2025-01-27T14:55:27.071149Z", + "iopub.status.idle": "2025-01-27T14:55:27.688748Z", + "shell.execute_reply": "2025-01-27T14:55:27.687905Z", + "shell.execute_reply.started": "2025-01-27T14:55:27.071449Z" + } + }, "outputs": [], "source": [ "from cs_util.plots import plot_data_1d" @@ -46,9 +62,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 122, "id": "13ceb1df", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:27.690842Z", + "iopub.status.busy": "2025-01-27T14:55:27.690528Z", + "iopub.status.idle": "2025-01-27T14:55:31.964607Z", + "shell.execute_reply": "2025-01-27T14:55:31.963715Z", + "shell.execute_reply.started": "2025-01-27T14:55:27.690796Z" + } + }, "outputs": [], "source": [ "from cs_util import canfar\n", @@ -77,9 +101,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 123, "id": "462a654d", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:38.612918Z", + "iopub.status.busy": "2025-01-27T14:55:38.612287Z", + "iopub.status.idle": "2025-01-27T14:55:39.409596Z", + "shell.execute_reply": "2025-01-27T14:55:39.406848Z", + "shell.execute_reply.started": "2025-01-27T14:55:38.612872Z" + } + }, "outputs": [], "source": [ "n_bin = 30\n", @@ -97,10 +129,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 124, "id": "c9ae1e58", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:39.411097Z", + "iopub.status.busy": "2025-01-27T14:55:39.410782Z", + "iopub.status.idle": "2025-01-27T14:55:43.627796Z", + "shell.execute_reply": "2025-01-27T14:55:43.626744Z", + "shell.execute_reply.started": "2025-01-27T14:55:39.411052Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ngmix:\n", + "$e_{1}^{\\rm PSF}$: m_1=0.167±0.075\n", + "$e_{1}^{\\rm PSF}$: m_2=0.043±0.076\n", + "$e_{2}^{\\rm PSF}$: m_1=-0.044±0.090\n", + "$e_{2}^{\\rm PSF}$: m_2=-0.069±0.091\n", + "$\\mathrm{FWHM}^{\\rm PSF}$ [arcsec]: m_1=-0.005±0.023\n", + "$\\mathrm{FWHM}^{\\rm PSF}$ [arcsec]: m_2=-0.004±0.024\n" + ] + } + ], "source": [ "for sh in shapes:\n", " plot_dir_leakage = f'{plot_dir}/psf_leak_{sh}'\n", @@ -153,9 +207,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 125, "id": "f76b7339", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:43.629276Z", + "iopub.status.busy": "2025-01-27T14:55:43.628932Z", + "iopub.status.idle": "2025-01-27T14:55:54.035086Z", + "shell.execute_reply": "2025-01-27T14:55:53.942080Z", + "shell.execute_reply.started": "2025-01-27T14:55:43.629221Z" + } + }, "outputs": [], "source": [ "r_corr_gp = {}\n", @@ -203,10 +265,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 126, "id": "687ae9da", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:54.036946Z", + "iopub.status.busy": "2025-01-27T14:55:54.036591Z", + "iopub.status.idle": "2025-01-27T14:55:54.887867Z", + "shell.execute_reply": "2025-01-27T14:55:54.886881Z", + "shell.execute_reply.started": "2025-01-27T14:55:54.036901Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ngmix: Weighted average alpha = 0.0609\n" + ] + } + ], "source": [ "alpha_leak_mean = {}\n", "\n", @@ -224,9 +302,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 127, "id": "f5c64dce", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:54.889559Z", + "iopub.status.busy": "2025-01-27T14:55:54.889195Z", + "iopub.status.idle": "2025-01-27T14:55:56.096225Z", + "shell.execute_reply": "2025-01-27T14:55:56.095378Z", + "shell.execute_reply.started": "2025-01-27T14:55:54.889511Z" + } + }, "outputs": [], "source": [ "for sh in shapes:\n", @@ -260,9 +346,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 128, "id": "27331f73", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:55:56.097537Z", + "iopub.status.busy": "2025-01-27T14:55:56.097192Z", + "iopub.status.idle": "2025-01-27T14:56:10.045244Z", + "shell.execute_reply": "2025-01-27T14:56:10.044188Z", + "shell.execute_reply.started": "2025-01-27T14:55:56.097492Z" + } + }, "outputs": [], "source": [ "# Scale-dependent leakage using the star catalogue\n", @@ -312,10 +406,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 129, "id": "0c148a08", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:56:10.135611Z", + "iopub.status.busy": "2025-01-27T14:56:10.046380Z", + "iopub.status.idle": "2025-01-27T14:56:11.053643Z", + "shell.execute_reply": "2025-01-27T14:56:11.052385Z", + "shell.execute_reply.started": "2025-01-27T14:56:10.135549Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ngmix: Weighted average alpha using star cat = -0.00735\n" + ] + } + ], "source": [ "if star_cat_path:\n", " alpha_leak_mean = {}\n", @@ -334,9 +444,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 130, "id": "a28b5f26", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:56:11.055388Z", + "iopub.status.busy": "2025-01-27T14:56:11.055016Z", + "iopub.status.idle": "2025-01-27T14:56:12.382111Z", + "shell.execute_reply": "2025-01-27T14:56:12.380999Z", + "shell.execute_reply.started": "2025-01-27T14:56:11.055340Z" + } + }, "outputs": [], "source": [ "if star_cat_path:\n", @@ -384,9 +502,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 131, "id": "d68f5607", - "metadata": {}, + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:56:12.383666Z", + "iopub.status.busy": "2025-01-27T14:56:12.383280Z", + "iopub.status.idle": "2025-01-27T14:56:13.174662Z", + "shell.execute_reply": "2025-01-27T14:56:13.173316Z", + "shell.execute_reply.started": "2025-01-27T14:56:12.383618Z" + } + }, "outputs": [], "source": [ "C_sys_p = {}\n", @@ -427,10 +553,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 132, "id": "f2f14815", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:56:16.861989Z", + "iopub.status.busy": "2025-01-27T14:56:16.861653Z", + "iopub.status.idle": "2025-01-27T14:56:18.987376Z", + "shell.execute_reply": "2025-01-27T14:56:18.986396Z", + "shell.execute_reply.started": "2025-01-27T14:56:16.861944Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Downloading file vos:cfis/cosmostat/cosmology/redshifts/nz.CFHTLenSmatched.W3_202012_v1.txt to ./nz.CFHTLenSmatched.W3_202012_v1.txt...\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Current version vos 3.6.1.1. A newer version, 3.6.2, is available on PyPI\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Download finished.\n" + ] + } + ], "source": [ "# Redshift distribution\n", "\n", @@ -449,10 +605,26 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 133, "id": "2aa72d9f", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:56:21.682179Z", + "iopub.status.busy": "2025-01-27T14:56:21.681869Z", + "iopub.status.idle": "2025-01-27T14:56:22.482871Z", + "shell.execute_reply": "2025-01-27T14:56:22.481614Z", + "shell.execute_reply.started": "2025-01-27T14:56:21.682134Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Computation of theoretical prediction failed\n" + ] + } + ], "source": [ "for sh in shapes:\n", " try:\n", @@ -472,10 +644,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 134, "id": "8312c4af", - "metadata": {}, - "outputs": [], + "metadata": { + "execution": { + "iopub.execute_input": "2025-01-27T14:56:25.350707Z", + "iopub.status.busy": "2025-01-27T14:56:25.350084Z", + "iopub.status.idle": "2025-01-27T14:56:28.252753Z", + "shell.execute_reply": "2025-01-27T14:56:28.251828Z", + "shell.execute_reply.started": "2025-01-27T14:56:25.350660Z" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ngmix: <|xi_sys_+|> = 3.913104457799062e-06\n", + "ngmix: <|xi_sys_-|> = 0.00025281181828650277\n" + ] + } + ], "source": [ "labels = ['$\\\\xi^{\\\\rm sys}_+$', '$\\\\xi^{\\\\rm sys}_-$']\n", "\n", @@ -543,13 +732,21 @@ "metadata": {}, "outputs": [], "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c64a0f7f-925b-4537-98af-f2d2643d4629", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "sp_validation", "language": "python", - "name": "python3" + "name": "sp_validation" }, "language_info": { "codemirror_mode": { @@ -561,7 +758,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.0" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/notebooks/write_cat.ipynb b/notebooks/write_cat.ipynb index 2f102a2e..17bf5689 100644 --- a/notebooks/write_cat.ipynb +++ b/notebooks/write_cat.ipynb @@ -39,7 +39,8 @@ "outputs": [], "source": [ "from sp_validation.util import *\n", - "from sp_validation.cat import *" + "from sp_validation.cat import *\n", + "from sp_validation.basic import metacal" ] }, { @@ -73,47 +74,45 @@ "local_cal = False\n", "\n", "if local_cal:\n", - " # MKDEBUG TODO: add local selection matrix\n", " for sh in shapes:\n", " write_shape_catalog(\n", " f'{output_shape_cat_base}_{sh}.fits',\n", " ra[sh],\n", " dec[sh],\n", - " g_corr_mc[sh],\n", " w[sh],\n", - " mag[sh],\n", - " gal_metacal[sh].R,\n", - " R_shear[sh],\n", - " gal_metacal[sh].R_selection,\n", - " c[sh],\n", - " c_err[sh],\n", - " alpha_leakage=alpha_leak_mean[sh],\n", + " mag=mag[sh],\n", + " g=g_corr_mc[sh],\n", " g1_uncal=g_uncorr[sh][0],\n", " g2_uncal=g_uncorr[sh][1], \n", " R_g11=R_shear_ind[sh][0, 0],\n", " R_g12=R_shear_ind[sh][0, 1],\n", " R_g21=R_shear_ind[sh][1, 0],\n", " R_g22=R_shear_ind[sh][1, 1],\n", + " R=gal_metacal[sh].R,\n", + " R_shear=R_shear[sh],\n", + " R_select=gal_metacal[sh].R_selection,\n", + " c=c[sh],\n", + " c_err=c_err[sh],\n", + " alpha_leakage=alpha_leak_mean[sh],\n", " add_cols=add_cols_data[sh],\n", " )\n", "else:\n", - " for sh in shapes:\n", - " \n", + " for sh in shapes: \n", " write_shape_catalog(\n", " f'{output_shape_cat_base}_{sh}.fits',\n", " ra[sh],\n", " dec[sh],\n", - " g_corr_mc[sh],\n", " w[sh],\n", - " mag[sh],\n", - " gal_metacal[sh].R,\n", - " R_shear[sh],\n", - " gal_metacal[sh].R_selection,\n", - " c[sh],\n", - " c_err[sh],\n", - " alpha_leakage=alpha_leak_mean[sh],\n", + " mag=mag[sh],\n", + " g=g_corr_mc[sh],\n", " g1_uncal=g_uncorr[sh][0],\n", " g2_uncal=g_uncorr[sh][1],\n", + " R=gal_metacal[sh].R,\n", + " R_shear=R_shear[sh],\n", + " R_select=gal_metacal[sh].R_selection,\n", + " c=c[sh],\n", + " c_err=c_err[sh],\n", + " alpha_leakage=alpha_leak_mean[sh],\n", " add_cols=add_cols_data[sh],\n", " )" ] @@ -162,12 +161,6 @@ "outputs": [], "source": [ "for sh in shapes:\n", - " \n", - " # Signal-to-noise estimates\n", - " if sh == 'ngmix':\n", - " my_snr = snr['ngmix']\n", - " elif sh == 'galsim':\n", - " my_snr = dd['SNR_WIN'][m_gal['galsim']]\n", "\n", " # Additional columns:\n", " # {e1, e2, size}_PSF\n", @@ -182,30 +175,95 @@ " f'{output_shape_cat_base}_extended_{sh}.fits',\n", " ra[sh],\n", " dec[sh],\n", - " g_corr_mc[sh],\n", " w[sh],\n", - " mag[sh],\n", - " gal_metacal[sh].R,\n", - " R_shear[sh],\n", - " gal_metacal[sh].R_selection,\n", - " c[sh],\n", - " c_err[sh],\n", - " snr=my_snr,\n", - " alpha_leakage=alpha_leak_mean[sh],\n", + " mag=mag[sh],\n", + " snr=snr[sh],\n", + " g=g_corr_mc[sh],\n", " g1_uncal=g_uncorr[sh][0],\n", " g2_uncal=g_uncorr[sh][1],\n", " R_g11=R_shear_ind[sh][0, 0],\n", " R_g12=R_shear_ind[sh][0, 1],\n", " R_g21=R_shear_ind[sh][1, 0],\n", " R_g22=R_shear_ind[sh][1, 1], \n", + " R=gal_metacal[sh].R,\n", + " R_shear=R_shear[sh],\n", + " R_select=gal_metacal[sh].R_selection,\n", + " c=c[sh],\n", + " c_err=c_err[sh],\n", + " alpha_leakage=alpha_leak_mean[sh],\n", " add_cols=ext_cols[sh],\n", " )" ] }, { "cell_type": "markdown", - "id": "b39cd31a", + "id": "a6319ed3-0add-4fd1-a174-2b188863cd8e", "metadata": {}, + "source": [ + "### Write comprehensive shape catalogue (all objects, no cuts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea2e13a6-86b1-44fd-92b3-34fc48b318a1", + "metadata": {}, + "outputs": [], + "source": [ + "# Add additional columns without cuts nor mask applied\n", + "\n", + "ext_cols_pre_cal = {}\n", + "\n", + "for sh in shapes:\n", + " ext_cols_pre_cal[sh] = {}\n", + "\n", + " # Standard additional columns\n", + " if add_cols:\n", + " for key in add_cols:\n", + " ext_cols_pre_cal[sh][key] = dd[key]\n", + "\n", + " # Pre-calibration columns\n", + " if add_cols_pre_cal:\n", + " for key in add_cols_pre_cal:\n", + " ext_cols_pre_cal[sh][key] = dd[key]\n", + "\n", + " # Flag to cut duplicate objects in overlapping region with neighbouring tiles\n", + " ext_cols_pre_cal[sh][\"overlap\"] = cut_overlap\n", + " add_cols_pre_cal_format[\"overlap\"] = \"I\"\n", + "\n", + " # Additional columns {e1, e2, size}_PSF\n", + " ext_cols_pre_cal[sh]['e1_PSF'] = dd[key_PSF_ell[sh]][:,0]\n", + " ext_cols_pre_cal[sh]['e2_PSF'] = dd[key_PSF_ell[sh]][:,1]\n", + " ext_cols_pre_cal[sh]['fwhm_PSF'] = size_to_fwhm[sh](dd[key_PSF_size[sh]])\n", + "\n", + " _, _, iv_w = metacal.get_variance_ivweights(dd, sigma_eps_prior, mask=None, col_2d=True)\n", + "\n", + " mag = get_col(dd, \"MAG_AUTO\", None, None)\n", + " snr = get_snr(sh, dd, None, None)\n", + " g1_uncal = dd[\"NGMIX_ELL_NOSHEAR\"][:, 0]\n", + " g2_uncal = dd[\"NGMIX_ELL_NOSHEAR\"][:, 1]\n", + " \n", + " # Comprehensive catalogue without cuts nor mask applied\n", + " write_shape_catalog(\n", + " f'{output_shape_cat_base}_comprehensive_{sh}.fits',\n", + " ra[\"all\"],\n", + " dec[\"all\"],\n", + " iv_w,\n", + " mag=mag,\n", + " snr=snr,\n", + " g1_uncal=g1_uncal,\n", + " g2_uncal=g2_uncal,\n", + " add_cols=ext_cols_pre_cal[sh],\n", + " add_cols_format=add_cols_pre_cal_format,\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "b39cd31a", + "metadata": { + "jp-MarkdownHeadingCollapsed": true + }, "source": [ "### Write galaxy (or random) position catalogue" ] @@ -264,9 +322,9 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "sp_validation", "language": "python", - "name": "python3" + "name": "sp_validation" }, "language_info": { "codemirror_mode": { @@ -278,7 +336,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.9.19" } }, "nbformat": 4, diff --git a/requirements.txt b/requirements.txt index 8e724c24..2f1dce64 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,6 +5,7 @@ emcee cs_util==0.1.2 astropy>=5.0 healpy +healsparse getdist h5py jupyterlab diff --git a/scripts/apply_hsp_masks.py b/scripts/apply_hsp_masks.py new file mode 100755 index 00000000..48fc8442 --- /dev/null +++ b/scripts/apply_hsp_masks.py @@ -0,0 +1,22 @@ +#!/usr/bin/env python3 + +import sys + +from sp_validation import run_joint_cat as sp_joint + + +def main(argv=None): + """Main + + Main program + + """ + if argv is None: + argv = sys.argv[1:] + sp_joint.run_apply_hsp_masks(*argv) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) diff --git a/scripts/combine_results.py b/scripts/combine_results.py index bd3467f4..5e649364 100755 --- a/scripts/combine_results.py +++ b/scripts/combine_results.py @@ -771,10 +771,10 @@ def main(argv=None): for file_base in file_base_arr: - os.system(f'~/txt2tex.pl {file_base}.tex > {file_base}_out.tex') - print(f'Creating LaTeX file {file_base}.tex') - os.system(f'pdflatex {file_base}_out 2&>/dev/null') - + #os.system(f'~/txt2tex.pl {file_base}.tex > {file_base}_out.tex') + #print(f'Creating LaTeX file {file_base}.tex') + #os.system(f'pdflatex {file_base}_out 2&>/dev/null') + print("Skipping calling LaTeX") if __name__ == "__main__": sys.exit(main(sys.argv)) diff --git a/scripts/create_joint_comprehensive_cat.py b/scripts/create_joint_comprehensive_cat.py new file mode 100755 index 00000000..14289c67 --- /dev/null +++ b/scripts/create_joint_comprehensive_cat.py @@ -0,0 +1,17 @@ +#!/usr/bin/env python3 + +import sys + +from sp_validation.run_joint_cat import run_joint_comprehensive_cat + +def main(argv=None): + + if argv is None: + args = sys.argv[0:] + run_joint_comprehensive_cat(*argv) + + return 0 + + +if __name__ == "__main__": + sys.exit(main(sys.argv)) \ No newline at end of file diff --git a/scripts/create_joint_shape_cat.py b/scripts/create_joint_shape_cat.py index 79ef2b81..e63276e5 100755 --- a/scripts/create_joint_shape_cat.py +++ b/scripts/create_joint_shape_cat.py @@ -360,6 +360,8 @@ def merge_catalogues( column_all.append(fits.Column(name='alpha_2', array=alpha_2, format='D')) + if verbose: + print(f"Wrting file {output_path}") cat.write_fits_BinTable_file(column_all, output_path, R, R_shear, R_select, c) else: if verbose: @@ -490,7 +492,7 @@ def main(argv=None): print(' ', patch) input_path = f'{patch}/sp_output/shape_catalog_{sh}.fits' - ra, dec, g1, g2, w, mag, _ = read_shape_catalog(input_path) + ra, dec, g1, g2, w, mag, _ = read_shape_catalog(input_path, w_name="w_iv") ra_all = np.append(ra_all, ra) dec_all = np.append(dec_all, dec) @@ -536,14 +538,14 @@ def main(argv=None): output_path, ra_all, dec_all, - g_corr_mc_all, w_all, - mag_all, - R, - R_shear, - R_select, - c, - c_err, + mag=mag_all, + g=g_corr_mc_all, + R=R, + R_shear=R_shear, + R_select=R_select, + c=c, + c_err=c_err, add_cols=add_col_data, add_cols_format=add_col_format, ) diff --git a/scripts/homogenize_cat_extended.py b/scripts/homogenize_cat_extended.py index 95e56fd6..a3e117ae 100644 --- a/scripts/homogenize_cat_extended.py +++ b/scripts/homogenize_cat_extended.py @@ -5,7 +5,7 @@ e1 and e2 from the non-extended one. Currently, the calibration of the columns e1 and e2 of the extended catalog are calibrated per patch, while the non-extended catalog is calibrated on the whole footprint. -:Authors: Martin Kilbinger, Sacha Guerrini +:Authors: Sacha Guerrini, Martin Kilbinger """ import sys diff --git a/scripts/prepare_patch_for_spval.sh b/scripts/prepare_patch_for_spval.sh index c82a5e77..e5445009 100755 --- a/scripts/prepare_patch_for_spval.sh +++ b/scripts/prepare_patch_for_spval.sh @@ -5,7 +5,8 @@ patch=$1 spdir=$HOME/astro/repositories/github/sp_validation # Galaxy catalogue -cp ~/psfex/final_cat_${patch}.hdf5 . +#cp ~/psfex/final_cat_${patch}.hdf5 . +ln -s ~/psfex/final_cat_${patch}.hdf5 # Parameter file, to avoid read errors for hdf5 file ln -sf ~/shapepipe/example/cfis/final_cat.param @@ -17,12 +18,14 @@ ln -sf ~/shapepipe/example/cfis/final_cat.param ## Projected back to world coordinates ln -sf $HOME/psfex/star_cat/${patch}/output/run_sp_Ms/merge_starcat_runner/output/full_starcat-0000000.fits -# Parameter file -cp $spdir/notebooks/params.py . - -# nb/python script -ln -sf $spdir/notebooks/validation.py - # Tile number list ln -sf ~/shapepipe/auxdir/CFIS/tiles_202106/tiles_${patch}.txt +# Parameter file +#cp $spdir/notebooks/params.py . +echo "Diff:" +diff $spdir/notebooks/params.py params.py +echo "Run?" +echo "cp $spdir/notebooks/params.py params.py" +echo "Run?" +echo "ipython ~/sp_validation/notebooks/validation.py" diff --git a/sp_validation/__init__.py b/sp_validation/__init__.py index 33c10e61..143bc854 100644 --- a/sp_validation/__init__.py +++ b/sp_validation/__init__.py @@ -39,13 +39,18 @@ __version__ = _version __all__ = [ - 'survey', + 'util', 'io', - 'cat', 'basic', - 'util', + 'galaxy', + 'cosmology', + 'calibration', + 'cat', 'plot_style', - 'plots' + 'plots', + 'run_joint_cat', + 'survey', + 'util', ] from . import * diff --git a/sp_validation/basic.py b/sp_validation/basic.py index 3387ae2a..28203be4 100644 --- a/sp_validation/basic.py +++ b/sp_validation/basic.py @@ -61,6 +61,9 @@ class metacal: sigma_eps : float, optional ellipticity dispersion (one component) for computation of weights; default is 0.34 + col_2d : bool, optional + if `True` (default, ellipticity in one 2D column; + if `False`, ellipticity in two columns ELL_0, ELL_1 verbose : bool, optional, default=False verbose output if True @@ -80,6 +83,7 @@ def __init__( rel_size_max=3.0, size_corr_ell=True, sigma_eps=0.34, + col_2d=True, verbose=False, ): @@ -102,6 +106,7 @@ def __init__( ) self._sigma_eps = sigma_eps + self._col_2d = col_2d self._verbose = verbose @@ -154,7 +159,7 @@ def _read_data_ngmix(self, data, mask, m1, p1, m2, p2, ns): Read data from ngmix catalogue. """ for name_shear, dict_tmp in zip( - ['1m', '1p', '2m', '2p', 'noshear'], + ['1M', '1P', '2M', '2P', 'NOSHEAR'], [m1, p1, m2, p2, ns] ): @@ -162,40 +167,97 @@ def _read_data_ngmix(self, data, mask, m1, p1, m2, p2, ns): print('Extracting {}'.format(name_shear)) dict_tmp['flag'] = ( - data[f'{self._prefix}_FLAGS_{name_shear.upper()}'][mask] - ) - dict_tmp['g1'] = ( - data[f'{self._prefix}_ELL_{name_shear.upper()}'][:, 0][mask] - ) - dict_tmp['g2'] = ( - data[f'{self._prefix}_ELL_{name_shear.upper()}'][:, 1][mask] + data[f'{self._prefix}_FLAGS_{name_shear}'][mask] ) - dict_tmp['flux'] = ( - data[f'{self._prefix}_FLUX_{name_shear.upper()}'][mask] - ) - dict_tmp['flux_err'] = ( - data[f'{self._prefix}_FLUX_ERR_{name_shear.upper()}'][mask] - ) + if self._col_2d: + # Ellipticity in one 2D column + for comp in (0, 1): + dict_tmp[f"g{comp+1}"] = ( + data[f"{self._prefix}_ELL_{name_shear}"][:, comp][mask] + ) + else: + # Ellipcitiy in two different columns + for comp in (0, 1): + dict_tmp[f"g{comp+1}"] = ( + data[f"{self._prefix}_ELL_{name_shear}_{comp}"][mask] + ) + + for key in ("flux", "flux_err", "T", "T_err"): + dict_tmp[key] = ( + data[f'{self._prefix}_{key.upper()}_{name_shear}'][mask] + ) - dict_tmp['T'] = ( - data[f'{self._prefix}_T_{name_shear.upper()}'][mask] - ) - dict_tmp['T_err'] = ( - data[f'{self._prefix}_T_ERR_{name_shear.upper()}'][mask] - ) dict_tmp['Tpsf'] = ( - data[f'{self._prefix}_Tpsf_{name_shear.upper()}'][mask] + data[f'{self._prefix}_Tpsf_{name_shear}'][mask] ) - ns['C11'] = data[f'{self._prefix}_ELL_ERR_NOSHEAR'][:, 0][mask] - ns['C22'] = data[f'{self._prefix}_ELL_ERR_NOSHEAR'][:, 1][mask] - ns['w'] = ( - 1. / (2 * self._sigma_eps ** 2 + dict_tmp['C11'] + dict_tmp['C22']) + ns["C11"], ns["C22"], ns["w"] = self.get_variance_ivweights( + data, + self._sigma_eps, + self._prefix, + mask=mask, + col_2d=self._col_2d, ) + self._n_input = len(data) + self._n_after_gal_mask = len(dict_tmp['flag']) + if self._verbose: + print(f"Number of objects on metacal input = {self._n_input}") + print(f"Number of objects after galaxy selection masking = {self._n_after_gal_mask}") + return m1, p1, m2, p2, ns + @staticmethod + def get_variance_ivweights(data, sigma_eps, prefix="NGMIX", mask=None, col_2d=True): + """Get Variance IVWEIGHTS. + + Compute variance and inverse-variance weights. + + Parameters + ---------- + data : numpy.ndarray + input data + sigma_eps : float + ellipticity dispersion + prefix : str, optional + shape measurement identifier; default is "NGMIX" + mask : list, optional + indicates valid objects with ``True`` values; default is ``None`` = use all objects + type has to be bool + col_2d : bool, optional + if ``True`` (default), ellipticity is given in single 2D column; + if ``False``, ellipticity is expected in two 1D columns. + + Returns + ------- + float + variance first component + float + variance second component + float + weight + + """ + if mask is not None: + if col_2d: + C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0][mask] + C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1][mask] + else: + C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"][mask] + C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"][mask] + else: + if col_2d: + C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0] + C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1] + else: + C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"] + C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"] + + iv_w = 1 / (2 * sigma_eps ** 2 + C11 + C22) + + return C11, C22, iv_w + def _read_data_galsim(self, data, mask, m1, p1, m2, p2, ns): """Read Data Galsim. diff --git a/sp_validation/calibration.py b/sp_validation/calibration.py index ad2f8443..1d5ddf79 100644 --- a/sp_validation/calibration.py +++ b/sp_validation/calibration.py @@ -219,7 +219,10 @@ def get_alpha_leakage_per_object(cat_gal, num_bins, weight_type='des'): #Fit e1 mod_wls = sm.WLS(e1_out, sm.add_constant(e1_PSF), weights=weight_out) - res_wls = mod_wls.fit() + try: + res_wls = mod_wls.fit() + except: + raise RunTimeError("Linear regression fit for PSF leakage failed") alpha_df.loc[i_group, 'alpha_1'] = res_wls.params[1] alpha_df.loc[i_group, 'alpha_1_err'] = np.sqrt(res_wls.cov_params()[1, 1]) del res_wls, mod_wls @@ -399,4 +402,4 @@ def get_calibrate_no_leakage_e_from_cat(path_cat_gal, weight_type='des', verbose e1_noleak = e1 - cat_gal['alpha_1']*cat_gal['e1_PSF'] e2_noleak = e2 - cat_gal['alpha_2']*cat_gal['e2_PSF'] - return e1_noleak, e2_noleak \ No newline at end of file + return e1_noleak, e2_noleak diff --git a/sp_validation/cat.py b/sp_validation/cat.py index 302a467b..1d6af8d3 100644 --- a/sp_validation/cat.py +++ b/sp_validation/cat.py @@ -64,11 +64,11 @@ def print_mean_ellipticity( """ # Get ellipticity columns all_ell = [] - ell_str = '' + ell_str = "" if ell_n_comp == 1: for i in (0, 1): all_ell.append(dd[ell_col_name[i]]) - ell_str = f'{ell_str}{ell_col_name[i]} ' + ell_str = f"{ell_str}{ell_col_name[i]} " elif ell_n_comp == 2: for i in (0, 1): all_ell.append(dd[ell_col_name][:, i]) @@ -87,12 +87,16 @@ def print_mean_ellipticity( n_tot_val = len(np.where(ind_v)[0]) n_tot_mil = util.millify(n_tot_val) - msg = f'Total number of valid objects = {n_tot_val} = {n_tot_mil}' + msg = ( + f"Total number of valid objects ({ell_col_name}0,1 != {invalid})" + + f" = {n_tot_val} = {n_tot_mil}" + ) io.print_stats(msg, stats_file, verbose=verbose) - msg = 'Fraction of invalid objects = {}/{} = {:.3g}%\n' \ - ''.format(n_tot - n_tot_val, n_tot, - (n_tot - n_tot_val) / n_tot * 100) + msg = ( + f"Fraction of invalid objects = {n_tot-n_tot_val}/{n_tot}" + + f" = {(n_tot-n_tot_val)/n_tot:.3%}" + ) io.print_stats(msg, stats_file, verbose=verbose) # Select valid objects @@ -105,12 +109,12 @@ def print_mean_ellipticity( ell[i] = all_ell[i].mean() io.print_stats( - f'Mean ellipticity of valid objects ({ell_str}):', + f"Mean ellipticity of valid objects ({ell_str}):", stats_file, verbose=verbose, ) for i in (0, 1): - msg = ' = {:.3g}'.format(i + 1, ell[i]) + msg = " = {:.3g}".format(i + 1, ell[i]) io.print_stats(msg, stats_file, verbose=verbose) @@ -135,13 +139,13 @@ def print_some_quantities(dd, stats_file, verbose=False): """ # Print all column names if verbose: - print('Column names:') + print("Column names:") print(dd.dtype.names) - print('') + print("") n_tot = len(dd) n_mil = util.millify(n_tot) - msg = f'Total number of objects = {n_tot} = {n_mil}' + msg = f"Total number of objects = {n_tot} = {n_mil}" io.print_stats(msg, stats_file, verbose=verbose) return n_tot @@ -186,7 +190,7 @@ def check_matching( # Filter stars outside footprint for efficiency mask_area_tiles = get_footprint(name, d1[keys_1[0]], d1[keys_1[1]]) if len(np.where(mask_area_tiles)[0]) == 0: - raise ValueError(f'Error: no object found in field \'{name}\'') + raise ValueError(f"Error: no object found in field '{name}'") else: mask_area_tiles = np.arange(len(d1)) @@ -196,13 +200,13 @@ def check_matching( d2[keys_2[1]], d1[keys_1[0]][mask_area_tiles], d1[keys_1[1]][mask_area_tiles], - thresh=thresh + thresh=thresh, ) n_tot = len(d1[keys_1[0]][mask_area_tiles]) msg = ( - 'Number of matched stars from exposures to total catalogue = ' - + f'{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}' + "Number of matched stars from exposures to total catalogue = " + + f"{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}" ) io.print_stats(msg, stats_file, verbose=verbose) @@ -210,8 +214,8 @@ def check_matching( ind = np.array(list(set(ind))) msg = ( - 'Number of matched stars after removing multiple matches = ' - + f'{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}' + "Number of matched stars after removing multiple matches = " + + f"{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}" ) io.print_stats(msg, stats_file, verbose=verbose) @@ -249,8 +253,9 @@ def check_invalid(dd, key, comp, val, stats_file, name=None, verbose=False): w = dd[key[i]][:, comp[i]] == val[i] n_inv_psf = len(np.where(w)[0]) - msg = 'Invalid {} found for {}/{} = {:.1g}% objects' \ - ''.format(name[i], n_inv_psf, n_all, n_inv_psf / n_all) + msg = "Invalid {} found for {}/{} = {:.1g}% objects" "".format( + name[i], n_inv_psf, n_all, n_inv_psf / n_all + ) io.print_stats(msg, stats_file, verbose=verbose) @@ -295,9 +300,9 @@ def match_subsample( ellipticities """ msg = ( - 'Number of stars matched to valid sample = ' - + f'{len(dd[pos_key[0]][ind][mask])}/{n_ref} = ' - + f'{len(dd[pos_key[0]][ind][mask]) / n_ref * 100:.1f}%' + "Number of stars matched to valid sample = " + + f"{len(dd[pos_key[0]][ind][mask])}/{n_ref} = " + + f"{len(dd[pos_key[0]][ind][mask]) / n_ref * 100:.1f}%" ) io.print_stats(msg, stats_file, verbose=verbose) @@ -316,25 +321,25 @@ def match_spread_class(dd, ind, mask, stats_file, n_ref, verbose=False): Match """ tot_star = n_ref - tot_as_star = len(np.where(dd['SPREAD_CLASS'][ind][mask] == 0)[0]) - tot_as_gal = len(np.where(dd['SPREAD_CLASS'][ind][mask] == 1)[0]) - tot_as_other = len(np.where(dd['SPREAD_CLASS'][ind][mask] == 2)[0]) + tot_as_star = len(np.where(dd["SPREAD_CLASS"][ind][mask] == 0)[0]) + tot_as_gal = len(np.where(dd["SPREAD_CLASS"][ind][mask] == 1)[0]) + tot_as_other = len(np.where(dd["SPREAD_CLASS"][ind][mask] == 2)[0]) msg = ( - 'Number of stars selected as star (SPREAD_CLASS=0) = ' - + f'{tot_as_star}/{tot_star} = {tot_as_star / tot_star * 100:.1f}%' + "Number of stars selected as star (SPREAD_CLASS=0) = " + + f"{tot_as_star}/{tot_star} = {tot_as_star / tot_star * 100:.1f}%" ) io.print_stats(msg, stats_file, verbose=verbose) msg = ( - 'Number of stars selected as galaxy (SPREAD_CLASS=1) = ' - + f'{tot_as_gal}/{tot_star} = {tot_as_gal / tot_star * 100:.1f}%' + "Number of stars selected as galaxy (SPREAD_CLASS=1) = " + + f"{tot_as_gal}/{tot_star} = {tot_as_gal / tot_star * 100:.1f}%" ) io.print_stats(msg, stats_file, verbose=verbose) msg = ( - 'Number of stars selected as other (SPREAD_CLASS=2) = ' - + f'{tot_as_other}/{tot_star} = {tot_as_other / tot_star * 100:.1f}%' + "Number of stars selected as other (SPREAD_CLASS=2) = " + + f"{tot_as_other}/{tot_star} = {tot_as_other / tot_star * 100:.1f}%" ) io.print_stats(msg, stats_file, verbose=verbose) @@ -359,7 +364,8 @@ def match_stars2(ra_gal, dec_gal, ra_star, dec_star, thresh=0.0002): def read_shape_catalog( - input_path + input_path, + w_name="w", ): """Read Shape Catalog. @@ -369,6 +375,8 @@ def read_shape_catalog( ---------- input_path : str input file path + w_name : str, optional + name of weight column, default is "w" Returns ------- @@ -391,17 +399,17 @@ def read_shape_catalog( hdu_no = 1 - ra = dat[hdu_no].data['RA'] - dec = dat[hdu_no].data['Dec'] + ra = dat[hdu_no].data["RA"] + dec = dat[hdu_no].data["Dec"] g = [np.empty_like(ra), np.empty_like(ra)] - g1 = dat[hdu_no].data['e1_uncal'] - g2 = dat[hdu_no].data['e2_uncal'] - w = dat[hdu_no].data['w'] - mag = dat[hdu_no].data['mag'] + g1 = dat[hdu_no].data["e1_uncal"] + g2 = dat[hdu_no].data["e2_uncal"] + w = dat[hdu_no].data[w_name] + mag = dat[hdu_no].data["mag"] - if 'snr' in dat[hdu_no].data.dtype.names: - snr = dat[hdu_no].data['snr'] + if "snr" in dat[hdu_no].data.dtype.names: + snr = dat[hdu_no].data["snr"] else: snr = None @@ -412,22 +420,22 @@ def write_shape_catalog( output_path, ra, dec, - g, w, - mag, - R, - R_shear, - R_select, - c, - c_err, - alpha_leakage=None, + mag=None, snr=None, + g=None, g1_uncal=None, g2_uncal=None, R_g11=None, R_g22=None, R_g12=None, R_g21=None, + R=None, + R_shear=None, + R_select=None, + c=None, + c_err=None, + alpha_leakage=None, sigma_epsilon=None, add_cols=None, add_cols_format=None, @@ -442,110 +450,142 @@ def write_shape_catalog( output file path ra, dec : arrays(ngal) of float coordinates in deg - g : arrays(2, ngal) of float - calibrated reduced shear estimate components, corrected for - multiplicative and additive bias, g = R^-1 g_uncal - c - w : array(ngal) of float - weights - mag : array(ngal) of float + w : np.ndarray + inverse-variance weights + mag : array(ngal) of float, optional magnitude, signal-to-noise ratio - R : 2x2 matrix of float - Mean full response matrix - R_shear : 2x2 matrix of float - Mean shear response matrix - R_select : 2x2 matrix of float - Global selection response matrix - c : array(2) of float + snr : array(ngal) of float, optional + signal-to-noise ratio, default is `None` + g : np.ndarray, optional + calibrated reduced shear estimate components, corrected for + multiplicative and additive bias, g = R^-1 g_uncal - c; + expected type is arrays(2, ngal) of float; + default is ``None`` (no calibrated shears written) + g1_uncal, g2_uncal : np.ndarray, optional + uncalibrated shear estimates; + expected types are arrays(ngal) of float + default is ``None`` (no uncalibrated shears written) + R_g11, R_g22, R_g12, R_g21 : np.ndarray, optional + shear response matrix elements per galaxy; + expected format is arrays(ngal) of float; + default is ``None`` + R : 2x2 matrix of float, optional + Mean full response matrix, default is ``None`` + R_shear : 2x2 matrix of float, optional + Mean shear response matrix, default is ``None`` + R_select : 2x2 matrix of float, optional + Global selection response matrix, default is ``None`` + c : array(2) of float, optional, default is ``None`` additive shear bias - c_err : array(2) of float + c_err : array(2) of float, optional, default is ``None`` error of c alpha_leakage : float, optional - Mean scale-dependent PSF leakage, default is None - snr : array(ngal) of float, optional - signal-to-noise ratio, default is `None` - g1_uncal, g2_uncal : arrays(ngal) of float, optional, default=None - uncalibrated shear estimates - R_g11, R_g22, R_g12, R_g21 : arrays(ngal) of float, optional, default=None - shear response matrix elements per galaxy + Mean scale-dependent PSF leakage, default is ``None`` sigma_epsilon: float, optional - shape noise, default is `None` - add_cols : dict, optional, default is `None` + shape noise, default is ``None`` + add_cols : dict, optional, default is ``None`` data for n additional columns to add add_cols_format : dict, optional - format for n additional columns to add, default is `None`, for which - 'float' format is used + format for n additional columns to add, default is ``None``, for which + ``float`` format is used """ - # Data HDU - c_ra = fits.Column(name='RA', array=ra, format='D', unit='deg') - c_dec = fits.Column(name='Dec', array=dec, format='D', unit='deg') - c_g1 = fits.Column(name='e1', array=g[0], format='D') - c_g2 = fits.Column(name='e2', array=g[1], format='D') - c_w = fits.Column(name='w', array=w, format='D') - c_mag = fits.Column(name='mag', array=mag, format='D') - cols = [c_ra, c_dec, c_g1, c_g2, c_w, c_mag] - - ntype = 6 - if snr is not None: - c_snr = fits.Column(name='snr', array=snr, format='D') - cols.append(c_snr) - ntype += 1 + col_info_arr = [] - for x, name in zip( + # Principal columns: coordinates and weights + col_info_arr.append( + ( + fits.Column(name="RA", array=ra, format="D", unit="deg"), + "Right Ascension", + ) + ) + col_info_arr.append( + ( + fits.Column(name="Dec", array=dec, format="D", unit="deg"), + "Declination", + ) + ) + col_info_arr.append( + ( + fits.Column(name="w_iv", array=w, format="D"), + "Inverse-variance weight", + ) + ) + + # Additional columns + ## Magnitude + if mag is not None: + col_info_arr.append( + ( + fits.Column(name="mag", array=mag, format="D"), + "MAG_AUTO magnitude", + ) + ) + ## Calibrated shear estimates + if g is not None: + for idx in (0, 1): + col_info_arr.append( + ( + fits.Column(name=f"e{idx+1}", array=g[idx], format="D"), + f"Calibrated reduced shear estimate comp {idx+1}", + ) + ) + # Signal-to-noise ratio + if snr is not None: + col_info_arr.append( + ( + fits.Column(name="snr", array=snr, format="D"), + "Signal-to-noise ratio", + ) + ) + for x, name, descr in zip( [g1_uncal, g2_uncal, R_g11, R_g22, R_g12, R_g21], - ['e1_uncal', 'e2_uncal', 'R_g11', 'R_g22', 'R_g12', 'R_g21'], + ["e1_uncal", "e2_uncal", "R_g11", "R_g22", "R_g12", "R_g21"], + [ + "Uncalibrated shear comp 1", + "Uncalibrated shear comp 2", + "Shear response matrix comp 1 1", + "Shear response matrix comp 2 2", + "Shear response matrix comp 1 2", + "Shear response matrix comp 2 1", + ], ): if x is not None: - cols.append(fits.Column(name=name, array=x, format='D')) - ntype += 1 + col_info_arr.append( + (fits.Column(name=name, array=x, format="D"), descr) + ) - if add_cols: - for name in add_cols: - if add_cols_format: + if add_cols is not None: + for idx, name in enumerate(add_cols): + if add_cols_format is not None and name in add_cols_format: my_format = add_cols_format[name] else: - my_format = 'D' - cols.append( - fits.Column(name=name, array=add_cols[name], format=my_format) + shape = add_cols[name].shape + if len(shape) == 1: + my_format = "D" + else: + my_format = f"{shape[1]}D" + col_info_arr.append( + ( + fits.Column( + name=name, array=add_cols[name], format=my_format + ), + name, + ) ) - ntype += len(add_cols) + # Write columns to FITS file + cols = [] + for col, _ in col_info_arr: + cols.append(col) table_hdu = fits.BinTableHDU.from_columns(cols) - table_hdu.header['TTYPE3'] = ( - 'e1', - 'Calibrated reduced shear estimate, 1st comp' - ) - table_hdu.header['TTYPE4'] = ( - 'e2', - 'Calibrated reduced shear estimate, 2nd comp' - ) - table_hdu.header['TTYPE5'] = ('w', 'DES Weight') - table_hdu.header['TTYPE6'] = ('mag', 'Magnitude = MAG_AUTO (SExtractor)') - if snr is not None: - table_hdu.header['TTYPE7'] = ( - 'snr', - 'Signal-to-noise ratio = flux/flux_std' + # Add human-readable descriptions + for idx, col_info in enumerate(col_info_arr): + table_hdu.header[f"TTYPE{idx+1}"] = ( + col_info[0].name, + col_info[1], ) - ntype += 1 - - for x, name in zip([g1_uncal, g2_uncal], ['e1_uncal', 'e2_uncal']): - if x is not None: - table_hdu.header[f'TTYPE{ntype}'] = ( - name, - 'uncalibrated reduced shear' - ) - ntype += 1 - for x, name in zip( - [R_g11, R_g22, R_g12, R_g21], - ['R_g11', 'R_g22', 'R_g12', 'R_g21'] - ): - if x is not None: - table_hdu.header[f'TTYPE{ntype}'] = ( - name, - f'shear response matrix {name}' - ) - ntype += 1 # Primary HDU with information in header primary_header = fits.Header() @@ -556,20 +596,22 @@ def write_shape_catalog( software_version=__version__, author=getpass.getuser(), ) - cat.add_shear_bias_to_header(primary_header, R, R_shear, R_select, c) - primary_header['c1_err'] = (c_err[0], 'Standard deviation of c_1') - primary_header['c2_err'] = (c_err[1], 'Standard deviation of c_2') - primary_header['w'] = ( - 'DES Weight' - ) - if sigma_epsilon: - primary_header['sig_eps'] = (sigma_epsilon, 'Shape noise RMS') + if all(v is not None for v in (R, R_shear, R_select, c)): + cat.add_shear_bias_to_header(primary_header, R, R_shear, R_select, c) + if c_err is not None: + primary_header["c1_err"] = (c_err[0], "Standard deviation of c_1") + primary_header["c2_err"] = (c_err[1], "Standard deviation of c_2") + + primary_header["w"] = "DES weight" + + if sigma_epsilon is not None: + primary_header["sig_eps"] = (sigma_epsilon, "Shape noise RMS") if alpha_leakage: - primary_header['alpha'] = ( + primary_header["alpha"] = ( alpha_leakage, - 'Mean scale-dependent PSF leakage' + "Mean scale-dependent PSF leakage", ) primary_hdu = fits.PrimaryHDU(header=primary_header) @@ -595,9 +637,9 @@ def write_galaxy_cat(output_path, ra, dec, tile_id): tile_id : array(ngal) of float tile ID of objects """ - c_ra = fits.Column(name='ra', array=ra, format='E', unit='deg') - c_dec = fits.Column(name='dec', array=dec, format='E', unit='deg') - c_id = fits.Column(name='tile_id', array=tile_id, format='E') + c_ra = fits.Column(name="ra", array=ra, format="E", unit="deg") + c_dec = fits.Column(name="dec", array=dec, format="E", unit="deg") + c_id = fits.Column(name="tile_id", array=tile_id, format="E") cols = [c_ra, c_dec, c_id] cat.write_fits_BinTable_file(cols, output_path) @@ -619,10 +661,10 @@ def write_PSF_cat(output_path, ra, dec, e1, e2): e2 : list of float second ellipticity component """ - c_ra = fits.Column(name='RA', array=ra, format='D', unit='deg') - c_dec = fits.Column(name='Dec', array=dec, format='D', unit='deg') - c_e1 = fits.Column(name='e1', array=e1, format='D') - c_e2 = fits.Column(name='e2', array=e2, format='D') + c_ra = fits.Column(name="RA", array=ra, format="D", unit="deg") + c_dec = fits.Column(name="Dec", array=dec, format="D", unit="deg") + c_e1 = fits.Column(name="e1", array=e1, format="D") + c_e2 = fits.Column(name="e2", array=e2, format="D") cols = [c_ra, c_dec, c_e1, c_e2] cat.write_fits_BinTable_file(cols, output_path) @@ -651,7 +693,7 @@ def read_param_file(path, verbose=False): if path: if verbose: - print(f"Reading parameter file {path}") + print(f"Reading parameter file: {path}") with open(path) as f: for line in f: @@ -670,16 +712,18 @@ def read_param_file(path, verbose=False): print(" into merged catalogue") param_list_unique = list(set(param_list)) - + if verbose: n = len(param_list) - len(param_list_unique) if n > 1: print("Removed {n} duplicate entries") - + return param_list_unique -def read_hdf5_file(file_path, name, stats_file, check_only=False, param_path=None): +def read_hdf5_file( + file_path, name, stats_file, check_only=False, param_path=None +): """Read HDF5 File. Read hdf5 file and return contained data. @@ -694,21 +738,37 @@ def read_hdf5_file(file_path, name, stats_file, check_only=False, param_path=Non summary statistics output file handler check_only : bool, optional If True only check, not return data - + Returns ------- dict data """ - param_list = read_param_file(param_path, verbose=True) if param_path else None + param_list = ( + read_param_file(param_path, verbose=True) if param_path else None + ) with h5py.File(file_path, "r") as hdf5_file: # Find patch group in hierarchical structure if not f"patches/{name}" in hdf5_file: - raise KeyError(f"Entry patches/{name} not found in file {file_path}") + raise KeyError( + f"Entry patches/{name} not found in file {file_path}" + ) patch_group = hdf5_file[f"patches/{name}"] + # Get size of data array + num_rows = sum(patch_group[ID].shape[0] for ID in patch_group) + # num_cols = patch_group[next(iter(patch_group))].shape[1] + num_cols = len(param_list) + + print( + f"Estimating {num_cols * num_rows * 8 / 1024**3:.1f}" + + f" Gb memory for the ({num_cols} x {num_rows}) data array ..." + ) + # data_comb = np.memmap(output_file, dtype=patch_group[next(iter(patch_group))].dtype, + # mode="w+", shape=(num_rows, num_cols)) + data_list = [] ID_pbl = set() for ID in tqdm.tqdm(patch_group): @@ -719,32 +779,89 @@ def read_hdf5_file(file_path, name, stats_file, check_only=False, param_path=Non # Restrict to parameter list if given data = data[param_list] if param_list is not None else data - #if data_list is None: - # data_list = data - # ID_first = ID - #else: - - # # Check columns - # for key in data.dtype.names: - # if key not in data_list.dtype.names: - # pass - # #print(f"Not found in ID {ID_first}", key) - # #ID_pbl.update([ID]) - # for key in data_list.dtype.names: - # if key not in data.dtype.names: - # pass - # #print(f"Not found in ID {ID}", key) - # #ID_pbl.update([ID]) - if not check_only: # Add new to existing data data_list.append(data) + print("Combine tile catalogues") data_comb = np.concatenate(data_list, axis=0) - + print("Done") + # Print problematic tile IDs for ID in ID_pbl: print("Tile IDs with missing keys:", file=stats_file) print(ID, file=stats_file) return data_comb + + +def get_col(dat, col, m_sel=None, m_flg=None): + """Get Col. + + Retrieve a specific column from the data with optional selection and flag masks. + + Parameters + ---------- + dat: dict + Input data + col: str + Key of the column to be returned + m_sel: array-like, optional + Boolean mask used for selection. If specified, m_flg must also be specified; + default is ``None`` + m_flg: array-like, optional + Boolean mask used as a flag. If specified, m_sel must also be specified. + default is ``None`` + + Returns + ------- + array-like + Requested column from the data, optionally filtered by the selection and flag masks. + + Raises + ------ + ValueError + If only one of m_sel or m_flg is specified without the other. + """ + + if bool(m_sel is None) != bool(m_flg is None): + raise ValueError("Specify both or none of selection and flag masks") + + if m_sel is None and m_flg is None: + return dat[col] + else: + return dat[col][m_sel][m_flg] + + +def get_snr(sh, dat, m_sel, m_flg): + """Get SNR. + + Return signal-to-noise ratio. + + Parameters + ---------- + sh: str + shape method identified, e.g. "ngmix" + dat: dict + Input data + m_sel: array-like, optional + Boolean mask used for selection. If specified, m_flg must also be specified; + default is ``None`` + m_flg: array-like, optional + Boolean mask used as a flag. If specified, m_sel must also be specified. + default is ``None`` + + Returns + ------- + array-like + signal-to-noise ratios + + """ + if sh == "ngmix": + my_snr = get_col(dat, "NGMIX_FLUX_NOSHEAR", m_sel, m_flg) / get_col( + dat, "NGMIX_FLUX_ERR_NOSHEAR", m_sel, m_flg + ) + elif sh == "galsim": + my_snr = get_col(dat, "SNR_WIN", m_sel, m_flg) + + return my_snr diff --git a/sp_validation/cosmology.py b/sp_validation/cosmology.py index 4bf4f46b..1260c96e 100644 --- a/sp_validation/cosmology.py +++ b/sp_validation/cosmology.py @@ -46,7 +46,7 @@ try: import pyccl as ccl except Exception: - print('Could not import pyccl') + print('Could not import pyccl, continuing...') # Convergence maps diff --git a/sp_validation/run_joint_cat.py b/sp_validation/run_joint_cat.py new file mode 100644 index 00000000..4458dde2 --- /dev/null +++ b/sp_validation/run_joint_cat.py @@ -0,0 +1,1131 @@ +"""RUN JOINT CAT. + +This module implements classes to create, mask, and calibrate joint +comprehensive catalogues. + +:Authors: Martin Kilbinger +""" + +import sys +import os + +import numpy as np +from scipy import stats + +import datetime +from tqdm import tqdm + +from optparse import OptionParser +from importlib.metadata import version + + +import h5py +import healsparse as hsp +import numpy as np + +from astropy.io import fits +from astropy.table import Column + +from cs_util import logging +from cs_util import cat +from cs_util import args as cs_args + +from . import util + + +class BaseCat(object): + """Base_Cat. + + Basic catalogue class. + + """ + + def __init__(self): + self.params_default() + + def set_params_from_command_line(self, args): + """Set Params From Command Line. + + Only use when calling using python from command line. + Does not work from ipython or jupyter. + + """ + # Read command line options + options = cs_args.parse_options( + self._params, + self._short_options, + self._types, + self._help_strings, + ) + + # Save calling command + logging.log_command(args) + + def read_cat(self, load_into_memory=False, mode="r"): + """Read Cat. + + Read input catalogue, either FITS or HDF5. + + Parameters + ---------- + load_into_memory: bool, optional + load data into memory (potentially slow) of ``True``; + default is ``False`` + mode: bool, optional + HDF5 read mode, default is "r" + + Returns + ------- + list + Catalogue data + + Raises + ------ + IOError + If file extension is not .fits or .hd5 + + """ + fpath = self._params["input_path"] + verbose = self._params["verbose"] + + extension = os.path.splitext(fpath)[1] + if extension == ".fits": + if verbose: + print(f"Reading FITS file {fpath}, HDU {hdu}...") + + hdu = 1 + dat = fits.getdata(fpath, hdu) + + elif extension in (".hdf5", ".hd5"): + if verbose: + print(f"Reading HDF5 file {fpath}...") + + self._hd5file = h5py.File(fpath, mode) + try: + dat = self._hd5file["data"] + except: + print(f"Error while reading file {fpath}") + raise + if load_into_memory: + return dat[()] + else: + return dat + else: + raise IOError(f"Unknown file extension {extension}") + + def write_hdf5_header(self, hd5file): + """Write HDF5 Header. + + Write basic header information to HDF5 file. + + Parameters + ---------- + hd5file : h5py.File + input HDF5 file + + """ + author = os.getenv("USER") + software_name = "sp_validation" + software_version = version(software_name) + date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + + hd5file.attrs["author"] = author + hd5file.attrs["softname"] = software_name + hd5file.attrs["softver"] = software_version + hd5file.attrs["date"] = date + + def write_hdf5_file(self, dat, output_path=None): + """Write HDF5 File. + + Write HDF5 data to file. + + Parameters + ---------- + dat : numpy.ndarray + input data + output_path : str, optional + output file path; when ``None`` (default) use + self._params['output_path'] + patches : list, optional + input patches, list of str, default is ``None`` + + """ + if output_path is None: + output_path = self._params["output_path"] + + if self._params["verbose"]: + print("Creating hdf5 file") + + with h5py.File(output_path, "w") as f: + + self.write_hdf5_header(f) + + dset = f.create_dataset("data", data=dat) + dset[:] = dat + + if self._params["verbose"]: + print(f"Done.") + + def close_hd5(self): + """Close HD5. + + Close HDF5 file. + + """ + self._hd5file.close() + + +class JointCat(BaseCat): + """Joint Cat. + + Class to create joint weak-lensing catalogues. + + """ + + def __init__(self): + # Set default parameters + self.params_default() + + def set_params_from_command_line(self, args): + """Set Params From Command Line. + + Only use when calling using python from command line. + Does not work from ipython or jupyter. + + """ + # Read command line options + options = cs_args.parse_options( + self._params, + self._short_options, + self._types, + self._help_strings, + ) + + # Update parameter values from options + self._params.update(options) + + # Save calling command + logging.log_command(args) + + def params_default(self): + """Params Default. + + Set default parameter values. + + """ + self._params = { + "patches": "v1", + "sh": "ngmix", + "survey": "unions", + "year": "2024", + "version": "1.4.2", + "pipeline": "shapepipe", + "hdu": 1, + "reduce_mem": False, + } + self._short_options = { + "patches": "-p", + "sh": "-g", + "survey": "-s", + "year": "-y", + "version": "-V", + "reduce_mem": "-r", + } + self._types = { + "hdu": "int", + "reduce_mem": "bool", + } + self._help_strings = { + "patches": "list of patches separated by '+', or shortcut (allowed are 'v1'), default={}", + "sh": "shape measurement method, default={}", + "survey": "survey name, default={}", + "year": "year of processing, default={}", + "version": "catalogue version, default={}", + "reduce_mem": "output some columns in lower precision to reduce memory", + } + + def get_patches(self): + """Get Patches. + + Return list of patches according to option parameter value. + + Returns + ------- + list + patches, list of str + + """ + if self._params["patches"] == "v1": + n_patch = 7 + patches = [f"P{x}" for x in np.arange(n_patch) + 1] + else: + patches = self._params["patches"].split("+") + + return patches + + def get_n_obj(self, patches, base_path, input_sub_path): + """Get N Obj. + + Get number of objects from FITS file headers. + + Parameters + ---------- + patches : list + input patches, type is str + base_path : str + input base directory, root dir of patches + input_sub_path : str + input file name; input path is base_path/patch/input_sub_path + + Raises: + ValueError: if input file canont be read + + Returns: + list + HDUs + list + number of objects per file + int + total number of objects + + """ + if self._params["verbose"]: + print("Getting number of objects") + n_obj_list = [] + n_obj = 0 + hdu_lists = [] + for patch in patches: + input_path = f"{base_path}/{patch}/{input_sub_path}" + try: + hdu_list = fits.open(input_path) + except: + raise ValueError( + f"Could not open file {input_path} at HDU" + + f" #{self._params['hdu']}" + ) + hdu_lists.append(hdu_list) + + this_n = int(hdu_list[self._params["hdu"]].header["NAXIS2"]) + n_obj_list.append(this_n) + n_obj += this_n + + if self._params["verbose"]: + print(f"Found a total of {n_obj} (~{util.millify(n_obj)}) objects.") + + return hdu_lists, n_obj_list, n_obj + + def get_col_info(self, dat): + """Get Col Info. + + Return information of input columns. + + Parameters + ---------- + dat : numpy.ndarray + input data + + Returns + ------- + list + column names + list + column formats + int + number of columns + + """ + col_names = dat.dtype.names + + n_col = 0 + formats = {} + ndim = {} + for name in col_names: + formats[name] = dat.dtype.fields[name][0] + ndim[name] = dat[name].ndim + n_col += ndim[name] + # Add one for patch + n_col += 1 + + if self._params["verbose"]: + print( + f"Number of input (output) columns = {len(col_names)} ({n_col})" + ) + + return col_names, formats, ndim, n_col + + def dtype_out(self, name, dtype_in): + """Set output dtype. + + Parameters + ---------- + name : str + column name + dtype_in : np.dtype + input dtype + + Returns + ------- + np.dtype + output dtype + + """ + cols_keep_dtype = [ + "RA", + "Dec", + "FLAGS", + "IMAFLAGS_ISO", + ] + if self._params["reduce_mem"] == False: + return dtype_in + elif name not in cols_keep_dtype: + if dtype_in == np.float64: + return np.float32 + if dtype_in == np.int32: + return np.int8 + + return dtype_in + + def init_data(self, n_col, n_obj, ndim, dat): + """Init Data. + + Initialize empty structured data. + + Parameters + ---------- + n_col : int + number of columns + n_obj : int + number of objects (rows) + ndim : dict + dimension of input columns + dat : numpy.ndarray + example data + + Returns + ------- + numpy.ndarray + combined structure data, (n_col x n_obj) array + + """ + if self._params["verbose"]: + print( + f"Allocating {n_col * n_obj * 8 / 1024**3:.1f}" + + f" Gb memory for the ({n_col} x {n_obj}) data array ...", + end="", + ) + + # Create dtypes from input column names and types. + # Reduce memory if flag set. + # Transform multi-D columns into 1D columns + dtype_tmp_list = [] + for name in ndim: + if ndim[name] == 1: + dtype_tmp_list.append( + (name, self.dtype_out(name, dat[name].dtype)) + ) + else: + for jdx in range(ndim[name]): + dtype_tmp_list.append( + (f"{name}_{jdx}", self.dtype_out(name, dat[name].dtype)) + ) + dtype_tmp_list.append(("patch", np.int8)) + dtype_tmp_struct = np.dtype(dtype_tmp_list) + dat_all = np.empty((n_obj,), dtype=dtype_tmp_struct) + + if self._params["verbose"]: + print("done") + + return dat_all + + def write_hdf5_file(self, dat_all, patches): + """Write HDF5 File. + + Write data to HDF5 file. + + Parameters + ---------- + dat_all : numpy.ndarray + input data + patches : list + input patches, list of str + + """ + output_path = ( + f"{self._params['survey']}_{self._params['pipeline']}" + + f"_comprehensive_{self._params['year']}_" + + f"v{self._params['version']}.hdf5" + ) + + with h5py.File(output_path, "w") as f: + + self.write_hdf5_header(f) + + dset = f.create_dataset("data", data=dat) + dset[:] = dat + + # super().write_hdf5_file(dat_all, output_path=output_path) + + def write_hdf5_header(self, hd5file, patches=None): + """Write HDF5 Header. + + Write header information to HDF5 file. + + Parameters + ---------- + hd5file : h5py.File + input HDF5 file + patches : list, optional + input patches, list of str, default is ``None`` + + """ + super().write_hdf5_header(hd5file) + + if patches is not None: + patches_str = " ".join(patches) + hd5file.attrs["patches"] = patches_str + + def merge_catalogues(self, patches, base_path="."): + """Merge Catalogues. + + Merge individual patch-based catalogues. + + Parameters + ---------- + patches : list + input patches; list of `str` + base_path : str, optional + input base directory path; default is "." + + """ + input_sub_path = ( + f"sp_output/shape_catalog_comprehensive_{self._params['sh']}.fits" + ) + + # Get input FITS files + hdu_lists, n_obj_list, n_obj = self.get_n_obj( + patches, + base_path, + input_sub_path, + ) + + # Read data + start = end = 0 + for idx, patch in enumerate(patches): + + input_path = f"{base_path}/{patch}/{input_sub_path}" + try: + dat = fits.getdata(input_path, self._params["hdu"]) + # dat = hdu_lists[idx][self._params["hdu"]].data + + hdu_lists[idx].close() + except: + raise ValueError( + f"Could not read data of file {input_path} at HDU" + + f" #{self._params['hdu']}" + ) + + # Create empty lists if first patch + if idx == 0: + + col_names, formats, ndim, n_col = self.get_col_info(dat) + dat_all = self.init_data(n_col, n_obj, ndim, dat) + + # Append new data for that patch (between start and end) + end += n_obj_list[idx] + + # Copy data + i_col = 0 + names_out = dat_all.dtype.names + for name in col_names: + if ndim[name] == 1: + # Copy 1D column + dat_all[names_out[i_col]][start:end] = dat[name] + else: + # Copy all components of multi-D column + for jdx in range(ndim[name]): + dat_all[names_out[i_col + jdx]][start:end] = dat[name][ + :, jdx + ] + i_col += ndim[name] + # Add patch number + dat_all["patch"][start:end] = patch[1:] + + if i_col + 1 != n_col: + raise ValueError( + "Inconsistent number of columns, {i_col + 1}" + + f" != {n_col}" + ) + if self._params["verbose"]: + print( + f"{patch}: Added {len(dat)} (~{util.millify(len(dat))})" + + f" objects (from {start} to {end-1})." + ) + start = end + + del dat + + self.write_hdf5_file(dat_all, patches) + + def run(self): + """Run. + + Main processing function. + + """ + patches = self.get_patches() + if self._params["verbose"]: + print("Merging patches", patches) + + self.merge_catalogues(patches) + + +class ApplyHspMasks(BaseCat): + """Apply Hsp Masks.""" + + def __init__(self): + # Set default parameters + self.params_default() + + @classmethod + def get_label_struct(cls, bit): + """Get Label Struct. + + Return label of bit-coded mask. + + Parameters + ---------- + bit: int + input bit + + Returns + ------- + str + label + + """ + # Labels of bit-coded structural masks + labels_struct = { + 1: "Faint_star_halos", + 2: "Bright_star_halos", + 4: "Stars", + 8: "Manual", + 16: "u", + 32: "g", + 64: "r", + 128: "i", + 256: "z", + 512: "Tile_RA_DEC_cut", + 1024: "Maximask", + } + + return labels_struct[bit] + + @classmethod + def get_mask_col_name(cls, bit): + """Get Mask Col Name. + + Return column name of mask corresponding to input bit. + + Parameters + ---------- + bit : int + input bit + + Returns + ------- + str + column name + + """ + return f"{bit}_{cls.get_label_struct(bit)}" + + def params_default(self): + """Params Default. + + Set default parameter values. + + """ + self._params = { + "input_path": None, + "output_path": "output.hdf5", + "mask_dir": ".", + "nside": 131072, + "file_base": "mask_r_", + "bits": 1, + "aux_mask_files": None, + "aux_mask_labels": None, + } + self._short_options = { + "input_path": "-i", + "output_path": "-o", + "mask_dir": "-d", + "nside": "-n", + "file_base": "-f", + "bits": "-b", + } + self._types = { + "nside": "int", + "bits": "int", + } + self._help_strings = { + "input_path": "path of input hdf5 catalogue, default={}", + "output_path": "path of output hdf5 catalogue, default={}", + "mask_dir": "directory with mask files, default={}", + "nside": "healsparse resolution parameter, default={}", + "file_base": "base name of mask files, default={}", + "bits": "bits to apply, default={}", + "aux_mask_files": "auxiliary mask files separated with '\\', defualt={}", + "aux_mask_labels": "auxiliary mask column names separated with '\\'", + } + + def check_params(self): + """Check Params. + + Check whether parameter values are valid. + + Raises + ------ + ValueError + if a parameter value is not valid + + """ + if (self._params["aux_mask_files"] is None) != ( + self._params["aux_mask_labels"] is None + ): + raise ValueError("Both or none of the 'aux_mask_*' can be None") + + def update_params(self): + """Update Params. + + Update and transform parameter values. + + """ + if self._params["aux_mask_files"] is not None: + self._params["aux_mask_file_list"] = cs_args.my_string_split( + self._params["aux_mask_files"], + verbose=self._params["verbose"], + stop=True, + sep="\\", + ) + self._params["aux_mask_num"] = len( + self._params["aux_mask_file_list"] + ) + self._params["aux_mask_label_list"] = cs_args.my_string_split( + self._params["aux_mask_labels"], + verbose=self._params["verbose"], + stop=True, + num=self._params["aux_mask_num"], + sep="\\", + ) + else: + self._params["aux_mask_file_list"] = [] + + def read_hsp_mask(self, path): + """Read Hsp Mask. + + Parameters + ---------- + path : str + Path to the mask file. + + Returns + ------- + np.ndarray + Mask + """ + + if self._params["verbose"]: + print(f"Reading healsparse mask file {path}...") + + return hsp.HealSparseMap.read(path) + + def reverse_bit_list(self): + """Reverse Bit List. + + Split bit-coded integer into bits. + + Parameters + ---------- + bit : int + Bit-coded integer + + Returns + ------- + list + List of bits + + """ + bit_list = [] + bits = self._params["bits"] + while bits: + lowest_bit = bits & -bits # Extract lowest set bit + bit_list.append(lowest_bit) + bits -= lowest_bit # Remove this bit from bit + + return bit_list + + def get_paths_bit_masks(self): + """Get Paths Bit Masks. + + Return paths of bit-coded mask files. + + Returns + ------- + dict + Dictionary with bit as key and path as value. + + """ + paths = {} + bit_list = self.reverse_bit_list() + for bit in bit_list: + paths[bit] = ( + f"{self._params['mask_dir']}/{self._params['file_base']}" + + f"nside{self._params['nside']}_n{bit}.hsp" + ) + return paths + + def get_masks(self, dat=None): + """Get Masks. + + Returns per-object masks for all bits. + + Parameters + ---------- + dat: numpy.ndarray, optional + input data; if not given (default), data will be read from + input file + + Returns + ------- + dict + masks + + """ + masks = {} + + # Get bit-coded mask file paths + paths = self.get_paths_bit_masks() + + # Get coordinates from data + if dat is None: + dat = self.read_cat() + if self._params["verbose"]: + print("Reading coordinates from data...") + ra = dat["RA"] + dec = dat["Dec"] + + # Read healsparse files and apply masks to coordinate + for bit in paths: + if self._params["verbose"]: + print(f"Reading mask for bit {bit}...") + hsp_mask = hsp.HealSparseMap.read(paths[bit]) + + label = self.get_mask_col_name(bit) + + if self._params["verbose"]: + print(f"Computing mask bit={bit}...") + masks[label] = hsp_mask.get_values_pos(ra, dec, lonlat=True) + + # Read auxiliary mask files" + for idx, path in enumerate(self._params["aux_mask_file_list"]): + label = self._params["aux_mask_label_list"][idx] + if self._params["verbose"]: + print(f"Reading aux mask for label {label}...") + hsp_mask = hsp.HealSparseMap.read(path) + masks[label] = hsp_mask.get_values_pos(ra, dec, lonlat=True) + + return masks + + def run(self): + """Run. + + Main processing function. + + """ + obj = self + + # Check parameters + obj.check_params() + + # Update parameters + obj.update_params() + + # Read input data + dat = obj.read_cat(load_into_memory=True, mode="r") + + # Get masks + masks = obj.get_masks(dat=dat) + + # Append masks to data + dat_ext = obj.append_masks(dat, masks) + + # Write extended data to new HDF5 file + obj.write_hdf5_file(dat_ext) + + # Close input HDF5 catalogue file + obj.close_hd5() + + def append_masks(self, dat, masks): + """Append Masks. + + Add mask information as columns to data. + + Parameters + ---------- + dat: numpy.ndarray + input data + masks: dict + mask information + + Returns + -------- + numpy.ndarray + updated data + + """ + labels = [label for label in masks] + dtypes = [masks[label].dtype for label in masks] + + # Create a structured dtype + structured_dtype = np.dtype( + [(label, dtype) for label, dtype in zip(labels, dtypes)] + ) + + new_data = np.zeros(dat.shape, dtype=structured_dtype) + + # Copy masks as new columns + for label in masks: + new_data[label] = masks[label] + + return new_data + + def write_hdf5_file(self, dat, dat_new): + + with h5py.File(self._params["output_path"], "w") as f: + + self.write_hdf5_header(f) + + dset = f.create_dataset( + "data", shape=dat.shape, dtype=dat.dtype, chunks=True + ) + dset_new = f.create_dataset( + "data_ext", + shape=dat_new.shape, + dtype=dat_new.dtype, + chunks=True, + ) + + chunk_size = 10000 + nrow = dat.shape[0] + + for i in range(0, nrow, chunk_size): + end = min(i + chunk_size, nrow) + dset[i:end] = dat[i:end] # Write chunk to dataset + dset_new[i:end] = dat_new[i:end] # Write chunk to dataset + + def write_hdf5_header(self, hd5file): + """Write HDF5 Header. + + Write header information to HDF5 file. + + Parameters + ---------- + hd5file : h5py.File + input HDF5 file + patches : list, optional + input patches, list of str, default is ``None`` + + """ + super().write_hdf5_header(hd5file) + + hd5file.attrs["hsp_nside"] = self._params["nside"] + + # Bit-mask file paths + paths = self.get_paths_bit_masks() + for bit in paths: + hd5file.attrs[f"hsp_path_{bit}"] = paths[bit] + + # Auxiliary mask file paths + for idx, path in enumerate(self._params["aux_mask_file_list"]): + label = self._params["aux_mask_label_list"][idx] + hd5file.attrs[f"hsp_path_{label}"] = path + + +class CalibrateCat(BaseCat): + """Calibrate Cat. + + Class to calibrate joint catalogue. + + """ + + def __init__(self): + # Set default parameters + self.params_default() + + def params_default(self): + """Params Default. + + Set default parameter values. + + """ + self._params = { + "input_path": None, + "cmatrices": False, + } + self._short_options = { + "input_path": "-i", + "cmatrices": "-C", + } + self._types = { + "cmatrices": "bool", + } + self._help_strings = { + "input_path": "path input FITS catalogue", + "cmatrices": "compute correlation and confusion matrices", + } + + def read_cat(self, load_into_memory=False): + """Read Cat. + + Read input HDF5 catalogue. + + Parameters + ---------- + load_into_memory: bool, optional + load data into memory (potentially slow) of ``True``; + default is ``False`` + + Returns + ------- + list + Catalogue data + list_ext + Extended catalogue data + + """ + fpath = self._params["input_path"] + verbose = self._params["verbose"] + + if verbose: + print(f"Reading HDF5 file {fpath}...") + + self._hd5file = h5py.File(fpath, "r") + try: + dat = self._hd5file["data"] + dat_ext = self._hd5file["data_ext"] + except: + print(f"Error while reading file {fpath}") + raise + if load_into_memory: + return dat[()], dat_ext[()] + else: + return dat, dat_ext + + def run(self): + """Run. + + Main processing function. + + """ + + +def confusion_matrix(mask, confidence_level=0.9): + + n_key = len(mask) + + cm = np.empty((n_key, n_key)) + r_val = np.zeros_like(cm) + r_cl = np.empty((n_key, n_key, 2)) + + for idx, key1 in enumerate(mask): + for jdx, key2 in enumerate(mask): + res = stats.pearsonr(mask[key1], mask[key2]) + r_val[idx][jdx] = res.statistic + r_cl[idx][jdx] = res.confidence_interval( + confidence_level=confidence_level + ) + + return r_val, r_cl + + +def correlation_matrix(mask): + + n_tot = len(mask) + n_key = len + + cm = np.empty((n_key, n_key)) + r = np_like(cm) + + for idx, key1 in enumerate(mask): + for jdx, key2 in enumerate(mask): + r[idx][jdx] = stats.pearsonr(mask[key1], mask[key2]) + + return r + + +class ReadCat: + + def __init__(self): + self.params_default() + + def params_default(self): + """Params Default. + + Set default parameter values. + + """ + self._params = { + "input": "input_cat.hdf5", + "n_row": None, + } + self.short_options = { + "input": "-i", + "n_row": "-n", + } + self._help_string = { + "input": "input file, default={}", + "n_row": "print first N_ROW rows only", + } + + def run(self): + + pass + + +def run_joint_comprehensive_cat(*args): + """Run Joint Comprehensive Cat. + + Run class to create joint comprehensive catalogue from command line. + + """ + obj = JointCat() + + obj.set_params_from_command_line(args) + + obj.run() + + +def run_calibrate_comprehensive_cat(*args): + """Run Calibrate Comprehensive Cat. + + Run class to calibrate joint comprehensive catalogue from command line. + + """ + obj = CalibrateCat() + + obj.set_params_from_command_line(args) + + obj.run() + + +def run_apply_hsp_masks(*args): + """Run Apply Healsparse Masks. + + Run class to apply healsparse masks. + + """ + obj = ApplyHspMasks() + + obj.set_params_from_command_line(args) + + obj.run()