diff --git a/.gitignore b/.gitignore
index 24fbc1ff..af9f647c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -159,3 +159,10 @@ cosmo_inference/cosmosis_config/values_*
 
 notebooks/tests/star_response.ipynb
 sp_validation.code-workspace
+notebooks/calibrate_comprehensive_cat.ipynb
+notebooks/calibrate_comprehensive_cat.ipynb
+notebooks/calibrate_comprehensive_cat.ipynb
+notebooks/test_apply_hsp_masks.ipynb
+notebooks/calibrate_comprehensive_cat.ipynb
+notebooks/demo_apply_hsp_masks.py
+notebooks/demo_apply_hsp_masks.ipynb
diff --git a/notebooks/calibrate_comprehensive_cat.py b/notebooks/calibrate_comprehensive_cat.py
new file mode 100644
index 00000000..1fd6dfad
--- /dev/null
+++ b/notebooks/calibrate_comprehensive_cat.py
@@ -0,0 +1,472 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.15.1
+#   kernelspec:
+#     display_name: Python 3
+#     language: python
+#     name: python3
+# ---
+
+# # Calibrate comprehensive catalogue
+
+# %reload_ext autoreload
+# %autoreload 2
+
+import sys
+import os
+import numpy as np
+from astropy.io import fits
+import matplotlib.pylab as plt
+
+from sp_validation import run_joint_cat as sp_joint
+from sp_validation import util
+from sp_validation.basic import metacal
+from sp_validation import calibration
+import sp_validation.cat as cat
+
+obj = sp_joint.CalibrateCat()
+
+obj._params["input_path"] = "unions_shapepipe_comprehensive_struc_2024_v1.4.2.hdf5"
+obj._params["cmatrices"] = True
+obj._params["verbose"] = True
+
+# !pwd
+
+dat, dat_ext = obj.read_cat(load_into_memory=False)
+
+print(f"Found {len(dat)} (~{util.millify(len(dat))}) objects in catalogue")
+
+
+# ## Masking
+
+class mask():
+    def __init__(self, col_name, label, kind="equal", value=0, dat=None):
+        self._col_name = col_name
+        self._label = label
+        self._value = value
+        self._kind = kind
+        self._num_ok = None
+
+        if dat is not None:
+            self.apply(dat)
+        
+    def from_list(cls, masks, label="combined"):
+        my_mask = cls(label, label, kind="combined", value=None)
+
+        my_mask._mask = np.logical_and.reduce([m._mask for m in masks])
+
+        return my_mask
+
+    def apply(self, dat):
+        if self._kind == "equal":
+            self._mask = dat[self._col_name] == self._value
+        elif self._kind == "not_equal":
+            self._mask = dat[self._col_name] != self._value
+        elif self._kind == "greater_equal":
+            self._mask = dat[self._col_name] >= self._value
+        elif self._kind == "range":
+            self._mask = (dat[self._col_name] >= self._value[0]) & (dat[self._col_name] <= self._value[1])
+        else:
+            raise ValueError(f"Invalid kind {kind}")
+        
+    @classmethod
+    def print_strings(cls, coln, lab, num, fnum):
+        print(f"{coln:30s} {lab:30s} {num:10s} {fnum:10s}")
+        
+    def print_stats(self, num_obj):
+        if self._num_ok is None:
+            self._num_ok = sum(self._mask)
+
+        si = f"{self._num_ok:10d}"
+        sf = f"{self._num_ok/num_obj:10.2%}"
+        self.print_strings(self._col_name, self._label, si, sf)
+        
+    def print_summary(self, f_out):
+        print(f"[{self._label}]\t\t\t", file=f_out, end="")
+        sign = None
+        if self._kind =="equal":
+            sign = "="
+        elif self._kind =="not_equal":
+            sign = "!="
+        elif self._kind =="greater_equal":
+            sign = ">="
+        if sign is not None:
+            print(f"{self._col_name} {sign} {self._value}", file=f_out)
+            
+        if self._kind == "range":
+            print(f"{self._value[0]} <= {self._col_name} <= {self._value[1]}", file=f_out)            
+
+
+# ### Pre-processing ShapePipe flags
+
+masks = []
+
+# +
+# SExtractor flags (see galaxy.py:classification_galaxy_base)
+
+my_mask = mask(col_name="FLAGS", label="SE FLAGS", kind="equal", value=0, dat=dat)
+masks.append(my_mask)
+
+# +
+# Duplicate objects
+
+my_mask = mask(col_name="overlap", label="tile overlap", kind="equal", value=True, dat=dat)
+masks.append(my_mask)
+# -
+
+my_mask = mask(col_name="IMAFLAGS_ISO", label="SP mask", kind="equal", value=0, dat=dat)
+masks.append(my_mask)
+
+# +
+# Number of epochs
+
+my_mask = mask(col_name="N_EPOCH", label=r"$n_{\rm epoch}$", kind="greater_equal", value=2, dat=dat)
+masks.append(my_mask)
+
+# +
+# Magnitude range
+
+my_mask = mask(col_name="mag", label="mag range", kind="range", value=[15, 30], dat=dat)
+masks.append(my_mask)
+
+# +
+# ngmix flags
+
+col_names = ["NGMIX_MCAL_FLAGS", "NGMIX_MOM_FAIL"]
+tmp_labels = ["ngmix flag", "ngmix moments fail"]
+good_mask_value = 0
+
+for col_name, label in zip(col_names, tmp_labels):
+    my_mask = mask(col_name=col_name, label=label, kind="equal", value=good_mask_value, dat=dat)
+    masks.append(my_mask)
+
+# MKDEBUG TODO: check should be two components, see galaxy.py.
+col_name = "NGMIX_ELL_PSFo_NOSHEAR_0"
+label = "bad PSF ell"
+kind = "not_equal"
+value = -10
+my_mask = mask(col_name=col_name, label=label, kind=kind, value=value, dat=dat)
+masks.append(my_mask)
+
+# -
+
+print(f"Combining {len(masks)} pre-processing masks")
+mask_combined_pre = mask.from_list(masks, label="combined_pre")
+
+# +
+# Output some mask statistics
+
+num_obj = dat.shape[0]
+
+mask.print_strings("flag", "label", f"{'num_ok':>10}", f"{'num_ok[%]':>10}")
+for my_mask in masks:
+    my_mask.print_stats(num_obj)
+
+mask_combined_pre.print_stats(num_obj)
+# -
+
+# ### Structural masks
+
+# Get all columns of type ``bool'' from the extended catalogue
+column_ext_info = {
+    name: "bool" if dat_ext.dtype.fields[name][0] == "int8" else dat_ext.dtype.fields[name][0]
+    for name in dat_ext.dtype.names
+}
+
+# +
+# Get all columns of type ``bool'' from the extended catalogue.
+# 0 = good mask value
+
+# Store the index of the first structural mask = current length of masks list
+idx_first_struct = len(masks)
+
+value = False
+for name in column_ext_info:
+    if column_ext_info[name] == "bool":
+        my_mask = mask(col_name=name, label=name, kind="equal", value=value, dat=dat_ext)
+        masks.append(my_mask)
+    else:
+        print(f"Type for column {name} not bool, skipping")
+# -
+
+# Number of pointings
+name = "npoint3"
+label = r"$n_{\rm pointing}$"
+value = 3
+kind = "greater_equal"
+my_mask = mask(col_name=name, label=label, kind=kind, value=value, dat=dat_ext)
+masks.append(my_mask)
+
+n_struct = len(masks) - idx_first_struct
+print(f"Combining {n_struct} structural masks")
+mask_combined_struct = mask.from_list(masks[idx_first_struct:], label="combined_struct")
+
+for my_mask in masks[idx_first_struct:]:
+    my_mask.print_stats(num_obj)
+mask_combined_struct.print_stats(num_obj)
+
+print(f"Combining pre-processing and structural masks, {len(masks)} in total")
+mask_combined = mask.from_list([mask_combined_pre, mask_combined_struct], label="combined")
+
+#
+
+# ### Calibration
+
+# +
+# Define cuts and metacal input parameters
+
+# Ellipticity dispersion
+sigma_eps_prior = 0.34
+
+# Signal-to-noise range
+gal_snr_min = 10
+gal_snr_max = 500
+
+# Relative-size (hlr / hlr_psf) range
+gal_rel_size_min = 0.5
+gal_rel_size_max = 3
+
+# Correct relative size for ellipticity?
+gal_size_corr_ell = False
+
+# For output
+mask_metacal = []
+
+col_name = "T_gal/T_PSF"
+label = "metacal relaive galaxy_size"
+my_mask = mask(col_name=col_name, label=label, kind="range", value=[gal_rel_size_min, gal_rel_size_max])
+masks.append(my_mask)
+
+col_name = "snr"
+label = "metacal signal-to-noise ratio"
+my_mask = mask(col_name=col_name, label=label, kind="range", value=[gal_snr_min, gal_snr_max])
+masks.append(my_mask)
+
+# +
+# Call metacal
+
+gal_metacal = metacal(
+    dat,
+    mask_combined._mask,
+    snr_min=gal_snr_min,
+    snr_max=gal_snr_max, 
+    rel_size_min=gal_rel_size_min,
+    rel_size_max=gal_rel_size_max,
+    size_corr_ell=gal_size_corr_ell,
+    sigma_eps=sigma_eps_prior,
+    col_2d=False,
+    verbose=True,
+)
+
+# +
+# Get calibrated quantities
+
+g_corr, g_uncorr, w, mask_metacal = calibration.get_calibrated_quantities(gal_metacal)
+
+# Additive bias
+c = np.zeros(2)
+c_err = np.zeros(2)
+
+
+for comp in (0, 1):
+    c[comp] = np.mean(g_uncorr[comp])
+    
+    # MKDEBUG TODO: Use std of mean instead, which is consistent with jackknife
+    c_err[comp] = np.std(g_uncorr[comp])
+
+# Shear estimate corrected for additive bias
+g_corr_mc = np.zeros_like(g_corr)
+c_corr = np.linalg.inv(gal_metacal.R).dot(c)
+for comp in (0, 1):
+    g_corr_mc[comp] = g_corr[comp] - c_corr[comp]
+# -
+
+name = "after cuts"
+num_ok = len(masks)
+print(f"{name:30s} {num_ok:10d} {num_ok/num_obj:10.2%}")
+
+# +
+# Additional quantities
+R_shear = np.mean(gal_metacal.R_shear, 2)
+
+ra = cat.get_col(dat, "RA", mask_combined._mask,  mask_metacal)
+dec = cat.get_col(dat, "Dec", mask_combined._mask,  mask_metacal)
+mag = cat.get_col(dat, "mag", mask_combined._mask,  mask_metacal)
+snr = cat.get_snr("ngmix", dat, mask_combined._mask,  mask_metacal)
+
+add_cols = ["FLUX_RADIUS", "FWHM_IMAGE", "FWHM_WORLD", "MAGERR_AUTO", "MAG_WIN", "MAGERR_WIN", "FLUX_AUTO", "FLUXERR_AUTO", "FLUX_APER", "FLUXERR_APER"]
+add_cols_data = {}    
+for key in add_cols:
+    add_cols_data[key] = dat[key][mask_combined._mask][mask_metacal]
+
+# +
+# Compute DES weights
+
+cat_gal = {}
+cat_gal["e1_uncal"] = g_uncorr[0]
+cat_gal["e2_uncal"] = g_uncorr[1]
+cat_gal["R_g11"] = gal_metacal.R11
+cat_gal["R_g12"] = gal_metacal.R12
+cat_gal["R_g21"] = gal_metacal.R21
+cat_gal["R_g22"] = gal_metacal.R22
+cat_gal["NGMIX_T_NOSHEAR"] = dat["NGMIX_T_NOSHEAR"][mask_combined._mask][mask_metacal]
+cat_gal["NGMIX_Tpsf_NOSHEAR"] = dat["NGMIX_Tpsf_NOSHEAR"][mask_combined._mask][mask_metacal]
+cat_gal["snr"] = snr
+
+name = 'w_des'
+num_bins = 20
+w = calibration.get_w_des(cat_gal, num_bins)
+
+# +
+# Correct for PSF leakage
+
+cat_gal["e1"] = g_corr_mc[0]
+cat_gal["e2"] = g_corr_mc[1]
+#cat_gal["e1_PSF"] = e_psf
+
+num_bins = 20
+weight_type = 'des'
+#alpha_1, alpha_2 = calibration.get_alpha_leakage_per_object(cat_gal, num_bins, weight_type)
+
+# +
+output_shape_cat_path = obj._params["input_path"].replace("comprehensive", "cut")
+output_shape_cat_path = output_shape_cat_path.replace("hdf5", "fits")
+
+cat.write_shape_catalog(
+    output_shape_cat_path,
+    ra,
+    dec,
+    w,
+    mag=mag,
+    g=g_corr_mc,
+    g1_uncal=g_uncorr[0],
+    g2_uncal=g_uncorr[1],
+    R=gal_metacal.R,
+    R_shear=R_shear,
+    R_select=gal_metacal.R_selection,
+    c=c,
+    c_err=c_err,
+    add_cols=add_cols_data
+)
+# -
+
+with open("masks.txt", "w") as f_out:
+    for my_mask in masks:
+        my_mask.print_summary(f_out)
+    for my_mask in masks:
+        my_mask.print_summary(f_out)
+
+from scipy import stats
+def correlation_matrix(masks, confidence_level=0.9):
+    
+    n_key = len(masks)
+    print(n_key)
+
+    cm = np.empty((n_key, n_key))
+    r_val = np.zeros_like(cm)
+    r_cl = np.empty((n_key, n_key, 2))
+
+    for idx, mask_idx in enumerate(masks):
+        for jdx, mask_jdx in enumerate(masks):
+            res = stats.pearsonr(mask_idx._mask, mask_jdx._mask)
+            r_val[idx][jdx] = res.statistic
+            r_cl[idx][jdx] = res.confidence_interval(confidence_level=confidence_level)
+            
+    return r_val, r_cl
+
+
+#
+
+all_masks = masks[:-3]
+
+# +
+if not obj._params["cmatrices"]:
+    print("Skipping cmatric calculations")
+    sys.exit(0)
+
+r_val, r_cl = correlation_matrix(all_masks)
+
+# +
+
+n = len(all_masks)
+keys = [my_mask._label for my_mask in all_masks]
+
+plt.imshow(r_val, cmap='coolwarm', vmin=-1, vmax=1)
+plt.xticks(np.arange(n), keys)
+plt.yticks(np.arange(n), keys)
+plt.xticks(rotation=90)
+plt.colorbar()
+plt.savefig("correlation_matrix.png")
+
+# -
+
+def confusion_matrix(prediction, observation):                                  
+                                                                                
+    result = {}                                                                 
+                                                                                
+    pred_pos = sum(prediction)                                                  
+    result["true_pos"] = sum(prediction & observation)                  
+    result["true_neg"] = sum(np.logical_not(prediction) & np.logical_not(observation))               
+    result["false_neg"] = sum(prediction & np.logical_not(observation))        
+    result["false_pos"] = sum(np.logical_not(prediction) & observation)            
+    result["false_pos_rate"] = result["false_pos"] / (result["false_pos"] + result["true_neg"])
+    result["false_neg_rate"] = result["false_neg"] / (result["false_neg"] + result["true_pos"])
+    result["sensitivity"] = result["true_pos"] / (result["true_pos"] + result["false_neg"])
+    result["specificity"] = result["true_neg"] / (result["true_neg"] + result["false_pos"])
+
+    cm = np.zeros((2, 2))
+    cm[0][0] = result["true_pos"]
+    cm[1][1] = result["true_neg"]
+    cm[0][1] = result["false_neg"]
+    cm[1][0] = result["false_pos"]
+    cmn = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
+    result["cmn"] = cmn
+     
+    return result
+
+
+n_key = len(all_masks)
+cms = np.zeros((n_key, n_key, 2, 2))
+for idx in range(n_key):
+    for jdx in range(n_key):
+
+        if idx == jdx: continue
+
+        print(idx, jdx)
+        res = confusion_matrix(masks[idx]._mask, masks[jdx]._mask)
+        cms[idx][jdx] = res["cmn"]
+
+# +
+import seaborn as sns
+
+fig = plt.figure(figsize=(30, 30))
+axes = np.empty((n_key, n_key), dtype=object)
+for idx in range(n_key):
+    for jdx in range(n_key):
+        if idx == jdx: continue
+        axes[idx][jdx] = plt.subplot2grid((n_key, n_key), (idx, jdx), fig=fig)
+
+matrix_elements = ["True", "False"]
+
+for idx in range(n_key):
+    for jdx in range(n_key):
+
+        if idx == jdx: continue
+        
+        ax = axes[idx, jdx]
+        sns.heatmap(cms[idx][jdx], annot=True, fmt='.2f', xticklabels=matrix_elements, yticklabels=matrix_elements, ax=ax)
+        ax.set_ylabel(masks[idx]._label)
+        ax.set_xlabel(masks[jdx]._label)
+
+plt.show(block=False)
+plt.savefig("confusion_matrix.png")
+# -
+
+obj.close_hd5()
+
+
diff --git a/notebooks/cosmology.ipynb b/notebooks/cosmology.ipynb
index 0dc7ce47..4878d549 100644
--- a/notebooks/cosmology.ipynb
+++ b/notebooks/cosmology.ipynb
@@ -116,16 +116,24 @@
     "    r_planck = res_g[sh].meanr\n",
     "    break\n",
     "\n",
-    "xi_p_planck, xi_m_planck = get_theo_xi(\n",
-    "    r_planck,\n",
-    "    z,\n",
-    "    nz,\n",
-    "    Omega_m=Om,\n",
-    "    h=h,\n",
-    "    Omega_b=Ob,\n",
-    "    sig8=sig8,\n",
-    "    ns=ns\n",
-    ")"
+    "try:\n",
+    "    import pyccl as ccl\n",
+    "\n",
+    "    xi_p_planck, xi_m_planck = get_theo_xi(\n",
+    "        r_planck,\n",
+    "        z,\n",
+    "        nz,\n",
+    "        Omega_m=Om,\n",
+    "        h=h,\n",
+    "        Omega_b=Ob,\n",
+    "        sig8=sig8,\n",
+    "        ns=ns\n",
+    "    )\n",
+    "\n",
+    "except:\n",
+    "    print(\"Warning: Cannot impory pyccl\")\n",
+    "    xi_p_planck = None\n",
+    "    xi_m_planck = None\n"
    ]
   },
   {
@@ -151,18 +159,21 @@
     "    x = [\n",
     "        res_g[sh].meanr[pos_ind_gg],\n",
     "        res_g[sh].meanr[neg_ind_gg],\n",
-    "        r_planck\n",
     "    ]\n",
     "    y = [\n",
     "        res_g[sh].xip[pos_ind_gg],\n",
     "        -res_g[sh].xip[neg_ind_gg],\n",
-    "        xi_p_planck\n",
     "    ]\n",
     "    yerr = [\n",
     "        np.sqrt(res_g[sh].varxip[pos_ind_gg]),\n",
     "        np.sqrt(res_g[sh].varxip[neg_ind_gg]),\n",
-    "        xi_p_planck * np.nan\n",
     "    ]\n",
+    "\n",
+    "    if xi_p_planck is not None:\n",
+    "        x.append(r_planck)\n",
+    "        y.append(xi_p_planck)\n",
+    "        yerr.append(xi_p_planck * np.nan)\n",
+    "\n",
     "    title = fr'{sh} Shear-shear correlation function $\\xi_+$'\n",
     "    out_path = f'{plot_dir}/xi_+_shear_shear_{sh}.pdf'\n",
     "\n",
@@ -200,21 +211,23 @@
     "    x = [\n",
     "        res_g[sh].meanr[pos_ind_gg],\n",
     "        res_g[sh].meanr[neg_ind_gg],\n",
-    "        r_planck\n",
     "    ]\n",
     "    y = [\n",
     "        res_g[sh].xim[pos_ind_gg],\n",
     "        -res_g[sh].xim[neg_ind_gg],\n",
-    "        xi_m_planck\n",
     "    ]\n",
     "    yerr = [\n",
     "        np.sqrt(res_g[sh].varxim[pos_ind_gg]),\n",
     "        np.sqrt(res_g[sh].varxim[neg_ind_gg]),\n",
-    "        xi_m_planck * np.nan\n",
     "    ]\n",
     "    title = f'{sh} Shear-shear correlation function $\\\\xi_-$'\n",
     "    out_path = f'{plot_dir}/xi_-_shear_shear_{sh}.pdf'\n",
     "\n",
+    "    if xi_m_planck is not None:\n",
+    "        x.append(r_planck)\n",
+    "        y.append(xi_m_planck)\n",
+    "        yerr.append(xi_m_planck * np.nan)\n",
+    "        \n",
     "    plot_data_1d(\n",
     "        x,\n",
     "        y,\n",
@@ -412,7 +425,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/demo_apply_hsp_masks.py b/notebooks/demo_apply_hsp_masks.py
new file mode 100644
index 00000000..0e4e0726
--- /dev/null
+++ b/notebooks/demo_apply_hsp_masks.py
@@ -0,0 +1,110 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.15.1
+#   kernelspec:
+#     display_name: sp_validation
+#     language: python
+#     name: python3
+# ---
+
+# # Test to apply hsp masks
+
+# %reload_ext autoreload
+# %autoreload 2
+
+# +
+import os
+import numpy as np
+import tracemalloc
+import healsparse as hsp
+
+from sp_validation import run_joint_cat as sp_joint
+
+# +
+# Trace and print used memory if True
+trace_mem = True
+
+if trace_mem:
+    tracemalloc.start()
+# -
+
+# Create instance of ApplyHspMasks object
+obj = sp_joint.ApplyHspMasks()
+
+
+# Bits set for non-tomographic catalogues
+bit_list = [1, 2, 4, 8, 64, 1024]
+
+# +
+# combine bit list with & operator
+# equivalent to bits = sum(bit_list)
+# if elements in bit_list are of type 2^n
+
+bits = 0
+for b in bit_list:
+    bits = bits | b
+
+# +
+# Set parameters
+obj._params["input_path"] = "unions_shapepipe_comprehensive_2024_v1.4.2.hdf5"
+obj._params["output_path"] = "unions_shapepipe_comprehensive_struc_2024_v1.4.2.hdf5"
+obj._params["mask_dir"] = f"{os.environ['HOME']}/v1.4.x/masks"
+obj._params["nside"] = 131072
+obj._params["file_base"] = "mask_r_"
+obj._params["bits"] = bits
+
+obj._params["aux_mask_files"] = f"{obj._params['mask_dir']}/coverage.hsp"
+obj._params["aux_mask_labels"] = "npoint3"
+obj._params["verbose"] = True
+# -
+
+# ## Run
+
+# +
+# Check parameter validity
+obj.check_params()
+
+# Update parameters (here: strings to list)
+obj.update_params()
+# -
+
+if trace_mem:
+    current, peak = tracemalloc.get_traced_memory()
+    print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB")
+
+# Read catalogue
+dat = obj.read_cat(load_into_memory=False, mode="r")
+
+if trace_mem:
+    current, peak = tracemalloc.get_traced_memory()
+    print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB")
+
+# Get bit-coded masks
+masks = obj.get_masks(dat=dat)
+
+if trace_mem:
+    current, peak = tracemalloc.get_traced_memory()
+    print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB")
+
+# Add mask bits as new columns
+dat_new = obj.append_masks(dat, masks)
+
+if trace_mem:
+    current, peak = tracemalloc.get_traced_memory()
+    print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB")
+
+# Write extended data to new HDF5 file
+obj.write_hdf5_file(dat, dat_new)
+
+# Close input HDF5 catalogue file
+obj.close_hd5()
+
+if trace_mem:
+    current, peak = tracemalloc.get_traced_memory()
+    print(f"Current (peak) memory usage: {current / 1024**2:.2f} ({peak / 1024**2:.2f}) MB")
+    tracemalloc.stop()
diff --git a/notebooks/main_set_up.ipynb b/notebooks/main_set_up.ipynb
index fd10349d..7ecdc347 100644
--- a/notebooks/main_set_up.ipynb
+++ b/notebooks/main_set_up.ipynb
@@ -22,13 +22,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.299Z"
+     "iopub.execute_input": "2025-02-11T13:27:17.061070Z",
+     "iopub.status.busy": "2025-02-11T13:27:17.060921Z",
+     "iopub.status.idle": "2025-02-11T13:27:17.101791Z",
+     "shell.execute_reply": "2025-02-11T13:27:17.101155Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:17.061054Z"
     }
    },
    "outputs": [],
+   "source": [
+    "%reload_ext autoreload\n",
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "# General library imports\n",
     "import sys\n",
@@ -42,7 +56,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.299Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.198398Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.198606Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.198506Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.198497Z"
     }
    },
    "outputs": [],
@@ -51,7 +68,8 @@
     "from sp_validation.io import *\n",
     "from sp_validation.cat import *\n",
     "from sp_validation.survey import *\n",
-    "from sp_validation.galaxy import *"
+    "from sp_validation.galaxy import *\n",
+    "from sp_validation.calibration import *"
    ]
   },
   {
@@ -66,7 +84,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.299Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.199368Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.199556Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.199472Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.199463Z"
     }
    },
    "outputs": [],
@@ -87,7 +108,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.299Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.200467Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.200992Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.200889Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.200878Z"
     }
    },
    "outputs": [],
@@ -110,7 +134,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.299Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.201455Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.201655Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.201558Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.201549Z"
     }
    },
    "outputs": [],
@@ -121,7 +148,10 @@
     "    dd = np.load(galaxy_cat_path, mmap_mode=mmap_mode)\n",
     "else:\n",
     "    print(\"Loading galaxy .hdf5 file...\")\n",
-    "    dd = read_hdf5_file(galaxy_cat_path, name, stats_file, param_path=param_list_path)"
+    "    dd = read_hdf5_file(galaxy_cat_path, name, stats_file, param_path=param_list_path)\n",
+    "\n",
+    "n_obj = len(dd)\n",
+    "print_stats(f'Read {n_obj} objects from file {galaxy_cat_path}', stats_file, verbose=verbose)"
    ]
   },
   {
@@ -136,7 +166,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.202131Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.202308Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.202227Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.202219Z"
     }
    },
    "outputs": [],
@@ -166,7 +199,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.203120Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.203317Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.203233Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.203224Z"
     }
    },
    "outputs": [],
@@ -198,7 +234,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.203827Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.204043Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.203949Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.203941Z"
     }
    },
    "outputs": [],
@@ -218,7 +257,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.204434Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.204618Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.204529Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.204521Z"
     }
    },
    "outputs": [],
@@ -238,7 +280,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.205399Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.205581Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.205500Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.205492Z"
     }
    },
    "outputs": [],
@@ -252,7 +297,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.206293Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.206474Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.206392Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.206384Z"
     }
    },
    "outputs": [],
@@ -300,7 +348,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.300Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.207015Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.207209Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.207122Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.207113Z"
     }
    },
    "outputs": [],
@@ -330,7 +381,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.301Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.207671Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.207846Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.207767Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.207758Z"
     }
    },
    "outputs": [],
@@ -388,7 +442,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.301Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.208418Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.208617Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.208518Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.208509Z"
     }
    },
    "outputs": [],
@@ -414,7 +471,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.301Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.209210Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.209386Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.209305Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.209297Z"
     }
    },
    "outputs": [],
@@ -470,7 +530,10 @@
    "execution_count": null,
    "metadata": {
     "execution": {
-     "execution_failed": "2025-01-06T09:34:01.301Z"
+     "iopub.status.busy": "2025-02-11T13:27:18.209826Z",
+     "iopub.status.idle": "2025-02-11T13:27:18.210021Z",
+     "shell.execute_reply": "2025-02-11T13:27:18.209941Z",
+     "shell.execute_reply.started": "2025-02-11T13:27:18.209932Z"
     }
    },
    "outputs": [],
@@ -481,6 +544,9 @@
     "    dec_key=col_name_dec\n",
     ")\n",
     "\n",
+    "n_ok = sum(cut_overlap)\n",
+    "print_stats(f\"Non-overlapping objects: {n_ok:10d}, {n_ok/n_obj:10.2%}\", stats_file, verbose=verbose)\n",
+    "\n",
     "m_gal = {}\n",
     "\n",
     "for sh in shapes:\n",
@@ -505,7 +571,16 @@
     "        cut_common,\n",
     "        stats_file,\n",
     "        verbose=verbose,\n",
-    "    )"
+    "    )\n",
+    "\n",
+    "    n_ok = sum(cut_common)\n",
+    "    print_stats(f\"{sh}: objects after common cut: {n_ok:10d}, {n_ok/n_obj:10.2%}\", stats_file, verbose=verbose)\n",
+    "\n",
+    "    # MKDEBUG for debugging calibrate_comprehensive\n",
+    "    n_ok = sum(m_gal[sh])\n",
+    "    print_stats(f\"common & ngmix = galaxy selection: {n_ok:10d}, {n_ok/n_obj:10.2%}\", stats_file, verbose=verbose)\n",
+    "\n",
+    "\n"
    ]
   },
   {
@@ -532,7 +607,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/maps.ipynb b/notebooks/maps.ipynb
index 0afb8e0f..becd90ba 100644
--- a/notebooks/maps.ipynb
+++ b/notebooks/maps.ipynb
@@ -28,7 +28,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from scipy.ndimage.filters import gaussian_filter\n",
+    "from scipy.ndimage import gaussian_filter\n",
     "\n",
     "from lenspack.utils import bin2d\n",
     "from lenspack.image.inversion import ks93"
diff --git a/notebooks/mask_fits2hsparse.py b/notebooks/mask_fits2hsparse.py
new file mode 100644
index 00000000..1af04a27
--- /dev/null
+++ b/notebooks/mask_fits2hsparse.py
@@ -0,0 +1,237 @@
+# ---
+# jupyter:
+#   jupytext:
+#     text_representation:
+#       extension: .py
+#       format_name: light
+#       format_version: '1.5'
+#       jupytext_version: 1.15.1
+#   kernelspec:
+#     display_name: sp_validation
+#     language: python
+#     name: sp_validation
+# ---
+
+# +
+import os
+import numpy as np
+import healpy as hp
+import healsparse as hs
+import glob
+import matplotlib.pylab as plt
+from astropy.io import fits
+from astropy import wcs
+from astropy import units
+import tqdm
+from timeit import default_timer as timer
+
+from IPython.display import display, clear_output
+
+from sp_validation import plot_style
+# -
+
+import warnings
+warnings.filterwarnings('ignore')
+
+# List of FITS mask files
+directory = f"{os.environ['HOME']}/arc/projects/unions/catalogues/unions/GAaP_photometry/extfinalmask_UNIONS5000"
+#directory = "."
+fits_mask_files = glob.glob(f"{directory}/UNIONS.*_extfinalmask.fits")
+
+# Define HealSparse parameters
+nside_coverage = 32  # Define the nside of the coverage map (adjust as needed)
+nside_sparse = 4096  # Define the sparse map resolution
+
+# +
+from scipy.interpolate import griddata
+from scipy.interpolate import RectBivariateSpline
+
+def interpol(ra_step, dec_step, x, y, x_step, y_step):
+
+    # Flatten the coarse grid and create pairs of coordinates
+    points = np.array([x_step.flatten(), y_step.flatten()]).T
+    ra_values = ra_step.flatten()
+    dec_values = dec_step.flatten()
+    
+    # Interpolate RA and Dec values to the fine grid
+    ra = griddata(points, ra_values, (x, y), method='cubic')
+    dec = griddata(points, dec_values, (x, y), method='cubic')
+    
+    return ra, dec
+    
+def process(fits_mask_file, nside_sparse, nside_coverage, step=1):
+
+    # Get data
+    hdu_list = fits.open(fits_mask_file)
+    WCS = wcs.WCS(hdu_list[0].header)
+
+    # Mask values
+    mask = hdu_list[0].data
+
+    # Coordinates
+
+    nx, ny = mask.shape
+    x = np.arange(nx)
+    y = np.arange(ny)
+    x_mesh, y_mesh = np.meshgrid(x, y)
+
+    if step > 1:
+
+        # Note: Use nx+1, ny+1 to add additional point at x_ar=nx, y_ar=ny.
+        # Required to
+        # reduce extrapolation errors towards large x, y.
+        x_ar = np.arange(0, nx + 1, step)
+        y_ar = np.arange(0, ny + 1, step)
+        x_step, y_step = np.meshgrid(x_ar, y_ar, indexing="ij")
+
+        #start = timer()
+        ra_step, dec_step = WCS.wcs_pix2world(x_step, y_step, 0)
+        #end = timer()
+        #print(f"WCS pix2 world course {end - start:.1f}s")
+
+        #start = timer()
+        ra_interp = RectBivariateSpline(y_ar, x_ar, ra_step, ky=2, kx=2)
+        dec_interp = RectBivariateSpline(y_ar, x_ar, dec_step, ky=2, kx=2)
+        #end = timer()
+        #print(f"create spline {end - start:.1f}s")
+
+        #start = timer()
+        ra = ra_interp(y, x).T
+        dec = dec_interp(y, x).T
+        #end = timer()
+        #print(f"interpolate {end - start:.1f}s")
+
+    else:
+        #start = timer()
+        ra, dec = WCS.wcs_pix2world(x_mesh, y_mesh, 0)
+        #end = timer()
+        #print(f"WCS pix2 world {end - start:.1f}s")
+
+    # coordinates in deg
+    return ra, dec, mask
+
+def update_map(hs_map, ra, dec, mask):
+    
+    # Add pixels to healsparse map
+    #start = timer()
+    hs_map.update_values_pos(ra, dec, mask, lonlat=True, operation="or")
+    #end = timer()
+    #print(f"update map {end - start:.1f}s")
+
+
+# -
+
+def flatten(ra, dec, mask):
+    ra_flatten = np.ravel(np.radians(ra))
+    dec_flatten = np.ravel(np.radians(dec))
+    mask_flatten = np.ravel(mask)
+
+    return ra_flatten, dec_flatten, mask_flatten
+
+
+# Initialise map (first time)
+hs_map = hs.HealSparseMap.make_empty(nside_coverage, nside_sparse, dtype="int16", sentinel=0)
+
+do_plot = False
+do_gif = True
+
+# +
+step = 10
+
+batch_size = 10
+
+batch_ra, batch_dec, batch_mask = [], [], []
+
+# Loop over mask files
+idx = 0
+for fits_mask_file in tqdm.tqdm(fits_mask_files, total=len(fits_mask_files), disable=False):
+
+    # Check for zero file size
+    if os.stat(fits_mask_file).st_size == 0:
+        print(f"File {fits_mask_file} has zero size, continuing")    
+
+    # Get coordinates of pixel grid
+    ra, dec, mask = process(fits_mask_files[0], nside_sparse, nside_coverage, step=step)
+
+    ra_flatten, dec_flatten, mask_flatten = flatten(ra, dec, mask)
+
+
+    # Append to batch
+    batch_ra.append(ra_flatten)
+    batch_dec.append(dec_flatten)
+    batch_mask.append(mask_flatten)
+
+    # Process batch update every `batch_size` files or at the end
+    if (idx + 1) % batch_size == 0 or idx == len(fits_mask_files) - 1:
+        start = timer()
+
+        # Concatenate arrays for batch processing
+        batch_ra = np.concatenate(batch_ra)
+        batch_dec = np.concatenate(batch_dec)
+        batch_mask = np.concatenate(batch_mask)
+    
+        update_map(hs_map, batch_ra, batch_dec, batch_mask)
+
+        # Clear batch storage
+        batch_ra, batch_dec, batch_mask = [], [], []
+
+        if do_gif:
+
+            map_lowres = hs_map.generate_healpix_map(nside=256)
+            hp.mollview(map_lowres)
+            fname = f"map_lowres_{idx:04d}.jpg"
+            print(f"Saving figure {fname}")
+
+            plt.savefig(fname)
+
+    idx += 1
+    
+    if do_plot:
+        clear_output(wait=True)
+
+        # Create the figure
+        plt.figure(figsize=(8, 5))
+    
+        hp.mollview(hs_map.coverage_mask)
+        plt.show()
+# -
+
+hs_map.write('mask_all.fits', clobber=False)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/notebooks/metacal_global.ipynb b/notebooks/metacal_global.ipynb
index f1f2442d..a467dfc9 100644
--- a/notebooks/metacal_global.ipynb
+++ b/notebooks/metacal_global.ipynb
@@ -56,7 +56,8 @@
     "from sp_validation.basic import *\n",
     "from sp_validation.plots import *\n",
     "from sp_validation.plot_style import *\n",
-    "from sp_validation.calibration import *"
+    "from sp_validation.calibration import *\n",
+    "from sp_validation import cat as spv_cat"
    ]
   },
   {
@@ -90,7 +91,18 @@
     "        size_corr_ell=gal_size_corr_ell,\n",
     "        sigma_eps=sigma_eps_prior,\n",
     "        verbose=verbose\n",
-    "    )"
+    "    )\n",
+    "\n",
+    "    print_stats(\n",
+    "        f\"Number of objects on metal input = {gal_metacal[sh]._n_input}\",\n",
+    "        stats_file,\n",
+    "        verbose=verbose,\n",
+    "    )\n",
+    "    print_stats(\n",
+    "        f\"Number of objects after galaxy selection masking = {gal_metacal[sh]._n_after_gal_mask}\",\n",
+    "        stats_file,\n",
+    "        verbose=verbose,\n",
+    "    )\n"
    ]
   },
   {
@@ -125,19 +137,35 @@
     "tile_ID = {}\n",
     "\n",
     "for sh in shapes:\n",
+    "    \n",
     "    g_corr[sh], g_uncorr[sh], w[sh], mask[sh] = (\n",
     "        get_calibrated_quantities(gal_metacal[sh])\n",
     "    )\n",
     "\n",
+    "    n_before_cuts = len(gal_metacal[sh].ns['g1'])\n",
+    "    n_after_cuts = len(w[sh])\n",
+    "    print_stats(\n",
+    "        f\"Number of objects before cuts = {n_before_cuts}\",\n",
+    "        stats_file,\n",
+    "        verbose=verbose\n",
+    "    )\n",
+    "    print_stats(\n",
+    "        f\"Number of objects after cuts = {n_after_cuts}\",\n",
+    "        stats_file,\n",
+    "        verbose=verbose\n",
+    "    )\n",
+    "    \n",
     "    # coordinates\n",
-    "    ra[sh] = dd['XWIN_WORLD'][m_gal[sh]][mask[sh]]\n",
+    "    #ra[sh] = dd['XWIN_WORLD'][m_gal[sh]][mask[sh]]\n",
+    "    ra[sh] = spv_cat.get_col(dd, col_name_ra, m_gal[sh], mask[sh]) \n",
     "    \n",
     "    # Modify R.A. for plots if R.A. = 0 in area\n",
     "    if wrap_ra != 0:\n",
     "        ra_wrap[sh] = (ra[sh] + wrap_ra) % 360 - wrap_ra + 360\n",
     "    else:\n",
     "        ra_wrap[sh] = ra[sh]\n",
-    "    dec[sh] = dd['YWIN_WORLD'][m_gal[sh]][mask[sh]]\n",
+    "    dec[sh] = spv_cat.get_col(dd, col_name_dec, m_gal[sh], mask[sh])\n",
+    "\n",
     "    ra_mean[sh] = np.mean(ra_wrap[sh])\n",
     "    dec_mean[sh] = np.mean(dec[sh])\n",
     "    \n",
@@ -150,18 +178,13 @@
     "    )\n",
     "\n",
     "    # magnitude, from SExtractor\n",
-    "    mag[sh] = dd['MAG_AUTO'][m_gal[sh]][mask[sh]]\n",
+    "    mag[sh] = spv_cat.get_col(dd, \"MAG_AUTO\", m_gal[sh], mask[sh])\n",
     "    \n",
     "    # Keep tile ID if external mask\n",
     "    if mask_external_path:\n",
-    "        tile_ID[sh] = dd['TILE_ID'][m_gal[sh]][mask[sh]]\n",
+    "        tile_ID[sh] = spv_cat.get_col(dd, \"TILE_ID\", m_gal[sh], mask[sh])\n",
     "\n",
-    "if 'ngmix' in shapes:\n",
-    "    # signal-to-noise ratio, only for ngmix, from fitted flux and error\n",
-    "    snr['ngmix'] = (\n",
-    "        dd['NGMIX_FLUX_NOSHEAR'][m_gal['ngmix']][mask['ngmix']] \n",
-    "        / dd['NGMIX_FLUX_ERR_NOSHEAR'][m_gal['ngmix']][mask['ngmix']]\n",
-    "    )"
+    "    snr[sh] = spv_cat.get_snr(sh, dd, m_gal[sh], mask[sh])"
    ]
   },
   {
@@ -259,12 +282,12 @@
    "outputs": [],
    "source": [
     "# Get coordinates for all objects\n",
-    "ra['all'] = dd[col_name_ra]\n",
+    "ra[\"all\"] = spv_cat.get_col(dd, col_name_ra)\n",
     "if wrap_ra:\n",
     "    ra_wrap['all'] = (ra['all'] + wrap_ra) % 360 - wrap_ra + 360\n",
     "else:\n",
     "    ra_wrap['all'] = ra['all']\n",
-    "dec['all'] = dd[col_name_dec]\n",
+    "dec[\"all\"] = spv_cat.get_col(dd, col_name_dec)\n",
     "\n",
     "shapes_all = []\n",
     "for sh in shapes:\n",
@@ -664,7 +687,7 @@
     "        print_stats(f'dc_{comp+1} = {c_err[sh][comp]:.3g}', stats_file, verbose=verbose)        \n",
     "        print_stats(f'dcw_{comp+1} = {cw_err[sh][comp]:.3g}', stats_file, verbose=verbose)\n",
     "\n",
-    "    # Error of mean: divde by sqrt(N) (TBC whether this is correct)\n",
+    "    # Error of mean: divide by sqrt(N) (TBC whether this is correct)\n",
     "    for comp in (0, 1):\n",
     "            print_stats(f'dmc_{comp+1} = {c_err[sh][comp]/np.sqrt(n_gal[sh]):.3e}', stats_file, verbose=verbose)\n",
     "            print_stats(\n",
@@ -1139,21 +1162,13 @@
     "    print_stats('Dispersion of (average) single-component ellipticity = {:.3f} = {:.3f} / sqrt(2)' \\\n",
     "                ''.format(sig_eps /  np.sqrt(2), sig_eps), stats_file, verbose=verbose)"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "bed6267c-152e-4b16-9e91-18a9220c1e5a",
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "sp_validation",
    "language": "python",
-   "name": "python3"
+   "name": "sp_validation"
   },
   "language_info": {
    "codemirror_mode": {
@@ -1165,7 +1180,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/params.py b/notebooks/params.py
index 6c315998..c192ebfb 100644
--- a/notebooks/params.py
+++ b/notebooks/params.py
@@ -12,7 +12,6 @@
 
 """
 
-import os
 import numpy as np
 
 
@@ -41,7 +40,6 @@
 ##  'ngix': multi-epoch model fitting
 ##  'galsim': stacked-image moments (experimental)
 shapes = ['ngmix']
-print('Shape measurement method(s):', shapes)
 
 # Paths
 
@@ -117,6 +115,27 @@
 ### Additional output columns
 add_cols = ["FLUX_RADIUS", "FWHM_IMAGE", "FWHM_WORLD", "MAGERR_AUTO", "MAG_WIN", "MAGERR_WIN", "FLUX_AUTO", "FLUXERR_AUTO", "FLUX_APER", "FLUXERR_APER", "NGMIX_T_NOSHEAR", "NGMIX_Tpsf_NOSHEAR"]
 
+## Pre-calibration catalogue, including masked objects and mask flags
+add_cols_pre_cal = ["IMAFLAGS_ISO", "FLAGS", "NGMIX_MCAL_FLAGS", "NGMIX_MOM_FAIL", "N_EPOCH", "NGMIX_N_EPOCH", "NGMIX_ELL_PSFo_NOSHEAR", "NGMIX_ELL_ERR_NOSHEAR"]
+
+### Set flag columns as integer format
+add_cols_pre_cal_format = {}
+for key in ("IMAFLAGS_ISO", "FLAGS", "NGMIX_MCAL_FLAGS", "NGMIX_MOM_FAIL", "N_EPOCH", "NGMIX_N_EPOCH"):
+    add_cols_pre_cal_format[key] = "I"
+
+# Crete key names for metacal information
+prefix = "NGMIX"
+suffixes = ["1M", "1P", "2M", "2P", "NOSHEAR"]
+centers = ["FLAGS", "ELL", "FLUX", "FLUX_ERR", "T", "T_ERR", "Tpsf"]
+for center in centers:
+    for suffix in suffixes:
+        add_cols_pre_cal.append(f"{prefix}_{center}_{suffix}")
+        
+for suffix in suffixes:
+    add_cols_pre_cal_format[f"FLAGS_{suffix}"] = "I"
+
+
+#add_cols_pre_cal_descr = ["ShapePipe pipeline flags", "SExtractor flags"]
 
 # Catalog parameters
 
diff --git a/notebooks/psf_leakage.ipynb b/notebooks/psf_leakage.ipynb
index e395a840..562ac56b 100644
--- a/notebooks/psf_leakage.ipynb
+++ b/notebooks/psf_leakage.ipynb
@@ -25,9 +25,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 120,
    "id": "35bc23a5",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:22.304812Z",
+     "iopub.status.busy": "2025-01-27T14:55:22.304500Z",
+     "iopub.status.idle": "2025-01-27T14:55:23.135275Z",
+     "shell.execute_reply": "2025-01-27T14:55:23.134437Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:22.304770Z"
+    }
+   },
    "outputs": [],
    "source": [
     "import os\n",
@@ -36,9 +44,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 121,
    "id": "bdeed0dc-7871-4833-b735-3d21f0e39b2e",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:27.071494Z",
+     "iopub.status.busy": "2025-01-27T14:55:27.071149Z",
+     "iopub.status.idle": "2025-01-27T14:55:27.688748Z",
+     "shell.execute_reply": "2025-01-27T14:55:27.687905Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:27.071449Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from cs_util.plots import plot_data_1d"
@@ -46,9 +62,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 122,
    "id": "13ceb1df",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:27.690842Z",
+     "iopub.status.busy": "2025-01-27T14:55:27.690528Z",
+     "iopub.status.idle": "2025-01-27T14:55:31.964607Z",
+     "shell.execute_reply": "2025-01-27T14:55:31.963715Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:27.690796Z"
+    }
+   },
    "outputs": [],
    "source": [
     "from cs_util import canfar\n",
@@ -77,9 +101,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 123,
    "id": "462a654d",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:38.612918Z",
+     "iopub.status.busy": "2025-01-27T14:55:38.612287Z",
+     "iopub.status.idle": "2025-01-27T14:55:39.409596Z",
+     "shell.execute_reply": "2025-01-27T14:55:39.406848Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:38.612872Z"
+    }
+   },
    "outputs": [],
    "source": [
     "n_bin = 30\n",
@@ -97,10 +129,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 124,
    "id": "c9ae1e58",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:39.411097Z",
+     "iopub.status.busy": "2025-01-27T14:55:39.410782Z",
+     "iopub.status.idle": "2025-01-27T14:55:43.627796Z",
+     "shell.execute_reply": "2025-01-27T14:55:43.626744Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:39.411052Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ngmix:\n",
+      "$e_{1}^{\\rm PSF}$: m_1=0.167±0.075\n",
+      "$e_{1}^{\\rm PSF}$: m_2=0.043±0.076\n",
+      "$e_{2}^{\\rm PSF}$: m_1=-0.044±0.090\n",
+      "$e_{2}^{\\rm PSF}$: m_2=-0.069±0.091\n",
+      "$\\mathrm{FWHM}^{\\rm PSF}$ [arcsec]: m_1=-0.005±0.023\n",
+      "$\\mathrm{FWHM}^{\\rm PSF}$ [arcsec]: m_2=-0.004±0.024\n"
+     ]
+    }
+   ],
    "source": [
     "for sh in shapes:\n",
     "    plot_dir_leakage = f'{plot_dir}/psf_leak_{sh}'\n",
@@ -153,9 +207,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 125,
    "id": "f76b7339",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:43.629276Z",
+     "iopub.status.busy": "2025-01-27T14:55:43.628932Z",
+     "iopub.status.idle": "2025-01-27T14:55:54.035086Z",
+     "shell.execute_reply": "2025-01-27T14:55:53.942080Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:43.629221Z"
+    }
+   },
    "outputs": [],
    "source": [
     "r_corr_gp = {}\n",
@@ -203,10 +265,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 126,
    "id": "687ae9da",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:54.036946Z",
+     "iopub.status.busy": "2025-01-27T14:55:54.036591Z",
+     "iopub.status.idle": "2025-01-27T14:55:54.887867Z",
+     "shell.execute_reply": "2025-01-27T14:55:54.886881Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:54.036901Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ngmix: Weighted average alpha = 0.0609\n"
+     ]
+    }
+   ],
    "source": [
     "alpha_leak_mean = {}\n",
     "\n",
@@ -224,9 +302,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 127,
    "id": "f5c64dce",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:54.889559Z",
+     "iopub.status.busy": "2025-01-27T14:55:54.889195Z",
+     "iopub.status.idle": "2025-01-27T14:55:56.096225Z",
+     "shell.execute_reply": "2025-01-27T14:55:56.095378Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:54.889511Z"
+    }
+   },
    "outputs": [],
    "source": [
     "for sh in shapes:\n",
@@ -260,9 +346,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 128,
    "id": "27331f73",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:55:56.097537Z",
+     "iopub.status.busy": "2025-01-27T14:55:56.097192Z",
+     "iopub.status.idle": "2025-01-27T14:56:10.045244Z",
+     "shell.execute_reply": "2025-01-27T14:56:10.044188Z",
+     "shell.execute_reply.started": "2025-01-27T14:55:56.097492Z"
+    }
+   },
    "outputs": [],
    "source": [
     "# Scale-dependent leakage using the star catalogue\n",
@@ -312,10 +406,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 129,
    "id": "0c148a08",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:56:10.135611Z",
+     "iopub.status.busy": "2025-01-27T14:56:10.046380Z",
+     "iopub.status.idle": "2025-01-27T14:56:11.053643Z",
+     "shell.execute_reply": "2025-01-27T14:56:11.052385Z",
+     "shell.execute_reply.started": "2025-01-27T14:56:10.135549Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ngmix: Weighted average alpha using star cat = -0.00735\n"
+     ]
+    }
+   ],
    "source": [
     "if star_cat_path:\n",
     "    alpha_leak_mean = {}\n",
@@ -334,9 +444,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 130,
    "id": "a28b5f26",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:56:11.055388Z",
+     "iopub.status.busy": "2025-01-27T14:56:11.055016Z",
+     "iopub.status.idle": "2025-01-27T14:56:12.382111Z",
+     "shell.execute_reply": "2025-01-27T14:56:12.380999Z",
+     "shell.execute_reply.started": "2025-01-27T14:56:11.055340Z"
+    }
+   },
    "outputs": [],
    "source": [
     "if star_cat_path:\n",
@@ -384,9 +502,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 131,
    "id": "d68f5607",
-   "metadata": {},
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:56:12.383666Z",
+     "iopub.status.busy": "2025-01-27T14:56:12.383280Z",
+     "iopub.status.idle": "2025-01-27T14:56:13.174662Z",
+     "shell.execute_reply": "2025-01-27T14:56:13.173316Z",
+     "shell.execute_reply.started": "2025-01-27T14:56:12.383618Z"
+    }
+   },
    "outputs": [],
    "source": [
     "C_sys_p = {}\n",
@@ -427,10 +553,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 132,
    "id": "f2f14815",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:56:16.861989Z",
+     "iopub.status.busy": "2025-01-27T14:56:16.861653Z",
+     "iopub.status.idle": "2025-01-27T14:56:18.987376Z",
+     "shell.execute_reply": "2025-01-27T14:56:18.986396Z",
+     "shell.execute_reply.started": "2025-01-27T14:56:16.861944Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Downloading file vos:cfis/cosmostat/cosmology/redshifts/nz.CFHTLenSmatched.W3_202012_v1.txt to ./nz.CFHTLenSmatched.W3_202012_v1.txt...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING: Current version vos 3.6.1.1. A newer version, 3.6.2, is available on PyPI\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Download finished.\n"
+     ]
+    }
+   ],
    "source": [
     "# Redshift distribution\n",
     "\n",
@@ -449,10 +605,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 133,
    "id": "2aa72d9f",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:56:21.682179Z",
+     "iopub.status.busy": "2025-01-27T14:56:21.681869Z",
+     "iopub.status.idle": "2025-01-27T14:56:22.482871Z",
+     "shell.execute_reply": "2025-01-27T14:56:22.481614Z",
+     "shell.execute_reply.started": "2025-01-27T14:56:21.682134Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Computation of theoretical prediction failed\n"
+     ]
+    }
+   ],
    "source": [
     "for sh in shapes:\n",
     "    try:\n",
@@ -472,10 +644,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 134,
    "id": "8312c4af",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2025-01-27T14:56:25.350707Z",
+     "iopub.status.busy": "2025-01-27T14:56:25.350084Z",
+     "iopub.status.idle": "2025-01-27T14:56:28.252753Z",
+     "shell.execute_reply": "2025-01-27T14:56:28.251828Z",
+     "shell.execute_reply.started": "2025-01-27T14:56:25.350660Z"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ngmix: <|xi_sys_+|> = 3.913104457799062e-06\n",
+      "ngmix: <|xi_sys_-|> = 0.00025281181828650277\n"
+     ]
+    }
+   ],
    "source": [
     "labels = ['$\\\\xi^{\\\\rm sys}_+$', '$\\\\xi^{\\\\rm sys}_-$']\n",
     "\n",
@@ -543,13 +732,21 @@
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c64a0f7f-925b-4537-98af-f2d2643d4629",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "sp_validation",
    "language": "python",
-   "name": "python3"
+   "name": "sp_validation"
   },
   "language_info": {
    "codemirror_mode": {
@@ -561,7 +758,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/write_cat.ipynb b/notebooks/write_cat.ipynb
index 2f102a2e..17bf5689 100644
--- a/notebooks/write_cat.ipynb
+++ b/notebooks/write_cat.ipynb
@@ -39,7 +39,8 @@
    "outputs": [],
    "source": [
     "from sp_validation.util import *\n",
-    "from sp_validation.cat import *"
+    "from sp_validation.cat import *\n",
+    "from sp_validation.basic import metacal"
    ]
   },
   {
@@ -73,47 +74,45 @@
     "local_cal = False\n",
     "\n",
     "if local_cal:\n",
-    "    # MKDEBUG TODO: add local selection matrix\n",
     "    for sh in shapes:\n",
     "        write_shape_catalog(\n",
     "            f'{output_shape_cat_base}_{sh}.fits',\n",
     "            ra[sh],\n",
     "            dec[sh],\n",
-    "            g_corr_mc[sh],\n",
     "            w[sh],\n",
-    "            mag[sh],\n",
-    "            gal_metacal[sh].R,\n",
-    "            R_shear[sh],\n",
-    "            gal_metacal[sh].R_selection,\n",
-    "            c[sh],\n",
-    "            c_err[sh],\n",
-    "            alpha_leakage=alpha_leak_mean[sh],\n",
+    "            mag=mag[sh],\n",
+    "            g=g_corr_mc[sh],\n",
     "            g1_uncal=g_uncorr[sh][0],\n",
     "            g2_uncal=g_uncorr[sh][1], \n",
     "            R_g11=R_shear_ind[sh][0, 0],\n",
     "            R_g12=R_shear_ind[sh][0, 1],\n",
     "            R_g21=R_shear_ind[sh][1, 0],\n",
     "            R_g22=R_shear_ind[sh][1, 1],\n",
+    "            R=gal_metacal[sh].R,\n",
+    "            R_shear=R_shear[sh],\n",
+    "            R_select=gal_metacal[sh].R_selection,\n",
+    "            c=c[sh],\n",
+    "            c_err=c_err[sh],\n",
+    "            alpha_leakage=alpha_leak_mean[sh],\n",
     "            add_cols=add_cols_data[sh],\n",
     "        )\n",
     "else:\n",
-    "    for sh in shapes:\n",
-    "        \n",
+    "    for sh in shapes:   \n",
     "        write_shape_catalog(\n",
     "            f'{output_shape_cat_base}_{sh}.fits',\n",
     "            ra[sh],\n",
     "            dec[sh],\n",
-    "            g_corr_mc[sh],\n",
     "            w[sh],\n",
-    "            mag[sh],\n",
-    "            gal_metacal[sh].R,\n",
-    "            R_shear[sh],\n",
-    "            gal_metacal[sh].R_selection,\n",
-    "            c[sh],\n",
-    "            c_err[sh],\n",
-    "            alpha_leakage=alpha_leak_mean[sh],\n",
+    "            mag=mag[sh],\n",
+    "            g=g_corr_mc[sh],\n",
     "            g1_uncal=g_uncorr[sh][0],\n",
     "            g2_uncal=g_uncorr[sh][1],\n",
+    "            R=gal_metacal[sh].R,\n",
+    "            R_shear=R_shear[sh],\n",
+    "            R_select=gal_metacal[sh].R_selection,\n",
+    "            c=c[sh],\n",
+    "            c_err=c_err[sh],\n",
+    "            alpha_leakage=alpha_leak_mean[sh],\n",
     "            add_cols=add_cols_data[sh],\n",
     "        )"
    ]
@@ -162,12 +161,6 @@
    "outputs": [],
    "source": [
     "for sh in shapes:\n",
-    "    \n",
-    "    # Signal-to-noise estimates\n",
-    "    if sh == 'ngmix':\n",
-    "        my_snr = snr['ngmix']\n",
-    "    elif sh == 'galsim':\n",
-    "        my_snr = dd['SNR_WIN'][m_gal['galsim']]\n",
     "\n",
     "    # Additional columns:\n",
     "    # {e1, e2, size}_PSF\n",
@@ -182,30 +175,95 @@
     "        f'{output_shape_cat_base}_extended_{sh}.fits',\n",
     "        ra[sh],\n",
     "        dec[sh],\n",
-    "        g_corr_mc[sh],\n",
     "        w[sh],\n",
-    "        mag[sh],\n",
-    "        gal_metacal[sh].R,\n",
-    "        R_shear[sh],\n",
-    "        gal_metacal[sh].R_selection,\n",
-    "        c[sh],\n",
-    "        c_err[sh],\n",
-    "        snr=my_snr,\n",
-    "        alpha_leakage=alpha_leak_mean[sh],\n",
+    "        mag=mag[sh],\n",
+    "        snr=snr[sh],\n",
+    "        g=g_corr_mc[sh],\n",
     "        g1_uncal=g_uncorr[sh][0],\n",
     "        g2_uncal=g_uncorr[sh][1],\n",
     "        R_g11=R_shear_ind[sh][0, 0],\n",
     "        R_g12=R_shear_ind[sh][0, 1],\n",
     "        R_g21=R_shear_ind[sh][1, 0],\n",
     "        R_g22=R_shear_ind[sh][1, 1],       \n",
+    "        R=gal_metacal[sh].R,\n",
+    "        R_shear=R_shear[sh],\n",
+    "        R_select=gal_metacal[sh].R_selection,\n",
+    "        c=c[sh],\n",
+    "        c_err=c_err[sh],\n",
+    "        alpha_leakage=alpha_leak_mean[sh],\n",
     "        add_cols=ext_cols[sh],\n",
     "     )"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "b39cd31a",
+   "id": "a6319ed3-0add-4fd1-a174-2b188863cd8e",
    "metadata": {},
+   "source": [
+    "### Write comprehensive shape catalogue (all objects, no cuts)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea2e13a6-86b1-44fd-92b3-34fc48b318a1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Add additional columns without cuts nor mask applied\n",
+    "\n",
+    "ext_cols_pre_cal = {}\n",
+    "\n",
+    "for sh in shapes:\n",
+    "    ext_cols_pre_cal[sh] = {}\n",
+    "\n",
+    "    # Standard additional columns\n",
+    "    if add_cols:\n",
+    "        for key in add_cols:\n",
+    "            ext_cols_pre_cal[sh][key] = dd[key]\n",
+    "\n",
+    "    # Pre-calibration columns\n",
+    "    if add_cols_pre_cal:\n",
+    "        for key in add_cols_pre_cal:\n",
+    "            ext_cols_pre_cal[sh][key] = dd[key]\n",
+    "\n",
+    "    # Flag to cut duplicate objects in overlapping region with neighbouring tiles\n",
+    "    ext_cols_pre_cal[sh][\"overlap\"] = cut_overlap\n",
+    "    add_cols_pre_cal_format[\"overlap\"] = \"I\"\n",
+    "\n",
+    "    # Additional columns {e1, e2, size}_PSF\n",
+    "    ext_cols_pre_cal[sh]['e1_PSF'] = dd[key_PSF_ell[sh]][:,0]\n",
+    "    ext_cols_pre_cal[sh]['e2_PSF'] = dd[key_PSF_ell[sh]][:,1]\n",
+    "    ext_cols_pre_cal[sh]['fwhm_PSF'] = size_to_fwhm[sh](dd[key_PSF_size[sh]])\n",
+    "\n",
+    "    _, _, iv_w = metacal.get_variance_ivweights(dd, sigma_eps_prior, mask=None, col_2d=True)\n",
+    "\n",
+    "    mag = get_col(dd, \"MAG_AUTO\", None, None)\n",
+    "    snr = get_snr(sh, dd, None, None)\n",
+    "    g1_uncal = dd[\"NGMIX_ELL_NOSHEAR\"][:, 0]\n",
+    "    g2_uncal = dd[\"NGMIX_ELL_NOSHEAR\"][:, 1]\n",
+    "    \n",
+    "    # Comprehensive catalogue without cuts nor mask applied\n",
+    "    write_shape_catalog(\n",
+    "        f'{output_shape_cat_base}_comprehensive_{sh}.fits',\n",
+    "        ra[\"all\"],\n",
+    "        dec[\"all\"],\n",
+    "        iv_w,\n",
+    "        mag=mag,\n",
+    "        snr=snr,\n",
+    "        g1_uncal=g1_uncal,\n",
+    "        g2_uncal=g2_uncal,\n",
+    "        add_cols=ext_cols_pre_cal[sh],\n",
+    "        add_cols_format=add_cols_pre_cal_format,\n",
+    "     )"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b39cd31a",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true
+   },
    "source": [
     "### Write galaxy (or random) position catalogue"
    ]
@@ -264,9 +322,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "sp_validation",
    "language": "python",
-   "name": "python3"
+   "name": "sp_validation"
   },
   "language_info": {
    "codemirror_mode": {
@@ -278,7 +336,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,
diff --git a/requirements.txt b/requirements.txt
index 8e724c24..2f1dce64 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,6 +5,7 @@ emcee
 cs_util==0.1.2
 astropy>=5.0
 healpy
+healsparse
 getdist
 h5py
 jupyterlab
diff --git a/scripts/apply_hsp_masks.py b/scripts/apply_hsp_masks.py
new file mode 100755
index 00000000..48fc8442
--- /dev/null
+++ b/scripts/apply_hsp_masks.py
@@ -0,0 +1,22 @@
+#!/usr/bin/env python3
+
+import sys
+
+from sp_validation import run_joint_cat as sp_joint
+
+
+def main(argv=None):
+    """Main
+
+    Main program
+
+    """
+    if argv is None:
+        argv = sys.argv[1:]
+    sp_joint.run_apply_hsp_masks(*argv)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
diff --git a/scripts/combine_results.py b/scripts/combine_results.py
index bd3467f4..5e649364 100755
--- a/scripts/combine_results.py
+++ b/scripts/combine_results.py
@@ -771,10 +771,10 @@ def main(argv=None):
 
     for file_base in file_base_arr:
 
-        os.system(f'~/txt2tex.pl {file_base}.tex > {file_base}_out.tex')
-        print(f'Creating LaTeX file {file_base}.tex')
-        os.system(f'pdflatex {file_base}_out 2&>/dev/null')
-
+        #os.system(f'~/txt2tex.pl {file_base}.tex > {file_base}_out.tex')
+        #print(f'Creating LaTeX file {file_base}.tex')
+        #os.system(f'pdflatex {file_base}_out 2&>/dev/null')
+        print("Skipping calling LaTeX")
 
 if __name__ == "__main__":
     sys.exit(main(sys.argv))
diff --git a/scripts/create_joint_comprehensive_cat.py b/scripts/create_joint_comprehensive_cat.py
new file mode 100755
index 00000000..14289c67
--- /dev/null
+++ b/scripts/create_joint_comprehensive_cat.py
@@ -0,0 +1,17 @@
+#!/usr/bin/env python3
+
+import sys
+
+from sp_validation.run_joint_cat import run_joint_comprehensive_cat
+
+def main(argv=None):
+
+    if argv is None:
+        args = sys.argv[0:]
+    run_joint_comprehensive_cat(*argv)
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main(sys.argv))
\ No newline at end of file
diff --git a/scripts/create_joint_shape_cat.py b/scripts/create_joint_shape_cat.py
index 79ef2b81..e63276e5 100755
--- a/scripts/create_joint_shape_cat.py
+++ b/scripts/create_joint_shape_cat.py
@@ -360,6 +360,8 @@ def merge_catalogues(
             column_all.append(fits.Column(name='alpha_2', array=alpha_2, format='D'))
 
 
+        if verbose:
+            print(f"Wrting file {output_path}")
         cat.write_fits_BinTable_file(column_all, output_path, R, R_shear, R_select, c)
     else:
         if verbose:
@@ -490,7 +492,7 @@ def main(argv=None):
             print(' ', patch)
 
         input_path = f'{patch}/sp_output/shape_catalog_{sh}.fits'
-        ra, dec, g1, g2, w, mag, _ = read_shape_catalog(input_path)
+        ra, dec, g1, g2, w, mag, _ = read_shape_catalog(input_path, w_name="w_iv")
 
         ra_all = np.append(ra_all, ra)
         dec_all = np.append(dec_all, dec)
@@ -536,14 +538,14 @@ def main(argv=None):
         output_path, 
         ra_all,
         dec_all,
-        g_corr_mc_all,
         w_all,
-        mag_all,
-        R,
-        R_shear, 
-        R_select,
-        c,
-        c_err,
+        mag=mag_all,
+        g=g_corr_mc_all,
+        R=R,
+        R_shear=R_shear, 
+        R_select=R_select,
+        c=c,
+        c_err=c_err,
         add_cols=add_col_data,
         add_cols_format=add_col_format, 
     )
diff --git a/scripts/homogenize_cat_extended.py b/scripts/homogenize_cat_extended.py
index 95e56fd6..a3e117ae 100644
--- a/scripts/homogenize_cat_extended.py
+++ b/scripts/homogenize_cat_extended.py
@@ -5,7 +5,7 @@
 e1 and e2 from the non-extended one. Currently, the calibration of the columns e1 and e2 of the extended catalog
 are calibrated per patch, while the non-extended catalog is calibrated on the whole footprint.
 
-:Authors: Martin Kilbinger, Sacha Guerrini
+:Authors: Sacha Guerrini, Martin Kilbinger
 """
 
 import sys
diff --git a/scripts/prepare_patch_for_spval.sh b/scripts/prepare_patch_for_spval.sh
index c82a5e77..e5445009 100755
--- a/scripts/prepare_patch_for_spval.sh
+++ b/scripts/prepare_patch_for_spval.sh
@@ -5,7 +5,8 @@ patch=$1
 spdir=$HOME/astro/repositories/github/sp_validation
 
 # Galaxy catalogue
-cp ~/psfex/final_cat_${patch}.hdf5 .
+#cp ~/psfex/final_cat_${patch}.hdf5 .
+ln -s ~/psfex/final_cat_${patch}.hdf5
 
 # Parameter file, to avoid read errors for hdf5 file
 ln -sf ~/shapepipe/example/cfis/final_cat.param
@@ -17,12 +18,14 @@ ln -sf ~/shapepipe/example/cfis/final_cat.param
 ## Projected back to world coordinates
 ln -sf $HOME/psfex/star_cat/${patch}/output/run_sp_Ms/merge_starcat_runner/output/full_starcat-0000000.fits
 
-# Parameter file
-cp $spdir/notebooks/params.py .
-
-# nb/python script
-ln -sf $spdir/notebooks/validation.py
-
 # Tile number list
 ln -sf ~/shapepipe/auxdir/CFIS/tiles_202106/tiles_${patch}.txt
 
+# Parameter file
+#cp $spdir/notebooks/params.py .
+echo "Diff:"
+diff $spdir/notebooks/params.py params.py
+echo "Run?"
+echo "cp $spdir/notebooks/params.py params.py"
+echo "Run?"
+echo "ipython ~/sp_validation/notebooks/validation.py"
diff --git a/sp_validation/__init__.py b/sp_validation/__init__.py
index 33c10e61..143bc854 100644
--- a/sp_validation/__init__.py
+++ b/sp_validation/__init__.py
@@ -39,13 +39,18 @@
 
 __version__ = _version
 __all__ = [
-    'survey',
+    'util',
     'io',
-    'cat',
     'basic',
-    'util',
+    'galaxy',
+    'cosmology',
+    'calibration',
+    'cat',
     'plot_style',
-    'plots'
+    'plots',
+    'run_joint_cat',
+    'survey',
+    'util',
 ]
 
 from . import *
diff --git a/sp_validation/basic.py b/sp_validation/basic.py
index 3387ae2a..28203be4 100644
--- a/sp_validation/basic.py
+++ b/sp_validation/basic.py
@@ -61,6 +61,9 @@ class metacal:
     sigma_eps : float, optional
         ellipticity dispersion (one component) for computation
         of weights; default is 0.34
+    col_2d : bool, optional
+        if `True` (default, ellipticity in one 2D column;
+        if `False`, ellipticity in two columns ELL_0, ELL_1
     verbose : bool, optional, default=False
         verbose output if True
 
@@ -80,6 +83,7 @@ def __init__(
         rel_size_max=3.0,
         size_corr_ell=True,
         sigma_eps=0.34,
+        col_2d=True,
         verbose=False,
     ):
 
@@ -102,6 +106,7 @@ def __init__(
             )
 
         self._sigma_eps = sigma_eps
+        self._col_2d = col_2d
 
         self._verbose = verbose
 
@@ -154,7 +159,7 @@ def _read_data_ngmix(self, data, mask, m1, p1, m2, p2, ns):
         Read data from ngmix catalogue.
         """
         for name_shear, dict_tmp in zip(
-            ['1m', '1p', '2m', '2p', 'noshear'],
+            ['1M', '1P', '2M', '2P', 'NOSHEAR'],
             [m1, p1, m2, p2, ns]
         ):
 
@@ -162,40 +167,97 @@ def _read_data_ngmix(self, data, mask, m1, p1, m2, p2, ns):
                 print('Extracting {}'.format(name_shear))
 
             dict_tmp['flag'] = (
-                data[f'{self._prefix}_FLAGS_{name_shear.upper()}'][mask]
-            )
-            dict_tmp['g1'] = (
-                data[f'{self._prefix}_ELL_{name_shear.upper()}'][:, 0][mask]
-            )
-            dict_tmp['g2'] = (
-                data[f'{self._prefix}_ELL_{name_shear.upper()}'][:, 1][mask]
+                data[f'{self._prefix}_FLAGS_{name_shear}'][mask]
             )
 
-            dict_tmp['flux'] = (
-                data[f'{self._prefix}_FLUX_{name_shear.upper()}'][mask]
-            )
-            dict_tmp['flux_err'] = (
-                data[f'{self._prefix}_FLUX_ERR_{name_shear.upper()}'][mask]
-            )
+            if self._col_2d:
+                # Ellipticity in one 2D column
+                for comp in (0, 1):
+                    dict_tmp[f"g{comp+1}"] = (
+                        data[f"{self._prefix}_ELL_{name_shear}"][:, comp][mask]
+                    )
+            else:
+                # Ellipcitiy in two different columns
+                for comp in (0, 1):
+                    dict_tmp[f"g{comp+1}"] = (
+                        data[f"{self._prefix}_ELL_{name_shear}_{comp}"][mask]
+                    )
+
+            for key in ("flux", "flux_err", "T", "T_err"):
+                dict_tmp[key] = (
+                    data[f'{self._prefix}_{key.upper()}_{name_shear}'][mask]
+                )
 
-            dict_tmp['T'] = (
-                data[f'{self._prefix}_T_{name_shear.upper()}'][mask]
-            )
-            dict_tmp['T_err'] = (
-                data[f'{self._prefix}_T_ERR_{name_shear.upper()}'][mask]
-            )
             dict_tmp['Tpsf'] = (
-                data[f'{self._prefix}_Tpsf_{name_shear.upper()}'][mask]
+                data[f'{self._prefix}_Tpsf_{name_shear}'][mask]
             )
 
-        ns['C11'] = data[f'{self._prefix}_ELL_ERR_NOSHEAR'][:, 0][mask]
-        ns['C22'] = data[f'{self._prefix}_ELL_ERR_NOSHEAR'][:, 1][mask]
-        ns['w'] = (
-            1. / (2 * self._sigma_eps ** 2 + dict_tmp['C11'] + dict_tmp['C22'])
+        ns["C11"], ns["C22"], ns["w"] = self.get_variance_ivweights(
+            data,
+            self._sigma_eps,
+            self._prefix,
+            mask=mask,
+            col_2d=self._col_2d,
         )
 
+        self._n_input = len(data)
+        self._n_after_gal_mask = len(dict_tmp['flag'])
+        if self._verbose:
+            print(f"Number of objects on metacal input = {self._n_input}")
+            print(f"Number of objects after galaxy selection masking = {self._n_after_gal_mask}")
+        
         return m1, p1, m2, p2, ns
 
+    @staticmethod
+    def get_variance_ivweights(data, sigma_eps, prefix="NGMIX", mask=None, col_2d=True):
+        """Get Variance IVWEIGHTS.
+
+        Compute variance and inverse-variance weights.
+
+        Parameters
+        ----------
+        data : numpy.ndarray
+            input data
+        sigma_eps : float
+            ellipticity dispersion
+        prefix : str, optional
+            shape measurement identifier; default is "NGMIX"
+        mask : list, optional
+            indicates valid objects with ``True`` values; default is ``None`` = use all objects
+            type has to be bool
+        col_2d : bool, optional
+            if ``True`` (default), ellipticity is given in single 2D column;
+            if ``False``, ellipticity is expected in two 1D columns.
+
+        Returns
+        -------
+        float
+            variance first component
+        float
+            variance second component
+        float
+            weight
+
+        """
+        if mask is not None:
+            if col_2d:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0][mask]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1][mask]
+            else:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"][mask]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"][mask]
+        else:
+            if col_2d:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 0]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR"][:, 1]
+            else:
+                C11 = data[f"{prefix}_ELL_ERR_NOSHEAR_0"]
+                C22 = data[f"{prefix}_ELL_ERR_NOSHEAR_1"]
+
+        iv_w = 1 / (2 * sigma_eps ** 2 + C11 + C22)
+
+        return C11, C22, iv_w
+
     def _read_data_galsim(self, data, mask, m1, p1, m2, p2, ns):
         """Read Data Galsim.
 
diff --git a/sp_validation/calibration.py b/sp_validation/calibration.py
index ad2f8443..1d5ddf79 100644
--- a/sp_validation/calibration.py
+++ b/sp_validation/calibration.py
@@ -219,7 +219,10 @@ def get_alpha_leakage_per_object(cat_gal, num_bins, weight_type='des'):
 
         #Fit e1
         mod_wls = sm.WLS(e1_out, sm.add_constant(e1_PSF), weights=weight_out)
-        res_wls = mod_wls.fit()
+        try:
+            res_wls = mod_wls.fit()
+        except:
+            raise RunTimeError("Linear regression fit for PSF leakage failed")
         alpha_df.loc[i_group, 'alpha_1'] = res_wls.params[1]
         alpha_df.loc[i_group, 'alpha_1_err'] = np.sqrt(res_wls.cov_params()[1, 1])
         del res_wls, mod_wls
@@ -399,4 +402,4 @@ def get_calibrate_no_leakage_e_from_cat(path_cat_gal, weight_type='des', verbose
     e1_noleak = e1 - cat_gal['alpha_1']*cat_gal['e1_PSF']
     e2_noleak = e2 - cat_gal['alpha_2']*cat_gal['e2_PSF']
 
-    return e1_noleak, e2_noleak
\ No newline at end of file
+    return e1_noleak, e2_noleak
diff --git a/sp_validation/cat.py b/sp_validation/cat.py
index 302a467b..1d6af8d3 100644
--- a/sp_validation/cat.py
+++ b/sp_validation/cat.py
@@ -64,11 +64,11 @@ def print_mean_ellipticity(
     """
     # Get ellipticity columns
     all_ell = []
-    ell_str = ''
+    ell_str = ""
     if ell_n_comp == 1:
         for i in (0, 1):
             all_ell.append(dd[ell_col_name[i]])
-            ell_str = f'{ell_str}{ell_col_name[i]} '
+            ell_str = f"{ell_str}{ell_col_name[i]} "
     elif ell_n_comp == 2:
         for i in (0, 1):
             all_ell.append(dd[ell_col_name][:, i])
@@ -87,12 +87,16 @@ def print_mean_ellipticity(
 
     n_tot_val = len(np.where(ind_v)[0])
     n_tot_mil = util.millify(n_tot_val)
-    msg = f'Total number of valid objects = {n_tot_val} = {n_tot_mil}'
+    msg = (
+        f"Total number of valid objects ({ell_col_name}0,1 != {invalid})"
+        + f" = {n_tot_val} = {n_tot_mil}"
+    )
     io.print_stats(msg, stats_file, verbose=verbose)
 
-    msg = 'Fraction of invalid objects = {}/{} = {:.3g}%\n' \
-          ''.format(n_tot - n_tot_val, n_tot,
-                    (n_tot - n_tot_val) / n_tot * 100)
+    msg = (
+        f"Fraction of invalid objects = {n_tot-n_tot_val}/{n_tot}"
+        + f" = {(n_tot-n_tot_val)/n_tot:.3%}"
+    )
     io.print_stats(msg, stats_file, verbose=verbose)
 
     # Select valid objects
@@ -105,12 +109,12 @@ def print_mean_ellipticity(
         ell[i] = all_ell[i].mean()
 
     io.print_stats(
-        f'Mean ellipticity of valid objects ({ell_str}):',
+        f"Mean ellipticity of valid objects ({ell_str}):",
         stats_file,
         verbose=verbose,
     )
     for i in (0, 1):
-        msg = '<e_{}> = {:.3g}'.format(i + 1, ell[i])
+        msg = "<e_{}> = {:.3g}".format(i + 1, ell[i])
         io.print_stats(msg, stats_file, verbose=verbose)
 
 
@@ -135,13 +139,13 @@ def print_some_quantities(dd, stats_file, verbose=False):
     """
     # Print all column names
     if verbose:
-        print('Column names:')
+        print("Column names:")
         print(dd.dtype.names)
-        print('')
+        print("")
 
     n_tot = len(dd)
     n_mil = util.millify(n_tot)
-    msg = f'Total number of objects = {n_tot} = {n_mil}'
+    msg = f"Total number of objects = {n_tot} = {n_mil}"
     io.print_stats(msg, stats_file, verbose=verbose)
 
     return n_tot
@@ -186,7 +190,7 @@ def check_matching(
         # Filter stars outside footprint for efficiency
         mask_area_tiles = get_footprint(name, d1[keys_1[0]], d1[keys_1[1]])
         if len(np.where(mask_area_tiles)[0]) == 0:
-            raise ValueError(f'Error: no object found in field \'{name}\'')
+            raise ValueError(f"Error: no object found in field '{name}'")
     else:
         mask_area_tiles = np.arange(len(d1))
 
@@ -196,13 +200,13 @@ def check_matching(
         d2[keys_2[1]],
         d1[keys_1[0]][mask_area_tiles],
         d1[keys_1[1]][mask_area_tiles],
-        thresh=thresh
+        thresh=thresh,
     )
 
     n_tot = len(d1[keys_1[0]][mask_area_tiles])
     msg = (
-        'Number of matched stars from exposures to total catalogue = '
-        + f'{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}'
+        "Number of matched stars from exposures to total catalogue = "
+        + f"{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}"
     )
     io.print_stats(msg, stats_file, verbose=verbose)
 
@@ -210,8 +214,8 @@ def check_matching(
     ind = np.array(list(set(ind)))
 
     msg = (
-        'Number of matched stars after removing multiple matches = '
-        + f'{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}'
+        "Number of matched stars after removing multiple matches = "
+        + f"{len(ind)}/{n_tot} = {len(ind) / n_tot:.1%}"
     )
     io.print_stats(msg, stats_file, verbose=verbose)
 
@@ -249,8 +253,9 @@ def check_invalid(dd, key, comp, val, stats_file, name=None, verbose=False):
 
         w = dd[key[i]][:, comp[i]] == val[i]
         n_inv_psf = len(np.where(w)[0])
-        msg = 'Invalid {} found for {}/{} = {:.1g}% objects' \
-              ''.format(name[i], n_inv_psf, n_all, n_inv_psf / n_all)
+        msg = "Invalid {} found for {}/{} = {:.1g}% objects" "".format(
+            name[i], n_inv_psf, n_all, n_inv_psf / n_all
+        )
         io.print_stats(msg, stats_file, verbose=verbose)
 
 
@@ -295,9 +300,9 @@ def match_subsample(
         ellipticities
     """
     msg = (
-        'Number of stars matched to valid sample = '
-        + f'{len(dd[pos_key[0]][ind][mask])}/{n_ref} = '
-        + f'{len(dd[pos_key[0]][ind][mask]) / n_ref * 100:.1f}%'
+        "Number of stars matched to valid sample = "
+        + f"{len(dd[pos_key[0]][ind][mask])}/{n_ref} = "
+        + f"{len(dd[pos_key[0]][ind][mask]) / n_ref * 100:.1f}%"
     )
     io.print_stats(msg, stats_file, verbose=verbose)
 
@@ -316,25 +321,25 @@ def match_spread_class(dd, ind, mask, stats_file, n_ref, verbose=False):
     Match
     """
     tot_star = n_ref
-    tot_as_star = len(np.where(dd['SPREAD_CLASS'][ind][mask] == 0)[0])
-    tot_as_gal = len(np.where(dd['SPREAD_CLASS'][ind][mask] == 1)[0])
-    tot_as_other = len(np.where(dd['SPREAD_CLASS'][ind][mask] == 2)[0])
+    tot_as_star = len(np.where(dd["SPREAD_CLASS"][ind][mask] == 0)[0])
+    tot_as_gal = len(np.where(dd["SPREAD_CLASS"][ind][mask] == 1)[0])
+    tot_as_other = len(np.where(dd["SPREAD_CLASS"][ind][mask] == 2)[0])
 
     msg = (
-        'Number of stars selected as star (SPREAD_CLASS=0)   = '
-        + f'{tot_as_star}/{tot_star} = {tot_as_star / tot_star * 100:.1f}%'
+        "Number of stars selected as star (SPREAD_CLASS=0)   = "
+        + f"{tot_as_star}/{tot_star} = {tot_as_star / tot_star * 100:.1f}%"
     )
     io.print_stats(msg, stats_file, verbose=verbose)
 
     msg = (
-        'Number of stars selected as galaxy (SPREAD_CLASS=1) = '
-        + f'{tot_as_gal}/{tot_star} = {tot_as_gal / tot_star * 100:.1f}%'
+        "Number of stars selected as galaxy (SPREAD_CLASS=1) = "
+        + f"{tot_as_gal}/{tot_star} = {tot_as_gal / tot_star * 100:.1f}%"
     )
     io.print_stats(msg, stats_file, verbose=verbose)
 
     msg = (
-        'Number of stars selected as other (SPREAD_CLASS=2)  = '
-        + f'{tot_as_other}/{tot_star} = {tot_as_other / tot_star * 100:.1f}%'
+        "Number of stars selected as other (SPREAD_CLASS=2)  = "
+        + f"{tot_as_other}/{tot_star} = {tot_as_other / tot_star * 100:.1f}%"
     )
     io.print_stats(msg, stats_file, verbose=verbose)
 
@@ -359,7 +364,8 @@ def match_stars2(ra_gal, dec_gal, ra_star, dec_star, thresh=0.0002):
 
 
 def read_shape_catalog(
-    input_path
+    input_path,
+    w_name="w",
 ):
     """Read Shape Catalog.
 
@@ -369,6 +375,8 @@ def read_shape_catalog(
     ----------
     input_path : str
         input file path
+    w_name : str, optional
+        name of weight column, default is "w"
 
     Returns
     -------
@@ -391,17 +399,17 @@ def read_shape_catalog(
 
     hdu_no = 1
 
-    ra = dat[hdu_no].data['RA']
-    dec = dat[hdu_no].data['Dec']
+    ra = dat[hdu_no].data["RA"]
+    dec = dat[hdu_no].data["Dec"]
 
     g = [np.empty_like(ra), np.empty_like(ra)]
-    g1 = dat[hdu_no].data['e1_uncal']
-    g2 = dat[hdu_no].data['e2_uncal']
-    w = dat[hdu_no].data['w']
-    mag = dat[hdu_no].data['mag']
+    g1 = dat[hdu_no].data["e1_uncal"]
+    g2 = dat[hdu_no].data["e2_uncal"]
+    w = dat[hdu_no].data[w_name]
+    mag = dat[hdu_no].data["mag"]
 
-    if 'snr' in dat[hdu_no].data.dtype.names:
-        snr = dat[hdu_no].data['snr']
+    if "snr" in dat[hdu_no].data.dtype.names:
+        snr = dat[hdu_no].data["snr"]
     else:
         snr = None
 
@@ -412,22 +420,22 @@ def write_shape_catalog(
     output_path,
     ra,
     dec,
-    g,
     w,
-    mag,
-    R,
-    R_shear,
-    R_select,
-    c,
-    c_err,
-    alpha_leakage=None,
+    mag=None,
     snr=None,
+    g=None,
     g1_uncal=None,
     g2_uncal=None,
     R_g11=None,
     R_g22=None,
     R_g12=None,
     R_g21=None,
+    R=None,
+    R_shear=None,
+    R_select=None,
+    c=None,
+    c_err=None,
+    alpha_leakage=None,
     sigma_epsilon=None,
     add_cols=None,
     add_cols_format=None,
@@ -442,110 +450,142 @@ def write_shape_catalog(
         output file path
     ra, dec : arrays(ngal) of float
         coordinates in deg
-    g : arrays(2, ngal) of float
-        calibrated reduced shear estimate components, corrected for
-        multiplicative and additive bias, g = R^-1 g_uncal - c
-    w : array(ngal) of float
-        weights
-    mag : array(ngal) of float
+    w : np.ndarray
+        inverse-variance weights
+    mag : array(ngal) of float, optional
         magnitude, signal-to-noise ratio
-    R : 2x2 matrix of float
-        Mean full response matrix
-    R_shear : 2x2 matrix of float
-        Mean shear response matrix
-    R_select : 2x2 matrix of float
-        Global selection response matrix
-    c : array(2) of float
+    snr : array(ngal) of float, optional
+        signal-to-noise ratio, default is `None`
+    g : np.ndarray, optional
+        calibrated reduced shear estimate components, corrected for
+        multiplicative and additive bias, g = R^-1 g_uncal - c;
+        expected type is arrays(2, ngal) of float;
+        default is ``None`` (no calibrated shears written)
+    g1_uncal, g2_uncal : np.ndarray, optional
+        uncalibrated shear estimates;
+        expected types are arrays(ngal) of float
+        default is ``None`` (no uncalibrated shears written)
+    R_g11, R_g22, R_g12, R_g21 : np.ndarray, optional
+        shear response matrix elements per galaxy;
+        expected format is arrays(ngal) of float;
+         default is ``None``
+    R : 2x2 matrix of float, optional
+        Mean full response matrix, default is ``None``
+    R_shear : 2x2 matrix of float, optional
+        Mean shear response matrix, default is ``None``
+    R_select : 2x2 matrix of float, optional
+        Global selection response matrix, default is ``None``
+    c : array(2) of float, optional, default is ``None``
         additive shear bias
-    c_err : array(2) of float
+    c_err : array(2) of float, optional, default is ``None``
         error of c
     alpha_leakage : float, optional
-        Mean scale-dependent PSF leakage, default is None
-    snr : array(ngal) of float, optional
-        signal-to-noise ratio, default is `None`
-    g1_uncal, g2_uncal : arrays(ngal) of float, optional, default=None
-        uncalibrated shear estimates
-    R_g11, R_g22, R_g12, R_g21 : arrays(ngal) of float, optional, default=None
-        shear response matrix elements per galaxy
+        Mean scale-dependent PSF leakage, default is ``None``
     sigma_epsilon: float, optional
-        shape noise, default is `None`
-    add_cols : dict, optional, default is `None`
+        shape noise, default is ``None``
+    add_cols : dict, optional, default is ``None``
         data for n additional columns to add
     add_cols_format : dict, optional
-        format for n additional columns to add, default is `None`, for which
-        'float' format is used
+        format for n additional columns to add, default is ``None``, for which
+        ``float`` format is used
 
     """
-    # Data HDU
-    c_ra = fits.Column(name='RA', array=ra, format='D', unit='deg')
-    c_dec = fits.Column(name='Dec', array=dec, format='D', unit='deg')
-    c_g1 = fits.Column(name='e1', array=g[0], format='D')
-    c_g2 = fits.Column(name='e2', array=g[1], format='D')
-    c_w = fits.Column(name='w', array=w, format='D')
-    c_mag = fits.Column(name='mag', array=mag, format='D')
-    cols = [c_ra, c_dec, c_g1, c_g2, c_w, c_mag]
-
-    ntype = 6
-    if snr is not None:
-        c_snr = fits.Column(name='snr', array=snr, format='D')
-        cols.append(c_snr)
-        ntype += 1
+    col_info_arr = []
 
-    for x, name in zip(
+    # Principal columns: coordinates and weights
+    col_info_arr.append(
+        (
+            fits.Column(name="RA", array=ra, format="D", unit="deg"),
+            "Right Ascension",
+        )
+    )
+    col_info_arr.append(
+        (
+            fits.Column(name="Dec", array=dec, format="D", unit="deg"),
+            "Declination",
+        )
+    )
+    col_info_arr.append(
+        (
+            fits.Column(name="w_iv", array=w, format="D"),
+            "Inverse-variance weight",
+        )
+    )
+
+    # Additional columns
+    ## Magnitude
+    if mag is not None:
+        col_info_arr.append(
+            (
+                fits.Column(name="mag", array=mag, format="D"),
+                "MAG_AUTO magnitude",
+            )
+        )
+    ## Calibrated shear estimates
+    if g is not None:
+        for idx in (0, 1):
+            col_info_arr.append(
+                (
+                    fits.Column(name=f"e{idx+1}", array=g[idx], format="D"),
+                    f"Calibrated reduced shear estimate comp {idx+1}",
+                )
+            )
+    # Signal-to-noise ratio
+    if snr is not None:
+        col_info_arr.append(
+            (
+                fits.Column(name="snr", array=snr, format="D"),
+                "Signal-to-noise ratio",
+            )
+        )
+    for x, name, descr in zip(
         [g1_uncal, g2_uncal, R_g11, R_g22, R_g12, R_g21],
-        ['e1_uncal', 'e2_uncal', 'R_g11', 'R_g22', 'R_g12', 'R_g21'],
+        ["e1_uncal", "e2_uncal", "R_g11", "R_g22", "R_g12", "R_g21"],
+        [
+            "Uncalibrated shear comp 1",
+            "Uncalibrated shear comp 2",
+            "Shear response matrix comp 1 1",
+            "Shear response matrix comp 2 2",
+            "Shear response matrix comp 1 2",
+            "Shear response matrix comp 2 1",
+        ],
     ):
         if x is not None:
-            cols.append(fits.Column(name=name, array=x, format='D'))
-            ntype += 1
+            col_info_arr.append(
+                (fits.Column(name=name, array=x, format="D"), descr)
+            )
 
-    if add_cols:
-        for name in add_cols:
-            if add_cols_format:
+    if add_cols is not None:
+        for idx, name in enumerate(add_cols):
+            if add_cols_format is not None and name in add_cols_format:
                 my_format = add_cols_format[name]
             else:
-                my_format = 'D'
-            cols.append(
-                fits.Column(name=name, array=add_cols[name], format=my_format)
+                shape = add_cols[name].shape
+                if len(shape) == 1:
+                    my_format = "D"
+                else:
+                    my_format = f"{shape[1]}D"
+            col_info_arr.append(
+                (
+                    fits.Column(
+                        name=name, array=add_cols[name], format=my_format
+                    ),
+                    name,
+                )
             )
-        ntype += len(add_cols)
 
+    # Write columns to FITS file
+    cols = []
+    for col, _ in col_info_arr:
+        cols.append(col)
     table_hdu = fits.BinTableHDU.from_columns(cols)
 
-    table_hdu.header['TTYPE3'] = (
-        'e1',
-        'Calibrated reduced shear estimate, 1st comp'
-    )
-    table_hdu.header['TTYPE4'] = (
-        'e2',
-        'Calibrated reduced shear estimate, 2nd comp'
-    )
-    table_hdu.header['TTYPE5'] = ('w', 'DES Weight')
-    table_hdu.header['TTYPE6'] = ('mag', 'Magnitude = MAG_AUTO (SExtractor)')
-    if snr is not None:
-        table_hdu.header['TTYPE7'] = (
-            'snr',
-            'Signal-to-noise ratio = flux/flux_std'
+    # Add human-readable descriptions
+    for idx, col_info in enumerate(col_info_arr):
+        table_hdu.header[f"TTYPE{idx+1}"] = (
+            col_info[0].name,
+            col_info[1],
         )
-        ntype += 1
-
-    for x, name in zip([g1_uncal, g2_uncal], ['e1_uncal', 'e2_uncal']):
-        if x is not None:
-            table_hdu.header[f'TTYPE{ntype}'] = (
-                name,
-                'uncalibrated reduced shear'
-            )
-            ntype += 1
-    for x, name in zip(
-        [R_g11, R_g22, R_g12, R_g21],
-        ['R_g11', 'R_g22', 'R_g12', 'R_g21']
-    ):
-        if x is not None:
-            table_hdu.header[f'TTYPE{ntype}'] = (
-                name,
-                f'shear response matrix {name}'
-            )
-            ntype += 1
 
     # Primary HDU with information in header
     primary_header = fits.Header()
@@ -556,20 +596,22 @@ def write_shape_catalog(
         software_version=__version__,
         author=getpass.getuser(),
     )
-    cat.add_shear_bias_to_header(primary_header, R, R_shear, R_select, c)
-    primary_header['c1_err'] = (c_err[0], 'Standard deviation of c_1')
-    primary_header['c2_err'] = (c_err[1], 'Standard deviation of c_2')
 
-    primary_header['w'] = (
-        'DES Weight'
-    )
-    if sigma_epsilon:
-        primary_header['sig_eps'] = (sigma_epsilon, 'Shape noise RMS')
+    if all(v is not None for v in (R, R_shear, R_select, c)):
+        cat.add_shear_bias_to_header(primary_header, R, R_shear, R_select, c)
+    if c_err is not None:
+        primary_header["c1_err"] = (c_err[0], "Standard deviation of c_1")
+        primary_header["c2_err"] = (c_err[1], "Standard deviation of c_2")
+
+    primary_header["w"] = "DES weight"
+
+    if sigma_epsilon is not None:
+        primary_header["sig_eps"] = (sigma_epsilon, "Shape noise RMS")
 
     if alpha_leakage:
-        primary_header['alpha'] = (
+        primary_header["alpha"] = (
             alpha_leakage,
-            'Mean scale-dependent PSF leakage'
+            "Mean scale-dependent PSF leakage",
         )
 
     primary_hdu = fits.PrimaryHDU(header=primary_header)
@@ -595,9 +637,9 @@ def write_galaxy_cat(output_path, ra, dec, tile_id):
     tile_id : array(ngal) of float
         tile ID of objects
     """
-    c_ra = fits.Column(name='ra', array=ra, format='E', unit='deg')
-    c_dec = fits.Column(name='dec', array=dec, format='E', unit='deg')
-    c_id = fits.Column(name='tile_id', array=tile_id, format='E')
+    c_ra = fits.Column(name="ra", array=ra, format="E", unit="deg")
+    c_dec = fits.Column(name="dec", array=dec, format="E", unit="deg")
+    c_id = fits.Column(name="tile_id", array=tile_id, format="E")
     cols = [c_ra, c_dec, c_id]
 
     cat.write_fits_BinTable_file(cols, output_path)
@@ -619,10 +661,10 @@ def write_PSF_cat(output_path, ra, dec, e1, e2):
     e2 : list of float
         second ellipticity  component
     """
-    c_ra = fits.Column(name='RA', array=ra, format='D', unit='deg')
-    c_dec = fits.Column(name='Dec', array=dec, format='D', unit='deg')
-    c_e1 = fits.Column(name='e1', array=e1, format='D')
-    c_e2 = fits.Column(name='e2', array=e2, format='D')
+    c_ra = fits.Column(name="RA", array=ra, format="D", unit="deg")
+    c_dec = fits.Column(name="Dec", array=dec, format="D", unit="deg")
+    c_e1 = fits.Column(name="e1", array=e1, format="D")
+    c_e2 = fits.Column(name="e2", array=e2, format="D")
     cols = [c_ra, c_dec, c_e1, c_e2]
 
     cat.write_fits_BinTable_file(cols, output_path)
@@ -651,7 +693,7 @@ def read_param_file(path, verbose=False):
 
     if path:
         if verbose:
-            print(f"Reading parameter file {path}")
+            print(f"Reading parameter file: {path}")
 
         with open(path) as f:
             for line in f:
@@ -670,16 +712,18 @@ def read_param_file(path, verbose=False):
         print(" into merged catalogue")
 
     param_list_unique = list(set(param_list))
-    
+
     if verbose:
         n = len(param_list) - len(param_list_unique)
         if n > 1:
             print("Removed {n} duplicate entries")
-    
+
     return param_list_unique
 
 
-def read_hdf5_file(file_path, name, stats_file, check_only=False, param_path=None):
+def read_hdf5_file(
+    file_path, name, stats_file, check_only=False, param_path=None
+):
     """Read HDF5 File.
 
     Read hdf5 file and return contained data.
@@ -694,21 +738,37 @@ def read_hdf5_file(file_path, name, stats_file, check_only=False, param_path=Non
         summary statistics output file handler
     check_only : bool, optional
         If True only check, not return data
- 
+
     Returns
     -------
     dict
         data
 
     """
-    param_list = read_param_file(param_path, verbose=True) if param_path else None
+    param_list = (
+        read_param_file(param_path, verbose=True) if param_path else None
+    )
 
     with h5py.File(file_path, "r") as hdf5_file:
         # Find patch group in hierarchical structure
         if not f"patches/{name}" in hdf5_file:
-            raise KeyError(f"Entry patches/{name} not found in file {file_path}")
+            raise KeyError(
+                f"Entry patches/{name} not found in file {file_path}"
+            )
         patch_group = hdf5_file[f"patches/{name}"]
 
+        # Get size of data array
+        num_rows = sum(patch_group[ID].shape[0] for ID in patch_group)
+        # num_cols = patch_group[next(iter(patch_group))].shape[1]
+        num_cols = len(param_list)
+
+        print(
+            f"Estimating {num_cols * num_rows * 8 / 1024**3:.1f}"
+            + f" Gb memory for the ({num_cols} x {num_rows}) data array ..."
+        )
+        # data_comb = np.memmap(output_file, dtype=patch_group[next(iter(patch_group))].dtype,
+        #              mode="w+", shape=(num_rows, num_cols))
+
         data_list = []
         ID_pbl = set()
         for ID in tqdm.tqdm(patch_group):
@@ -719,32 +779,89 @@ def read_hdf5_file(file_path, name, stats_file, check_only=False, param_path=Non
             # Restrict to parameter list if given
             data = data[param_list] if param_list is not None else data
 
-            #if data_list is None:
-            #    data_list = data
-            #    ID_first = ID
-            #else:
-            
-            #    # Check columns
-            #    for key in data.dtype.names:
-            #        if key not in data_list.dtype.names:
-            #            pass
-            #            #print(f"Not found in ID {ID_first}", key)
-            #            #ID_pbl.update([ID])
-            #    for key in data_list.dtype.names:
-            #        if key not in data.dtype.names:
-            #            pass
-            #            #print(f"Not found in ID {ID}", key)
-            #            #ID_pbl.update([ID])
-
             if not check_only:
                 # Add new to existing data
                 data_list.append(data)
 
+        print("Combine tile catalogues")
         data_comb = np.concatenate(data_list, axis=0)
- 
+        print("Done")
+
     # Print problematic tile IDs
     for ID in ID_pbl:
         print("Tile IDs with missing keys:", file=stats_file)
         print(ID, file=stats_file)
 
     return data_comb
+
+
+def get_col(dat, col, m_sel=None, m_flg=None):
+    """Get Col.
+
+    Retrieve a specific column from the data with optional selection and flag masks.
+
+    Parameters
+    ----------
+    dat: dict
+        Input data
+    col: str
+        Key of the column to be returned
+    m_sel: array-like, optional
+        Boolean mask used for selection. If specified, m_flg must also be specified;
+        default is ``None``
+    m_flg: array-like, optional
+        Boolean mask used as a flag. If specified, m_sel must also be specified.
+        default is ``None``
+
+    Returns
+    -------
+    array-like
+        Requested column from the data, optionally filtered by the selection and flag masks.
+
+    Raises
+    ------
+    ValueError
+        If only one of m_sel or m_flg is specified without the other.
+    """
+
+    if bool(m_sel is None) != bool(m_flg is None):
+        raise ValueError("Specify both or none of selection and flag masks")
+
+    if m_sel is None and m_flg is None:
+        return dat[col]
+    else:
+        return dat[col][m_sel][m_flg]
+
+
+def get_snr(sh, dat, m_sel, m_flg):
+    """Get SNR.
+
+    Return signal-to-noise ratio.
+
+    Parameters
+    ----------
+    sh: str
+        shape method identified, e.g. "ngmix"
+    dat: dict
+        Input data
+    m_sel: array-like, optional
+        Boolean mask used for selection. If specified, m_flg must also be specified;
+        default is ``None``
+    m_flg: array-like, optional
+        Boolean mask used as a flag. If specified, m_sel must also be specified.
+        default is ``None``
+
+    Returns
+    -------
+    array-like
+        signal-to-noise ratios
+
+    """
+    if sh == "ngmix":
+        my_snr = get_col(dat, "NGMIX_FLUX_NOSHEAR", m_sel, m_flg) / get_col(
+            dat, "NGMIX_FLUX_ERR_NOSHEAR", m_sel, m_flg
+        )
+    elif sh == "galsim":
+        my_snr = get_col(dat, "SNR_WIN", m_sel, m_flg)
+
+    return my_snr
diff --git a/sp_validation/cosmology.py b/sp_validation/cosmology.py
index 4bf4f46b..1260c96e 100644
--- a/sp_validation/cosmology.py
+++ b/sp_validation/cosmology.py
@@ -46,7 +46,7 @@
 try:
     import pyccl as ccl
 except Exception:
-    print('Could not import pyccl')
+    print('Could not import pyccl, continuing...')
 
 
 # Convergence maps
diff --git a/sp_validation/run_joint_cat.py b/sp_validation/run_joint_cat.py
new file mode 100644
index 00000000..4458dde2
--- /dev/null
+++ b/sp_validation/run_joint_cat.py
@@ -0,0 +1,1131 @@
+"""RUN JOINT CAT.
+
+This module implements classes to create, mask, and calibrate joint
+comprehensive catalogues.
+
+:Authors: Martin Kilbinger
+"""
+
+import sys
+import os
+
+import numpy as np
+from scipy import stats
+
+import datetime
+from tqdm import tqdm
+
+from optparse import OptionParser
+from importlib.metadata import version
+
+
+import h5py
+import healsparse as hsp
+import numpy as np
+
+from astropy.io import fits
+from astropy.table import Column
+
+from cs_util import logging
+from cs_util import cat
+from cs_util import args as cs_args
+
+from . import util
+
+
+class BaseCat(object):
+    """Base_Cat.
+
+    Basic catalogue class.
+
+    """
+
+    def __init__(self):
+        self.params_default()
+
+    def set_params_from_command_line(self, args):
+        """Set Params From Command Line.
+
+        Only use when calling using python from command line.
+        Does not work from ipython or jupyter.
+
+        """
+        # Read command line options
+        options = cs_args.parse_options(
+            self._params,
+            self._short_options,
+            self._types,
+            self._help_strings,
+        )
+
+        # Save calling command
+        logging.log_command(args)
+
+    def read_cat(self, load_into_memory=False, mode="r"):
+        """Read Cat.
+
+        Read input catalogue, either FITS or HDF5.
+
+        Parameters
+        ----------
+        load_into_memory: bool, optional
+            load data into memory (potentially slow) of ``True``;
+            default is ``False``
+        mode: bool, optional
+            HDF5 read mode, default is "r"
+
+        Returns
+        -------
+        list
+            Catalogue data
+
+        Raises
+        ------
+        IOError
+            If file extension is not .fits or .hd5
+
+        """
+        fpath = self._params["input_path"]
+        verbose = self._params["verbose"]
+
+        extension = os.path.splitext(fpath)[1]
+        if extension == ".fits":
+            if verbose:
+                print(f"Reading FITS file {fpath}, HDU {hdu}...")
+
+            hdu = 1
+            dat = fits.getdata(fpath, hdu)
+
+        elif extension in (".hdf5", ".hd5"):
+            if verbose:
+                print(f"Reading HDF5 file {fpath}...")
+
+            self._hd5file = h5py.File(fpath, mode)
+            try:
+                dat = self._hd5file["data"]
+            except:
+                print(f"Error while reading file {fpath}")
+                raise
+            if load_into_memory:
+                return dat[()]
+            else:
+                return dat
+        else:
+            raise IOError(f"Unknown file extension {extension}")
+
+    def write_hdf5_header(self, hd5file):
+        """Write HDF5 Header.
+
+        Write basic header information to HDF5 file.
+
+        Parameters
+        ----------
+        hd5file : h5py.File
+            input HDF5 file
+
+        """
+        author = os.getenv("USER")
+        software_name = "sp_validation"
+        software_version = version(software_name)
+        date = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+
+        hd5file.attrs["author"] = author
+        hd5file.attrs["softname"] = software_name
+        hd5file.attrs["softver"] = software_version
+        hd5file.attrs["date"] = date
+
+    def write_hdf5_file(self, dat, output_path=None):
+        """Write HDF5 File.
+
+        Write HDF5 data to file.
+
+        Parameters
+        ----------
+        dat : numpy.ndarray
+            input data
+        output_path : str, optional
+            output file path; when ``None`` (default) use
+            self._params['output_path']
+        patches : list, optional
+            input patches, list of str, default is ``None``
+
+        """
+        if output_path is None:
+            output_path = self._params["output_path"]
+
+        if self._params["verbose"]:
+            print("Creating hdf5 file")
+
+        with h5py.File(output_path, "w") as f:
+
+            self.write_hdf5_header(f)
+
+            dset = f.create_dataset("data", data=dat)
+            dset[:] = dat
+
+        if self._params["verbose"]:
+            print(f"Done.")
+
+    def close_hd5(self):
+        """Close HD5.
+
+        Close HDF5 file.
+
+        """
+        self._hd5file.close()
+
+
+class JointCat(BaseCat):
+    """Joint Cat.
+
+    Class to create joint weak-lensing catalogues.
+
+    """
+
+    def __init__(self):
+        # Set default parameters
+        self.params_default()
+
+    def set_params_from_command_line(self, args):
+        """Set Params From Command Line.
+
+        Only use when calling using python from command line.
+        Does not work from ipython or jupyter.
+
+        """
+        # Read command line options
+        options = cs_args.parse_options(
+            self._params,
+            self._short_options,
+            self._types,
+            self._help_strings,
+        )
+
+        # Update parameter values from options
+        self._params.update(options)
+
+        # Save calling command
+        logging.log_command(args)
+
+    def params_default(self):
+        """Params Default.
+
+        Set default parameter values.
+
+        """
+        self._params = {
+            "patches": "v1",
+            "sh": "ngmix",
+            "survey": "unions",
+            "year": "2024",
+            "version": "1.4.2",
+            "pipeline": "shapepipe",
+            "hdu": 1,
+            "reduce_mem": False,
+        }
+        self._short_options = {
+            "patches": "-p",
+            "sh": "-g",
+            "survey": "-s",
+            "year": "-y",
+            "version": "-V",
+            "reduce_mem": "-r",
+        }
+        self._types = {
+            "hdu": "int",
+            "reduce_mem": "bool",
+        }
+        self._help_strings = {
+            "patches": "list of patches separated by '+', or shortcut (allowed are 'v1'), default={}",
+            "sh": "shape measurement method, default={}",
+            "survey": "survey name, default={}",
+            "year": "year of processing, default={}",
+            "version": "catalogue version, default={}",
+            "reduce_mem": "output some columns in lower precision to reduce memory",
+        }
+
+    def get_patches(self):
+        """Get Patches.
+
+        Return list of patches according to option parameter value.
+
+        Returns
+        -------
+        list
+            patches, list of str
+
+        """
+        if self._params["patches"] == "v1":
+            n_patch = 7
+            patches = [f"P{x}" for x in np.arange(n_patch) + 1]
+        else:
+            patches = self._params["patches"].split("+")
+
+        return patches
+
+    def get_n_obj(self, patches, base_path, input_sub_path):
+        """Get N Obj.
+
+        Get number of objects from FITS file headers.
+
+        Parameters
+        ----------
+        patches : list
+            input patches, type is str
+        base_path : str
+            input base directory, root dir of patches
+        input_sub_path : str
+            input file name; input path is base_path/patch/input_sub_path
+
+        Raises:
+            ValueError: if input file canont be read
+
+        Returns:
+            list
+                HDUs
+            list
+                number of objects per file
+            int
+                total number of objects
+
+        """
+        if self._params["verbose"]:
+            print("Getting number of objects")
+        n_obj_list = []
+        n_obj = 0
+        hdu_lists = []
+        for patch in patches:
+            input_path = f"{base_path}/{patch}/{input_sub_path}"
+            try:
+                hdu_list = fits.open(input_path)
+            except:
+                raise ValueError(
+                    f"Could not open file {input_path} at HDU"
+                    + f" #{self._params['hdu']}"
+                )
+            hdu_lists.append(hdu_list)
+
+            this_n = int(hdu_list[self._params["hdu"]].header["NAXIS2"])
+            n_obj_list.append(this_n)
+            n_obj += this_n
+
+        if self._params["verbose"]:
+            print(f"Found a total of {n_obj} (~{util.millify(n_obj)}) objects.")
+
+        return hdu_lists, n_obj_list, n_obj
+
+    def get_col_info(self, dat):
+        """Get Col Info.
+
+        Return information of input columns.
+
+        Parameters
+        ----------
+        dat : numpy.ndarray
+            input data
+
+        Returns
+        -------
+        list
+            column names
+        list
+            column formats
+        int
+            number of columns
+
+        """
+        col_names = dat.dtype.names
+
+        n_col = 0
+        formats = {}
+        ndim = {}
+        for name in col_names:
+            formats[name] = dat.dtype.fields[name][0]
+            ndim[name] = dat[name].ndim
+            n_col += ndim[name]
+        # Add one for patch
+        n_col += 1
+
+        if self._params["verbose"]:
+            print(
+                f"Number of input (output) columns = {len(col_names)} ({n_col})"
+            )
+
+        return col_names, formats, ndim, n_col
+
+    def dtype_out(self, name, dtype_in):
+        """Set output dtype.
+
+        Parameters
+        ----------
+        name : str
+            column name
+        dtype_in : np.dtype
+            input dtype
+
+        Returns
+        -------
+        np.dtype
+            output dtype
+
+        """
+        cols_keep_dtype = [
+            "RA",
+            "Dec",
+            "FLAGS",
+            "IMAFLAGS_ISO",
+        ]
+        if self._params["reduce_mem"] == False:
+            return dtype_in
+        elif name not in cols_keep_dtype:
+            if dtype_in == np.float64:
+                return np.float32
+            if dtype_in == np.int32:
+                return np.int8
+
+        return dtype_in
+
+    def init_data(self, n_col, n_obj, ndim, dat):
+        """Init Data.
+
+        Initialize empty structured data.
+
+        Parameters
+        ----------
+        n_col : int
+            number of columns
+        n_obj : int
+            number of objects (rows)
+        ndim : dict
+            dimension of input columns
+        dat : numpy.ndarray
+            example data
+
+        Returns
+        -------
+        numpy.ndarray
+            combined structure data, (n_col x n_obj) array
+
+        """
+        if self._params["verbose"]:
+            print(
+                f"Allocating {n_col * n_obj * 8 / 1024**3:.1f}"
+                + f" Gb memory for the ({n_col} x {n_obj}) data array ...",
+                end="",
+            )
+
+        # Create dtypes from input column names and types.
+        # Reduce memory if flag set.
+        # Transform multi-D columns into 1D columns
+        dtype_tmp_list = []
+        for name in ndim:
+            if ndim[name] == 1:
+                dtype_tmp_list.append(
+                    (name, self.dtype_out(name, dat[name].dtype))
+                )
+            else:
+                for jdx in range(ndim[name]):
+                    dtype_tmp_list.append(
+                        (f"{name}_{jdx}", self.dtype_out(name, dat[name].dtype))
+                    )
+        dtype_tmp_list.append(("patch", np.int8))
+        dtype_tmp_struct = np.dtype(dtype_tmp_list)
+        dat_all = np.empty((n_obj,), dtype=dtype_tmp_struct)
+
+        if self._params["verbose"]:
+            print("done")
+
+        return dat_all
+
+    def write_hdf5_file(self, dat_all, patches):
+        """Write HDF5 File.
+
+        Write data to HDF5 file.
+
+        Parameters
+        ----------
+        dat_all : numpy.ndarray
+            input data
+        patches : list
+            input patches, list of str
+
+        """
+        output_path = (
+            f"{self._params['survey']}_{self._params['pipeline']}"
+            + f"_comprehensive_{self._params['year']}_"
+            + f"v{self._params['version']}.hdf5"
+        )
+
+        with h5py.File(output_path, "w") as f:
+
+            self.write_hdf5_header(f)
+
+            dset = f.create_dataset("data", data=dat)
+            dset[:] = dat
+
+        # super().write_hdf5_file(dat_all, output_path=output_path)
+
+    def write_hdf5_header(self, hd5file, patches=None):
+        """Write HDF5 Header.
+
+        Write header information to HDF5 file.
+
+        Parameters
+        ----------
+        hd5file : h5py.File
+            input HDF5 file
+        patches : list, optional
+            input patches, list of str, default is ``None``
+
+        """
+        super().write_hdf5_header(hd5file)
+
+        if patches is not None:
+            patches_str = " ".join(patches)
+            hd5file.attrs["patches"] = patches_str
+
+    def merge_catalogues(self, patches, base_path="."):
+        """Merge Catalogues.
+
+        Merge individual patch-based catalogues.
+
+        Parameters
+        ----------
+        patches : list
+            input patches; list of `str`
+        base_path : str, optional
+            input base directory path; default is "."
+
+        """
+        input_sub_path = (
+            f"sp_output/shape_catalog_comprehensive_{self._params['sh']}.fits"
+        )
+
+        # Get input FITS files
+        hdu_lists, n_obj_list, n_obj = self.get_n_obj(
+            patches,
+            base_path,
+            input_sub_path,
+        )
+
+        # Read data
+        start = end = 0
+        for idx, patch in enumerate(patches):
+
+            input_path = f"{base_path}/{patch}/{input_sub_path}"
+            try:
+                dat = fits.getdata(input_path, self._params["hdu"])
+                # dat = hdu_lists[idx][self._params["hdu"]].data
+
+                hdu_lists[idx].close()
+            except:
+                raise ValueError(
+                    f"Could not read data of file {input_path} at HDU"
+                    + f" #{self._params['hdu']}"
+                )
+
+            # Create empty lists if first patch
+            if idx == 0:
+
+                col_names, formats, ndim, n_col = self.get_col_info(dat)
+                dat_all = self.init_data(n_col, n_obj, ndim, dat)
+
+            # Append new data for that patch (between start and end)
+            end += n_obj_list[idx]
+
+            # Copy data
+            i_col = 0
+            names_out = dat_all.dtype.names
+            for name in col_names:
+                if ndim[name] == 1:
+                    # Copy 1D column
+                    dat_all[names_out[i_col]][start:end] = dat[name]
+                else:
+                    # Copy all components of multi-D column
+                    for jdx in range(ndim[name]):
+                        dat_all[names_out[i_col + jdx]][start:end] = dat[name][
+                            :, jdx
+                        ]
+                i_col += ndim[name]
+            # Add patch number
+            dat_all["patch"][start:end] = patch[1:]
+
+            if i_col + 1 != n_col:
+                raise ValueError(
+                    "Inconsistent number of columns, {i_col + 1}"
+                    + f" != {n_col}"
+                )
+            if self._params["verbose"]:
+                print(
+                    f"{patch}: Added {len(dat)} (~{util.millify(len(dat))})"
+                    + f" objects (from {start} to {end-1})."
+                )
+            start = end
+
+        del dat
+
+        self.write_hdf5_file(dat_all, patches)
+
+    def run(self):
+        """Run.
+
+        Main processing function.
+
+        """
+        patches = self.get_patches()
+        if self._params["verbose"]:
+            print("Merging patches", patches)
+
+        self.merge_catalogues(patches)
+
+
+class ApplyHspMasks(BaseCat):
+    """Apply Hsp Masks."""
+
+    def __init__(self):
+        # Set default parameters
+        self.params_default()
+
+    @classmethod
+    def get_label_struct(cls, bit):
+        """Get Label Struct.
+
+        Return label of bit-coded mask.
+
+        Parameters
+        ----------
+        bit: int
+            input bit
+
+        Returns
+        -------
+        str
+            label
+
+        """
+        # Labels of bit-coded structural masks
+        labels_struct = {
+            1: "Faint_star_halos",
+            2: "Bright_star_halos",
+            4: "Stars",
+            8: "Manual",
+            16: "u",
+            32: "g",
+            64: "r",
+            128: "i",
+            256: "z",
+            512: "Tile_RA_DEC_cut",
+            1024: "Maximask",
+        }
+
+        return labels_struct[bit]
+
+    @classmethod
+    def get_mask_col_name(cls, bit):
+        """Get Mask Col Name.
+
+        Return column name of mask corresponding to input bit.
+
+        Parameters
+        ----------
+        bit : int
+            input bit
+
+        Returns
+        -------
+        str
+            column name
+
+        """
+        return f"{bit}_{cls.get_label_struct(bit)}"
+
+    def params_default(self):
+        """Params Default.
+
+        Set default parameter values.
+
+        """
+        self._params = {
+            "input_path": None,
+            "output_path": "output.hdf5",
+            "mask_dir": ".",
+            "nside": 131072,
+            "file_base": "mask_r_",
+            "bits": 1,
+            "aux_mask_files": None,
+            "aux_mask_labels": None,
+        }
+        self._short_options = {
+            "input_path": "-i",
+            "output_path": "-o",
+            "mask_dir": "-d",
+            "nside": "-n",
+            "file_base": "-f",
+            "bits": "-b",
+        }
+        self._types = {
+            "nside": "int",
+            "bits": "int",
+        }
+        self._help_strings = {
+            "input_path": "path of input hdf5 catalogue, default={}",
+            "output_path": "path of output hdf5 catalogue, default={}",
+            "mask_dir": "directory with mask files, default={}",
+            "nside": "healsparse resolution parameter, default={}",
+            "file_base": "base name of mask files, default={}",
+            "bits": "bits to apply, default={}",
+            "aux_mask_files": "auxiliary mask files separated with '\\', defualt={}",
+            "aux_mask_labels": "auxiliary mask column names separated with '\\'",
+        }
+
+    def check_params(self):
+        """Check Params.
+
+        Check whether parameter values are valid.
+
+        Raises
+        ------
+        ValueError
+            if a parameter value is not valid
+
+        """
+        if (self._params["aux_mask_files"] is None) != (
+            self._params["aux_mask_labels"] is None
+        ):
+            raise ValueError("Both or none of the 'aux_mask_*' can be None")
+
+    def update_params(self):
+        """Update Params.
+
+        Update and transform parameter values.
+
+        """
+        if self._params["aux_mask_files"] is not None:
+            self._params["aux_mask_file_list"] = cs_args.my_string_split(
+                self._params["aux_mask_files"],
+                verbose=self._params["verbose"],
+                stop=True,
+                sep="\\",
+            )
+            self._params["aux_mask_num"] = len(
+                self._params["aux_mask_file_list"]
+            )
+            self._params["aux_mask_label_list"] = cs_args.my_string_split(
+                self._params["aux_mask_labels"],
+                verbose=self._params["verbose"],
+                stop=True,
+                num=self._params["aux_mask_num"],
+                sep="\\",
+            )
+        else:
+            self._params["aux_mask_file_list"] = []
+
+    def read_hsp_mask(self, path):
+        """Read Hsp Mask.
+
+        Parameters
+        ----------
+        path : str
+            Path to the mask file.
+
+        Returns
+        -------
+        np.ndarray
+            Mask
+        """
+
+        if self._params["verbose"]:
+            print(f"Reading healsparse mask file {path}...")
+
+        return hsp.HealSparseMap.read(path)
+
+    def reverse_bit_list(self):
+        """Reverse Bit List.
+
+        Split bit-coded integer into bits.
+
+        Parameters
+        ----------
+        bit : int
+            Bit-coded integer
+
+        Returns
+        -------
+        list
+            List of bits
+
+        """
+        bit_list = []
+        bits = self._params["bits"]
+        while bits:
+            lowest_bit = bits & -bits  # Extract lowest set bit
+            bit_list.append(lowest_bit)
+            bits -= lowest_bit  # Remove this bit from bit
+
+        return bit_list
+
+    def get_paths_bit_masks(self):
+        """Get Paths Bit Masks.
+
+        Return paths of bit-coded mask files.
+
+        Returns
+        -------
+        dict
+            Dictionary with bit as key and path as value.
+
+        """
+        paths = {}
+        bit_list = self.reverse_bit_list()
+        for bit in bit_list:
+            paths[bit] = (
+                f"{self._params['mask_dir']}/{self._params['file_base']}"
+                + f"nside{self._params['nside']}_n{bit}.hsp"
+            )
+        return paths
+
+    def get_masks(self, dat=None):
+        """Get Masks.
+
+        Returns per-object masks for all bits.
+
+        Parameters
+        ----------
+        dat: numpy.ndarray, optional
+            input data; if not given (default), data will be read from
+            input file
+
+        Returns
+        -------
+        dict
+            masks
+
+        """
+        masks = {}
+
+        # Get bit-coded mask file paths
+        paths = self.get_paths_bit_masks()
+
+        # Get coordinates from data
+        if dat is None:
+            dat = self.read_cat()
+        if self._params["verbose"]:
+            print("Reading coordinates from data...")
+        ra = dat["RA"]
+        dec = dat["Dec"]
+
+        # Read healsparse files and apply masks to coordinate
+        for bit in paths:
+            if self._params["verbose"]:
+                print(f"Reading mask for bit {bit}...")
+            hsp_mask = hsp.HealSparseMap.read(paths[bit])
+
+            label = self.get_mask_col_name(bit)
+
+            if self._params["verbose"]:
+                print(f"Computing mask bit={bit}...")
+            masks[label] = hsp_mask.get_values_pos(ra, dec, lonlat=True)
+
+        # Read auxiliary mask files"
+        for idx, path in enumerate(self._params["aux_mask_file_list"]):
+            label = self._params["aux_mask_label_list"][idx]
+            if self._params["verbose"]:
+                print(f"Reading aux mask for label {label}...")
+            hsp_mask = hsp.HealSparseMap.read(path)
+            masks[label] = hsp_mask.get_values_pos(ra, dec, lonlat=True)
+
+        return masks
+
+    def run(self):
+        """Run.
+
+        Main processing function.
+
+        """
+        obj = self
+
+        # Check parameters
+        obj.check_params()
+
+        # Update parameters
+        obj.update_params()
+
+        # Read input data
+        dat = obj.read_cat(load_into_memory=True, mode="r")
+
+        # Get masks
+        masks = obj.get_masks(dat=dat)
+
+        # Append masks to data
+        dat_ext = obj.append_masks(dat, masks)
+
+        # Write extended data to new HDF5 file
+        obj.write_hdf5_file(dat_ext)
+
+        # Close input HDF5 catalogue file
+        obj.close_hd5()
+
+    def append_masks(self, dat, masks):
+        """Append Masks.
+
+        Add mask information as columns to data.
+
+        Parameters
+        ----------
+        dat: numpy.ndarray
+            input data
+        masks: dict
+            mask information
+
+        Returns
+        --------
+        numpy.ndarray
+            updated data
+
+        """
+        labels = [label for label in masks]
+        dtypes = [masks[label].dtype for label in masks]
+
+        # Create a structured dtype
+        structured_dtype = np.dtype(
+            [(label, dtype) for label, dtype in zip(labels, dtypes)]
+        )
+
+        new_data = np.zeros(dat.shape, dtype=structured_dtype)
+
+        # Copy masks as new columns
+        for label in masks:
+            new_data[label] = masks[label]
+
+        return new_data
+
+    def write_hdf5_file(self, dat, dat_new):
+
+        with h5py.File(self._params["output_path"], "w") as f:
+
+            self.write_hdf5_header(f)
+
+            dset = f.create_dataset(
+                "data", shape=dat.shape, dtype=dat.dtype, chunks=True
+            )
+            dset_new = f.create_dataset(
+                "data_ext",
+                shape=dat_new.shape,
+                dtype=dat_new.dtype,
+                chunks=True,
+            )
+
+            chunk_size = 10000
+            nrow = dat.shape[0]
+
+            for i in range(0, nrow, chunk_size):
+                end = min(i + chunk_size, nrow)
+                dset[i:end] = dat[i:end]  # Write chunk to dataset
+                dset_new[i:end] = dat_new[i:end]  # Write chunk to dataset
+
+    def write_hdf5_header(self, hd5file):
+        """Write HDF5 Header.
+
+        Write header information to HDF5 file.
+
+        Parameters
+        ----------
+        hd5file : h5py.File
+            input HDF5 file
+        patches : list, optional
+            input patches, list of str, default is ``None``
+
+        """
+        super().write_hdf5_header(hd5file)
+
+        hd5file.attrs["hsp_nside"] = self._params["nside"]
+
+        # Bit-mask file paths
+        paths = self.get_paths_bit_masks()
+        for bit in paths:
+            hd5file.attrs[f"hsp_path_{bit}"] = paths[bit]
+
+        # Auxiliary mask file paths
+        for idx, path in enumerate(self._params["aux_mask_file_list"]):
+            label = self._params["aux_mask_label_list"][idx]
+            hd5file.attrs[f"hsp_path_{label}"] = path
+
+
+class CalibrateCat(BaseCat):
+    """Calibrate Cat.
+
+    Class to calibrate joint catalogue.
+
+    """
+
+    def __init__(self):
+        # Set default parameters
+        self.params_default()
+
+    def params_default(self):
+        """Params Default.
+
+        Set default parameter values.
+
+        """
+        self._params = {
+            "input_path": None,
+            "cmatrices": False,
+        }
+        self._short_options = {
+            "input_path": "-i",
+            "cmatrices": "-C",
+        }
+        self._types = {
+            "cmatrices": "bool",
+        }
+        self._help_strings = {
+            "input_path": "path input FITS catalogue",
+            "cmatrices": "compute correlation and confusion matrices",
+        }
+
+    def read_cat(self, load_into_memory=False):
+        """Read Cat.
+
+        Read input HDF5 catalogue.
+
+        Parameters
+        ----------
+        load_into_memory: bool, optional
+            load data into memory (potentially slow) of ``True``;
+            default is ``False``
+
+        Returns
+        -------
+        list
+            Catalogue data
+        list_ext
+            Extended catalogue data
+
+        """
+        fpath = self._params["input_path"]
+        verbose = self._params["verbose"]
+
+        if verbose:
+            print(f"Reading HDF5 file {fpath}...")
+
+        self._hd5file = h5py.File(fpath, "r")
+        try:
+            dat = self._hd5file["data"]
+            dat_ext = self._hd5file["data_ext"]
+        except:
+            print(f"Error while reading file {fpath}")
+            raise
+        if load_into_memory:
+            return dat[()], dat_ext[()]
+        else:
+            return dat, dat_ext
+
+    def run(self):
+        """Run.
+
+        Main processing function.
+
+        """
+
+
+def confusion_matrix(mask, confidence_level=0.9):
+
+    n_key = len(mask)
+
+    cm = np.empty((n_key, n_key))
+    r_val = np.zeros_like(cm)
+    r_cl = np.empty((n_key, n_key, 2))
+
+    for idx, key1 in enumerate(mask):
+        for jdx, key2 in enumerate(mask):
+            res = stats.pearsonr(mask[key1], mask[key2])
+            r_val[idx][jdx] = res.statistic
+            r_cl[idx][jdx] = res.confidence_interval(
+                confidence_level=confidence_level
+            )
+
+    return r_val, r_cl
+
+
+def correlation_matrix(mask):
+
+    n_tot = len(mask)
+    n_key = len
+
+    cm = np.empty((n_key, n_key))
+    r = np_like(cm)
+
+    for idx, key1 in enumerate(mask):
+        for jdx, key2 in enumerate(mask):
+            r[idx][jdx] = stats.pearsonr(mask[key1], mask[key2])
+
+    return r
+
+
+class ReadCat:
+
+    def __init__(self):
+        self.params_default()
+
+    def params_default(self):
+        """Params Default.
+
+        Set default parameter values.
+
+        """
+        self._params = {
+            "input": "input_cat.hdf5",
+            "n_row": None,
+        }
+        self.short_options = {
+            "input": "-i",
+            "n_row": "-n",
+        }
+        self._help_string = {
+            "input": "input file, default={}",
+            "n_row": "print first N_ROW rows only",
+        }
+
+    def run(self):
+
+        pass
+
+
+def run_joint_comprehensive_cat(*args):
+    """Run Joint Comprehensive Cat.
+
+    Run class to create joint comprehensive catalogue from command line.
+
+    """
+    obj = JointCat()
+
+    obj.set_params_from_command_line(args)
+
+    obj.run()
+
+
+def run_calibrate_comprehensive_cat(*args):
+    """Run Calibrate Comprehensive Cat.
+
+    Run class to calibrate joint comprehensive catalogue from command line.
+
+    """
+    obj = CalibrateCat()
+
+    obj.set_params_from_command_line(args)
+
+    obj.run()
+
+
+def run_apply_hsp_masks(*args):
+    """Run Apply Healsparse Masks.
+
+    Run class to apply healsparse masks.
+
+    """
+    obj = ApplyHspMasks()
+
+    obj.set_params_from_command_line(args)
+
+    obj.run()