From 5434bdd31094e10c73b32b1ceb3c6d1180be499d Mon Sep 17 00:00:00 2001 From: m-aguena Date: Mon, 19 Jun 2023 19:03:12 +0200 Subject: [PATCH 01/27] add box match --- clevar/match/box.py | 236 ++++++++++++++++++++++++++++++++++++++++ clevar/match/spatial.py | 201 ++++++++++++++++++++++++++++++++++ 2 files changed, 437 insertions(+) create mode 100644 clevar/match/box.py create mode 100644 clevar/match/spatial.py diff --git a/clevar/match/box.py b/clevar/match/box.py new file mode 100644 index 0000000..e875c79 --- /dev/null +++ b/clevar/match/box.py @@ -0,0 +1,236 @@ +"""@file box.py +The BoxMatch class +""" +import numpy as np + +from .spatial import SpatialMatch +from ..catalog import ClData + + +class BoxMatch(SpatialMatch): + """ + BoxMatch Class + + Attributes + ---------- + type : str + Type of matching object. Set to "Box" + history : list + Steps in the matching + """ + + def __init__(self): + SpatialMatch.__init__(self) + self.type = "Box" + + def multiple( + self, cat1, cat2, metric="GIoU", metric_cut=0.5, share_area_frac=0.5, verbose=True + ): + """ + Make the one way multiple matching + + Parameters + ---------- + cat1: clevar.ClCatalog + Base catalog + cat2: clevar.ClCatalog + Target catalog + verbose: bool + Print result for individual matches. + metric: str (optional) + Metric to be used for matching. Can be: GIoU (generalized Intersection over Union). + metric_cut: float + Minimum value of metric for match. + share_area_frac: float + Minimum relative size of area for match. + """ + # pylint: disable=arguments-renamed + # pylint: disable=too-many-locals + if cat1.mt_input is None: + raise AttributeError("cat1.mt_input is None, run prep_cat_for_match first.") + if cat2.mt_input is None: + raise AttributeError("cat2.mt_input is None, run prep_cat_for_match first.") + if metric == "GIoU": + + def get_metric_value(*args): + return self._compute_giou(*args) + + else: + raise ValueError("metric must be GIoU.") + self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching + ra2min, ra2max, dec2min, dec2max = ( + cat2[c] for c in ("ra_min", "ra_max", "dec_min", "dec_max") + ) + z2min, z2max = (cat2.mt_input[c] for c in ("zmin", "zmax")) + print(f"Finding candidates ({cat1.name})") + for ind1, (ra1min, ra1max, dec1min, dec1max, z1min, z1max) in enumerate( + zip( + *(cat1[c] for c in ("ra_min", "ra_max", "dec_min", "dec_max")), + *(cat1.mt_input[c] for c in ("zmin", "zmax")), + ) + ): + # crop in redshift range + mask = (z2max >= z1min) * (z2min <= z1max) + if mask.any(): + # makes square crop with intersection + mask[mask] = ( + (ra2max[mask] >= ra1min) + * (ra2min[mask] <= ra1max) + * (dec2max[mask] >= dec1min) + * (dec2min[mask] <= dec1max) + ) + if mask.any(): + area1, area2, intersection, outter = self._compute_areas( + *( + [ra1min, ra1max, dec1min, dec1max] * np.ones(mask.sum())[:, None] + ).T, # for vec computation + ra2min[mask], + ra2max[mask], + dec2min[mask], + dec2max[mask], + ) + # makes metric crop + for id2 in cat2["id"][mask][ + (get_metric_value(area1, area2, intersection, outter) >= metric_cut) + * (area1 / area2 >= share_area_frac) + * (area2 / area1 >= share_area_frac) + ]: + cat1["mt_multi_self"][ind1].append(id2) + ind2 = int(cat2.id_dict[id2]) + cat2["mt_multi_other"][ind2].append(cat1["id"][ind1]) + self._cat1_mmt[ind1] = True + if verbose: + self._prt_cand_mt(cat1, ind1) + hist = { + "func": "multiple", + "cats": f"{cat1.name}, {cat2.name}", + } + self._rm_dup_add_hist(cat1, cat2, hist) + + def _compute_areas(self, ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max): + area1 = (ra1max - ra1min) * (dec1max - dec1min) + area2 = (ra2max - ra2min) * (dec2max - dec2min) + # areas + intersection = (np.min([ra1max, ra2max], axis=0) - np.max([ra1min, ra2min], axis=0)) * ( + np.min([dec1max, dec2max], axis=0) - np.max([dec1min, dec2min], axis=0) + ) + outter = (np.max([ra1max, ra2max], axis=0) - np.min([ra1min, ra2min], axis=0)) * ( + np.max([dec1max, dec2max], axis=0) - np.min([dec1min, dec2min], axis=0) + ) + return area1, area2, intersection, outter + + def _compute_giou(self, area1, area2, intersection, outter): + union = area1 + area2 - intersection + return intersection / union + union / outter - 1.0 + + def prep_cat_for_match( + self, + cat, + delta_z, + n_delta_z=1, + ra_min="ra_min", + ra_max="ra_max", + dec_min="dec_min", + dec_max="dec_max", + ): + """ + Adds zmin and zmax to cat.mt_input and tags ra/dec min/max cols + + Parameters + ---------- + cat: clevar.ClCatalog + Input ClCatalog + delta_z: float, string + Defines the zmin, zmax for matching. Options are: + + * `'cat'` - uses redshift properties of the catalog + * `'spline.filename'` - interpolates data in 'filename' (z, zmin, zmax) fmt + * `float` - uses delta_z*(1+z) + * `None` - does not use z + + n_delta_z: float + Number of delta_z to be used in the matching + """ + # pylint: disable=arguments-differ + # pylint: disable=unused-argument + print("## Prep mt_cols") + if cat.mt_input is None: + cat.mt_input = ClData() + self._prep_z_for_match(cat, delta_z, n_delta_z) + for name in ("ra_min", "ra_max", "dec_min", "dec_max"): + if name not in cat.tags: + cat.tag_column(locals()[name], name) + self.history.append( + { + "func": "prep_cat_for_match", + "cat": cat.name, + "delta_z": delta_z, + "n_delta_z": n_delta_z, + } + ) + + def match_from_config(self, cat1, cat2, match_config, cosmo=None): + """ + Make matching of catalogs based on a configuration dictionary + + Parameters + ---------- + cat1: clevar.ClCatalog + ClCatalog 1 + cat2: clevar.ClCatalog + ClCatalog 2 + match_config: dict + Dictionary with the matching configuration. Keys must be: + + * `type`: type of matching, can be: `cat1`, `cat2`, `cross`. + * `catalog1`: `kwargs` used in `prep_cat_for_match(cat1, **kwargs)` (minus `cosmo`). + * `catalog2`: `kwargs` used in `prep_cat_for_match(cat2, **kwargs)` (minus `cosmo`). + * `preference`: Preference to set best match, can be: `more_massive`, + `angular_proximity`, `redshift_proximity`, `shared_member_fraction`. + * `verbose`: Print result for individual matches (default=`True`). + + cosmo: clevar.Cosmology object + Cosmology object for when radius has physical units + """ + if match_config["type"] not in ("cat1", "cat2", "cross"): + raise ValueError("config type must be cat1, cat2 or cross") + if match_config["type"] in ("cat1", "cross"): + print("\n## ClCatalog 1") + self.prep_cat_for_match(cat1, cosmo=cosmo, **match_config["catalog1"]) + if match_config["type"] in ("cat2", "cross"): + print("\n## ClCatalog 2") + self.prep_cat_for_match(cat2, cosmo=cosmo, **match_config["catalog2"]) + + verbose = match_config.get("verbose", True) + if match_config["type"] in ("cat1", "cross"): + print("\n## Multiple match (catalog 1)") + radius_selection = { + "cat1": "self", + "cat2": "other", + }.get( + match_config["which_radius"], + match_config["which_radius"], + ) + self.multiple(cat1, cat2, radius_selection, verbose=verbose) + if match_config["type"] in ("cat2", "cross"): + print("\n## Multiple match (catalog 2)") + radius_selection = { + "cat1": "other", + "cat2": "self", + }.get( + match_config["which_radius"], + match_config["which_radius"], + ) + # pylint: disable=arguments-out-of-order + self.multiple(cat2, cat1, radius_selection, verbose=verbose) + + if match_config["type"] in ("cat1", "cross"): + print("\n## Finding unique matches of catalog 1") + self.unique(cat1, cat2, match_config["preference"]) + if match_config["type"] in ("cat2", "cross"): + print("\n## Finding unique matches of catalog 2") + self.unique(cat2, cat1, match_config["preference"]) + + if match_config["type"] == "cross": + self.cross_match(cat1) + self.cross_match(cat2) diff --git a/clevar/match/spatial.py b/clevar/match/spatial.py new file mode 100644 index 0000000..2eeb491 --- /dev/null +++ b/clevar/match/spatial.py @@ -0,0 +1,201 @@ +"""@file spatial.py +The SpatialMatch class +""" +import numpy as np +from scipy.interpolate import InterpolatedUnivariateSpline as spline + +from .parent import Match +from ..geometry import units_bank, convert_units +from ..utils import str2dataunit + + +class SpatialMatch(Match): + """ + SpatialMatch Class + + Attributes + ---------- + type : str + Type of matching object. Set to "Spatial" + history : list + Steps in the matching + """ + + def __init__(self): + Match.__init__(self) + self.type = "Spatial" + + def _prep_z_for_match( + self, + cat, + delta_z, + n_delta_z=1, + ): + """ + Adds zmin and zmax to cat.mt_input + + Parameters + ---------- + cat: clevar.ClCatalog + Input ClCatalog + delta_z: float, string + Defines the zmin, zmax for matching. Options are: + + * `'cat'` - uses redshift properties of the catalog + * `'spline.filename'` - interpolates data in 'filename' (z, zmin, zmax) fmt + * `float` - uses delta_z*(1+z) + * `None` - does not use z + + n_delta_z: float + Number of delta_z to be used in the matching + """ + print("### Prep z_cols") + if delta_z is None: + # No z use + print("* zmin|zmax set to -1|10") + cat.mt_input["zmin"] = -1 * np.ones(cat.size) + cat.mt_input["zmax"] = 10 * np.ones(cat.size) + elif delta_z == "cat": + # Values from catalog + if "zmin" in cat.tags and "zmax" in cat.tags: + print("* zmin|zmax from cat cols (zmin, zmax)") + cat.mt_input["zmin"] = self._rescale_z(cat["z"], cat["zmin"], n_delta_z) + cat.mt_input["zmax"] = self._rescale_z(cat["z"], cat["zmax"], n_delta_z) + # create zmin/zmax if not there + elif "z_err" in cat.tags: + print("* zmin|zmax from cat cols (err)") + cat.mt_input["zmin"] = cat["z"] - n_delta_z * cat["z_err"] + cat.mt_input["zmax"] = cat["z"] + n_delta_z * cat["z_err"] + else: + raise ValueError("ClCatalog must contain zmin, zmax or z_err for this matching.") + elif isinstance(delta_z, str): + # zmin/zmax in auxiliar file + print("* zmin|zmax from aux file") + order = 3 + zval, zvmin, zvmax = np.loadtxt(delta_z) + zvmin = self._rescale_z(zval, zvmin, n_delta_z) + zvmax = self._rescale_z(zval, zvmax, n_delta_z) + cat.mt_input["zmin"] = spline(zval, zvmin, k=order)(cat["z"]) + cat.mt_input["zmax"] = spline(zval, zvmax, k=order)(cat["z"]) + elif isinstance(delta_z, (int, float)): + # zmin/zmax from sigma_z*(1+z) + print("* zmin|zmax from config value") + cat.mt_input["zmin"] = cat["z"] - delta_z * n_delta_z * (1.0 + cat["z"]) + cat.mt_input["zmax"] = cat["z"] + delta_z * n_delta_z * (1.0 + cat["z"]) + + def _rescale_z(self, z, zlim, n_rescale): + """Rescale zmin/zmax by a factor n_rescale + + Parameters + ---------- + z: float, array + Redshift value + zlim: float, array + Redshift limit + n_rescale: float + Value to rescale zlim + + Returns + ------- + float, array + Rescaled z limit + """ + return z + n_rescale * (zlim - z) + + def _prep_radius_for_match(self, cat, match_radius, n_match_radius=1, cosmo=None): + """ + Adds radius to cat.mt_input + + Parameters + ---------- + cat: clevar.ClCatalog + Input ClCatalog + + match_radius: string + Defines the radius for matching. Options are: + + * `'cat'` - uses the radius in the catalog + * `'value unit'` - used fixed value (ex: `1 arcsec`, `1 Mpc`) + + n_match_radius: float + Multiplies the radius of the matchingi + cosmo: clevar.Cosmology object + Cosmology object for when radius has physical units + """ + print("### Prep ang_cols") + if match_radius == "cat": + print("* ang radius from cat") + in_rad, in_rad_unit = cat["radius"], cat.radius_unit + # when mass is passed to radius: m#b or m#c - background/critical + if in_rad_unit.lower() != "mpc" and in_rad_unit[0].lower() == "m": + print(f" * Converting mass ({in_rad_unit}) ->radius") + delta, mtyp = str2dataunit( + in_rad_unit[1:], + ["b", "c"], + err_msg=( + f"Mass unit ({in_rad_unit}) must be in format " + "'m#b' (background) or 'm#c' (critical)" + ), + ) + in_rad = cosmo.eval_mass2radius( + in_rad, cat["z"], delta, mass_type={"b": "background", "c": "critical"}[mtyp] + ) + in_rad_unit = "mpc" + else: + print("* ang radius from set scale") + in_rad, in_rad_unit = str2dataunit(match_radius, units_bank.keys()) + in_rad *= np.ones(cat.size) + # convert to degrees + cat.mt_input["ang"] = convert_units( + in_rad * n_match_radius, + in_rad_unit, + "degrees", + redshift=cat["z"] if "z" in cat.tags else None, + cosmo=cosmo, + ) + + def prep_cat_for_match(self, cat, *args): + """ + Prepare the catalog for matching, will fill the cat.mt_input object. + Each method must implement its specifics preparations. + """ + raise NotImplementedError + + def multiple(self, cat1, cat2, verbose=True): + """Makes multiple matchig + + Parameters + ---------- + cat1: clevar.ClCatalog + Base catalog + cat2: clevar.ClCatalog + Target catalog + verbose: bool + Print result for individual matches. + + Note + ---- + Not implemented in parent class + """ + raise NotImplementedError + + def match_from_config(self, cat1, cat2, match_config, cosmo=None): + """ + Make matching of catalogs based on a configuration dictionary + + Parameters + ---------- + cat1: clevar.ClCatalog + ClCatalog 1 + cat2: clevar.ClCatalog + ClCatalog 2 + match_config: dict + Dictionary with the matching configuration. + cosmo: clevar.Cosmology object + Cosmology object for when radius has physical units + + Note + ---- + Not implemented in parent class + """ + raise NotImplementedError From 70536bc1e66c14b6866b8ab01d311fb42318582e Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 21 Jun 2023 15:07:38 +0200 Subject: [PATCH 02/27] add file --- clevar/match/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clevar/match/__init__.py b/clevar/match/__init__.py index 7006f38..11066f0 100644 --- a/clevar/match/__init__.py +++ b/clevar/match/__init__.py @@ -7,6 +7,7 @@ from .parent import Match from .proximity import ProximityMatch from .membership import MembershipMatch +from .box import BoxMatch def get_matched_pairs(cat1, cat2, matching_type, mask1=None, mask2=None): From 7dfb974262a8bac1f41ddc12edc402a71cfbdd7b Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 21 Jun 2023 15:08:24 +0200 Subject: [PATCH 03/27] add examples/box_matching_detailed.ipynb --- examples/box_matching_detailed.ipynb | 480 +++++++++++++++++++++++++++ 1 file changed, 480 insertions(+) create mode 100644 examples/box_matching_detailed.ipynb diff --git a/examples/box_matching_detailed.ipynb b/examples/box_matching_detailed.ipynb new file mode 100644 index 0000000..c56303d --- /dev/null +++ b/examples/box_matching_detailed.ipynb @@ -0,0 +1,480 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matching catalogs based on proximity (detailed)\n", + "Here we show the specific steps of matching two catalogs based on proximity" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "toc": true + }, + "source": [ + "

Table of Contents

\n", + "" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ClCatalogs\n", + "Given some input data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from astropy.table import Table\n", + "\n", + "input1 = Table(\n", + " {\n", + " \"ID\": [f\"CL{i}\" for i in range(5)],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25, 20],\n", + " \"DEC\": [0.0, 0.0, 0.0, 0.0, 0.0],\n", + " \"Z\": [0.2, 0.3, 0.25, 0.4, 0.35],\n", + " \"MASS\": [10**13.5, 10**13.4, 10**13.3, 10**13.8, 10**14],\n", + " \"RADIUS_ARCMIN\": [1.0, 1.0, 1.0, 1.0, 1.0],\n", + " }\n", + ")\n", + "input2 = Table(\n", + " {\n", + " \"ID\": [\"CL0\", \"CL1\", \"CL2\", \"CL3\"],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25],\n", + " \"DEC\": [0.0, 0, 0, 0],\n", + " \"Z\": [0.3, 0.2, 0.25, 0.4],\n", + " \"MASS\": [10**13.3, 10**13.4, 10**13.5, 10**13.8],\n", + " \"RADIUS_ARCMIN\": [1.0, 1.0, 1.0, 1.0],\n", + " }\n", + ")\n", + "for col in ('RA', 'DEC'):\n", + " input1[f\"{col}_MAX\"] = input1[col]+0.01\n", + " input1[f\"{col}_MIN\"] = input1[col]-0.01\n", + " input2[f\"{col}_MAX\"] = input2[col]+0.01\n", + " input2[f\"{col}_MIN\"] = input2[col]-0.01\n", + "\n", + "display(input1)\n", + "display(input2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create two `ClCatalog` objects, they have the same properties of `astropy` tables with additional functionality. You can tag the main properties of the catalog, or have columns with those names (see `catalogs.ipynb` for detailts). For the proximity matching, the main tags/columns to be included are:\n", + "- `id` - if not included, one will be assigned\n", + "- `ra` (in degrees) - necessary\n", + "- `dec` (in degrees) - necessary\n", + "- `z` - necessary if used as matching criteria or for angular to physical convertion\n", + "- `mass` (or mass proxy) - necessary if used as preference criteria for unique matches\n", + "- `radius` - necessary if used as a criteria of matching (also requires `radius_unit` to be passed)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.catalog import ClCatalog\n", + "\n", + "tags = {\"id\": \"ID\", \"ra\": \"RA\", \"dec\": \"DEC\", \"z\": \"Z\", \"mass\": \"MASS\"}\n", + "c1 = ClCatalog(\"Cat1\", data=input1, tags=tags)\n", + "c2 = ClCatalog(\"Cat2\", data=input2, tags=tags)\n", + "# Format for nice display\n", + "for c in (\"ra\", \"dec\", \"z\"):\n", + " c1[c].info.format = \".2f\"\n", + " c2[c].info.format = \".2f\"\n", + "for c in (\"mass\",):\n", + " c1[c].info.format = \".2e\"\n", + " c2[c].info.format = \".2e\"\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `ClCatalog` object can also be read directly from a file,\n", + "for details, see catalogs.ipynb." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Matching\n", + "Import the `ProximityMatch` and create a object for matching" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import BoxMatch\n", + "\n", + "mt = BoxMatch()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare the catalogs\n", + "The first step is to prepare each catalog with the matching configuration:\n", + "\n", + "- `delta_z`: Defines redshift window for matching. The possible values are:\n", + " - `'cat'`: uses redshift properties of the catalog\n", + " - `'spline.filename'`: interpolates data in `'filename'` assuming (z, zmin, zmax) format\n", + " - `float`: uses `delta_z*(1+z)`\n", + " - `None`: does not use z\n", + "- `match_radius`: Radius of the catalog to be used in the matching. If `'cat'` uses the radius in the catalog, else must be in format `'value unit'`. (ex: `'1 arcsec'`, `'1 Mpc'`)\n", + "\n", + "In this case, because one of the configuraion radius has physical units, we also need a cosmology (`cosmo`) object to convert it to angular size (this is done internally)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.cosmology import AstroPyCosmology\n", + "\n", + "mt_config1 = {\"delta_z\": 0.2,}\n", + "mt_config2 = {\"delta_z\": 0.2,}\n", + "mt.prep_cat_for_match(c1, **mt_config1)\n", + "mt.prep_cat_for_match(c2, **mt_config2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will add values to the `mt_input` attribute of the catalogs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1.mt_input)\n", + "display(c2.mt_input)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple matching\n", + "The next step is to match the catalogs and store all candidates that pass the matching criteria. You can also pass the argument:\n", + "- `radius_selection`: Given a pair of clusters, which radius will be used for the matching." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mt.multiple(c1, c2)\n", + "mt.multiple(c2, c1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the `mt_multi_self` and `mt_multi_other` columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unique matching\n", + "Once all candidates are stored in each catalog, we can find the best candidates. You can also pass the argument:\n", + "- `preference`: In cases where there are multiple matched, how the best candidate will be chosen." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mt.unique(c1, c2, preference=\"angular_proximity\")\n", + "mt.unique(c2, c1, preference=\"angular_proximity\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the `mt_self` and `mt_other` columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cross matching\n", + "If you want to make sure the same pair was found in both directions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.cross_match()\n", + "c2.cross_match()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the `mt_cross` column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save and Load\n", + "The results of the matching can easily be saved and load using `ClEvaR` tools:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.save_matches(c1, c2, out_dir=\"temp\", overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.load_matches(c1, c2, out_dir=\"temp\")\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Matched Pairs\n", + "\n", + "There is functionality inbuilt in `clevar` to plot some results of the matching, such as:\n", + "- Recovery rates\n", + "- Distances (anguar and redshift) of cluster centers\n", + "- Scaling relations (mass, redshift, ...)\n", + "for those cases, check the match_metrics.ipynb and match_metrics_advanced.ipynb notebooks.\n", + "\n", + "If those do not provide your needs, you can get directly the matched pairs of clusters: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import get_matched_pairs\n", + "\n", + "mt1, mt2 = get_matched_pairs(c1, c2, \"cross\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt._compute_giou(\n", + " *mt._compute_areas(\n", + " mt1['ra_min'], mt1['ra_max'], mt1['dec_min'], mt1['dec_max'],\n", + " mt2['ra_min'], mt2['ra_max'], mt2['dec_min'], mt2['dec_max'],\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These will be catalogs with the corresponding matched pairs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pylab as plt\n", + "\n", + "plt.scatter(mt1[\"mass\"], mt2[\"mass\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outputing matched catalogs\n", + "\n", + "To save the current catalogs, you can use the `write` inbuilt function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.write(\"c1_temp.fits\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will allow you to save the catalog with its current labels and matching information.\n", + "\n", + "### Outputing matching information to original catalogs\n", + "\n", + "Assuming your input data came from initial files,\n", + "`clevar` also provides functions create output files \n", + "that combine all the information on them with the matching results.\n", + "\n", + "To add the matching information to an input catalog, use:\n", + "\n", + "```\n", + "from clevar.match import output_catalog_with_matching\n", + "output_catalog_with_matching('input_catalog.fits', 'output_catalog.fits', c1)\n", + "```\n", + "\n", + "- note: `input_catalog.fits` must have the same number of rows that `c1`.\n", + "\n", + "\n", + "To create a matched catalog containig all columns of both input catalogs, use:\n", + "\n", + "```\n", + "from clevar.match import output_matched_catalog\n", + "output_matched_catalog('input_catalog1.fits', 'input_catalog2.fits',\n", + " 'output_catalog.fits', c1, c2, matching_type='cross')\n", + "```\n", + "\n", + "where `matching_type` must be `cross`, `cat1` or `cat2`.\n", + "\n", + "- note: `input_catalog1.fits` must have the same number of rows that `c1` (and the same for `c2`)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 82b44947d6b13b21f913cc950bc42f722633b995 Mon Sep 17 00:00:00 2001 From: Michel Date: Mon, 21 Aug 2023 16:12:23 +0200 Subject: [PATCH 04/27] update mask in box match --- clevar/match/box.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index e875c79..720f09d 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -73,12 +73,7 @@ def get_metric_value(*args): mask = (z2max >= z1min) * (z2min <= z1max) if mask.any(): # makes square crop with intersection - mask[mask] = ( - (ra2max[mask] >= ra1min) - * (ra2min[mask] <= ra1max) - * (dec2max[mask] >= dec1min) - * (dec2min[mask] <= dec1max) - ) + mask *= self.mask_intersection(ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max) if mask.any(): area1, area2, intersection, outter = self._compute_areas( *( From 5a6a77b590e2c13d81bce66b827f9ae598b340ec Mon Sep 17 00:00:00 2001 From: Michel Date: Mon, 21 Aug 2023 16:39:51 +0200 Subject: [PATCH 05/27] improve match --- clevar/match/box.py | 54 ++++++++++++++++++++++++++++++++++++------ clevar/match/parent.py | 48 +++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 7 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 720f09d..c18e7d8 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -73,7 +73,16 @@ def get_metric_value(*args): mask = (z2max >= z1min) * (z2min <= z1max) if mask.any(): # makes square crop with intersection - mask *= self.mask_intersection(ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max) + mask[mask] *= self.mask_intersection( + ra1min, + ra1max, + dec1min, + dec1max, + ra2min[mask], + ra2max[mask], + dec2min[mask], + dec2max[mask], + ) if mask.any(): area1, area2, intersection, outter = self._compute_areas( *( @@ -102,15 +111,46 @@ def get_metric_value(*args): } self._rm_dup_add_hist(cat1, cat2, hist) + def mask_intersection(self, ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max): + """Mask clusters without intersection + + Return + ------ + mask + If clusters have intersection + """ + mask = ra1min <= ra2max + if not mask.any(): + return mask + mask *= ra2min <= ra1max + if not mask.any(): + return mask + mask *= dec1min <= dec2max + if not mask.any(): + return mask + mask *= dec2min <= dec1max + if not mask.any(): + return mask + return mask + + def _compute_area(self, ra_min, ra_max, dec_min, dec_max): + return (ra_max - ra_min) * (dec_max - dec_min) + def _compute_areas(self, ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max): - area1 = (ra1max - ra1min) * (dec1max - dec1min) - area2 = (ra2max - ra2min) * (dec2max - dec2min) + area1 = self._compute_area(ra1max, ra1min, dec1max, dec1min) + area2 = self._compute_area(ra2max, ra2min, dec2max, dec2min) # areas - intersection = (np.min([ra1max, ra2max], axis=0) - np.max([ra1min, ra2min], axis=0)) * ( - np.min([dec1max, dec2max], axis=0) - np.max([dec1min, dec2min], axis=0) + intersection = self._compute_area( + np.max([ra1min, ra2min], axis=0), + np.min([ra1max, ra2max], axis=0), + np.max([dec1min, dec2min], axis=0), + np.min([dec1max, dec2max], axis=0), ) - outter = (np.max([ra1max, ra2max], axis=0) - np.min([ra1min, ra2min], axis=0)) * ( - np.max([dec1max, dec2max], axis=0) - np.min([dec1min, dec2min], axis=0) + outter = self._compute_area( + np.min([ra1min, ra2min], axis=0), + np.max([ra1max, ra2max], axis=0), + np.min([dec1min, dec2min], axis=0), + np.max([dec1max, dec2max], axis=0), ) return area1, area2, intersection, outter diff --git a/clevar/match/parent.py b/clevar/match/parent.py index 9eb1123..3e3df73 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -103,6 +103,11 @@ def set_unique(*args): def set_unique(*args): return self._match_sharepref(*args, minimum_share_fraction) + elif preference == "giou": + + def set_unique(*args): + return self._match_gioupref(*args) + else: raise ValueError( "preference must be 'more_massive', 'angular_proximity' or 'redshift_proximity'" @@ -169,6 +174,49 @@ def _match_mpref(self, cat1, ind1, cat2): return True return False + def _match_gioupref(self, cat1, ind1, cat2): + """ + Make the unique match by gIoU preference + + Parameters + ---------- + cat1: clevar.ClCatalog + Base catalog + ind1: int + Index of the cluster from cat1 to be matched + cat2: clevar.ClCatalog + Target catalog + + Returns + ------- + bool + Tells if the cluster was matched + """ + inds2 = cat2.ids2inds(cat1["mt_multi_self"][ind1]) + if len(inds2) > 0: + area1, area2, intersection, outter = self._compute_areas( + *( + [ + cat1["ra_min"][ind1], + cat1["ra_max"][ind1], + cat1["dec_min"][ind1], + cat1["dec_max"][ind1], + ] + * np.ones(inds2.size)[:, None] + ).T, # for vec computation + cat2["ra_min"][inds2], + cat2["ra_max"][inds2], + cat2["dec_min"][inds2], + cat2["dec_max"][inds2], + ) + giou = self._compute_giou(area1, area2, intersection, outter) + for ind2 in inds2[np.argsort(giou)][::-1]: + if cat2["mt_other"][ind2] is None: + cat1["mt_self"][ind1] = cat2["id"][ind2] + cat2["mt_other"][ind2] = cat1["id"][ind1] + return True + return False + def _match_apref(self, cat1, ind1, cat2, match_pref): """ Make the unique match by angular (or redshift) distance preference From 2745be5d10cc1f15d0aaca9836aeb1b2f71c5aaa Mon Sep 17 00:00:00 2001 From: Michel Date: Mon, 21 Aug 2023 18:05:02 +0200 Subject: [PATCH 06/27] add box._detailed_print --- clevar/match/box.py | 106 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 89 insertions(+), 17 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index c18e7d8..08f942d 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -24,7 +24,14 @@ def __init__(self): self.type = "Box" def multiple( - self, cat1, cat2, metric="GIoU", metric_cut=0.5, share_area_frac=0.5, verbose=True + self, + cat1, + cat2, + metric="GIoU", + metric_cut=0.5, + share_area_frac=0.5, + verbose=True, + detailed_print_only=False, ): """ Make the one way multiple matching @@ -35,14 +42,16 @@ def multiple( Base catalog cat2: clevar.ClCatalog Target catalog - verbose: bool - Print result for individual matches. metric: str (optional) Metric to be used for matching. Can be: GIoU (generalized Intersection over Union). metric_cut: float Minimum value of metric for match. share_area_frac: float Minimum relative size of area for match. + verbose: bool + Print result for individual matches. + detailed_print_only: bool + Only prints detailed comparisons for matching, does not register matches in catalogs. """ # pylint: disable=arguments-renamed # pylint: disable=too-many-locals @@ -69,9 +78,11 @@ def get_metric_value(*args): *(cat1.mt_input[c] for c in ("zmin", "zmax")), ) ): + self._detailed_print(0, locals()) # crop in redshift range mask = (z2max >= z1min) * (z2min <= z1max) if mask.any(): + self._detailed_print(1, locals()) # makes square crop with intersection mask[mask] *= self.mask_intersection( ra1min, @@ -84,6 +95,7 @@ def get_metric_value(*args): dec2max[mask], ) if mask.any(): + self._detailed_print(2, locals()) area1, area2, intersection, outter = self._compute_areas( *( [ra1min, ra1max, dec1min, dec1max] * np.ones(mask.sum())[:, None] @@ -94,22 +106,82 @@ def get_metric_value(*args): dec2max[mask], ) # makes metric crop - for id2 in cat2["id"][mask][ - (get_metric_value(area1, area2, intersection, outter) >= metric_cut) - * (area1 / area2 >= share_area_frac) - * (area2 / area1 >= share_area_frac) - ]: - cat1["mt_multi_self"][ind1].append(id2) - ind2 = int(cat2.id_dict[id2]) - cat2["mt_multi_other"][ind2].append(cat1["id"][ind1]) - self._cat1_mmt[ind1] = True + metric_value = get_metric_value(area1, area2, intersection, outter) + self._detailed_print(3, locals()) + if not detailed_print_only: + for id2 in cat2["id"][mask][ + (metric_value >= metric_cut) + * (area1 / area2 >= share_area_frac) + * (area2 / area1 >= share_area_frac) + ]: + cat1["mt_multi_self"][ind1].append(id2) + ind2 = int(cat2.id_dict[id2]) + cat2["mt_multi_other"][ind2].append(cat1["id"][ind1]) + self._cat1_mmt[ind1] = True + else: + self._detailed_print(4, locals()) + else: + self._detailed_print(5, locals()) if verbose: self._prt_cand_mt(cat1, ind1) - hist = { - "func": "multiple", - "cats": f"{cat1.name}, {cat2.name}", - } - self._rm_dup_add_hist(cat1, cat2, hist) + if not detailed_print_only: + hist = { + "func": "multiple", + "cats": f"{cat1.name}, {cat2.name}", + } + self._rm_dup_add_hist(cat1, cat2, hist) + + def _detailed_print(self, i, locs): + if not locs["detailed_print_only"]: + return + if i == 0: + print( + "\n\nCluster:", + "(", + ", ".join([f"{locs[v]:.4f}" for v in ("ra1min", "ra1max", "dec1min", "dec1max")]), + ") (", + ", ".join([f"{locs[v]:.4f}" for v in ("z1min", "z1max")]), + ")", + f"( area: {self._compute_area(locs['ra1min'], locs['ra1max'], locs['dec1min'], locs['dec1max'])*3600:.2f} arcmin2 )", + ) + elif i == 1: + print(f" * z pass: {locs['mask'].sum():,}") + elif i == 2: + print(f" * intersection pass: {locs['mask'].sum():,}") + elif i == 3: + for val in zip( + locs["cat2"]["id"][locs["mask"]], + locs["ra2min"][locs["mask"]], + locs["ra2max"][locs["mask"]], + locs["dec2min"][locs["mask"]], + locs["dec2max"][locs["mask"]], + locs["area1"], + locs["area2"], + locs["intersection"], + locs["outter"], + locs["metric_value"], + locs["metric_value"] >= locs["metric_cut"], + locs["area1"] / locs["area2"] >= locs["share_area_frac"], + locs["area2"] / locs["area1"] >= locs["share_area_frac"], + ): + print(" Candidate:", val[0]) + print(f" Pass : {bool(np.prod(val[10:]))} (", *val[10:], ")") + print( + " Areas :", + ", ".join([f"{v*3600:.2f}" for v in val[6:9]]), + f"arcmin2 ( {locs['metric']}: {val[9]:.2g} )", + ) + print( + " Coords : (", + ", ".join([f"{v:.4f}" for v in val[1:5]]), + ")", + ) + elif i == 4: + print(" * intersection fail") + elif i == 5: + print(" * z fail!") + else: + raise ValueError(f"i={i} invalid!") def mask_intersection(self, ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max): """Mask clusters without intersection From a7cc4482804379a3489f77861a6bb5657681dfb7 Mon Sep 17 00:00:00 2001 From: Michel Date: Mon, 21 Aug 2023 18:50:37 +0200 Subject: [PATCH 07/27] fix ccl import fail in tests --- tests/conftest.py | 6 +++++- tests/test_yaml.py | 17 +++++++++++------ 2 files changed, 16 insertions(+), 7 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 447abba..2c1e8d5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,11 +3,15 @@ import importlib import os from clevar.cosmology import AstroPyCosmology, CCLCosmology +from clevar import optional_libs @pytest.fixture(scope="module", params=[AstroPyCosmology, CCLCosmology]) def CosmoClass(request): - return request.param + param = request.param + if optional_libs.ccl is None and param==CCLCosmology: + pytest.skip(f"Missing backend '{param}'.") + return param @pytest.fixture( diff --git a/tests/test_yaml.py b/tests/test_yaml.py index 7940431..25784e8 100644 --- a/tests/test_yaml.py +++ b/tests/test_yaml.py @@ -2,12 +2,14 @@ """ Tests for match.py """ import os import numpy as np +import pytest import yaml from numpy.testing import assert_raises, assert_allclose, assert_equal from unittest import mock from clevar import clv_yaml as clevar_yaml from clevar.clv_yaml import match_metrics_parent as metric_parent +from clevar import optional_libs def test_yaml_parent(): @@ -43,10 +45,6 @@ def test_yaml_helper_functions(): hf.loadconf("cfg.yml", ["test"], fail_action="orverwrite") os.system("rm cfg.yml") os.system("rm -r temp") - # cosmology - hf.make_cosmology({"backend": "astropy"}) - hf.make_cosmology({"backend": "ccl"}) - assert_raises(ValueError, hf.make_cosmology, {"backend": "unknown"}) # make_bins hf.make_bins(3, log=False) hf.make_bins("0 1 3", log=False) @@ -58,6 +56,13 @@ def test_yaml_helper_functions(): hf.get_input_loop(options_msg="test", actions={"pass": (lambda x: "ok", [0], {})}), "ok" ) mock.builtins.input = original_input + # cosmology + hf.make_cosmology({"backend": "astropy"}) + if optional_libs.ccl is None: + pytest.skip(f"Missing backend CCL.") + else: + hf.make_cosmology({"backend": "ccl"}) + assert_raises(ValueError, hf.make_cosmology, {"backend": "unknown"}) def create_base_matched_files(config_file, matching_mode): @@ -123,13 +128,13 @@ def test_yaml_funcs_prox(): # Match, used diff cosmology and overwrite # 1 print("############ Fail cfg check #########") - config["proximity_match"]["step1"]["cosmology"] = {"backend": "CCL"} + config["proximity_match"]["step1"]["cosmology"] = {"backend": "astropy"} yaml.write(config, "cfg.yml") original_input = mock.builtins.input mock.builtins.input = lambda _: "q" clevar_yaml.match("cfg.yml", overwrite_config=False, overwrite_files=False) # 2 - config["cosmology"] = {"backend": "CCL"} + config["cosmology"] = {"backend": "astropy"} yaml.write(config, "cfg.yml") mock.builtins.input = lambda _: "o" clevar_yaml.match("cfg.yml", overwrite_config=False, overwrite_files=False) From ffe4362828d7382fd5d4251da8d656d819c5d732 Mon Sep 17 00:00:00 2001 From: Michel Date: Thu, 24 Aug 2023 12:24:53 +0200 Subject: [PATCH 08/27] add IoA* cases in BoxMatch --- clevar/match/box.py | 17 +++++++++ clevar/match/parent.py | 80 ++++++++++++++++++++++++++++++------------ 2 files changed, 74 insertions(+), 23 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 08f942d..3a04144 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -6,6 +6,13 @@ from .spatial import SpatialMatch from ..catalog import ClData +_area_type_funcs = { + "min": lambda area1, area2: np.min([area1, area2], axis=0), + "max": lambda area1, area2: np.max([area1, area2], axis=0), + "self": lambda area1, area2: area1, + "other": lambda area1, area2: area2, +} + class BoxMatch(SpatialMatch): """ @@ -59,13 +66,20 @@ def multiple( raise AttributeError("cat1.mt_input is None, run prep_cat_for_match first.") if cat2.mt_input is None: raise AttributeError("cat2.mt_input is None, run prep_cat_for_match first.") + if metric == "GIoU": def get_metric_value(*args): return self._compute_giou(*args) + elif metric[:3] == "IoA" and metric[3:] in ("min", "max", "self", "other"): + + def get_metric_value(*args): + return self._compute_intersection_over_area(*args, area_type=metric[3:]) + else: raise ValueError("metric must be GIoU.") + self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching ra2min, ra2max, dec2min, dec2max = ( cat2[c] for c in ("ra_min", "ra_max", "dec_min", "dec_max") @@ -230,6 +244,9 @@ def _compute_giou(self, area1, area2, intersection, outter): union = area1 + area2 - intersection return intersection / union + union / outter - 1.0 + def _compute_intersection_over_area(self, area1, area2, intersection, outter, area_type): + return intersection / _area_type_funcs[area_type](area1, area2) + def prep_cat_for_match( self, cat, diff --git a/clevar/match/parent.py b/clevar/match/parent.py index 3e3df73..220d8e4 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -75,10 +75,22 @@ def unique(self, cat1, cat2, preference, minimum_share_fraction=0): Minimum share fraction to consider in matches (default=0). """ self._cat1_mt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching + + # set order to match catalogs if "mass" in cat1.tags: i_vals = np.argsort(cat1["mass"])[::-1] else: i_vals = range(cat1.size) + + # set funtion to be used for matching + valid_pref = True + valid_pref_vals = [ + "more_massive", + "angular_proximity", + "redshift_proximity", + "shared_member_fraction", + ] + if preference == "more_massive": def set_unique(*args): @@ -103,21 +115,42 @@ def set_unique(*args): def set_unique(*args): return self._match_sharepref(*args, minimum_share_fraction) - elif preference == "giou": + elif self.type == "box": + valid_pref_vals += ["gIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] - def set_unique(*args): - return self._match_gioupref(*args) + if preference == "gIoU": + + def set_unique(*args): + return self._match_box_metrics_pref(*args, self._compute_giou) + + elif preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other"): + + def set_unique(*args): + return self._match_box_metrics_pref( + *args, + lambda *args_: self._compute_intersection_over_area( + *args_, area_type=preference[3:] + ), + ) + + else: + valid_pref = False else: - raise ValueError( - "preference must be 'more_massive', 'angular_proximity' or 'redshift_proximity'" - ) + valid_pref = False + + if not valid_pref: + raise ValueError("preference must be in: " ", ".join(valid_pref_vals)) + + # Run matching print(f"Unique Matches ({cat1.name})") for ind1 in i_vals: if cat1["mt_self"][ind1] is None: self._cat1_mt[ind1] = set_unique(cat1, ind1, cat2) self._cat1_mt *= cat1.get_matching_mask("self") # In case ang pref removes a match print(f'* {cat1.get_matching_mask("self").sum():,}/{cat1.size:,} objects matched.') + + # Add match conf to history cfg = {"func": "unique", "cats": f"{cat1.name}, {cat2.name}", "preference": preference} if preference == "shared_member_fraction": cfg["minimum_share_fraction"] = minimum_share_fraction @@ -174,7 +207,7 @@ def _match_mpref(self, cat1, ind1, cat2): return True return False - def _match_gioupref(self, cat1, ind1, cat2): + def _match_box_metrics_pref(self, cat1, ind1, cat2, metric_func): """ Make the unique match by gIoU preference @@ -194,23 +227,24 @@ def _match_gioupref(self, cat1, ind1, cat2): """ inds2 = cat2.ids2inds(cat1["mt_multi_self"][ind1]) if len(inds2) > 0: - area1, area2, intersection, outter = self._compute_areas( - *( - [ - cat1["ra_min"][ind1], - cat1["ra_max"][ind1], - cat1["dec_min"][ind1], - cat1["dec_max"][ind1], - ] - * np.ones(inds2.size)[:, None] - ).T, # for vec computation - cat2["ra_min"][inds2], - cat2["ra_max"][inds2], - cat2["dec_min"][inds2], - cat2["dec_max"][inds2], + metric = metric_func( + *self._compute_areas( + *( + [ + cat1["ra_min"][ind1], + cat1["ra_max"][ind1], + cat1["dec_min"][ind1], + cat1["dec_max"][ind1], + ] + * np.ones(inds2.size)[:, None] + ).T, # for vec computation + cat2["ra_min"][inds2], + cat2["ra_max"][inds2], + cat2["dec_min"][inds2], + cat2["dec_max"][inds2], + ) ) - giou = self._compute_giou(area1, area2, intersection, outter) - for ind2 in inds2[np.argsort(giou)][::-1]: + for ind2 in inds2[np.argsort(metric)][::-1]: if cat2["mt_other"][ind2] is None: cat1["mt_self"][ind1] = cat2["id"][ind2] cat2["mt_other"][ind2] = cat1["id"][ind1] From b4c082ac6c006c1e8efaebda78bdc23a3274c197 Mon Sep 17 00:00:00 2001 From: Michel Date: Fri, 25 Aug 2023 15:31:31 +0200 Subject: [PATCH 09/27] add mt_metric to box match --- clevar/match/parent.py | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/clevar/match/parent.py b/clevar/match/parent.py index 220d8e4..7df8fdf 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -115,32 +115,41 @@ def set_unique(*args): def set_unique(*args): return self._match_sharepref(*args, minimum_share_fraction) - elif self.type == "box": + elif self.type == "Box": valid_pref_vals += ["gIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] if preference == "gIoU": - def set_unique(*args): - return self._match_box_metrics_pref(*args, self._compute_giou) + def metric_func(*args): + return self._compute_giou(*args) elif preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other"): + def metric_func(*args): + return self._compute_intersection_over_area(*args, area_type=preference[3:]) + + else: + valid_pref = False + + if valid_pref: + def set_unique(*args): return self._match_box_metrics_pref( - *args, - lambda *args_: self._compute_intersection_over_area( - *args_, area_type=preference[3:] - ), + *args, preference=preference, metric_func=metric_func ) - else: - valid_pref = False + for col in (f"mt_self_{preference}", f"mt_other_{preference}"): + if col not in cat1.colnames: + cat1[col] = np.ones(cat1.size) * -99.0 + col = f"mt_other_{preference}" + if col not in cat2.colnames: + cat2[col] = np.ones(cat2.size) * -99.0 else: valid_pref = False if not valid_pref: - raise ValueError("preference must be in: " ", ".join(valid_pref_vals)) + raise ValueError("preference must be in: " + ", ".join(valid_pref_vals)) # Run matching print(f"Unique Matches ({cat1.name})") @@ -207,7 +216,7 @@ def _match_mpref(self, cat1, ind1, cat2): return True return False - def _match_box_metrics_pref(self, cat1, ind1, cat2, metric_func): + def _match_box_metrics_pref(self, cat1, ind1, cat2, preference, metric_func): """ Make the unique match by gIoU preference @@ -244,10 +253,13 @@ def _match_box_metrics_pref(self, cat1, ind1, cat2, metric_func): cat2["dec_max"][inds2], ) ) - for ind2 in inds2[np.argsort(metric)][::-1]: + for i_sort in np.argsort(metric)[::-1]: + ind2 = inds2[i_sort] if cat2["mt_other"][ind2] is None: cat1["mt_self"][ind1] = cat2["id"][ind2] cat2["mt_other"][ind2] = cat1["id"][ind1] + cat1[f"mt_self_{preference}"][ind1] = metric[i_sort] + cat2[f"mt_other_{preference}"][ind2] = metric[i_sort] return True return False From 95eff0ac5026bedd942f2b6c6d0a83957f6d5ae7 Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 15:21:10 +0200 Subject: [PATCH 10/27] make ProximityMatch a child of SpatialMatch --- clevar/catalog/catalog.py | 1 + clevar/match/proximity.py | 93 ++++----------------------------------- 2 files changed, 9 insertions(+), 85 deletions(-) diff --git a/clevar/catalog/catalog.py b/clevar/catalog/catalog.py index 59ebca4..f2ec804 100644 --- a/clevar/catalog/catalog.py +++ b/clevar/catalog/catalog.py @@ -813,6 +813,7 @@ def id_dict_list(self): def __init__(self, name, tags=None, labels=None, **kwargs): if tags is not None and not isinstance(tags, dict): raise ValueError("tags must be dict.") + self.mt_input = kwargs.pop("mt_input", None) self.__id_dict_list = {} Catalog.__init__( self, diff --git a/clevar/match/proximity.py b/clevar/match/proximity.py index 5aebcd0..4f02a12 100644 --- a/clevar/match/proximity.py +++ b/clevar/match/proximity.py @@ -4,13 +4,13 @@ import numpy as np from scipy.interpolate import InterpolatedUnivariateSpline as spline -from .parent import Match +from .spatial import SpatialMatch from ..geometry import units_bank, convert_units from ..catalog import ClData from ..utils import str2dataunit -class ProximityMatch(Match): +class ProximityMatch(SpatialMatch): """ ProximityMatch Class @@ -23,7 +23,7 @@ class ProximityMatch(Match): """ def __init__(self): - Match.__init__(self) + SpatialMatch.__init__(self) self.type = "Proximity" def multiple(self, cat1, cat2, radius_selection="max", verbose=True): @@ -125,72 +125,13 @@ def prep_cat_for_match( """ # pylint: disable=arguments-differ print("## Prep mt_cols") - cat.mt_input = ClData() + if cat.mt_input is None: + cat.mt_input = ClData() # Set zmin, zmax - if delta_z is None: - # No z use - print("* zmin|zmax set to -1|10") - cat.mt_input["zmin"] = -1 * np.ones(cat.size) - cat.mt_input["zmax"] = 10 * np.ones(cat.size) - elif delta_z == "cat": - # Values from catalog - if "zmin" in cat.tags and "zmax" in cat.tags: - print("* zmin|zmax from cat cols (zmin, zmax)") - cat.mt_input["zmin"] = self._rescale_z(cat["z"], cat["zmin"], n_delta_z) - cat.mt_input["zmax"] = self._rescale_z(cat["z"], cat["zmax"], n_delta_z) - # create zmin/zmax if not there - elif "z_err" in cat.tags: - print("* zmin|zmax from cat cols (err)") - cat.mt_input["zmin"] = cat["z"] - n_delta_z * cat["z_err"] - cat.mt_input["zmax"] = cat["z"] + n_delta_z * cat["z_err"] - else: - raise ValueError("ClCatalog must contain zmin, zmax or z_err for this matching.") - elif isinstance(delta_z, str): - # zmin/zmax in auxiliar file - print("* zmin|zmax from aux file") - order = 3 - zval, zvmin, zvmax = np.loadtxt(delta_z) - zvmin = self._rescale_z(zval, zvmin, n_delta_z) - zvmax = self._rescale_z(zval, zvmax, n_delta_z) - cat.mt_input["zmin"] = spline(zval, zvmin, k=order)(cat["z"]) - cat.mt_input["zmax"] = spline(zval, zvmax, k=order)(cat["z"]) - elif isinstance(delta_z, (int, float)): - # zmin/zmax from sigma_z*(1+z) - print("* zmin|zmax from config value") - cat.mt_input["zmin"] = cat["z"] - delta_z * n_delta_z * (1.0 + cat["z"]) - cat.mt_input["zmax"] = cat["z"] + delta_z * n_delta_z * (1.0 + cat["z"]) - + self._prep_z_for_match(cat, delta_z, n_delta_z) # Set angular radius - if match_radius == "cat": - print("* ang radius from cat") - in_rad, in_rad_unit = cat["radius"], cat.radius_unit - # when mass is passed to radius: m#b or m#c - background/critical - if in_rad_unit.lower() != "mpc" and in_rad_unit[0].lower() == "m": - print(f" * Converting mass ({in_rad_unit}) ->radius") - delta, mtyp = str2dataunit( - in_rad_unit[1:], - ["b", "c"], - err_msg=( - f"Mass unit ({in_rad_unit}) must be in format " - "'m#b' (background) or 'm#c' (critical)" - ), - ) - in_rad = cosmo.eval_mass2radius( - in_rad, cat["z"], delta, mass_type={"b": "background", "c": "critical"}[mtyp] - ) - in_rad_unit = "mpc" - else: - print("* ang radius from set scale") - in_rad, in_rad_unit = str2dataunit(match_radius, units_bank.keys()) - in_rad *= np.ones(cat.size) - # convert to degrees - cat.mt_input["ang"] = convert_units( - in_rad * n_match_radius, - in_rad_unit, - "degrees", - redshift=cat["z"] if "z" in cat.tags else None, - cosmo=cosmo, - ) + self._prep_radius_for_match(cat, match_radius, n_match_radius=n_match_radius, cosmo=cosmo) + # add conf to history of matching self.history.append( { "func": "prep_cat_for_match", @@ -203,24 +144,6 @@ def prep_cat_for_match( } ) - def _rescale_z(self, z, zlim, n_rescale): - """Rescale zmin/zmax by a factor n_rescale - - Parameters - ---------- - z: float, array - Redshift value - zlim: float, array - Redshift limit - n_rescale: float - Value to rescale zlim - - Returns - ------- - float, array - Rescaled z limit - """ - return z + n_rescale * (zlim - z) def _max_mt_distance(self, radius1, radius2, radius_selection): """Get maximum angular distance allowed for the matching From 4f9738d572febf2f252f95fcd7e53c65345868ff Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 17:27:15 +0200 Subject: [PATCH 11/27] fix minor bugs in box --- clevar/match/box.py | 28 +++++++--------------------- 1 file changed, 7 insertions(+), 21 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 3a04144..4fb0aab 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -67,7 +67,7 @@ def multiple( if cat2.mt_input is None: raise AttributeError("cat2.mt_input is None, run prep_cat_for_match first.") - if metric == "GIoU": + if metric.lower() == "giou": def get_metric_value(*args): return self._compute_giou(*args) @@ -78,7 +78,8 @@ def get_metric_value(*args): return self._compute_intersection_over_area(*args, area_type=metric[3:]) else: - raise ValueError("metric must be GIoU.") + valid_pref_vals = ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] + raise ValueError("metric must be in:"+", ".join(valid_pref_vals)) self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching ra2min, ra2max, dec2min, dec2max = ( @@ -320,33 +321,18 @@ def match_from_config(self, cat1, cat2, match_config, cosmo=None): raise ValueError("config type must be cat1, cat2 or cross") if match_config["type"] in ("cat1", "cross"): print("\n## ClCatalog 1") - self.prep_cat_for_match(cat1, cosmo=cosmo, **match_config["catalog1"]) + self.prep_cat_for_match(cat1, **match_config["catalog1"]) if match_config["type"] in ("cat2", "cross"): print("\n## ClCatalog 2") - self.prep_cat_for_match(cat2, cosmo=cosmo, **match_config["catalog2"]) + self.prep_cat_for_match(cat2, **match_config["catalog2"]) verbose = match_config.get("verbose", True) if match_config["type"] in ("cat1", "cross"): print("\n## Multiple match (catalog 1)") - radius_selection = { - "cat1": "self", - "cat2": "other", - }.get( - match_config["which_radius"], - match_config["which_radius"], - ) - self.multiple(cat1, cat2, radius_selection, verbose=verbose) + self.multiple(cat1, cat2, verbose=verbose) if match_config["type"] in ("cat2", "cross"): print("\n## Multiple match (catalog 2)") - radius_selection = { - "cat1": "other", - "cat2": "self", - }.get( - match_config["which_radius"], - match_config["which_radius"], - ) - # pylint: disable=arguments-out-of-order - self.multiple(cat2, cat1, radius_selection, verbose=verbose) + self.multiple(cat2, cat1, verbose=verbose) if match_config["type"] in ("cat1", "cross"): print("\n## Finding unique matches of catalog 1") From cec92ccd8e76a3ed339f8911227feac7491f6606 Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 17:27:41 +0200 Subject: [PATCH 12/27] make giou case independent --- clevar/match/parent.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clevar/match/parent.py b/clevar/match/parent.py index 7df8fdf..a13fce8 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -116,9 +116,9 @@ def set_unique(*args): return self._match_sharepref(*args, minimum_share_fraction) elif self.type == "Box": - valid_pref_vals += ["gIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] + valid_pref_vals += ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] - if preference == "gIoU": + if preference.lower() == "giou": def metric_func(*args): return self._compute_giou(*args) @@ -218,7 +218,7 @@ def _match_mpref(self, cat1, ind1, cat2): def _match_box_metrics_pref(self, cat1, ind1, cat2, preference, metric_func): """ - Make the unique match by gIoU preference + Make the unique match by GIoU preference Parameters ---------- From 101163bb8794e37dfb8af3401eb01165b8d31241 Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 17:28:02 +0200 Subject: [PATCH 13/27] rm unused part of spatial --- clevar/match/spatial.py | 46 ----------------------------------------- 1 file changed, 46 deletions(-) diff --git a/clevar/match/spatial.py b/clevar/match/spatial.py index 2eeb491..f9627b7 100644 --- a/clevar/match/spatial.py +++ b/clevar/match/spatial.py @@ -153,49 +153,3 @@ def _prep_radius_for_match(self, cat, match_radius, n_match_radius=1, cosmo=None redshift=cat["z"] if "z" in cat.tags else None, cosmo=cosmo, ) - - def prep_cat_for_match(self, cat, *args): - """ - Prepare the catalog for matching, will fill the cat.mt_input object. - Each method must implement its specifics preparations. - """ - raise NotImplementedError - - def multiple(self, cat1, cat2, verbose=True): - """Makes multiple matchig - - Parameters - ---------- - cat1: clevar.ClCatalog - Base catalog - cat2: clevar.ClCatalog - Target catalog - verbose: bool - Print result for individual matches. - - Note - ---- - Not implemented in parent class - """ - raise NotImplementedError - - def match_from_config(self, cat1, cat2, match_config, cosmo=None): - """ - Make matching of catalogs based on a configuration dictionary - - Parameters - ---------- - cat1: clevar.ClCatalog - ClCatalog 1 - cat2: clevar.ClCatalog - ClCatalog 2 - match_config: dict - Dictionary with the matching configuration. - cosmo: clevar.Cosmology object - Cosmology object for when radius has physical units - - Note - ---- - Not implemented in parent class - """ - raise NotImplementedError From c7d736d29df5529cf38cbe7206961430d7c67ef2 Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 17:28:25 +0200 Subject: [PATCH 14/27] add tests to box match --- tests/test_match.py | 181 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) diff --git a/tests/test_match.py b/tests/test_match.py index 376942b..d16b89b 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -6,9 +6,11 @@ from clevar.catalog import ClCatalog, MemCatalog from clevar.match.parent import Match +from clevar.match.spatial import SpatialMatch from clevar.match import ( ProximityMatch, MembershipMatch, + BoxMatch, output_catalog_with_matching, output_matched_catalog, get_matched_pairs, @@ -22,6 +24,13 @@ def test_parent(): assert_raises(NotImplementedError, mt.match_from_config, None, None, None, None) assert_raises(ValueError, mt._get_dist_mt, None, None, "unknown match") +def test_spatial(): + mt = SpatialMatch() + assert_raises(NotImplementedError, mt.prep_cat_for_match, None) + assert_raises(NotImplementedError, mt.multiple, None, None) + assert_raises(NotImplementedError, mt.match_from_config, None, None, None, None) + assert_raises(ValueError, mt._get_dist_mt, None, None, "unknown match") + def _test_mt_results(cat, multi_self, self, cross, multi_other=None, other=None): multi_other = multi_self if multi_other is None else multi_other @@ -493,6 +502,178 @@ def test_membership_cfg(CosmoClass): _test_mt_results(cat1, multi_self=mmt1, self=smt, cross=cmt, other=omt) _test_mt_results(cat2, multi_self=mmt2, self=omt[:-1], cross=cmt[:-1], other=smt[:-1]) +def get_test_data_box(): + input1 = { + "id": [f"CL{i}" for i in range(5)], + "ra": np.array([0.0, 0.0001, 0.00011, 25, 20]), + "dec": np.array([0.0, 0, 0, 0, 0]), + "z": [0.2, 0.3, 0.25, 0.4, 0.35], + "mass": [10**13.5, 10**13.4, 10**13.3, 10**13.8, 10**14], + } + input1["ra_min"] = input1["ra"]-1/60. + input1["ra_max"] = input1["ra"]+1/60. + input1["dec_min"] = input1["dec"]-1/60. + input1["dec_max"] = input1["dec"]+1/60. + input2 = {k: v[:4] for k, v in input1.items()} + input2["z"][:2] = [0.3, 0.2] + input2["mass"][:3] = input2["mass"][:3][::-1] + return input1, input2 + +def _validate_unique_matching( + mt, cat1, cat2, match_preference, + mmt1, smt1, omt1, + mmt2, smt2, omt2, +): + for col in ("mt_self", "mt_other"): + cat1[col] = None + cat2[col] = None + mt.unique(cat1, cat2, match_preference) + mt.unique(cat2, cat1, match_preference) + cat1.show_mt_hist() + cat2.show_mt_hist() + cat1.cross_match() + cat2.cross_match() + _test_mt_results(cat1, multi_self=mmt1, self=smt1, cross=smt1, other=omt1) + _test_mt_results(cat2, multi_self=mmt2, self=omt2, cross=smt2, other=omt2) + + +def test_box(CosmoClass): + input1, input2 = get_test_data_box() + cat1 = ClCatalog("Cat1", **input1) + cat2 = ClCatalog("Cat2", **input2) + print(cat1.data) + print(cat2.data) + cosmo = CosmoClass() + mt = BoxMatch() + # test mask intersection + res = np.zeros(2, dtype=bool) + for i, add in enumerate([1, -1, 1, -1]): + args = np.zeros((8, 2)) + args[i] += add + print(args) + assert_equal(mt.mask_intersection(*args), res) + # test missing data + assert_raises(AttributeError, mt.multiple, cat1, cat2) + cat1.mt_input = "xx" + assert_raises(AttributeError, mt.multiple, cat1, cat2) + cat1.mt_input = None + # init match + mt_config1 = {"delta_z": 0.2} + mt_config2 = {"delta_z": 0.2} + mt.prep_cat_for_match(cat1, **mt_config1) + mt.prep_cat_for_match(cat2, **mt_config2) + # Check multiple match + assert_raises(ValueError, mt.multiple, cat1, cat2, metric="unknown") + mmt = [ + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL3"], + [], + ] + for metric in ("GIoU", "IoAmin", "IoAmax", "IoAself", "IoAother"): + mt.multiple(cat1, cat2, metric=metric) + mt.multiple(cat2, cat1, metric=metric) + # Check unique with different preferences + smt = ["CL0", "CL1", "CL2", "CL3", None] + for pref in ("angular_proximity", "GIoU", "IoAmin", "IoAmax", "IoAself", "IoAother"): + print(pref) + _validate_unique_matching( + mt, cat1, cat2, pref, + mmt, smt, smt, + mmt[:-1], smt[:-1], smt[:-1], + ) + # Check unique with mass preference + smt = ["CL2", "CL1", "CL0", "CL3", None] + _validate_unique_matching( + mt, cat1, cat2, "more_massive", + mmt, smt, smt, + mmt[:-1], smt[:-1], smt[:-1], + ) + # Check unique with z preference + cat2["mt_other"][0] = "CL3" # to force a replacement + smt = ["CL1", "CL0", "CL2", "CL3", None] + omt = ["CL1", "CL0", "CL2", "CL3", None] + _validate_unique_matching( + mt, cat1, cat2, "redshift_proximity", + mmt, smt, omt, + mmt[:-1], omt[:-1], smt[:-1], + ) + # Error for unkown preference + assert_raises(ValueError, mt.unique, cat1, cat2, "unknown") + # Check save and load matching + mt.save_matches(cat1, cat2, out_dir="temp", overwrite=True) + cat1_v2 = ClCatalog("Cat1", **input1) + cat2_v2 = ClCatalog("Cat2", **input2) + mt.load_matches(cat1_v2, cat2_v2, out_dir="temp") + for col in ("mt_self", "mt_other", "mt_multi_self", "mt_multi_other"): + assert_equal(cat1[col], cat1_v2[col]) + assert_equal(cat2[col], cat2_v2[col]) + os.system("rm -rf temp") + # Other config of prep for matching + # No redshift use + mt.prep_cat_for_match(cat1, delta_z=None) + assert all(cat1.mt_input["zmin"] < cat1["z"].min()) + assert all(cat1.mt_input["zmax"] > cat1["z"].max()) + # missing all zmin/zmax info in catalog + # zmin/zmax in catalog + cat1["zmin"] = cat1["z"] - 0.2 + cat1["zmax"] = cat1["z"] + 0.2 + cat1["z_err"] = 0.1 + mt.prep_cat_for_match(cat1, delta_z="cat") + assert_allclose(cat1.mt_input["zmin"], cat1["zmin"]) + assert_allclose(cat1.mt_input["zmax"], cat1["zmax"]) + # z_err in catalog + del cat1["zmin"], cat1["zmax"] + mt.prep_cat_for_match(cat1, delta_z="cat") + assert_allclose(cat1.mt_input["zmin"], cat1["z"] - cat1["z_err"]) + assert_allclose(cat1.mt_input["zmax"], cat1["z"] + cat1["z_err"]) + # zmin/zmax from aux file + zv = np.linspace(0, 5, 10) + np.savetxt("zvals.dat", [zv, zv - 0.22, zv + 0.33]) + mt.prep_cat_for_match(cat1, delta_z="zvals.dat") + assert_allclose(cat1.mt_input["zmin"], cat1["z"] - 0.22) + assert_allclose(cat1.mt_input["zmax"], cat1["z"] + 0.33) + os.system("rm -rf zvals.dat") + + +def test_box_cfg(CosmoClass): + input1, input2 = get_test_data_box() + cat1 = ClCatalog("Cat1", **input1) + cat2 = ClCatalog("Cat2", **input2) + print(cat1.data) + print(cat2.data) + # init match + mt = BoxMatch() + # test wrong matching config + assert_raises(ValueError, mt.match_from_config, cat1, cat2, {"type": "unknown"}) + ### test 0 ### + mt_config = { + "type": "cross", + "preference": "GIoU", + "catalog1": {"delta_z": 0.2,}, + "catalog2": {"delta_z": 0.2,}, + } + # Check multiple match + mmt = [ + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL3"], + [], + ] + smt = ["CL0", "CL1", "CL2", "CL3", None] + ### test 0 ### + mt.match_from_config(cat1, cat2, mt_config) + # Check match + _test_mt_results(cat1, multi_self=mmt, self=smt, cross=smt) + _test_mt_results(cat2, multi_self=mmt[:-1], self=smt[:-1], cross=smt[:-1]) + ### test 1 ### + cat1._init_match_vals(overwrite=True) + cat2._init_match_vals(overwrite=True) + mt.match_from_config(cat1, cat2, mt_config) + _test_mt_results(cat1, multi_self=mmt, self=smt, cross=smt) + _test_mt_results(cat2, multi_self=mmt[:-1], self=smt[:-1], cross=smt[:-1]) def test_output_catalog_with_matching(): # input data From 886974a66447d53a5a618f249efd7b1b2118757f Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 17:49:38 +0200 Subject: [PATCH 15/27] add test_box_detailed_print --- tests/test_match.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tests/test_match.py b/tests/test_match.py index d16b89b..9effacd 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -675,6 +675,41 @@ def test_box_cfg(CosmoClass): _test_mt_results(cat1, multi_self=mmt, self=smt, cross=smt) _test_mt_results(cat2, multi_self=mmt[:-1], self=smt[:-1], cross=smt[:-1]) +def test_box_detailed_print(CosmoClass): + cosmo = CosmoClass() + mt = BoxMatch() + assert_raises(ValueError, mt._detailed_print, None, {"detailed_print_only":True}) + # prep data + input1, input2 = get_test_data_box() + cat1 = ClCatalog("Cat1", **input1)[:1] + cat2 = ClCatalog("Cat2", **input2) + mt.prep_cat_for_match(cat1, delta_z=None) + mt.prep_cat_for_match(cat2, delta_z=None) + # success match + mt.multiple( + cat1, cat2, + metric_cut=0.5, + detailed_print_only=True, + ) + # fail match on redshift + cat2["z"] += 1 + mt.prep_cat_for_match(cat1, delta_z=0.2) + mt.prep_cat_for_match(cat2, delta_z=0.2) + mt.multiple( + cat1, cat2, + metric_cut=0.5, + detailed_print_only=True, + ) + # fail match on intersection + cat1["ra_min"] += 100 + mt.prep_cat_for_match(cat1, delta_z=None) + mt.prep_cat_for_match(cat2, delta_z=None) + mt.multiple( + cat1, cat2, + metric_cut=0.5, + detailed_print_only=True, + ) + def test_output_catalog_with_matching(): # input data cat1, cat2 = get_test_data_mem() From abedfd4b0cd675ec04c32ef2ea1f64bce28d649d Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 6 Sep 2023 17:51:20 +0200 Subject: [PATCH 16/27] fmt files --- clevar/match/box.py | 2 +- clevar/match/proximity.py | 1 - tests/conftest.py | 2 +- tests/test_match.py | 90 ++++++++++++++++++++++++++++----------- 4 files changed, 67 insertions(+), 28 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 4fb0aab..8219894 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -79,7 +79,7 @@ def get_metric_value(*args): else: valid_pref_vals = ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] - raise ValueError("metric must be in:"+", ".join(valid_pref_vals)) + raise ValueError("metric must be in:" + ", ".join(valid_pref_vals)) self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching ra2min, ra2max, dec2min, dec2max = ( diff --git a/clevar/match/proximity.py b/clevar/match/proximity.py index 4f02a12..9973e4e 100644 --- a/clevar/match/proximity.py +++ b/clevar/match/proximity.py @@ -144,7 +144,6 @@ def prep_cat_for_match( } ) - def _max_mt_distance(self, radius1, radius2, radius_selection): """Get maximum angular distance allowed for the matching diff --git a/tests/conftest.py b/tests/conftest.py index 2c1e8d5..7fd218c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,7 +9,7 @@ @pytest.fixture(scope="module", params=[AstroPyCosmology, CCLCosmology]) def CosmoClass(request): param = request.param - if optional_libs.ccl is None and param==CCLCosmology: + if optional_libs.ccl is None and param == CCLCosmology: pytest.skip(f"Missing backend '{param}'.") return param diff --git a/tests/test_match.py b/tests/test_match.py index 9effacd..bd84d6d 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -24,6 +24,7 @@ def test_parent(): assert_raises(NotImplementedError, mt.match_from_config, None, None, None, None) assert_raises(ValueError, mt._get_dist_mt, None, None, "unknown match") + def test_spatial(): mt = SpatialMatch() assert_raises(NotImplementedError, mt.prep_cat_for_match, None) @@ -502,6 +503,7 @@ def test_membership_cfg(CosmoClass): _test_mt_results(cat1, multi_self=mmt1, self=smt, cross=cmt, other=omt) _test_mt_results(cat2, multi_self=mmt2, self=omt[:-1], cross=cmt[:-1], other=smt[:-1]) + def get_test_data_box(): input1 = { "id": [f"CL{i}" for i in range(5)], @@ -510,19 +512,27 @@ def get_test_data_box(): "z": [0.2, 0.3, 0.25, 0.4, 0.35], "mass": [10**13.5, 10**13.4, 10**13.3, 10**13.8, 10**14], } - input1["ra_min"] = input1["ra"]-1/60. - input1["ra_max"] = input1["ra"]+1/60. - input1["dec_min"] = input1["dec"]-1/60. - input1["dec_max"] = input1["dec"]+1/60. + input1["ra_min"] = input1["ra"] - 1 / 60.0 + input1["ra_max"] = input1["ra"] + 1 / 60.0 + input1["dec_min"] = input1["dec"] - 1 / 60.0 + input1["dec_max"] = input1["dec"] + 1 / 60.0 input2 = {k: v[:4] for k, v in input1.items()} input2["z"][:2] = [0.3, 0.2] input2["mass"][:3] = input2["mass"][:3][::-1] return input1, input2 + def _validate_unique_matching( - mt, cat1, cat2, match_preference, - mmt1, smt1, omt1, - mmt2, smt2, omt2, + mt, + cat1, + cat2, + match_preference, + mmt1, + smt1, + omt1, + mmt2, + smt2, + omt2, ): for col in ("mt_self", "mt_other"): cat1[col] = None @@ -579,25 +589,46 @@ def test_box(CosmoClass): for pref in ("angular_proximity", "GIoU", "IoAmin", "IoAmax", "IoAself", "IoAother"): print(pref) _validate_unique_matching( - mt, cat1, cat2, pref, - mmt, smt, smt, - mmt[:-1], smt[:-1], smt[:-1], + mt, + cat1, + cat2, + pref, + mmt, + smt, + smt, + mmt[:-1], + smt[:-1], + smt[:-1], ) # Check unique with mass preference smt = ["CL2", "CL1", "CL0", "CL3", None] _validate_unique_matching( - mt, cat1, cat2, "more_massive", - mmt, smt, smt, - mmt[:-1], smt[:-1], smt[:-1], + mt, + cat1, + cat2, + "more_massive", + mmt, + smt, + smt, + mmt[:-1], + smt[:-1], + smt[:-1], ) # Check unique with z preference cat2["mt_other"][0] = "CL3" # to force a replacement smt = ["CL1", "CL0", "CL2", "CL3", None] omt = ["CL1", "CL0", "CL2", "CL3", None] _validate_unique_matching( - mt, cat1, cat2, "redshift_proximity", - mmt, smt, omt, - mmt[:-1], omt[:-1], smt[:-1], + mt, + cat1, + cat2, + "redshift_proximity", + mmt, + smt, + omt, + mmt[:-1], + omt[:-1], + smt[:-1], ) # Error for unkown preference assert_raises(ValueError, mt.unique, cat1, cat2, "unknown") @@ -651,8 +682,12 @@ def test_box_cfg(CosmoClass): mt_config = { "type": "cross", "preference": "GIoU", - "catalog1": {"delta_z": 0.2,}, - "catalog2": {"delta_z": 0.2,}, + "catalog1": { + "delta_z": 0.2, + }, + "catalog2": { + "delta_z": 0.2, + }, } # Check multiple match mmt = [ @@ -675,10 +710,11 @@ def test_box_cfg(CosmoClass): _test_mt_results(cat1, multi_self=mmt, self=smt, cross=smt) _test_mt_results(cat2, multi_self=mmt[:-1], self=smt[:-1], cross=smt[:-1]) + def test_box_detailed_print(CosmoClass): cosmo = CosmoClass() mt = BoxMatch() - assert_raises(ValueError, mt._detailed_print, None, {"detailed_print_only":True}) + assert_raises(ValueError, mt._detailed_print, None, {"detailed_print_only": True}) # prep data input1, input2 = get_test_data_box() cat1 = ClCatalog("Cat1", **input1)[:1] @@ -687,28 +723,32 @@ def test_box_detailed_print(CosmoClass): mt.prep_cat_for_match(cat2, delta_z=None) # success match mt.multiple( - cat1, cat2, + cat1, + cat2, metric_cut=0.5, detailed_print_only=True, - ) + ) # fail match on redshift cat2["z"] += 1 mt.prep_cat_for_match(cat1, delta_z=0.2) mt.prep_cat_for_match(cat2, delta_z=0.2) mt.multiple( - cat1, cat2, + cat1, + cat2, metric_cut=0.5, detailed_print_only=True, - ) + ) # fail match on intersection cat1["ra_min"] += 100 mt.prep_cat_for_match(cat1, delta_z=None) mt.prep_cat_for_match(cat2, delta_z=None) mt.multiple( - cat1, cat2, + cat1, + cat2, metric_cut=0.5, detailed_print_only=True, - ) + ) + def test_output_catalog_with_matching(): # input data From 2f7ba3a97b70dc69a6da1455dcab43d570804e5f Mon Sep 17 00:00:00 2001 From: Michel Date: Wed, 27 Dec 2023 12:22:56 +0100 Subject: [PATCH 17/27] fix name of arg share_area_frac->rel_area --- clevar/match/box.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 8219894..6d06ed3 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -36,7 +36,7 @@ def multiple( cat2, metric="GIoU", metric_cut=0.5, - share_area_frac=0.5, + rel_area=0.5, verbose=True, detailed_print_only=False, ): @@ -53,7 +53,7 @@ def multiple( Metric to be used for matching. Can be: GIoU (generalized Intersection over Union). metric_cut: float Minimum value of metric for match. - share_area_frac: float + rel_area: float Minimum relative size of area for match. verbose: bool Print result for individual matches. @@ -126,8 +126,8 @@ def get_metric_value(*args): if not detailed_print_only: for id2 in cat2["id"][mask][ (metric_value >= metric_cut) - * (area1 / area2 >= share_area_frac) - * (area2 / area1 >= share_area_frac) + * (area1 / area2 >= rel_area) + * (area2 / area1 >= rel_area) ]: cat1["mt_multi_self"][ind1].append(id2) ind2 = int(cat2.id_dict[id2]) @@ -176,8 +176,8 @@ def _detailed_print(self, i, locs): locs["outter"], locs["metric_value"], locs["metric_value"] >= locs["metric_cut"], - locs["area1"] / locs["area2"] >= locs["share_area_frac"], - locs["area2"] / locs["area1"] >= locs["share_area_frac"], + locs["area1"] / locs["area2"] >= locs["rel_area"], + locs["area2"] / locs["area1"] >= locs["rel_area"], ): print(" Candidate:", val[0]) print(f" Pass : {bool(np.prod(val[10:]))} (", *val[10:], ")") From e86e3d7e695dab86c1cdaed907bcf05e67dc2eb4 Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 22 May 2024 12:12:05 +0200 Subject: [PATCH 18/27] update nbs --- examples/box_matching.ipynb | 380 +++++++++++++++++++++++++++ examples/box_matching_detailed.ipynb | 81 +++--- 2 files changed, 410 insertions(+), 51 deletions(-) create mode 100644 examples/box_matching.ipynb diff --git a/examples/box_matching.ipynb b/examples/box_matching.ipynb new file mode 100644 index 0000000..a9654fd --- /dev/null +++ b/examples/box_matching.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matching catalogs based on square boxes (simple)\n", + "Matching two catalogs based on boxes based on a configuration dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ClCatalogs\n", + "Given some input data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from astropy.table import Table\n", + "\n", + "input1 = Table(\n", + " {\n", + " \"ID\": [f\"CL{i}\" for i in range(5)],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25, 20],\n", + " \"DEC\": [0.0, 0.0, 0.0, 0.0, 0.0],\n", + " \"Z\": [0.2, 0.3, 0.25, 0.4, 0.35],\n", + " \"MASS\": [10**13.5, 10**13.5, 10**13.3, 10**13.8, 10**14],\n", + " }\n", + ")\n", + "input2 = Table(\n", + " {\n", + " \"ID\": [\"CL0\", \"CL1\", \"CL2\", \"CL3\"],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25],\n", + " \"DEC\": [0.0, 0, 0, 0],\n", + " \"Z\": [0.3, 0.2, 0.25, 0.4],\n", + " \"MASS\": [10**13.3, 10**13.4, 10**13.5, 10**13.8],\n", + " }\n", + ")\n", + "for col in (\"RA\", \"DEC\"):\n", + " input1[f\"{col}_MAX\"] = input1[col] + 0.01\n", + " input1[f\"{col}_MIN\"] = input1[col] - 0.01\n", + " input2[f\"{col}_MAX\"] = input2[col] + 0.01\n", + " input2[f\"{col}_MIN\"] = input2[col] - 0.01\n", + "display(input1)\n", + "display(input2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create two `ClCatalog` objects, they have the same properties of `astropy` tables with additional functionality. You can tag the main properties of the catalog, or have columns with those names (see `catalogs.ipynb` for detailts). For the box matching, the main tags/columns to be included are:\n", + "- `id` - if not included, one will be assigned\n", + "- `ra_min`, `ra_max` (in degrees) - necessary\n", + "- `dec_min`, `dec_max` (in degrees) - necessary\n", + "- `z` - necessary if used as matching criteria" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.catalog import ClCatalog\n", + "\n", + "tags = {\"id\": \"ID\", \"ra\": \"RA\", \"dec\": \"DEC\", \"z\": \"Z\", \"mass\": \"MASS\"}\n", + "c1 = ClCatalog(\"Cat1\", data=input1, tags=tags)\n", + "c2 = ClCatalog(\"Cat2\", data=input2, tags=tags)\n", + "\n", + "# Format for nice display\n", + "for c in (\"ra\", \"dec\", \"z\", \"ra_min\", \"dec_min\", \"ra_max\", \"dec_max\"):\n", + " c1[c].info.format = \".2f\"\n", + " c2[c].info.format = \".2f\"\n", + "for c in (\"mass\",):\n", + " c1[c].info.format = \".2e\"\n", + " c2[c].info.format = \".2e\"\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `ClCatalog` object can also be read directly from a file,\n", + "for details, see catalogs.ipynb." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Matching\n", + "Import the `BoxMatch` and create a object for matching" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import BoxMatch\n", + "\n", + "mt = BoxMatch()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prepare the configuration. The main values are:\n", + "\n", + "- `type`: Type of matching to be considered. Can be a simple match of ClCatalog1->ClCatalog2 (`cat1`), ClCatalog2->ClCatalog1 (`cat2`) or cross matching.\n", + "- `metric`: Metric to be used for matching. Can be: `GIoU` (generalized Intersection over Union); `IoA*` (Intersection over Area, with area choice in [`min`, `max`, `self`, `other`]);\n", + "- `metric_cut`: Minimum value of metric for match.\n", + "- `rel_area`: Minimum relative size of area for match.\n", + "- `preference`: In cases where there are multiple matched, how the best candidate will be chosen. Options are: `'more_massive'`, `'angular_proximity'`, `'redshift_proximity'`, `'shared_member_fraction'`, `'GIoU'` (generalized Intersection over Union), `'IoA*'` (Intersection over Area, with area choice in `min`, `max`, `self`, `other`).\n", + "\n", + "- `verbose`: Print result for individual matches (default=`True`).\n", + "\n", + "We also need to provide some specific configuration for each catalog with:\n", + "\n", + "- `delta_z`: Defines redshift window for matching. The possible values are:\n", + " - `'cat'`: uses redshift properties of the catalog\n", + " - `'spline.filename'`: interpolates data in `'filename'` assuming (z, zmin, zmax) format\n", + " - `float`: uses `delta_z*(1+z)`\n", + " - `None`: does not use z" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "match_config = {\n", + " \"type\": \"cross\", # options are cross, cat1, cat2\n", + " \"preference\": \"GIoU\",\n", + " \"catalog1\": {\"delta_z\": None},\n", + " \"catalog2\": {\"delta_z\": None},\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the configuration is prepared, the whole process can be done with one call:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mt.match_from_config(c1, c2, match_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the matching columns in the catalogs:\n", + "- `mt_multi_self`: Multiple matches found\n", + "- `mt_multi_other`: Multiple matches found by the other catalog\n", + "- `mt_self`: Best candidate found\n", + "- `mt_other`: Best candidate found by the other catalog\n", + "- `mt_cross`: Best candidate found in both directions\n", + "\n", + "If `preference` in (`GIoU`, `IoA*`), it also add the value of `mt_self_preference` and `mt_other_preference`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The steps of matching are stored in the catalogs and can be checked:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.show_mt_hist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save and Load\n", + "The results of the matching can easily be saved and load using `ClEvaR` tools:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.save_matches(c1, c2, out_dir=\"temp\", overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.load_matches(c1, c2, out_dir=\"temp\")\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Matched Pairs\n", + "\n", + "There is functionality inbuilt in `clevar` to plot some results of the matching, such as:\n", + "- Recovery rates\n", + "- Distances (anguar and redshift) of cluster centers\n", + "- Scaling relations (mass, redshift, ...)\n", + "for those cases, check the match_metrics.ipynb and match_metrics_advanced.ipynb notebooks.\n", + "\n", + "If those do not provide your needs, you can get directly the matched pairs of clusters: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import get_matched_pairs\n", + "\n", + "mt1, mt2 = get_matched_pairs(c1, c2, \"cross\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These will be catalogs with the corresponding matched pairs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pylab as plt\n", + "\n", + "plt.scatter(mt1[\"mass\"], mt2[\"mass\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outputing matched catalogs\n", + "\n", + "To save the current catalogs, you can use the `write` inbuilt function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.write(\"c1_temp.fits\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will allow you to save the catalog with its current labels and matching information.\n", + "\n", + "### Outputing matching information to original catalogs\n", + "\n", + "If your input data came from initial files,\n", + "`clevar` also provides functions create output files \n", + "that combine all the information on them with the matching results.\n", + "\n", + "To add the matching information to an input catalog, use:\n", + "\n", + "```\n", + "from clevar.match import output_catalog_with_matching\n", + "output_catalog_with_matching('input_catalog.fits', 'output_catalog.fits', c1)\n", + "```\n", + "\n", + "- note: `input_catalog.fits` must have the same number of rows that `c1`.\n", + "\n", + "\n", + "To create a matched catalog containig all columns of both input catalogs, use:\n", + "\n", + "```\n", + "from clevar.match import output_matched_catalog\n", + "output_matched_catalog('input_catalog1.fits', 'input_catalog2.fits',\n", + " 'output_catalog.fits', c1, c2, matching_type='cross')\n", + "```\n", + "\n", + "where `matching_type` must be `cross`, `cat1` or `cat2`.\n", + "\n", + "- note: `input_catalog1.fits` must have the same number of rows that `c1` (and the same for `c2`)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/box_matching_detailed.ipynb b/examples/box_matching_detailed.ipynb index c56303d..b030993 100644 --- a/examples/box_matching_detailed.ipynb +++ b/examples/box_matching_detailed.ipynb @@ -4,18 +4,8 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Matching catalogs based on proximity (detailed)\n", - "Here we show the specific steps of matching two catalogs based on proximity" - ] - }, - { - "cell_type": "markdown", - "metadata": { - "toc": true - }, - "source": [ - "

Table of Contents

\n", - "" + "# Matching catalogs based on square boxes (detailed)\n", + "Here we show the specific steps of matching two catalogs based on square boxes on clusters" ] }, { @@ -33,7 +23,7 @@ "metadata": {}, "source": [ "## ClCatalogs\n", - "Given some input data" + "Given some input data." ] }, { @@ -52,7 +42,6 @@ " \"DEC\": [0.0, 0.0, 0.0, 0.0, 0.0],\n", " \"Z\": [0.2, 0.3, 0.25, 0.4, 0.35],\n", " \"MASS\": [10**13.5, 10**13.4, 10**13.3, 10**13.8, 10**14],\n", - " \"RADIUS_ARCMIN\": [1.0, 1.0, 1.0, 1.0, 1.0],\n", " }\n", ")\n", "input2 = Table(\n", @@ -62,14 +51,13 @@ " \"DEC\": [0.0, 0, 0, 0],\n", " \"Z\": [0.3, 0.2, 0.25, 0.4],\n", " \"MASS\": [10**13.3, 10**13.4, 10**13.5, 10**13.8],\n", - " \"RADIUS_ARCMIN\": [1.0, 1.0, 1.0, 1.0],\n", " }\n", ")\n", - "for col in ('RA', 'DEC'):\n", - " input1[f\"{col}_MAX\"] = input1[col]+0.01\n", - " input1[f\"{col}_MIN\"] = input1[col]-0.01\n", - " input2[f\"{col}_MAX\"] = input2[col]+0.01\n", - " input2[f\"{col}_MIN\"] = input2[col]-0.01\n", + "for col in (\"RA\", \"DEC\"):\n", + " input1[f\"{col}_MAX\"] = input1[col] + 0.01\n", + " input1[f\"{col}_MIN\"] = input1[col] - 0.01\n", + " input2[f\"{col}_MAX\"] = input2[col] + 0.01\n", + " input2[f\"{col}_MIN\"] = input2[col] - 0.01\n", "\n", "display(input1)\n", "display(input2)" @@ -79,13 +67,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Create two `ClCatalog` objects, they have the same properties of `astropy` tables with additional functionality. You can tag the main properties of the catalog, or have columns with those names (see `catalogs.ipynb` for detailts). For the proximity matching, the main tags/columns to be included are:\n", + "Create two `ClCatalog` objects, they have the same properties of `astropy` tables with additional functionality. You can tag the main properties of the catalog, or have columns with those names (see `catalogs.ipynb` for detailts). For the box matching, the main tags/columns to be included are:\n", "- `id` - if not included, one will be assigned\n", - "- `ra` (in degrees) - necessary\n", - "- `dec` (in degrees) - necessary\n", - "- `z` - necessary if used as matching criteria or for angular to physical convertion\n", - "- `mass` (or mass proxy) - necessary if used as preference criteria for unique matches\n", - "- `radius` - necessary if used as a criteria of matching (also requires `radius_unit` to be passed)" + "- `ra_min`, `ra_max` (in degrees) - necessary\n", + "- `dec_min`, `dec_max` (in degrees) - necessary\n", + "- `z` - necessary if used as matching criteria" ] }, { @@ -123,7 +109,7 @@ "metadata": {}, "source": [ "## Matching\n", - "Import the `ProximityMatch` and create a object for matching" + "Import the `BoxMatch` and create a object for matching" ] }, { @@ -149,7 +135,6 @@ " - `'spline.filename'`: interpolates data in `'filename'` assuming (z, zmin, zmax) format\n", " - `float`: uses `delta_z*(1+z)`\n", " - `None`: does not use z\n", - "- `match_radius`: Radius of the catalog to be used in the matching. If `'cat'` uses the radius in the catalog, else must be in format `'value unit'`. (ex: `'1 arcsec'`, `'1 Mpc'`)\n", "\n", "In this case, because one of the configuraion radius has physical units, we also need a cosmology (`cosmo`) object to convert it to angular size (this is done internally)." ] @@ -162,8 +147,12 @@ "source": [ "from clevar.cosmology import AstroPyCosmology\n", "\n", - "mt_config1 = {\"delta_z\": 0.2,}\n", - "mt_config2 = {\"delta_z\": 0.2,}\n", + "mt_config1 = {\n", + " \"delta_z\": None,\n", + "}\n", + "mt_config2 = {\n", + " \"delta_z\": None,\n", + "}\n", "mt.prep_cat_for_match(c1, **mt_config1)\n", "mt.prep_cat_for_match(c2, **mt_config2)" ] @@ -178,7 +167,9 @@ { "cell_type": "code", "execution_count": null, - "metadata": {}, + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ "display(c1.mt_input)\n", @@ -190,8 +181,10 @@ "metadata": {}, "source": [ "### Multiple matching\n", - "The next step is to match the catalogs and store all candidates that pass the matching criteria. You can also pass the argument:\n", - "- `radius_selection`: Given a pair of clusters, which radius will be used for the matching." + "The next step is to match the catalogs and store all candidates that pass the matching criteria. You can also pass the arguments\n", + "- `metric`: Metric to be used for matching. Can be: `GIoU` (generalized Intersection over Union); `IoA*` (Intersection over Area, with area choice in `min`, `max`, `self`, `other`);\n", + "- `metric_cut`: Minimum value of metric for match.\n", + "- `rel_area`: Minimum relative size of area for match." ] }, { @@ -237,7 +230,7 @@ "source": [ "### Unique matching\n", "Once all candidates are stored in each catalog, we can find the best candidates. You can also pass the argument:\n", - "- `preference`: In cases where there are multiple matched, how the best candidate will be chosen." + "- `preference`: In cases where there are multiple matched, how the best candidate will be chosen. Options are: `'more_massive'`, `'angular_proximity'`, `'redshift_proximity'`, `'shared_member_fraction'`, `'GIoU'`, `'IoA*'`. " ] }, { @@ -247,15 +240,15 @@ "outputs": [], "source": [ "%%time\n", - "mt.unique(c1, c2, preference=\"angular_proximity\")\n", - "mt.unique(c2, c1, preference=\"angular_proximity\")" + "mt.unique(c1, c2, preference=\"GIoU\")\n", + "mt.unique(c2, c1, preference=\"GIoU\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "This will fill the `mt_self` and `mt_other` columns:" + "This will fill the `mt_self` and `mt_other` columns, and in case of `preference` in (`GIoU`, `IoA*`), also add the value of `mt_self_preference` and `mt_other_preference`:" ] }, { @@ -357,20 +350,6 @@ "mt1, mt2 = get_matched_pairs(c1, c2, \"cross\")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "mt._compute_giou(\n", - " *mt._compute_areas(\n", - " mt1['ra_min'], mt1['ra_max'], mt1['dec_min'], mt1['dec_max'],\n", - " mt2['ra_min'], mt2['ra_max'], mt2['dec_min'], mt2['dec_max'],\n", - " )\n", - ")" - ] - }, { "cell_type": "markdown", "metadata": {}, From e0ce66770a352ebb731f71bdb5992542a2e9c77a Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 22 May 2024 16:18:23 +0200 Subject: [PATCH 19/27] restruct and simplify code --- clevar/match/box.py | 114 +++++++++++++++++++++++++++-------- clevar/match/membership.py | 7 +++ clevar/match/parent.py | 118 ++++++------------------------------- clevar/match/proximity.py | 17 ++---- clevar/match/spatial.py | 30 ++++++++++ 5 files changed, 148 insertions(+), 138 deletions(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 6d06ed3..63b9161 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -29,6 +29,9 @@ class BoxMatch(SpatialMatch): def __init__(self): SpatialMatch.__init__(self) self.type = "Box" + self._valid_unique_preference_vals += ["GIoU"] + [ + f"IoA{end}" for end in ("min", "max", "self", "other") + ] def multiple( self, @@ -50,7 +53,9 @@ def multiple( cat2: clevar.ClCatalog Target catalog metric: str (optional) - Metric to be used for matching. Can be: GIoU (generalized Intersection over Union). + Metric to be used for matching. Can be: + "GIoU" (generalized Intersection over Union); + "IoA*" (Intersection over Area, with area choice in ["min", "max", "self", "other"]); metric_cut: float Minimum value of metric for match. rel_area: float @@ -67,19 +72,10 @@ def multiple( if cat2.mt_input is None: raise AttributeError("cat2.mt_input is None, run prep_cat_for_match first.") - if metric.lower() == "giou": - - def get_metric_value(*args): - return self._compute_giou(*args) - - elif metric[:3] == "IoA" and metric[3:] in ("min", "max", "self", "other"): - - def get_metric_value(*args): - return self._compute_intersection_over_area(*args, area_type=metric[3:]) - - else: - valid_pref_vals = ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] - raise ValueError("metric must be in:" + ", ".join(valid_pref_vals)) + valid_metric_vals = ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] + if metric not in valid_metric_vals: + raise ValueError("metric must be in:" + ", ".join(valid_metric_vals)) + self._set_metric_function(preference) self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching ra2min, ra2max, dec2min, dec2max = ( @@ -121,7 +117,7 @@ def get_metric_value(*args): dec2max[mask], ) # makes metric crop - metric_value = get_metric_value(area1, area2, intersection, outter) + metric_value = self._get_metric(area1, area2, intersection, outter) self._detailed_print(3, locals()) if not detailed_print_only: for id2 in cat2["id"][mask][ @@ -150,6 +146,10 @@ def _detailed_print(self, i, locs): if not locs["detailed_print_only"]: return if i == 0: + area = ( + self._compute_area(locs["ra1min"], locs["ra1max"], locs["dec1min"], locs["dec1max"]) + * 3600 + ) print( "\n\nCluster:", "(", @@ -157,7 +157,7 @@ def _detailed_print(self, i, locs): ") (", ", ".join([f"{locs[v]:.4f}" for v in ("z1min", "z1max")]), ")", - f"( area: {self._compute_area(locs['ra1min'], locs['ra1max'], locs['dec1min'], locs['dec1max'])*3600:.2f} arcmin2 )", + f"( area: {area:.2f} arcmin2 )", ) elif i == 1: print(f" * z pass: {locs['mask'].sum():,}") @@ -246,6 +246,7 @@ def _compute_giou(self, area1, area2, intersection, outter): return intersection / union + union / outter - 1.0 def _compute_intersection_over_area(self, area1, area2, intersection, outter, area_type): + # pylint: disable=unused-argument return intersection / _area_type_funcs[area_type](area1, area2) def prep_cat_for_match( @@ -294,6 +295,76 @@ def prep_cat_for_match( } ) + def _get_metric(self, preference): + # pylint: disable=method-hidden + raise NotImplementedError + + def _set_metric_function(self, preference): + if preference.lower() == "giou": + self._get_metric = self._compute_giou + + elif preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other"): + self._get_metric = lambda *args: self._compute_intersection_over_area( + *args, area_type=preference[3:] + ) + + def _set_unique_matching_function(self, preference, **kwargs): + self._set_metric_function(preference) + + def set_unique(*args): + return self._match_box_metrics_pref( + *args, preference=preference, metric_func=self._get_metric + ) + + return set_unique + + def _match_box_metrics_pref(self, cat1, ind1, cat2, preference, metric_func): + """ + Make the unique match by GIoU preference + + Parameters + ---------- + cat1: clevar.ClCatalog + Base catalog + ind1: int + Index of the cluster from cat1 to be matched + cat2: clevar.ClCatalog + Target catalog + + Returns + ------- + bool + Tells if the cluster was matched + """ + inds2 = cat2.ids2inds(cat1["mt_multi_self"][ind1]) + if len(inds2) > 0: + metric = metric_func( + *self._compute_areas( + *( + [ + cat1["ra_min"][ind1], + cat1["ra_max"][ind1], + cat1["dec_min"][ind1], + cat1["dec_max"][ind1], + ] + * np.ones(inds2.size)[:, None] + ).T, # for vec computation + cat2["ra_min"][inds2], + cat2["ra_max"][inds2], + cat2["dec_min"][inds2], + cat2["dec_max"][inds2], + ) + ) + for i_sort in np.argsort(metric)[::-1]: + ind2 = inds2[i_sort] + if cat2["mt_other"][ind2] is None: + cat1["mt_self"][ind1] = cat2["id"][ind2] + cat2["mt_other"][ind2] = cat1["id"][ind1] + cat1[f"mt_self_{preference}"][ind1] = metric[i_sort] + cat2[f"mt_other_{preference}"][ind2] = metric[i_sort] + return True + return False + def match_from_config(self, cat1, cat2, match_config, cosmo=None): """ Make matching of catalogs based on a configuration dictionary @@ -334,13 +405,4 @@ def match_from_config(self, cat1, cat2, match_config, cosmo=None): print("\n## Multiple match (catalog 2)") self.multiple(cat2, cat1, verbose=verbose) - if match_config["type"] in ("cat1", "cross"): - print("\n## Finding unique matches of catalog 1") - self.unique(cat1, cat2, match_config["preference"]) - if match_config["type"] in ("cat2", "cross"): - print("\n## Finding unique matches of catalog 2") - self.unique(cat2, cat1, match_config["preference"]) - - if match_config["type"] == "cross": - self.cross_match(cat1) - self.cross_match(cat2) + self._unique_match_from_config(cat1, cat2, match_config) diff --git a/clevar/match/membership.py b/clevar/match/membership.py index 099b59a..fe16f12 100644 --- a/clevar/match/membership.py +++ b/clevar/match/membership.py @@ -26,6 +26,7 @@ def __init__(self): Match.__init__(self) self.type = "Membership" self.matched_mems = None + self._valid_unique_preference_vals += ["shared_member_fraction"] def multiple(self, cat1, cat2, minimum_share_fraction=0, verbose=True): """ @@ -143,6 +144,12 @@ def _add_pmem(self, cat1_share_mems, ind1, cat2_id, pmem1): """ cat1_share_mems[ind1][cat2_id] = cat1_share_mems[ind1].get(cat2_id, 0) + pmem1 + def _set_unique_matching_function(self, preference, **kwargs): + def set_unique(*args): + return self._match_sharepref(*args, kwargs["minimum_share_fraction"]) + + return set_unique + def save_shared_members(self, cat1, cat2, fileprefix): """ Saves dictionaries of shared members diff --git a/clevar/match/parent.py b/clevar/match/parent.py index a13fce8..7e1fc58 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -18,6 +18,14 @@ def __init__(self): self._cat1_mt = None self._cat1_mmt = None + # internal values for unique matching + self._valid_unique_preference_vals = [ + "more_massive", + "angular_proximity", + "redshift_proximity", + "shared_member_fraction", + ] + def prep_cat_for_match(self, cat, *args): """ Prepare the catalog for matching, will fill the cat.mt_input object. @@ -58,6 +66,10 @@ def _rm_dup_add_hist(self, cat1, cat2, hist): cat1._set_mt_hist(self.history) cat2._set_mt_hist(self.history) + def _set_unique_matching_function(self, preference, **kwargs): + # pylint: disable=unused-argument + raise NotImplementedError + def unique(self, cat1, cat2, preference, minimum_share_fraction=0): """Makes unique matchig, requires multiple matching to be made first @@ -82,15 +94,6 @@ def unique(self, cat1, cat2, preference, minimum_share_fraction=0): else: i_vals = range(cat1.size) - # set funtion to be used for matching - valid_pref = True - valid_pref_vals = [ - "more_massive", - "angular_proximity", - "redshift_proximity", - "shared_member_fraction", - ] - if preference == "more_massive": def set_unique(*args): @@ -106,50 +109,14 @@ def set_unique(*args): def set_unique(*args): return self._match_apref(*args, "redshift_proximity") - elif preference == "shared_member_fraction": - cat1["mt_frac_self"] = np.zeros(cat1.size) - cat2["mt_frac_other"] = np.zeros(cat2.size) - if "mt_frac_other" not in cat1.colnames: - cat1["mt_frac_other"] = np.zeros(cat1.size) - - def set_unique(*args): - return self._match_sharepref(*args, minimum_share_fraction) - - elif self.type == "Box": - valid_pref_vals += ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] - - if preference.lower() == "giou": - - def metric_func(*args): - return self._compute_giou(*args) - - elif preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other"): - - def metric_func(*args): - return self._compute_intersection_over_area(*args, area_type=preference[3:]) - - else: - valid_pref = False - - if valid_pref: - - def set_unique(*args): - return self._match_box_metrics_pref( - *args, preference=preference, metric_func=metric_func - ) - - for col in (f"mt_self_{preference}", f"mt_other_{preference}"): - if col not in cat1.colnames: - cat1[col] = np.ones(cat1.size) * -99.0 - col = f"mt_other_{preference}" - if col not in cat2.colnames: - cat2[col] = np.ones(cat2.size) * -99.0 - else: - valid_pref = False - - if not valid_pref: - raise ValueError("preference must be in: " + ", ".join(valid_pref_vals)) + if preference.lower() not in [v.lower() for v in self._valid_unique_preference_vals]: + raise ValueError( + "preference must be in: " + ", ".join(self._valid_unique_preference_vals) + ) + set_unique = self._set_unique_matching_function( + preference, minimum_share_fraction=minimum_share_fraction + ) # Run matching print(f"Unique Matches ({cat1.name})") @@ -216,53 +183,6 @@ def _match_mpref(self, cat1, ind1, cat2): return True return False - def _match_box_metrics_pref(self, cat1, ind1, cat2, preference, metric_func): - """ - Make the unique match by GIoU preference - - Parameters - ---------- - cat1: clevar.ClCatalog - Base catalog - ind1: int - Index of the cluster from cat1 to be matched - cat2: clevar.ClCatalog - Target catalog - - Returns - ------- - bool - Tells if the cluster was matched - """ - inds2 = cat2.ids2inds(cat1["mt_multi_self"][ind1]) - if len(inds2) > 0: - metric = metric_func( - *self._compute_areas( - *( - [ - cat1["ra_min"][ind1], - cat1["ra_max"][ind1], - cat1["dec_min"][ind1], - cat1["dec_max"][ind1], - ] - * np.ones(inds2.size)[:, None] - ).T, # for vec computation - cat2["ra_min"][inds2], - cat2["ra_max"][inds2], - cat2["dec_min"][inds2], - cat2["dec_max"][inds2], - ) - ) - for i_sort in np.argsort(metric)[::-1]: - ind2 = inds2[i_sort] - if cat2["mt_other"][ind2] is None: - cat1["mt_self"][ind1] = cat2["id"][ind2] - cat2["mt_other"][ind2] = cat1["id"][ind1] - cat1[f"mt_self_{preference}"][ind1] = metric[i_sort] - cat2[f"mt_other_{preference}"][ind2] = metric[i_sort] - return True - return False - def _match_apref(self, cat1, ind1, cat2, match_pref): """ Make the unique match by angular (or redshift) distance preference diff --git a/clevar/match/proximity.py b/clevar/match/proximity.py index 9973e4e..95714f4 100644 --- a/clevar/match/proximity.py +++ b/clevar/match/proximity.py @@ -2,12 +2,9 @@ The ProximityMatch class """ import numpy as np -from scipy.interpolate import InterpolatedUnivariateSpline as spline from .spatial import SpatialMatch -from ..geometry import units_bank, convert_units from ..catalog import ClData -from ..utils import str2dataunit class ProximityMatch(SpatialMatch): @@ -175,6 +172,9 @@ def _max_mt_distance(self, radius1, radius2, radius_selection): coeff2 = radius1 >= radius2 return coeff1 * radius1 + coeff2 * radius2 + def _set_unique_matching_function(self, preference, **kwargs): + pass + def match_from_config(self, cat1, cat2, match_config, cosmo=None): """ Make matching of catalogs based on a configuration dictionary @@ -231,13 +231,4 @@ def match_from_config(self, cat1, cat2, match_config, cosmo=None): # pylint: disable=arguments-out-of-order self.multiple(cat2, cat1, radius_selection, verbose=verbose) - if match_config["type"] in ("cat1", "cross"): - print("\n## Finding unique matches of catalog 1") - self.unique(cat1, cat2, match_config["preference"]) - if match_config["type"] in ("cat2", "cross"): - print("\n## Finding unique matches of catalog 2") - self.unique(cat2, cat1, match_config["preference"]) - - if match_config["type"] == "cross": - self.cross_match(cat1) - self.cross_match(cat2) + self._unique_match_from_config(cat1, cat2, match_config) diff --git a/clevar/match/spatial.py b/clevar/match/spatial.py index f9627b7..4219fdf 100644 --- a/clevar/match/spatial.py +++ b/clevar/match/spatial.py @@ -21,6 +21,8 @@ class SpatialMatch(Match): Steps in the matching """ + # pylint: disable=abstract-method + def __init__(self): Match.__init__(self) self.type = "Spatial" @@ -153,3 +155,31 @@ def _prep_radius_for_match(self, cat, match_radius, n_match_radius=1, cosmo=None redshift=cat["z"] if "z" in cat.tags else None, cosmo=cosmo, ) + + def _unique_match_from_config(self, cat1, cat2, match_config): + """ + Make matching of catalogs based on a configuration dictionary + + Parameters + ---------- + cat1: clevar.ClCatalog + ClCatalog 1 + cat2: clevar.ClCatalog + ClCatalog 2 + match_config: dict + Dictionary with the matching configuration. Keys must be: + + * `type`: type of matching, can be: `cat1`, `cat2`, `cross`. + * `preference`: Preference to set best match, can be: `more_massive`, + `angular_proximity`, `redshift_proximity`, `shared_member_fraction`. + """ + if match_config["type"] in ("cat1", "cross"): + print("\n## Finding unique matches of catalog 1") + self.unique(cat1, cat2, match_config["preference"]) + if match_config["type"] in ("cat2", "cross"): + print("\n## Finding unique matches of catalog 2") + self.unique(cat2, cat1, match_config["preference"]) + + if match_config["type"] == "cross": + self.cross_match(cat1) + self.cross_match(cat2) From b7486dfa67976e91b7d16c50a85bb12aad1ac752 Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 22 May 2024 17:50:01 +0200 Subject: [PATCH 20/27] fix bug --- clevar/match/box.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clevar/match/box.py b/clevar/match/box.py index 63b9161..5d6a06a 100644 --- a/clevar/match/box.py +++ b/clevar/match/box.py @@ -75,7 +75,7 @@ def multiple( valid_metric_vals = ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] if metric not in valid_metric_vals: raise ValueError("metric must be in:" + ", ".join(valid_metric_vals)) - self._set_metric_function(preference) + self._set_metric_function(metric) self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching ra2min, ra2max, dec2min, dec2max = ( From 98e76ed1da6189efa02af1a1be64eba99609bbcf Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 22 May 2024 17:50:32 +0200 Subject: [PATCH 21/27] add Match._init_matching_extra_cols function --- clevar/match/parent.py | 19 +++++++++++++++++++ clevar/match/proximity.py | 3 --- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/clevar/match/parent.py b/clevar/match/parent.py index 7e1fc58..f0019f5 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -70,6 +70,24 @@ def _set_unique_matching_function(self, preference, **kwargs): # pylint: disable=unused-argument raise NotImplementedError + def _init_matching_extra_cols(self, cat1, cat2, preference): + + if preference == "shared_member_fraction": + cat1["mt_frac_self"] = np.zeros(cat1.size) + cat2["mt_frac_other"] = np.zeros(cat2.size) + if "mt_frac_other" not in cat1.colnames: + cat1["mt_frac_other"] = np.zeros(cat1.size) + elif (preference.lower() == "giou") or ( + preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other") + ): + for col in (f"mt_self_{preference}", f"mt_other_{preference}"): + if col not in cat1.colnames: + cat1[col] = np.ones(cat1.size) * -99.0 + col = f"mt_other_{preference}" + if col not in cat2.colnames: + cat2[col] = np.ones(cat2.size) * -99.0 + + def unique(self, cat1, cat2, preference, minimum_share_fraction=0): """Makes unique matchig, requires multiple matching to be made first @@ -117,6 +135,7 @@ def set_unique(*args): set_unique = self._set_unique_matching_function( preference, minimum_share_fraction=minimum_share_fraction ) + self._init_matching_extra_cols(cat1, cat2, preference) # Run matching print(f"Unique Matches ({cat1.name})") diff --git a/clevar/match/proximity.py b/clevar/match/proximity.py index 95714f4..a7abd23 100644 --- a/clevar/match/proximity.py +++ b/clevar/match/proximity.py @@ -172,9 +172,6 @@ def _max_mt_distance(self, radius1, radius2, radius_selection): coeff2 = radius1 >= radius2 return coeff1 * radius1 + coeff2 * radius2 - def _set_unique_matching_function(self, preference, **kwargs): - pass - def match_from_config(self, cat1, cat2, match_config, cosmo=None): """ Make matching of catalogs based on a configuration dictionary From 50c95de7a4f03f52da06087b7c05610407a0119a Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 22 May 2024 17:51:03 +0200 Subject: [PATCH 22/27] update tests --- tests/test_match.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_match.py b/tests/test_match.py index bd84d6d..49bd2f2 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -23,6 +23,7 @@ def test_parent(): assert_raises(NotImplementedError, mt.multiple, None, None) assert_raises(NotImplementedError, mt.match_from_config, None, None, None, None) assert_raises(ValueError, mt._get_dist_mt, None, None, "unknown match") + assert_raises(NotImplementedError, mt._set_unique_matching_function, None) def test_spatial(): @@ -555,6 +556,7 @@ def test_box(CosmoClass): print(cat2.data) cosmo = CosmoClass() mt = BoxMatch() + assert_raises(NotImplementedError, mt._get_metric, None) # test mask intersection res = np.zeros(2, dtype=bool) for i, add in enumerate([1, -1, 1, -1]): From be5b27e4387dc10e1d0330b038fff3b3d85bd6d0 Mon Sep 17 00:00:00 2001 From: m-aguena Date: Wed, 22 May 2024 19:17:20 +0200 Subject: [PATCH 23/27] update CCL vers --- .github/workflows/build_check.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/build_check.yml b/.github/workflows/build_check.yml index 7d55118..3785771 100644 --- a/.github/workflows/build_check.yml +++ b/.github/workflows/build_check.yml @@ -36,6 +36,7 @@ jobs: conda install -c conda-forge cmake swig git clone https://github.com/LSSTDESC/CCL cd CCL + git checkout v3.0.0 pip install . - name: Install healsparse from source run: | From f79cad9029bf1b974588298cad77d554ca3b47ca Mon Sep 17 00:00:00 2001 From: m-aguena Date: Thu, 23 May 2024 12:19:38 +0200 Subject: [PATCH 24/27] rm pytest.warns(None) from tests --- tests/test_catalog.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_catalog.py b/tests/test_catalog.py index a05e513..4faa9f7 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -66,10 +66,10 @@ def _base_cat_test(DataType, kwa=None, **quantities): # test warnings c["ra"] = 1.0 c.tag_column("ra", "newtag") - with pytest.warns(None) as record: + with pytest.warns() as record: c.tag_column("z", "newtag") assert f"{record._list[0].message}" == "tag newtag:ra being replaced by newtag:z" - with pytest.warns(None) as record: + with pytest.warns() as record: c.tag_column("dec", "z") assert f"{record._list[0].message}" == ( "There is a column with the same name as the tag setup." @@ -100,11 +100,11 @@ def test_catalog(): assert_raises(TypeError, c_._add_values, data=1) assert_raises(KeyError, c_._add_values, data=[1], mass=[1]) # Check creation of id col - with pytest.warns(None) as record: + with pytest.warns() as record: c_no_id = Catalog(name="no id", ra=[1, 2]) assert f"{record._list[0].message}" == "id column missing, additional one is being created." assert_equal(c_no_id["id"], ["0", "1"]) - with pytest.warns(None) as record: + with pytest.warns() as record: c_no_id = Catalog(name="no id") c_no_id["ra"] = [1, 2] assert f"{record._list[0].message}" == "id column missing, additional one is being created." @@ -126,7 +126,7 @@ def test_clcatalog(): c._repr_html_() c.show_mt_hist() # test repeated ids - with pytest.warns(None) as record: + with pytest.warns() as record: c0 = ClCatalog(name="test", id=["a", "a"], z=[0, 0]) assert f"{record._list[0].message}" == "Repeated ID's in id column, adding suffix _r# to them." assert_equal(c0["id"], ["a_r1", "a_r2"]) From 42115d054b57c9faee54ed651c4c25f86f4f7f0e Mon Sep 17 00:00:00 2001 From: m-aguena Date: Thu, 23 May 2024 12:30:56 +0200 Subject: [PATCH 25/27] exclude mpl version 3.9.0 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 660aba7..ecf32d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ astropy>=4, !=5.0 -matplotlib +matplotlib !=3.9.0 numpy>=1.17 scipy>=1.3 healpy>=1.16 From b0ae63191182bc2a7baa9363fcc0683484815d4f Mon Sep 17 00:00:00 2001 From: m-aguena Date: Thu, 23 May 2024 12:36:30 +0200 Subject: [PATCH 26/27] formatting --- clevar/match/parent.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/clevar/match/parent.py b/clevar/match/parent.py index f0019f5..8448330 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -71,7 +71,6 @@ def _set_unique_matching_function(self, preference, **kwargs): raise NotImplementedError def _init_matching_extra_cols(self, cat1, cat2, preference): - if preference == "shared_member_fraction": cat1["mt_frac_self"] = np.zeros(cat1.size) cat2["mt_frac_other"] = np.zeros(cat2.size) @@ -87,7 +86,6 @@ def _init_matching_extra_cols(self, cat1, cat2, preference): if col not in cat2.colnames: cat2[col] = np.ones(cat2.size) * -99.0 - def unique(self, cat1, cat2, preference, minimum_share_fraction=0): """Makes unique matchig, requires multiple matching to be made first From 19b33aaf90d40992f0ca8c8cf658eeb89772daf3 Mon Sep 17 00:00:00 2001 From: m-aguena Date: Thu, 23 May 2024 12:37:53 +0200 Subject: [PATCH 27/27] Update version to 0.15.0 --- clevar/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clevar/version.py b/clevar/version.py index 9a02a8d..9eeaaf1 100644 --- a/clevar/version.py +++ b/clevar/version.py @@ -1,2 +1,2 @@ """Version of ClEvaR""" -__version__ = "0.14.1" +__version__ = '0.15.0'