diff --git a/.github/workflows/build_check.yml b/.github/workflows/build_check.yml index 7d55118..3785771 100644 --- a/.github/workflows/build_check.yml +++ b/.github/workflows/build_check.yml @@ -36,6 +36,7 @@ jobs: conda install -c conda-forge cmake swig git clone https://github.com/LSSTDESC/CCL cd CCL + git checkout v3.0.0 pip install . - name: Install healsparse from source run: | diff --git a/clevar/catalog/catalog.py b/clevar/catalog/catalog.py index 59ebca4..f2ec804 100644 --- a/clevar/catalog/catalog.py +++ b/clevar/catalog/catalog.py @@ -813,6 +813,7 @@ def id_dict_list(self): def __init__(self, name, tags=None, labels=None, **kwargs): if tags is not None and not isinstance(tags, dict): raise ValueError("tags must be dict.") + self.mt_input = kwargs.pop("mt_input", None) self.__id_dict_list = {} Catalog.__init__( self, diff --git a/clevar/match/__init__.py b/clevar/match/__init__.py index 7006f38..11066f0 100644 --- a/clevar/match/__init__.py +++ b/clevar/match/__init__.py @@ -7,6 +7,7 @@ from .parent import Match from .proximity import ProximityMatch from .membership import MembershipMatch +from .box import BoxMatch def get_matched_pairs(cat1, cat2, matching_type, mask1=None, mask2=None): diff --git a/clevar/match/box.py b/clevar/match/box.py new file mode 100644 index 0000000..5d6a06a --- /dev/null +++ b/clevar/match/box.py @@ -0,0 +1,408 @@ +"""@file box.py +The BoxMatch class +""" +import numpy as np + +from .spatial import SpatialMatch +from ..catalog import ClData + +_area_type_funcs = { + "min": lambda area1, area2: np.min([area1, area2], axis=0), + "max": lambda area1, area2: np.max([area1, area2], axis=0), + "self": lambda area1, area2: area1, + "other": lambda area1, area2: area2, +} + + +class BoxMatch(SpatialMatch): + """ + BoxMatch Class + + Attributes + ---------- + type : str + Type of matching object. Set to "Box" + history : list + Steps in the matching + """ + + def __init__(self): + SpatialMatch.__init__(self) + self.type = "Box" + self._valid_unique_preference_vals += ["GIoU"] + [ + f"IoA{end}" for end in ("min", "max", "self", "other") + ] + + def multiple( + self, + cat1, + cat2, + metric="GIoU", + metric_cut=0.5, + rel_area=0.5, + verbose=True, + detailed_print_only=False, + ): + """ + Make the one way multiple matching + + Parameters + ---------- + cat1: clevar.ClCatalog + Base catalog + cat2: clevar.ClCatalog + Target catalog + metric: str (optional) + Metric to be used for matching. Can be: + "GIoU" (generalized Intersection over Union); + "IoA*" (Intersection over Area, with area choice in ["min", "max", "self", "other"]); + metric_cut: float + Minimum value of metric for match. + rel_area: float + Minimum relative size of area for match. + verbose: bool + Print result for individual matches. + detailed_print_only: bool + Only prints detailed comparisons for matching, does not register matches in catalogs. + """ + # pylint: disable=arguments-renamed + # pylint: disable=too-many-locals + if cat1.mt_input is None: + raise AttributeError("cat1.mt_input is None, run prep_cat_for_match first.") + if cat2.mt_input is None: + raise AttributeError("cat2.mt_input is None, run prep_cat_for_match first.") + + valid_metric_vals = ["GIoU"] + [f"IoA{end}" for end in ("min", "max", "self", "other")] + if metric not in valid_metric_vals: + raise ValueError("metric must be in:" + ", ".join(valid_metric_vals)) + self._set_metric_function(metric) + + self._cat1_mmt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching + ra2min, ra2max, dec2min, dec2max = ( + cat2[c] for c in ("ra_min", "ra_max", "dec_min", "dec_max") + ) + z2min, z2max = (cat2.mt_input[c] for c in ("zmin", "zmax")) + print(f"Finding candidates ({cat1.name})") + for ind1, (ra1min, ra1max, dec1min, dec1max, z1min, z1max) in enumerate( + zip( + *(cat1[c] for c in ("ra_min", "ra_max", "dec_min", "dec_max")), + *(cat1.mt_input[c] for c in ("zmin", "zmax")), + ) + ): + self._detailed_print(0, locals()) + # crop in redshift range + mask = (z2max >= z1min) * (z2min <= z1max) + if mask.any(): + self._detailed_print(1, locals()) + # makes square crop with intersection + mask[mask] *= self.mask_intersection( + ra1min, + ra1max, + dec1min, + dec1max, + ra2min[mask], + ra2max[mask], + dec2min[mask], + dec2max[mask], + ) + if mask.any(): + self._detailed_print(2, locals()) + area1, area2, intersection, outter = self._compute_areas( + *( + [ra1min, ra1max, dec1min, dec1max] * np.ones(mask.sum())[:, None] + ).T, # for vec computation + ra2min[mask], + ra2max[mask], + dec2min[mask], + dec2max[mask], + ) + # makes metric crop + metric_value = self._get_metric(area1, area2, intersection, outter) + self._detailed_print(3, locals()) + if not detailed_print_only: + for id2 in cat2["id"][mask][ + (metric_value >= metric_cut) + * (area1 / area2 >= rel_area) + * (area2 / area1 >= rel_area) + ]: + cat1["mt_multi_self"][ind1].append(id2) + ind2 = int(cat2.id_dict[id2]) + cat2["mt_multi_other"][ind2].append(cat1["id"][ind1]) + self._cat1_mmt[ind1] = True + else: + self._detailed_print(4, locals()) + else: + self._detailed_print(5, locals()) + if verbose: + self._prt_cand_mt(cat1, ind1) + if not detailed_print_only: + hist = { + "func": "multiple", + "cats": f"{cat1.name}, {cat2.name}", + } + self._rm_dup_add_hist(cat1, cat2, hist) + + def _detailed_print(self, i, locs): + if not locs["detailed_print_only"]: + return + if i == 0: + area = ( + self._compute_area(locs["ra1min"], locs["ra1max"], locs["dec1min"], locs["dec1max"]) + * 3600 + ) + print( + "\n\nCluster:", + "(", + ", ".join([f"{locs[v]:.4f}" for v in ("ra1min", "ra1max", "dec1min", "dec1max")]), + ") (", + ", ".join([f"{locs[v]:.4f}" for v in ("z1min", "z1max")]), + ")", + f"( area: {area:.2f} arcmin2 )", + ) + elif i == 1: + print(f" * z pass: {locs['mask'].sum():,}") + elif i == 2: + print(f" * intersection pass: {locs['mask'].sum():,}") + elif i == 3: + for val in zip( + locs["cat2"]["id"][locs["mask"]], + locs["ra2min"][locs["mask"]], + locs["ra2max"][locs["mask"]], + locs["dec2min"][locs["mask"]], + locs["dec2max"][locs["mask"]], + locs["area1"], + locs["area2"], + locs["intersection"], + locs["outter"], + locs["metric_value"], + locs["metric_value"] >= locs["metric_cut"], + locs["area1"] / locs["area2"] >= locs["rel_area"], + locs["area2"] / locs["area1"] >= locs["rel_area"], + ): + print(" Candidate:", val[0]) + print(f" Pass : {bool(np.prod(val[10:]))} (", *val[10:], ")") + print( + " Areas :", + ", ".join([f"{v*3600:.2f}" for v in val[6:9]]), + f"arcmin2 ( {locs['metric']}: {val[9]:.2g} )", + ) + print( + " Coords : (", + ", ".join([f"{v:.4f}" for v in val[1:5]]), + ")", + ) + elif i == 4: + print(" * intersection fail") + elif i == 5: + print(" * z fail!") + else: + raise ValueError(f"i={i} invalid!") + + def mask_intersection(self, ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max): + """Mask clusters without intersection + + Return + ------ + mask + If clusters have intersection + """ + mask = ra1min <= ra2max + if not mask.any(): + return mask + mask *= ra2min <= ra1max + if not mask.any(): + return mask + mask *= dec1min <= dec2max + if not mask.any(): + return mask + mask *= dec2min <= dec1max + if not mask.any(): + return mask + return mask + + def _compute_area(self, ra_min, ra_max, dec_min, dec_max): + return (ra_max - ra_min) * (dec_max - dec_min) + + def _compute_areas(self, ra1min, ra1max, dec1min, dec1max, ra2min, ra2max, dec2min, dec2max): + area1 = self._compute_area(ra1max, ra1min, dec1max, dec1min) + area2 = self._compute_area(ra2max, ra2min, dec2max, dec2min) + # areas + intersection = self._compute_area( + np.max([ra1min, ra2min], axis=0), + np.min([ra1max, ra2max], axis=0), + np.max([dec1min, dec2min], axis=0), + np.min([dec1max, dec2max], axis=0), + ) + outter = self._compute_area( + np.min([ra1min, ra2min], axis=0), + np.max([ra1max, ra2max], axis=0), + np.min([dec1min, dec2min], axis=0), + np.max([dec1max, dec2max], axis=0), + ) + return area1, area2, intersection, outter + + def _compute_giou(self, area1, area2, intersection, outter): + union = area1 + area2 - intersection + return intersection / union + union / outter - 1.0 + + def _compute_intersection_over_area(self, area1, area2, intersection, outter, area_type): + # pylint: disable=unused-argument + return intersection / _area_type_funcs[area_type](area1, area2) + + def prep_cat_for_match( + self, + cat, + delta_z, + n_delta_z=1, + ra_min="ra_min", + ra_max="ra_max", + dec_min="dec_min", + dec_max="dec_max", + ): + """ + Adds zmin and zmax to cat.mt_input and tags ra/dec min/max cols + + Parameters + ---------- + cat: clevar.ClCatalog + Input ClCatalog + delta_z: float, string + Defines the zmin, zmax for matching. Options are: + + * `'cat'` - uses redshift properties of the catalog + * `'spline.filename'` - interpolates data in 'filename' (z, zmin, zmax) fmt + * `float` - uses delta_z*(1+z) + * `None` - does not use z + + n_delta_z: float + Number of delta_z to be used in the matching + """ + # pylint: disable=arguments-differ + # pylint: disable=unused-argument + print("## Prep mt_cols") + if cat.mt_input is None: + cat.mt_input = ClData() + self._prep_z_for_match(cat, delta_z, n_delta_z) + for name in ("ra_min", "ra_max", "dec_min", "dec_max"): + if name not in cat.tags: + cat.tag_column(locals()[name], name) + self.history.append( + { + "func": "prep_cat_for_match", + "cat": cat.name, + "delta_z": delta_z, + "n_delta_z": n_delta_z, + } + ) + + def _get_metric(self, preference): + # pylint: disable=method-hidden + raise NotImplementedError + + def _set_metric_function(self, preference): + if preference.lower() == "giou": + self._get_metric = self._compute_giou + + elif preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other"): + self._get_metric = lambda *args: self._compute_intersection_over_area( + *args, area_type=preference[3:] + ) + + def _set_unique_matching_function(self, preference, **kwargs): + self._set_metric_function(preference) + + def set_unique(*args): + return self._match_box_metrics_pref( + *args, preference=preference, metric_func=self._get_metric + ) + + return set_unique + + def _match_box_metrics_pref(self, cat1, ind1, cat2, preference, metric_func): + """ + Make the unique match by GIoU preference + + Parameters + ---------- + cat1: clevar.ClCatalog + Base catalog + ind1: int + Index of the cluster from cat1 to be matched + cat2: clevar.ClCatalog + Target catalog + + Returns + ------- + bool + Tells if the cluster was matched + """ + inds2 = cat2.ids2inds(cat1["mt_multi_self"][ind1]) + if len(inds2) > 0: + metric = metric_func( + *self._compute_areas( + *( + [ + cat1["ra_min"][ind1], + cat1["ra_max"][ind1], + cat1["dec_min"][ind1], + cat1["dec_max"][ind1], + ] + * np.ones(inds2.size)[:, None] + ).T, # for vec computation + cat2["ra_min"][inds2], + cat2["ra_max"][inds2], + cat2["dec_min"][inds2], + cat2["dec_max"][inds2], + ) + ) + for i_sort in np.argsort(metric)[::-1]: + ind2 = inds2[i_sort] + if cat2["mt_other"][ind2] is None: + cat1["mt_self"][ind1] = cat2["id"][ind2] + cat2["mt_other"][ind2] = cat1["id"][ind1] + cat1[f"mt_self_{preference}"][ind1] = metric[i_sort] + cat2[f"mt_other_{preference}"][ind2] = metric[i_sort] + return True + return False + + def match_from_config(self, cat1, cat2, match_config, cosmo=None): + """ + Make matching of catalogs based on a configuration dictionary + + Parameters + ---------- + cat1: clevar.ClCatalog + ClCatalog 1 + cat2: clevar.ClCatalog + ClCatalog 2 + match_config: dict + Dictionary with the matching configuration. Keys must be: + + * `type`: type of matching, can be: `cat1`, `cat2`, `cross`. + * `catalog1`: `kwargs` used in `prep_cat_for_match(cat1, **kwargs)` (minus `cosmo`). + * `catalog2`: `kwargs` used in `prep_cat_for_match(cat2, **kwargs)` (minus `cosmo`). + * `preference`: Preference to set best match, can be: `more_massive`, + `angular_proximity`, `redshift_proximity`, `shared_member_fraction`. + * `verbose`: Print result for individual matches (default=`True`). + + cosmo: clevar.Cosmology object + Cosmology object for when radius has physical units + """ + if match_config["type"] not in ("cat1", "cat2", "cross"): + raise ValueError("config type must be cat1, cat2 or cross") + if match_config["type"] in ("cat1", "cross"): + print("\n## ClCatalog 1") + self.prep_cat_for_match(cat1, **match_config["catalog1"]) + if match_config["type"] in ("cat2", "cross"): + print("\n## ClCatalog 2") + self.prep_cat_for_match(cat2, **match_config["catalog2"]) + + verbose = match_config.get("verbose", True) + if match_config["type"] in ("cat1", "cross"): + print("\n## Multiple match (catalog 1)") + self.multiple(cat1, cat2, verbose=verbose) + if match_config["type"] in ("cat2", "cross"): + print("\n## Multiple match (catalog 2)") + self.multiple(cat2, cat1, verbose=verbose) + + self._unique_match_from_config(cat1, cat2, match_config) diff --git a/clevar/match/membership.py b/clevar/match/membership.py index 099b59a..fe16f12 100644 --- a/clevar/match/membership.py +++ b/clevar/match/membership.py @@ -26,6 +26,7 @@ def __init__(self): Match.__init__(self) self.type = "Membership" self.matched_mems = None + self._valid_unique_preference_vals += ["shared_member_fraction"] def multiple(self, cat1, cat2, minimum_share_fraction=0, verbose=True): """ @@ -143,6 +144,12 @@ def _add_pmem(self, cat1_share_mems, ind1, cat2_id, pmem1): """ cat1_share_mems[ind1][cat2_id] = cat1_share_mems[ind1].get(cat2_id, 0) + pmem1 + def _set_unique_matching_function(self, preference, **kwargs): + def set_unique(*args): + return self._match_sharepref(*args, kwargs["minimum_share_fraction"]) + + return set_unique + def save_shared_members(self, cat1, cat2, fileprefix): """ Saves dictionaries of shared members diff --git a/clevar/match/parent.py b/clevar/match/parent.py index 9eb1123..8448330 100644 --- a/clevar/match/parent.py +++ b/clevar/match/parent.py @@ -18,6 +18,14 @@ def __init__(self): self._cat1_mt = None self._cat1_mmt = None + # internal values for unique matching + self._valid_unique_preference_vals = [ + "more_massive", + "angular_proximity", + "redshift_proximity", + "shared_member_fraction", + ] + def prep_cat_for_match(self, cat, *args): """ Prepare the catalog for matching, will fill the cat.mt_input object. @@ -58,6 +66,26 @@ def _rm_dup_add_hist(self, cat1, cat2, hist): cat1._set_mt_hist(self.history) cat2._set_mt_hist(self.history) + def _set_unique_matching_function(self, preference, **kwargs): + # pylint: disable=unused-argument + raise NotImplementedError + + def _init_matching_extra_cols(self, cat1, cat2, preference): + if preference == "shared_member_fraction": + cat1["mt_frac_self"] = np.zeros(cat1.size) + cat2["mt_frac_other"] = np.zeros(cat2.size) + if "mt_frac_other" not in cat1.colnames: + cat1["mt_frac_other"] = np.zeros(cat1.size) + elif (preference.lower() == "giou") or ( + preference[:3] == "IoA" and preference[3:] in ("min", "max", "self", "other") + ): + for col in (f"mt_self_{preference}", f"mt_other_{preference}"): + if col not in cat1.colnames: + cat1[col] = np.ones(cat1.size) * -99.0 + col = f"mt_other_{preference}" + if col not in cat2.colnames: + cat2[col] = np.ones(cat2.size) * -99.0 + def unique(self, cat1, cat2, preference, minimum_share_fraction=0): """Makes unique matchig, requires multiple matching to be made first @@ -75,10 +103,13 @@ def unique(self, cat1, cat2, preference, minimum_share_fraction=0): Minimum share fraction to consider in matches (default=0). """ self._cat1_mt = np.zeros(cat1.size, dtype=bool) # To add flag in multi step matching + + # set order to match catalogs if "mass" in cat1.tags: i_vals = np.argsort(cat1["mass"])[::-1] else: i_vals = range(cat1.size) + if preference == "more_massive": def set_unique(*args): @@ -94,25 +125,25 @@ def set_unique(*args): def set_unique(*args): return self._match_apref(*args, "redshift_proximity") - elif preference == "shared_member_fraction": - cat1["mt_frac_self"] = np.zeros(cat1.size) - cat2["mt_frac_other"] = np.zeros(cat2.size) - if "mt_frac_other" not in cat1.colnames: - cat1["mt_frac_other"] = np.zeros(cat1.size) - - def set_unique(*args): - return self._match_sharepref(*args, minimum_share_fraction) - else: - raise ValueError( - "preference must be 'more_massive', 'angular_proximity' or 'redshift_proximity'" + if preference.lower() not in [v.lower() for v in self._valid_unique_preference_vals]: + raise ValueError( + "preference must be in: " + ", ".join(self._valid_unique_preference_vals) + ) + set_unique = self._set_unique_matching_function( + preference, minimum_share_fraction=minimum_share_fraction ) + self._init_matching_extra_cols(cat1, cat2, preference) + + # Run matching print(f"Unique Matches ({cat1.name})") for ind1 in i_vals: if cat1["mt_self"][ind1] is None: self._cat1_mt[ind1] = set_unique(cat1, ind1, cat2) self._cat1_mt *= cat1.get_matching_mask("self") # In case ang pref removes a match print(f'* {cat1.get_matching_mask("self").sum():,}/{cat1.size:,} objects matched.') + + # Add match conf to history cfg = {"func": "unique", "cats": f"{cat1.name}, {cat2.name}", "preference": preference} if preference == "shared_member_fraction": cfg["minimum_share_fraction"] = minimum_share_fraction diff --git a/clevar/match/proximity.py b/clevar/match/proximity.py index 5aebcd0..a7abd23 100644 --- a/clevar/match/proximity.py +++ b/clevar/match/proximity.py @@ -2,15 +2,12 @@ The ProximityMatch class """ import numpy as np -from scipy.interpolate import InterpolatedUnivariateSpline as spline -from .parent import Match -from ..geometry import units_bank, convert_units +from .spatial import SpatialMatch from ..catalog import ClData -from ..utils import str2dataunit -class ProximityMatch(Match): +class ProximityMatch(SpatialMatch): """ ProximityMatch Class @@ -23,7 +20,7 @@ class ProximityMatch(Match): """ def __init__(self): - Match.__init__(self) + SpatialMatch.__init__(self) self.type = "Proximity" def multiple(self, cat1, cat2, radius_selection="max", verbose=True): @@ -125,72 +122,13 @@ def prep_cat_for_match( """ # pylint: disable=arguments-differ print("## Prep mt_cols") - cat.mt_input = ClData() + if cat.mt_input is None: + cat.mt_input = ClData() # Set zmin, zmax - if delta_z is None: - # No z use - print("* zmin|zmax set to -1|10") - cat.mt_input["zmin"] = -1 * np.ones(cat.size) - cat.mt_input["zmax"] = 10 * np.ones(cat.size) - elif delta_z == "cat": - # Values from catalog - if "zmin" in cat.tags and "zmax" in cat.tags: - print("* zmin|zmax from cat cols (zmin, zmax)") - cat.mt_input["zmin"] = self._rescale_z(cat["z"], cat["zmin"], n_delta_z) - cat.mt_input["zmax"] = self._rescale_z(cat["z"], cat["zmax"], n_delta_z) - # create zmin/zmax if not there - elif "z_err" in cat.tags: - print("* zmin|zmax from cat cols (err)") - cat.mt_input["zmin"] = cat["z"] - n_delta_z * cat["z_err"] - cat.mt_input["zmax"] = cat["z"] + n_delta_z * cat["z_err"] - else: - raise ValueError("ClCatalog must contain zmin, zmax or z_err for this matching.") - elif isinstance(delta_z, str): - # zmin/zmax in auxiliar file - print("* zmin|zmax from aux file") - order = 3 - zval, zvmin, zvmax = np.loadtxt(delta_z) - zvmin = self._rescale_z(zval, zvmin, n_delta_z) - zvmax = self._rescale_z(zval, zvmax, n_delta_z) - cat.mt_input["zmin"] = spline(zval, zvmin, k=order)(cat["z"]) - cat.mt_input["zmax"] = spline(zval, zvmax, k=order)(cat["z"]) - elif isinstance(delta_z, (int, float)): - # zmin/zmax from sigma_z*(1+z) - print("* zmin|zmax from config value") - cat.mt_input["zmin"] = cat["z"] - delta_z * n_delta_z * (1.0 + cat["z"]) - cat.mt_input["zmax"] = cat["z"] + delta_z * n_delta_z * (1.0 + cat["z"]) - + self._prep_z_for_match(cat, delta_z, n_delta_z) # Set angular radius - if match_radius == "cat": - print("* ang radius from cat") - in_rad, in_rad_unit = cat["radius"], cat.radius_unit - # when mass is passed to radius: m#b or m#c - background/critical - if in_rad_unit.lower() != "mpc" and in_rad_unit[0].lower() == "m": - print(f" * Converting mass ({in_rad_unit}) ->radius") - delta, mtyp = str2dataunit( - in_rad_unit[1:], - ["b", "c"], - err_msg=( - f"Mass unit ({in_rad_unit}) must be in format " - "'m#b' (background) or 'm#c' (critical)" - ), - ) - in_rad = cosmo.eval_mass2radius( - in_rad, cat["z"], delta, mass_type={"b": "background", "c": "critical"}[mtyp] - ) - in_rad_unit = "mpc" - else: - print("* ang radius from set scale") - in_rad, in_rad_unit = str2dataunit(match_radius, units_bank.keys()) - in_rad *= np.ones(cat.size) - # convert to degrees - cat.mt_input["ang"] = convert_units( - in_rad * n_match_radius, - in_rad_unit, - "degrees", - redshift=cat["z"] if "z" in cat.tags else None, - cosmo=cosmo, - ) + self._prep_radius_for_match(cat, match_radius, n_match_radius=n_match_radius, cosmo=cosmo) + # add conf to history of matching self.history.append( { "func": "prep_cat_for_match", @@ -203,25 +141,6 @@ def prep_cat_for_match( } ) - def _rescale_z(self, z, zlim, n_rescale): - """Rescale zmin/zmax by a factor n_rescale - - Parameters - ---------- - z: float, array - Redshift value - zlim: float, array - Redshift limit - n_rescale: float - Value to rescale zlim - - Returns - ------- - float, array - Rescaled z limit - """ - return z + n_rescale * (zlim - z) - def _max_mt_distance(self, radius1, radius2, radius_selection): """Get maximum angular distance allowed for the matching @@ -309,13 +228,4 @@ def match_from_config(self, cat1, cat2, match_config, cosmo=None): # pylint: disable=arguments-out-of-order self.multiple(cat2, cat1, radius_selection, verbose=verbose) - if match_config["type"] in ("cat1", "cross"): - print("\n## Finding unique matches of catalog 1") - self.unique(cat1, cat2, match_config["preference"]) - if match_config["type"] in ("cat2", "cross"): - print("\n## Finding unique matches of catalog 2") - self.unique(cat2, cat1, match_config["preference"]) - - if match_config["type"] == "cross": - self.cross_match(cat1) - self.cross_match(cat2) + self._unique_match_from_config(cat1, cat2, match_config) diff --git a/clevar/match/spatial.py b/clevar/match/spatial.py new file mode 100644 index 0000000..4219fdf --- /dev/null +++ b/clevar/match/spatial.py @@ -0,0 +1,185 @@ +"""@file spatial.py +The SpatialMatch class +""" +import numpy as np +from scipy.interpolate import InterpolatedUnivariateSpline as spline + +from .parent import Match +from ..geometry import units_bank, convert_units +from ..utils import str2dataunit + + +class SpatialMatch(Match): + """ + SpatialMatch Class + + Attributes + ---------- + type : str + Type of matching object. Set to "Spatial" + history : list + Steps in the matching + """ + + # pylint: disable=abstract-method + + def __init__(self): + Match.__init__(self) + self.type = "Spatial" + + def _prep_z_for_match( + self, + cat, + delta_z, + n_delta_z=1, + ): + """ + Adds zmin and zmax to cat.mt_input + + Parameters + ---------- + cat: clevar.ClCatalog + Input ClCatalog + delta_z: float, string + Defines the zmin, zmax for matching. Options are: + + * `'cat'` - uses redshift properties of the catalog + * `'spline.filename'` - interpolates data in 'filename' (z, zmin, zmax) fmt + * `float` - uses delta_z*(1+z) + * `None` - does not use z + + n_delta_z: float + Number of delta_z to be used in the matching + """ + print("### Prep z_cols") + if delta_z is None: + # No z use + print("* zmin|zmax set to -1|10") + cat.mt_input["zmin"] = -1 * np.ones(cat.size) + cat.mt_input["zmax"] = 10 * np.ones(cat.size) + elif delta_z == "cat": + # Values from catalog + if "zmin" in cat.tags and "zmax" in cat.tags: + print("* zmin|zmax from cat cols (zmin, zmax)") + cat.mt_input["zmin"] = self._rescale_z(cat["z"], cat["zmin"], n_delta_z) + cat.mt_input["zmax"] = self._rescale_z(cat["z"], cat["zmax"], n_delta_z) + # create zmin/zmax if not there + elif "z_err" in cat.tags: + print("* zmin|zmax from cat cols (err)") + cat.mt_input["zmin"] = cat["z"] - n_delta_z * cat["z_err"] + cat.mt_input["zmax"] = cat["z"] + n_delta_z * cat["z_err"] + else: + raise ValueError("ClCatalog must contain zmin, zmax or z_err for this matching.") + elif isinstance(delta_z, str): + # zmin/zmax in auxiliar file + print("* zmin|zmax from aux file") + order = 3 + zval, zvmin, zvmax = np.loadtxt(delta_z) + zvmin = self._rescale_z(zval, zvmin, n_delta_z) + zvmax = self._rescale_z(zval, zvmax, n_delta_z) + cat.mt_input["zmin"] = spline(zval, zvmin, k=order)(cat["z"]) + cat.mt_input["zmax"] = spline(zval, zvmax, k=order)(cat["z"]) + elif isinstance(delta_z, (int, float)): + # zmin/zmax from sigma_z*(1+z) + print("* zmin|zmax from config value") + cat.mt_input["zmin"] = cat["z"] - delta_z * n_delta_z * (1.0 + cat["z"]) + cat.mt_input["zmax"] = cat["z"] + delta_z * n_delta_z * (1.0 + cat["z"]) + + def _rescale_z(self, z, zlim, n_rescale): + """Rescale zmin/zmax by a factor n_rescale + + Parameters + ---------- + z: float, array + Redshift value + zlim: float, array + Redshift limit + n_rescale: float + Value to rescale zlim + + Returns + ------- + float, array + Rescaled z limit + """ + return z + n_rescale * (zlim - z) + + def _prep_radius_for_match(self, cat, match_radius, n_match_radius=1, cosmo=None): + """ + Adds radius to cat.mt_input + + Parameters + ---------- + cat: clevar.ClCatalog + Input ClCatalog + + match_radius: string + Defines the radius for matching. Options are: + + * `'cat'` - uses the radius in the catalog + * `'value unit'` - used fixed value (ex: `1 arcsec`, `1 Mpc`) + + n_match_radius: float + Multiplies the radius of the matchingi + cosmo: clevar.Cosmology object + Cosmology object for when radius has physical units + """ + print("### Prep ang_cols") + if match_radius == "cat": + print("* ang radius from cat") + in_rad, in_rad_unit = cat["radius"], cat.radius_unit + # when mass is passed to radius: m#b or m#c - background/critical + if in_rad_unit.lower() != "mpc" and in_rad_unit[0].lower() == "m": + print(f" * Converting mass ({in_rad_unit}) ->radius") + delta, mtyp = str2dataunit( + in_rad_unit[1:], + ["b", "c"], + err_msg=( + f"Mass unit ({in_rad_unit}) must be in format " + "'m#b' (background) or 'm#c' (critical)" + ), + ) + in_rad = cosmo.eval_mass2radius( + in_rad, cat["z"], delta, mass_type={"b": "background", "c": "critical"}[mtyp] + ) + in_rad_unit = "mpc" + else: + print("* ang radius from set scale") + in_rad, in_rad_unit = str2dataunit(match_radius, units_bank.keys()) + in_rad *= np.ones(cat.size) + # convert to degrees + cat.mt_input["ang"] = convert_units( + in_rad * n_match_radius, + in_rad_unit, + "degrees", + redshift=cat["z"] if "z" in cat.tags else None, + cosmo=cosmo, + ) + + def _unique_match_from_config(self, cat1, cat2, match_config): + """ + Make matching of catalogs based on a configuration dictionary + + Parameters + ---------- + cat1: clevar.ClCatalog + ClCatalog 1 + cat2: clevar.ClCatalog + ClCatalog 2 + match_config: dict + Dictionary with the matching configuration. Keys must be: + + * `type`: type of matching, can be: `cat1`, `cat2`, `cross`. + * `preference`: Preference to set best match, can be: `more_massive`, + `angular_proximity`, `redshift_proximity`, `shared_member_fraction`. + """ + if match_config["type"] in ("cat1", "cross"): + print("\n## Finding unique matches of catalog 1") + self.unique(cat1, cat2, match_config["preference"]) + if match_config["type"] in ("cat2", "cross"): + print("\n## Finding unique matches of catalog 2") + self.unique(cat2, cat1, match_config["preference"]) + + if match_config["type"] == "cross": + self.cross_match(cat1) + self.cross_match(cat2) diff --git a/clevar/version.py b/clevar/version.py index 9a02a8d..9eeaaf1 100644 --- a/clevar/version.py +++ b/clevar/version.py @@ -1,2 +1,2 @@ """Version of ClEvaR""" -__version__ = "0.14.1" +__version__ = '0.15.0' diff --git a/examples/box_matching.ipynb b/examples/box_matching.ipynb new file mode 100644 index 0000000..a9654fd --- /dev/null +++ b/examples/box_matching.ipynb @@ -0,0 +1,380 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matching catalogs based on square boxes (simple)\n", + "Matching two catalogs based on boxes based on a configuration dictionary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ClCatalogs\n", + "Given some input data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from astropy.table import Table\n", + "\n", + "input1 = Table(\n", + " {\n", + " \"ID\": [f\"CL{i}\" for i in range(5)],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25, 20],\n", + " \"DEC\": [0.0, 0.0, 0.0, 0.0, 0.0],\n", + " \"Z\": [0.2, 0.3, 0.25, 0.4, 0.35],\n", + " \"MASS\": [10**13.5, 10**13.5, 10**13.3, 10**13.8, 10**14],\n", + " }\n", + ")\n", + "input2 = Table(\n", + " {\n", + " \"ID\": [\"CL0\", \"CL1\", \"CL2\", \"CL3\"],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25],\n", + " \"DEC\": [0.0, 0, 0, 0],\n", + " \"Z\": [0.3, 0.2, 0.25, 0.4],\n", + " \"MASS\": [10**13.3, 10**13.4, 10**13.5, 10**13.8],\n", + " }\n", + ")\n", + "for col in (\"RA\", \"DEC\"):\n", + " input1[f\"{col}_MAX\"] = input1[col] + 0.01\n", + " input1[f\"{col}_MIN\"] = input1[col] - 0.01\n", + " input2[f\"{col}_MAX\"] = input2[col] + 0.01\n", + " input2[f\"{col}_MIN\"] = input2[col] - 0.01\n", + "display(input1)\n", + "display(input2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create two `ClCatalog` objects, they have the same properties of `astropy` tables with additional functionality. You can tag the main properties of the catalog, or have columns with those names (see `catalogs.ipynb` for detailts). For the box matching, the main tags/columns to be included are:\n", + "- `id` - if not included, one will be assigned\n", + "- `ra_min`, `ra_max` (in degrees) - necessary\n", + "- `dec_min`, `dec_max` (in degrees) - necessary\n", + "- `z` - necessary if used as matching criteria" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.catalog import ClCatalog\n", + "\n", + "tags = {\"id\": \"ID\", \"ra\": \"RA\", \"dec\": \"DEC\", \"z\": \"Z\", \"mass\": \"MASS\"}\n", + "c1 = ClCatalog(\"Cat1\", data=input1, tags=tags)\n", + "c2 = ClCatalog(\"Cat2\", data=input2, tags=tags)\n", + "\n", + "# Format for nice display\n", + "for c in (\"ra\", \"dec\", \"z\", \"ra_min\", \"dec_min\", \"ra_max\", \"dec_max\"):\n", + " c1[c].info.format = \".2f\"\n", + " c2[c].info.format = \".2f\"\n", + "for c in (\"mass\",):\n", + " c1[c].info.format = \".2e\"\n", + " c2[c].info.format = \".2e\"\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `ClCatalog` object can also be read directly from a file,\n", + "for details, see catalogs.ipynb." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Matching\n", + "Import the `BoxMatch` and create a object for matching" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import BoxMatch\n", + "\n", + "mt = BoxMatch()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prepare the configuration. The main values are:\n", + "\n", + "- `type`: Type of matching to be considered. Can be a simple match of ClCatalog1->ClCatalog2 (`cat1`), ClCatalog2->ClCatalog1 (`cat2`) or cross matching.\n", + "- `metric`: Metric to be used for matching. Can be: `GIoU` (generalized Intersection over Union); `IoA*` (Intersection over Area, with area choice in [`min`, `max`, `self`, `other`]);\n", + "- `metric_cut`: Minimum value of metric for match.\n", + "- `rel_area`: Minimum relative size of area for match.\n", + "- `preference`: In cases where there are multiple matched, how the best candidate will be chosen. Options are: `'more_massive'`, `'angular_proximity'`, `'redshift_proximity'`, `'shared_member_fraction'`, `'GIoU'` (generalized Intersection over Union), `'IoA*'` (Intersection over Area, with area choice in `min`, `max`, `self`, `other`).\n", + "\n", + "- `verbose`: Print result for individual matches (default=`True`).\n", + "\n", + "We also need to provide some specific configuration for each catalog with:\n", + "\n", + "- `delta_z`: Defines redshift window for matching. The possible values are:\n", + " - `'cat'`: uses redshift properties of the catalog\n", + " - `'spline.filename'`: interpolates data in `'filename'` assuming (z, zmin, zmax) format\n", + " - `float`: uses `delta_z*(1+z)`\n", + " - `None`: does not use z" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "match_config = {\n", + " \"type\": \"cross\", # options are cross, cat1, cat2\n", + " \"preference\": \"GIoU\",\n", + " \"catalog1\": {\"delta_z\": None},\n", + " \"catalog2\": {\"delta_z\": None},\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Once the configuration is prepared, the whole process can be done with one call:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mt.match_from_config(c1, c2, match_config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the matching columns in the catalogs:\n", + "- `mt_multi_self`: Multiple matches found\n", + "- `mt_multi_other`: Multiple matches found by the other catalog\n", + "- `mt_self`: Best candidate found\n", + "- `mt_other`: Best candidate found by the other catalog\n", + "- `mt_cross`: Best candidate found in both directions\n", + "\n", + "If `preference` in (`GIoU`, `IoA*`), it also add the value of `mt_self_preference` and `mt_other_preference`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The steps of matching are stored in the catalogs and can be checked:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.show_mt_hist()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save and Load\n", + "The results of the matching can easily be saved and load using `ClEvaR` tools:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.save_matches(c1, c2, out_dir=\"temp\", overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.load_matches(c1, c2, out_dir=\"temp\")\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Matched Pairs\n", + "\n", + "There is functionality inbuilt in `clevar` to plot some results of the matching, such as:\n", + "- Recovery rates\n", + "- Distances (anguar and redshift) of cluster centers\n", + "- Scaling relations (mass, redshift, ...)\n", + "for those cases, check the match_metrics.ipynb and match_metrics_advanced.ipynb notebooks.\n", + "\n", + "If those do not provide your needs, you can get directly the matched pairs of clusters: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import get_matched_pairs\n", + "\n", + "mt1, mt2 = get_matched_pairs(c1, c2, \"cross\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These will be catalogs with the corresponding matched pairs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pylab as plt\n", + "\n", + "plt.scatter(mt1[\"mass\"], mt2[\"mass\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outputing matched catalogs\n", + "\n", + "To save the current catalogs, you can use the `write` inbuilt function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.write(\"c1_temp.fits\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will allow you to save the catalog with its current labels and matching information.\n", + "\n", + "### Outputing matching information to original catalogs\n", + "\n", + "If your input data came from initial files,\n", + "`clevar` also provides functions create output files \n", + "that combine all the information on them with the matching results.\n", + "\n", + "To add the matching information to an input catalog, use:\n", + "\n", + "```\n", + "from clevar.match import output_catalog_with_matching\n", + "output_catalog_with_matching('input_catalog.fits', 'output_catalog.fits', c1)\n", + "```\n", + "\n", + "- note: `input_catalog.fits` must have the same number of rows that `c1`.\n", + "\n", + "\n", + "To create a matched catalog containig all columns of both input catalogs, use:\n", + "\n", + "```\n", + "from clevar.match import output_matched_catalog\n", + "output_matched_catalog('input_catalog1.fits', 'input_catalog2.fits',\n", + " 'output_catalog.fits', c1, c2, matching_type='cross')\n", + "```\n", + "\n", + "where `matching_type` must be `cross`, `cat1` or `cat2`.\n", + "\n", + "- note: `input_catalog1.fits` must have the same number of rows that `c1` (and the same for `c2`)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/examples/box_matching_detailed.ipynb b/examples/box_matching_detailed.ipynb new file mode 100644 index 0000000..b030993 --- /dev/null +++ b/examples/box_matching_detailed.ipynb @@ -0,0 +1,459 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Matching catalogs based on square boxes (detailed)\n", + "Here we show the specific steps of matching two catalogs based on square boxes on clusters" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_ext autoreload\n", + "%autoreload 2" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ClCatalogs\n", + "Given some input data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from astropy.table import Table\n", + "\n", + "input1 = Table(\n", + " {\n", + " \"ID\": [f\"CL{i}\" for i in range(5)],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25, 20],\n", + " \"DEC\": [0.0, 0.0, 0.0, 0.0, 0.0],\n", + " \"Z\": [0.2, 0.3, 0.25, 0.4, 0.35],\n", + " \"MASS\": [10**13.5, 10**13.4, 10**13.3, 10**13.8, 10**14],\n", + " }\n", + ")\n", + "input2 = Table(\n", + " {\n", + " \"ID\": [\"CL0\", \"CL1\", \"CL2\", \"CL3\"],\n", + " \"RA\": [0.0, 0.0001, 0.00011, 25],\n", + " \"DEC\": [0.0, 0, 0, 0],\n", + " \"Z\": [0.3, 0.2, 0.25, 0.4],\n", + " \"MASS\": [10**13.3, 10**13.4, 10**13.5, 10**13.8],\n", + " }\n", + ")\n", + "for col in (\"RA\", \"DEC\"):\n", + " input1[f\"{col}_MAX\"] = input1[col] + 0.01\n", + " input1[f\"{col}_MIN\"] = input1[col] - 0.01\n", + " input2[f\"{col}_MAX\"] = input2[col] + 0.01\n", + " input2[f\"{col}_MIN\"] = input2[col] - 0.01\n", + "\n", + "display(input1)\n", + "display(input2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Create two `ClCatalog` objects, they have the same properties of `astropy` tables with additional functionality. You can tag the main properties of the catalog, or have columns with those names (see `catalogs.ipynb` for detailts). For the box matching, the main tags/columns to be included are:\n", + "- `id` - if not included, one will be assigned\n", + "- `ra_min`, `ra_max` (in degrees) - necessary\n", + "- `dec_min`, `dec_max` (in degrees) - necessary\n", + "- `z` - necessary if used as matching criteria" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.catalog import ClCatalog\n", + "\n", + "tags = {\"id\": \"ID\", \"ra\": \"RA\", \"dec\": \"DEC\", \"z\": \"Z\", \"mass\": \"MASS\"}\n", + "c1 = ClCatalog(\"Cat1\", data=input1, tags=tags)\n", + "c2 = ClCatalog(\"Cat2\", data=input2, tags=tags)\n", + "# Format for nice display\n", + "for c in (\"ra\", \"dec\", \"z\"):\n", + " c1[c].info.format = \".2f\"\n", + " c2[c].info.format = \".2f\"\n", + "for c in (\"mass\",):\n", + " c1[c].info.format = \".2e\"\n", + " c2[c].info.format = \".2e\"\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The `ClCatalog` object can also be read directly from a file,\n", + "for details, see catalogs.ipynb." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Matching\n", + "Import the `BoxMatch` and create a object for matching" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import BoxMatch\n", + "\n", + "mt = BoxMatch()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Prepare the catalogs\n", + "The first step is to prepare each catalog with the matching configuration:\n", + "\n", + "- `delta_z`: Defines redshift window for matching. The possible values are:\n", + " - `'cat'`: uses redshift properties of the catalog\n", + " - `'spline.filename'`: interpolates data in `'filename'` assuming (z, zmin, zmax) format\n", + " - `float`: uses `delta_z*(1+z)`\n", + " - `None`: does not use z\n", + "\n", + "In this case, because one of the configuraion radius has physical units, we also need a cosmology (`cosmo`) object to convert it to angular size (this is done internally)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.cosmology import AstroPyCosmology\n", + "\n", + "mt_config1 = {\n", + " \"delta_z\": None,\n", + "}\n", + "mt_config2 = {\n", + " \"delta_z\": None,\n", + "}\n", + "mt.prep_cat_for_match(c1, **mt_config1)\n", + "mt.prep_cat_for_match(c2, **mt_config2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will add values to the `mt_input` attribute of the catalogs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "display(c1.mt_input)\n", + "display(c2.mt_input)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Multiple matching\n", + "The next step is to match the catalogs and store all candidates that pass the matching criteria. You can also pass the arguments\n", + "- `metric`: Metric to be used for matching. Can be: `GIoU` (generalized Intersection over Union); `IoA*` (Intersection over Area, with area choice in `min`, `max`, `self`, `other`);\n", + "- `metric_cut`: Minimum value of metric for match.\n", + "- `rel_area`: Minimum relative size of area for match." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mt.multiple(c1, c2)\n", + "mt.multiple(c2, c1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the `mt_multi_self` and `mt_multi_other` columns:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Unique matching\n", + "Once all candidates are stored in each catalog, we can find the best candidates. You can also pass the argument:\n", + "- `preference`: In cases where there are multiple matched, how the best candidate will be chosen. Options are: `'more_massive'`, `'angular_proximity'`, `'redshift_proximity'`, `'shared_member_fraction'`, `'GIoU'`, `'IoA*'`. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "mt.unique(c1, c2, preference=\"GIoU\")\n", + "mt.unique(c2, c1, preference=\"GIoU\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the `mt_self` and `mt_other` columns, and in case of `preference` in (`GIoU`, `IoA*`), also add the value of `mt_self_preference` and `mt_other_preference`:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Cross matching\n", + "If you want to make sure the same pair was found in both directions:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.cross_match()\n", + "c2.cross_match()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will fill the `mt_cross` column:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Save and Load\n", + "The results of the matching can easily be saved and load using `ClEvaR` tools:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.save_matches(c1, c2, out_dir=\"temp\", overwrite=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "mt.load_matches(c1, c2, out_dir=\"temp\")\n", + "display(c1)\n", + "display(c2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Getting Matched Pairs\n", + "\n", + "There is functionality inbuilt in `clevar` to plot some results of the matching, such as:\n", + "- Recovery rates\n", + "- Distances (anguar and redshift) of cluster centers\n", + "- Scaling relations (mass, redshift, ...)\n", + "for those cases, check the match_metrics.ipynb and match_metrics_advanced.ipynb notebooks.\n", + "\n", + "If those do not provide your needs, you can get directly the matched pairs of clusters: " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from clevar.match import get_matched_pairs\n", + "\n", + "mt1, mt2 = get_matched_pairs(c1, c2, \"cross\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "These will be catalogs with the corresponding matched pairs:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import pylab as plt\n", + "\n", + "plt.scatter(mt1[\"mass\"], mt2[\"mass\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Outputing matched catalogs\n", + "\n", + "To save the current catalogs, you can use the `write` inbuilt function:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "c1.write(\"c1_temp.fits\", overwrite=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This will allow you to save the catalog with its current labels and matching information.\n", + "\n", + "### Outputing matching information to original catalogs\n", + "\n", + "Assuming your input data came from initial files,\n", + "`clevar` also provides functions create output files \n", + "that combine all the information on them with the matching results.\n", + "\n", + "To add the matching information to an input catalog, use:\n", + "\n", + "```\n", + "from clevar.match import output_catalog_with_matching\n", + "output_catalog_with_matching('input_catalog.fits', 'output_catalog.fits', c1)\n", + "```\n", + "\n", + "- note: `input_catalog.fits` must have the same number of rows that `c1`.\n", + "\n", + "\n", + "To create a matched catalog containig all columns of both input catalogs, use:\n", + "\n", + "```\n", + "from clevar.match import output_matched_catalog\n", + "output_matched_catalog('input_catalog1.fits', 'input_catalog2.fits',\n", + " 'output_catalog.fits', c1, c2, matching_type='cross')\n", + "```\n", + "\n", + "where `matching_type` must be `cross`, `cat1` or `cat2`.\n", + "\n", + "- note: `input_catalog1.fits` must have the same number of rows that `c1` (and the same for `c2`)." + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + }, + "toc": { + "base_numbering": 1, + "nav_menu": {}, + "number_sections": true, + "sideBar": true, + "skip_h1_title": true, + "title_cell": "Table of Contents", + "title_sidebar": "Contents", + "toc_cell": true, + "toc_position": {}, + "toc_section_display": true, + "toc_window_display": false + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/requirements.txt b/requirements.txt index 660aba7..ecf32d4 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ astropy>=4, !=5.0 -matplotlib +matplotlib !=3.9.0 numpy>=1.17 scipy>=1.3 healpy>=1.16 diff --git a/tests/conftest.py b/tests/conftest.py index 447abba..7fd218c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -3,11 +3,15 @@ import importlib import os from clevar.cosmology import AstroPyCosmology, CCLCosmology +from clevar import optional_libs @pytest.fixture(scope="module", params=[AstroPyCosmology, CCLCosmology]) def CosmoClass(request): - return request.param + param = request.param + if optional_libs.ccl is None and param == CCLCosmology: + pytest.skip(f"Missing backend '{param}'.") + return param @pytest.fixture( diff --git a/tests/test_catalog.py b/tests/test_catalog.py index a05e513..4faa9f7 100644 --- a/tests/test_catalog.py +++ b/tests/test_catalog.py @@ -66,10 +66,10 @@ def _base_cat_test(DataType, kwa=None, **quantities): # test warnings c["ra"] = 1.0 c.tag_column("ra", "newtag") - with pytest.warns(None) as record: + with pytest.warns() as record: c.tag_column("z", "newtag") assert f"{record._list[0].message}" == "tag newtag:ra being replaced by newtag:z" - with pytest.warns(None) as record: + with pytest.warns() as record: c.tag_column("dec", "z") assert f"{record._list[0].message}" == ( "There is a column with the same name as the tag setup." @@ -100,11 +100,11 @@ def test_catalog(): assert_raises(TypeError, c_._add_values, data=1) assert_raises(KeyError, c_._add_values, data=[1], mass=[1]) # Check creation of id col - with pytest.warns(None) as record: + with pytest.warns() as record: c_no_id = Catalog(name="no id", ra=[1, 2]) assert f"{record._list[0].message}" == "id column missing, additional one is being created." assert_equal(c_no_id["id"], ["0", "1"]) - with pytest.warns(None) as record: + with pytest.warns() as record: c_no_id = Catalog(name="no id") c_no_id["ra"] = [1, 2] assert f"{record._list[0].message}" == "id column missing, additional one is being created." @@ -126,7 +126,7 @@ def test_clcatalog(): c._repr_html_() c.show_mt_hist() # test repeated ids - with pytest.warns(None) as record: + with pytest.warns() as record: c0 = ClCatalog(name="test", id=["a", "a"], z=[0, 0]) assert f"{record._list[0].message}" == "Repeated ID's in id column, adding suffix _r# to them." assert_equal(c0["id"], ["a_r1", "a_r2"]) diff --git a/tests/test_match.py b/tests/test_match.py index 376942b..49bd2f2 100644 --- a/tests/test_match.py +++ b/tests/test_match.py @@ -6,9 +6,11 @@ from clevar.catalog import ClCatalog, MemCatalog from clevar.match.parent import Match +from clevar.match.spatial import SpatialMatch from clevar.match import ( ProximityMatch, MembershipMatch, + BoxMatch, output_catalog_with_matching, output_matched_catalog, get_matched_pairs, @@ -21,6 +23,15 @@ def test_parent(): assert_raises(NotImplementedError, mt.multiple, None, None) assert_raises(NotImplementedError, mt.match_from_config, None, None, None, None) assert_raises(ValueError, mt._get_dist_mt, None, None, "unknown match") + assert_raises(NotImplementedError, mt._set_unique_matching_function, None) + + +def test_spatial(): + mt = SpatialMatch() + assert_raises(NotImplementedError, mt.prep_cat_for_match, None) + assert_raises(NotImplementedError, mt.multiple, None, None) + assert_raises(NotImplementedError, mt.match_from_config, None, None, None, None) + assert_raises(ValueError, mt._get_dist_mt, None, None, "unknown match") def _test_mt_results(cat, multi_self, self, cross, multi_other=None, other=None): @@ -494,6 +505,253 @@ def test_membership_cfg(CosmoClass): _test_mt_results(cat2, multi_self=mmt2, self=omt[:-1], cross=cmt[:-1], other=smt[:-1]) +def get_test_data_box(): + input1 = { + "id": [f"CL{i}" for i in range(5)], + "ra": np.array([0.0, 0.0001, 0.00011, 25, 20]), + "dec": np.array([0.0, 0, 0, 0, 0]), + "z": [0.2, 0.3, 0.25, 0.4, 0.35], + "mass": [10**13.5, 10**13.4, 10**13.3, 10**13.8, 10**14], + } + input1["ra_min"] = input1["ra"] - 1 / 60.0 + input1["ra_max"] = input1["ra"] + 1 / 60.0 + input1["dec_min"] = input1["dec"] - 1 / 60.0 + input1["dec_max"] = input1["dec"] + 1 / 60.0 + input2 = {k: v[:4] for k, v in input1.items()} + input2["z"][:2] = [0.3, 0.2] + input2["mass"][:3] = input2["mass"][:3][::-1] + return input1, input2 + + +def _validate_unique_matching( + mt, + cat1, + cat2, + match_preference, + mmt1, + smt1, + omt1, + mmt2, + smt2, + omt2, +): + for col in ("mt_self", "mt_other"): + cat1[col] = None + cat2[col] = None + mt.unique(cat1, cat2, match_preference) + mt.unique(cat2, cat1, match_preference) + cat1.show_mt_hist() + cat2.show_mt_hist() + cat1.cross_match() + cat2.cross_match() + _test_mt_results(cat1, multi_self=mmt1, self=smt1, cross=smt1, other=omt1) + _test_mt_results(cat2, multi_self=mmt2, self=omt2, cross=smt2, other=omt2) + + +def test_box(CosmoClass): + input1, input2 = get_test_data_box() + cat1 = ClCatalog("Cat1", **input1) + cat2 = ClCatalog("Cat2", **input2) + print(cat1.data) + print(cat2.data) + cosmo = CosmoClass() + mt = BoxMatch() + assert_raises(NotImplementedError, mt._get_metric, None) + # test mask intersection + res = np.zeros(2, dtype=bool) + for i, add in enumerate([1, -1, 1, -1]): + args = np.zeros((8, 2)) + args[i] += add + print(args) + assert_equal(mt.mask_intersection(*args), res) + # test missing data + assert_raises(AttributeError, mt.multiple, cat1, cat2) + cat1.mt_input = "xx" + assert_raises(AttributeError, mt.multiple, cat1, cat2) + cat1.mt_input = None + # init match + mt_config1 = {"delta_z": 0.2} + mt_config2 = {"delta_z": 0.2} + mt.prep_cat_for_match(cat1, **mt_config1) + mt.prep_cat_for_match(cat2, **mt_config2) + # Check multiple match + assert_raises(ValueError, mt.multiple, cat1, cat2, metric="unknown") + mmt = [ + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL3"], + [], + ] + for metric in ("GIoU", "IoAmin", "IoAmax", "IoAself", "IoAother"): + mt.multiple(cat1, cat2, metric=metric) + mt.multiple(cat2, cat1, metric=metric) + # Check unique with different preferences + smt = ["CL0", "CL1", "CL2", "CL3", None] + for pref in ("angular_proximity", "GIoU", "IoAmin", "IoAmax", "IoAself", "IoAother"): + print(pref) + _validate_unique_matching( + mt, + cat1, + cat2, + pref, + mmt, + smt, + smt, + mmt[:-1], + smt[:-1], + smt[:-1], + ) + # Check unique with mass preference + smt = ["CL2", "CL1", "CL0", "CL3", None] + _validate_unique_matching( + mt, + cat1, + cat2, + "more_massive", + mmt, + smt, + smt, + mmt[:-1], + smt[:-1], + smt[:-1], + ) + # Check unique with z preference + cat2["mt_other"][0] = "CL3" # to force a replacement + smt = ["CL1", "CL0", "CL2", "CL3", None] + omt = ["CL1", "CL0", "CL2", "CL3", None] + _validate_unique_matching( + mt, + cat1, + cat2, + "redshift_proximity", + mmt, + smt, + omt, + mmt[:-1], + omt[:-1], + smt[:-1], + ) + # Error for unkown preference + assert_raises(ValueError, mt.unique, cat1, cat2, "unknown") + # Check save and load matching + mt.save_matches(cat1, cat2, out_dir="temp", overwrite=True) + cat1_v2 = ClCatalog("Cat1", **input1) + cat2_v2 = ClCatalog("Cat2", **input2) + mt.load_matches(cat1_v2, cat2_v2, out_dir="temp") + for col in ("mt_self", "mt_other", "mt_multi_self", "mt_multi_other"): + assert_equal(cat1[col], cat1_v2[col]) + assert_equal(cat2[col], cat2_v2[col]) + os.system("rm -rf temp") + # Other config of prep for matching + # No redshift use + mt.prep_cat_for_match(cat1, delta_z=None) + assert all(cat1.mt_input["zmin"] < cat1["z"].min()) + assert all(cat1.mt_input["zmax"] > cat1["z"].max()) + # missing all zmin/zmax info in catalog + # zmin/zmax in catalog + cat1["zmin"] = cat1["z"] - 0.2 + cat1["zmax"] = cat1["z"] + 0.2 + cat1["z_err"] = 0.1 + mt.prep_cat_for_match(cat1, delta_z="cat") + assert_allclose(cat1.mt_input["zmin"], cat1["zmin"]) + assert_allclose(cat1.mt_input["zmax"], cat1["zmax"]) + # z_err in catalog + del cat1["zmin"], cat1["zmax"] + mt.prep_cat_for_match(cat1, delta_z="cat") + assert_allclose(cat1.mt_input["zmin"], cat1["z"] - cat1["z_err"]) + assert_allclose(cat1.mt_input["zmax"], cat1["z"] + cat1["z_err"]) + # zmin/zmax from aux file + zv = np.linspace(0, 5, 10) + np.savetxt("zvals.dat", [zv, zv - 0.22, zv + 0.33]) + mt.prep_cat_for_match(cat1, delta_z="zvals.dat") + assert_allclose(cat1.mt_input["zmin"], cat1["z"] - 0.22) + assert_allclose(cat1.mt_input["zmax"], cat1["z"] + 0.33) + os.system("rm -rf zvals.dat") + + +def test_box_cfg(CosmoClass): + input1, input2 = get_test_data_box() + cat1 = ClCatalog("Cat1", **input1) + cat2 = ClCatalog("Cat2", **input2) + print(cat1.data) + print(cat2.data) + # init match + mt = BoxMatch() + # test wrong matching config + assert_raises(ValueError, mt.match_from_config, cat1, cat2, {"type": "unknown"}) + ### test 0 ### + mt_config = { + "type": "cross", + "preference": "GIoU", + "catalog1": { + "delta_z": 0.2, + }, + "catalog2": { + "delta_z": 0.2, + }, + } + # Check multiple match + mmt = [ + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL0", "CL1", "CL2"], + ["CL3"], + [], + ] + smt = ["CL0", "CL1", "CL2", "CL3", None] + ### test 0 ### + mt.match_from_config(cat1, cat2, mt_config) + # Check match + _test_mt_results(cat1, multi_self=mmt, self=smt, cross=smt) + _test_mt_results(cat2, multi_self=mmt[:-1], self=smt[:-1], cross=smt[:-1]) + ### test 1 ### + cat1._init_match_vals(overwrite=True) + cat2._init_match_vals(overwrite=True) + mt.match_from_config(cat1, cat2, mt_config) + _test_mt_results(cat1, multi_self=mmt, self=smt, cross=smt) + _test_mt_results(cat2, multi_self=mmt[:-1], self=smt[:-1], cross=smt[:-1]) + + +def test_box_detailed_print(CosmoClass): + cosmo = CosmoClass() + mt = BoxMatch() + assert_raises(ValueError, mt._detailed_print, None, {"detailed_print_only": True}) + # prep data + input1, input2 = get_test_data_box() + cat1 = ClCatalog("Cat1", **input1)[:1] + cat2 = ClCatalog("Cat2", **input2) + mt.prep_cat_for_match(cat1, delta_z=None) + mt.prep_cat_for_match(cat2, delta_z=None) + # success match + mt.multiple( + cat1, + cat2, + metric_cut=0.5, + detailed_print_only=True, + ) + # fail match on redshift + cat2["z"] += 1 + mt.prep_cat_for_match(cat1, delta_z=0.2) + mt.prep_cat_for_match(cat2, delta_z=0.2) + mt.multiple( + cat1, + cat2, + metric_cut=0.5, + detailed_print_only=True, + ) + # fail match on intersection + cat1["ra_min"] += 100 + mt.prep_cat_for_match(cat1, delta_z=None) + mt.prep_cat_for_match(cat2, delta_z=None) + mt.multiple( + cat1, + cat2, + metric_cut=0.5, + detailed_print_only=True, + ) + + def test_output_catalog_with_matching(): # input data cat1, cat2 = get_test_data_mem() diff --git a/tests/test_yaml.py b/tests/test_yaml.py index 7940431..25784e8 100644 --- a/tests/test_yaml.py +++ b/tests/test_yaml.py @@ -2,12 +2,14 @@ """ Tests for match.py """ import os import numpy as np +import pytest import yaml from numpy.testing import assert_raises, assert_allclose, assert_equal from unittest import mock from clevar import clv_yaml as clevar_yaml from clevar.clv_yaml import match_metrics_parent as metric_parent +from clevar import optional_libs def test_yaml_parent(): @@ -43,10 +45,6 @@ def test_yaml_helper_functions(): hf.loadconf("cfg.yml", ["test"], fail_action="orverwrite") os.system("rm cfg.yml") os.system("rm -r temp") - # cosmology - hf.make_cosmology({"backend": "astropy"}) - hf.make_cosmology({"backend": "ccl"}) - assert_raises(ValueError, hf.make_cosmology, {"backend": "unknown"}) # make_bins hf.make_bins(3, log=False) hf.make_bins("0 1 3", log=False) @@ -58,6 +56,13 @@ def test_yaml_helper_functions(): hf.get_input_loop(options_msg="test", actions={"pass": (lambda x: "ok", [0], {})}), "ok" ) mock.builtins.input = original_input + # cosmology + hf.make_cosmology({"backend": "astropy"}) + if optional_libs.ccl is None: + pytest.skip(f"Missing backend CCL.") + else: + hf.make_cosmology({"backend": "ccl"}) + assert_raises(ValueError, hf.make_cosmology, {"backend": "unknown"}) def create_base_matched_files(config_file, matching_mode): @@ -123,13 +128,13 @@ def test_yaml_funcs_prox(): # Match, used diff cosmology and overwrite # 1 print("############ Fail cfg check #########") - config["proximity_match"]["step1"]["cosmology"] = {"backend": "CCL"} + config["proximity_match"]["step1"]["cosmology"] = {"backend": "astropy"} yaml.write(config, "cfg.yml") original_input = mock.builtins.input mock.builtins.input = lambda _: "q" clevar_yaml.match("cfg.yml", overwrite_config=False, overwrite_files=False) # 2 - config["cosmology"] = {"backend": "CCL"} + config["cosmology"] = {"backend": "astropy"} yaml.write(config, "cfg.yml") mock.builtins.input = lambda _: "o" clevar_yaml.match("cfg.yml", overwrite_config=False, overwrite_files=False)