In [2]:
from pathlib import Path

import numpy as np
from astropy.io import ascii
import matplotlib.pyplot as plt

from dask.distributed import Client
import dask.array
from dask.dataframe.utils import make_meta

from hats import read_hats
from hats.inspection import plot_pixels
from hats_import.catalog.file_readers import CsvReader
from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments
from hats_import.pipeline import ImportArguments, pipeline_with_client

import lsdb

from catalog_filtering import bandFilterLenient, contains_PM

print("Imported Libraries")

Imported Libraries


In [4]:
DES_HATS_DIR = Path("../../../shared/hats/catalogs/des/des_dr2")
GAIA_HATS_DIR = Path("../../../shared/hats/catalogs/gaia_dr3/gaia")

CATALOG_DIR = Path("../../catalogs")

GAIA_PM_NAME = "gaia_dr3_pm_greater_100"
GAIA_PM_DIR = CATALOG_DIR / GAIA_PM_NAME
GAIA_MARGIN_CACHE_NAME = "gaia_margin_cache_18_arcsec"
GAIA_MARGIN_CACHE_DIR = CATALOG_DIR / GAIA_MARGIN_CACHE_NAME

DES_LIGHT_NAME = "des_light"
DES_LIGHT_DIR = CATALOG_DIR / DES_LIGHT_NAME
DES_MARGIN_CACHE_NAME = "des_margin_cache_18_arcsec"
DES_MARGIN_CACHE_DIR = CATALOG_DIR / DES_MARGIN_CACHE_NAME
DES_FILTERED_NAME = "des_light_filtered"
DES_FILTERED_DIR = CATALOG_DIR / DES_FILTERED_NAME


XMATCH_NAME = "des_dr2_x_gaia_dr3"
XMATCH_DIR = CATALOG_DIR / XMATCH_NAME

print("Defined directories")

Defined directories


In [5]:
bandList = ['G','R','I','Z','Y']
class_star = None
spread_model = 0.05
magnitude_error = 0.05
check_flags = True
check_invalid_mags = True
query_string = bandFilterLenient(bandList,classStar=class_star,spreadModel=spread_model,magError=magnitude_error,flag=check_flags,invalidMags=check_invalid_mags)
des_cols = (
    [f'CLASS_STAR_{band}' for band in bandList] + 
    [f'FLAGS_{band}' for band in bandList] + 
    ['RA','DEC','COADD_OBJECT_ID'] + 
    [f'SPREAD_MODEL_{band}' for band in bandList] + 
    [f'WAVG_MAG_PSF_{band}' for band in bandList] + 
    [f'WAVG_MAGERR_PSF_{band}' for band in bandList]
)
pm_speed_min = 2000 #units are milliseconds per year
pm_speed_max = 10**5
max_neighbor_dist = 18
print("Defined filter variables")

Defined filter variables


# Obtaining old Gaia Catalog

In [None]:
%%time
with Client(n_workers=4):
    lsdb.read_hats(
        path=GAIA_HATS_DIR,
        columns=['ra', 'pmra', 'dec', 'pmdec', 'source_id', 'parallax_over_error'],
    ).query('pmra*pmra + pmdec*pmdec > 100*100').to_hats(catalog_name = GAIA_PM_NAME, 
                                                        base_catalog_path = GAIA_PM_DIR)

print("gaia_pm successfully saved to /catalog")

## Obtaining Gaia Margin Cache

This cache ensures that we do not miss out on potential crossmatches of DES and Gaia at the border of pixels in the Gaia catalog. The margin threshold is defined by the maximum distance at which we seek object alignment for our algorithm: max_neighbor_dist.

In [None]:
client = Client()

In [None]:
margin_cache_args = MarginCacheArguments(
    input_catalog_path=GAIA_PM_DIR,
    output_path=CATALOG_DIR,
    margin_threshold=max_neighbor_dist,  # arcsec
    output_artifact_name=GAIA_MARGIN_CACHE_NAME,
)

pipeline_with_client(margin_cache_args, client)

In [None]:
client.close()

In [11]:
gaia_dr3_pm_greater_100 = lsdb.read_hats(GAIA_PM_DIR, margin_cache=GAIA_MARGIN_CACHE_DIR)
gaia_dr3_pm_greater_100

Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec
npartitions=3917,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
"Order: 2, Pixel: 0",int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 3, Pixel: 4",...,...,...,...,...
...,...,...,...,...,...
"Order: 4, Pixel: 3067",...,...,...,...,...
"Order: 3, Pixel: 767",...,...,...,...,...


In [12]:
print(len(gaia_dr3_pm_greater_100))

506589


# Obtaining Old DES Catalog

In [12]:
%%time

with Client(n_workers=4) as client:
    lsdb.read_hats(
        DES_HATS_DIR,
        columns=des_cols,
    ).to_hats(catalog_name = DES_LIGHT_NAME, 
                base_catalog_path = DES_LIGHT_DIR)

CPU times: user 42.1 s, sys: 5.17 s, total: 47.2 s
Wall time: 5min 27s


## Obtaining DES Margin Cache

In [5]:
client = Client()

In [None]:
margin_cache_args = MarginCacheArguments(
    input_catalog_path=DES_LIGHT_DIR,
    output_path=CATALOG_DIR,
    margin_threshold=max_neighbor_dist,  # arcsec
    output_artifact_name=DES_MARGIN_CACHE_NAME,
)

pipeline_with_client(margin_cache_args, client)

In [6]:
des_dr2_light = lsdb.read_hats(DES_LIGHT_DIR, margin_cache=DES_MARGIN_CACHE_DIR)
print(len(des_dr2_light))

691483608


## Saving filtered DES to compare size with unfiltered DES

In [7]:
%%time
with Client(n_workers=4):
    lsdb.read_hats(
        DES_LIGHT_DIR,
    ).query(query_string
    ).to_hats(catalog_name = DES_FILTERED_NAME, 
                base_catalog_path = DES_FILTERED_DIR)

CPU times: user 41.1 s, sys: 3.86 s, total: 44.9 s
Wall time: 4min 32s


In [10]:
des_filtered = lsdb.read_hats(DES_FILTERED_DIR)
print(len(des_filtered))

169891863


In [13]:
xmatch = des_dr2_light.crossmatch(gaia_dr3_pm_greater_100,
    suffixes=("_des","_gaia"),
    n_neighbors=100, radius_arcsec=18
)
xmatch

Unnamed: 0_level_0,CLASS_STAR_G_des,CLASS_STAR_R_des,CLASS_STAR_I_des,CLASS_STAR_Z_des,CLASS_STAR_Y_des,FLAGS_G_des,FLAGS_R_des,FLAGS_I_des,FLAGS_Z_des,FLAGS_Y_des,RA_des,DEC_des,COADD_OBJECT_ID_des,SPREAD_MODEL_G_des,SPREAD_MODEL_R_des,SPREAD_MODEL_I_des,SPREAD_MODEL_Z_des,SPREAD_MODEL_Y_des,WAVG_MAG_PSF_G_des,WAVG_MAG_PSF_R_des,WAVG_MAG_PSF_I_des,WAVG_MAG_PSF_Z_des,WAVG_MAG_PSF_Y_des,WAVG_MAGERR_PSF_G_des,WAVG_MAGERR_PSF_R_des,WAVG_MAGERR_PSF_I_des,WAVG_MAGERR_PSF_Z_des,WAVG_MAGERR_PSF_Y_des,source_id_gaia,ra_gaia,dec_gaia,pmra_gaia,pmdec_gaia,_dist_arcsec
npartitions=1587,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
"Order: 4, Pixel: 0",double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],int16[pyarrow],int16[pyarrow],int16[pyarrow],int16[pyarrow],int16[pyarrow],double[pyarrow],double[pyarrow],int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 5, Pixel: 8",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 752",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 767",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [6]:
with Client(n_workers=3):
    xmatch.to_hats(catalog_name = XMATCH_NAME,
        base_catalog_path = XMATCH_DIR
    )
print("Saved crossmatch to catalogs folder.")



Saved crossmatch to catalogs folder.


In [16]:
des_gaia_xmatch = lsdb.read_hats(XMATCH_DIR, columns='all')
des_gaia_xmatch

Unnamed: 0_level_0,CLASS_STAR_G_des,CLASS_STAR_R_des,CLASS_STAR_I_des,CLASS_STAR_Z_des,CLASS_STAR_Y_des,FLAGS_G_des,FLAGS_R_des,FLAGS_I_des,FLAGS_Z_des,FLAGS_Y_des,RA_des,DEC_des,COADD_OBJECT_ID_des,SPREAD_MODEL_G_des,SPREAD_MODEL_R_des,SPREAD_MODEL_I_des,SPREAD_MODEL_Z_des,SPREAD_MODEL_Y_des,WAVG_MAG_PSF_G_des,WAVG_MAG_PSF_R_des,WAVG_MAG_PSF_I_des,WAVG_MAG_PSF_Z_des,WAVG_MAG_PSF_Y_des,WAVG_MAGERR_PSF_G_des,WAVG_MAGERR_PSF_R_des,WAVG_MAGERR_PSF_I_des,WAVG_MAGERR_PSF_Z_des,WAVG_MAGERR_PSF_Y_des,source_id_gaia,ra_gaia,dec_gaia,pmra_gaia,pmdec_gaia,_dist_arcsec
npartitions=1578,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1
"Order: 4, Pixel: 0",double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],int16[pyarrow],int16[pyarrow],int16[pyarrow],int16[pyarrow],int16[pyarrow],double[pyarrow],double[pyarrow],int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 5, Pixel: 8",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 743",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
"Order: 3, Pixel: 752",...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...


In [17]:
print(len(des_gaia_xmatch))

566779


In [None]:
pm_filter_xmatch = des_gaia_xmatch.query(f'{pm_speed_max**2} >(pmra_gaia**2 + pmdec_gaia**2) > {pm_speed_min**2}')
with Client(n_workers=4):
    df = pm_filter_xmatch.compute()

df

In [8]:
df_no_dupes = df[~df['source_id_gaia'].duplicated(keep='first')]

gaia_ids = df_no_dupes['source_id_gaia']

#dropping because otherwise produces error when performing .apply below
df_no_dupes = df_no_dupes.drop('source_id_gaia', axis=1)

df_no_dupes

Unnamed: 0_level_0,CLASS_STAR_G_des,CLASS_STAR_R_des,CLASS_STAR_I_des,CLASS_STAR_Z_des,CLASS_STAR_Y_des,FLAGS_G_des,FLAGS_R_des,FLAGS_I_des,FLAGS_Z_des,FLAGS_Y_des,RA_des,DEC_des,COADD_OBJECT_ID_des,SPREAD_MODEL_G_des,SPREAD_MODEL_R_des,SPREAD_MODEL_I_des,SPREAD_MODEL_Z_des,SPREAD_MODEL_Y_des,WAVG_MAG_PSF_G_des,WAVG_MAG_PSF_R_des,WAVG_MAG_PSF_I_des,WAVG_MAG_PSF_Z_des,WAVG_MAG_PSF_Y_des,WAVG_MAGERR_PSF_G_des,WAVG_MAGERR_PSF_R_des,WAVG_MAGERR_PSF_I_des,WAVG_MAGERR_PSF_Z_des,WAVG_MAGERR_PSF_Y_des,ra_gaia,dec_gaia,pmra_gaia,pmdec_gaia,_dist_arcsec
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1153482605725265461,0.844888,0.845371,0.844807,0.845333,0.844827,3,3,3,3,3,1.386363,-37.369781,1043295027,-0.015229,0.040008,0.037639,0.034238,0.050002,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,1.383284,-37.367744,5633.438088,-2334.721273,11.461677
1257518643390044839,0.844564,0.845075,0.845251,0.843732,0.801461,2,2,2,2,2,33.079341,3.567016,1250008459,0.028251,0.024656,0.022653,0.022333,0.032334,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,33.079599,3.567385,-1762.405718,-1852.8711,1.62095
2368327529620177120,0.3251,0.177878,0.293946,0.140421,0.001102,0,0,0,0,0,53.568008,-49.893871,1398391104,-0.006986,0.000464,0.004162,0.024926,0.02906,24.553566,23.715429,23.094166,22.574404,-99.0,0.236298,0.062562,0.070822,0.11552,-99.0,53.567196,-49.890084,2360.592206,483.127504,13.763072
2390050329526096144,0.125803,0.120065,0.028601,0.028627,0.028627,3,3,3,3,3,62.610625,-53.614224,1464092812,0.010486,0.038736,0.037149,0.040352,0.041889,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,62.611,-53.612997,-825.17937,-2415.577565,4.490123
2405297220004419732,0.194207,0.84529,0.84489,0.845284,0.029913,3,3,3,3,3,77.964238,-45.04643,1540617639,0.008839,0.006246,0.002662,0.001698,0.005954,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,77.959937,-45.043813,6491.223339,-5708.61415,14.437522
2423978658324676571,0.845425,0.845425,0.845406,0.845371,0.84513,22,22,18,18,22,50.000054,-43.066641,1445898840,-0.000132,-9e-06,1.1e-05,9e-06,2e-06,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,50.000344,-43.066553,3035.017316,726.964482,0.825611
2450054482295873745,0.844839,0.845014,0.845117,0.845267,0.844982,2,2,2,2,6,5.037242,-64.869108,1054999950,0.000474,0.000276,-0.000397,0.000404,1.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,5.03561,-64.869617,1706.746855,1164.959443,3.096527
2468500457058623057,0.84536,0.029197,0.845341,0.842975,0.047746,2,3,2,2,2,32.623122,-50.820946,1243465193,0.00557,0.00185,0.001703,0.001036,0.004741,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,32.622946,-50.820906,2125.416147,637.975043,0.425698
2468500457058623057,0.84536,0.029197,0.845341,0.842975,0.047746,2,3,2,2,2,32.623122,-50.820946,1243465193,0.00557,0.00185,0.001703,0.001036,0.004741,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,32.624069,-50.820823,2168.886011,710.847727,2.199757
2503116016090087218,0.141634,0.108526,0.007691,0.009382,0.014642,3,3,3,3,3,11.340723,-33.50167,1094555899,0.014438,0.016889,0.014475,0.018477,0.016475,-99.0,23.862965,23.351395,22.496925,-99.0,-99.0,0.096928,0.085113,0.068819,-99.0,11.341389,-33.497993,1826.373986,-1485.010343,13.386494
