## Collecting DES Dim HMPS

In this notebook, we get all of the known hpms in DES-DR2 from a "crossmatch" with Gaia DR3. We do this by performing small conesearches surrounding the coordinates of known, dim HPMS, and concatenating them into one df to collect statistics. We also then run the pipeline on each of these conesearches individually, concatenate the results, and plot statistics from that catalog. 

In [1]:
# Dask puts out more advisory logging that we care for.
# It takes some doing to quiet all of it, but this recipe works.
import dask
import logging
import dask_jobqueue
from dask.dataframe.utils import make_meta
from dask.distributed import Client

dask.config.set({"logging.distributed": "critical"})

# This also has to be done, for the above to be effective
logger = logging.getLogger("distributed")
logger.setLevel(logging.CRITICAL)

import warnings

# Finally, suppress the specific warning about Dask dashboard port usage
warnings.filterwarnings("ignore", message="Port 8787 is already in use.")

from pathlib import Path

import numpy as np
import pandas as pd
from astropy.io import ascii
import matplotlib.pyplot as plt
import time

from hats import read_hats

import lsdb

from catalog_filtering import bandFilterLenient, contains_PM
import hpms_pipeline as hpms

print("Imported libraries.")

Imported libraries.


In [2]:
bandList = ['G','R','I','Z','Y']
class_star = None
spread_model = 0.05
magnitude_error = 0.05
check_flags = True
mag = 19
query_string = bandFilterLenient(bandList,classStar=class_star,spreadModel=spread_model,magError=magnitude_error,flag=check_flags,mag=mag)
des_cols = (
    [f'CLASS_STAR_{band}' for band in bandList] + 
    [f'FLAGS_{band}' for band in bandList] + 
    ['RA','DEC','COADD_OBJECT_ID'] + 
    [f'SPREAD_MODEL_{band}' for band in bandList] + 
    [f'WAVG_MAG_PSF_{band}' for band in bandList] + 
    [f'WAVG_MAGERR_PSF_{band}' for band in bandList]
    [f'NEPOCHS_{band}' for band in bandList
)
k = 2
max_obj_deviation = 0.2
pm_speed_min = 1000 #units are milliarcseconds per year
pm_speed_max = 10**5
des_id_col = 'COADD_OBJECT_ID_1'
mag_cols = [f'WAVG_MAG_PSF_{band}' for band in ['I']]
cone_search_rad = 25
min_neighbors = 3
max_neighbor_dist = 18
milliarc_degree_conversion = 1/(1000*3600)
xmatch_max_neighbors = 100
print("Defined local vars.")

Defined local vars.


In [3]:
# Change to the directories where the data will be stored
CATALOG_DIR = Path("../../../catalogs")
MARGIN_CACHE_DIR = CATALOG_DIR / 'margin_caches'

GAIA_NAME = "gaia_dr3_pm_greater_100"
GAIA_DIR = CATALOG_DIR / GAIA_NAME

DES_NAME = "des_light"
DES_DIR = CATALOG_DIR / DES_NAME 

DES_MARGIN_CACHE_NAME = "des_margin_cache_18_arcsec"
DES_MARGIN_CACHE_DIR = MARGIN_CACHE_DIR / DES_MARGIN_CACHE_NAME

des_dr2 = lsdb.read_hats(DES_DIR, margin_cache=DES_MARGIN_CACHE_DIR)
gaia_dr3_pm_greater_100 = lsdb.read_hats(GAIA_DIR)

print("Defined directories.")

Defined directories.


In [4]:
with Client():
    gaia_dr3_dim_pm_greater_1000 = gaia_dr3_pm_greater_100.query('pmra**2 + pmdec**2 > 1000**2').query('phot_bp_mean_mag > 19 or phot_bp_mean_mag.isna()').compute()

gaia_dr3_dim_pm_greater_1000

2025-07-01 09:27:59,549 - tornado.application - ERROR - Uncaught exception GET /status/ws (10.8.11.31)
HTTPServerRequest(protocol='http', host='localhost:8787', method='GET', uri='/status/ws', version='HTTP/1.1', remote_ip='10.8.11.31')
Traceback (most recent call last):
  File "/ocean/projects/phy210048p/jpassos/conda-venvs/lsdb-main/lib/python3.12/site-packages/tornado/web.py", line 1848, in _execute
    result = await result
             ^^^^^^^^^^^^
  File "/ocean/projects/phy210048p/jpassos/conda-venvs/lsdb-main/lib/python3.12/site-packages/tornado/websocket.py", line 277, in get
    await self.ws_connection.accept_connection(self)
  File "/ocean/projects/phy210048p/jpassos/conda-venvs/lsdb-main/lib/python3.12/site-packages/tornado/websocket.py", line 890, in accept_connection
    await self._accept_connection(handler)
  File "/ocean/projects/phy210048p/jpassos/conda-venvs/lsdb-main/lib/python3.12/site-packages/tornado/websocket.py", line 973, in _accept_connection
    await self.

Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
33418788618313182,66837563803594880,56.696369,24.92893,520.177372,-1157.434442,18.573349,19.320286,17.775904
72355622838789655,144711230753602048,68.900732,21.250996,860.716142,-959.20791,19.120424,21.595911,17.604784
...,...,...,...,...,...,...,...,...
3206298010515918110,6412596012146801152,331.076453,-56.793812,3981.976663,-2466.831815,18.030664,21.142073,16.060108
3410775449123011603,6821550875241089152,335.301587,-19.973358,905.928232,-588.583929,17.651583,20.288462,16.224318


In [49]:
from astropy.coordinates import SkyCoord
from astropy import units as u
from astropy.time import Time
import math

def construct_des_hpms_df(df, row, cone_search_rad):
    # Get coords of dim HPMS
    ra, dec = row['ra'], row['dec']
    radius_arcsec = compute_rad(row['pmra'], row['pmdec']) 

    # Find all the DES stars within some radius of the dim HPMS
    des_hpms = des_dr2.cone_search(ra=ra, dec=dec, radius_arcsec=radius_arcsec).compute()
    if des_hpms.empty: return df

    # Concatenate all Gaia columns to this des_hpms for future reference
    m = des_hpms.shape[0]

    row_df = pd.DataFrame(row).T
    repeated = pd.DataFrame([row_df.iloc[0]] * m, columns=row_df.columns)

    # combined = des_hpms.join(repeated)
    repeated.index = des_hpms.index
    combined = pd.concat([des_hpms, repeated], axis=1)
    
    
    # Concatenate des_hpms to df to assemble final product
    df = pd.concat([df, combined], axis=0)
    return df

def compute_rad(pmra, pmdec):
    # ra and dec are arbitrary and only used to compute total distance traveled
    ra=20
    dec=24
    pm_ra_cosdec= pmra * np.cos(np.radians(dec))
    old_pos = SkyCoord(ra=ra * u.deg,dec=dec*u.deg,
                      pm_ra_cosdec=pm_ra_cosdec*u.mas/u.yr, pm_dec=pmdec*u.mas/u.yr,
                      frame='icrs', obstime=Time('J2000'))
    new_pos = old_pos.apply_space_motion(new_obstime=Time('J2005'))

    total_dist = old_pos.separation(new_pos).arcsecond

    
    return math.ceil(total_dist)
    

In [50]:
warnings.filterwarnings("ignore", category=UserWarning, module="erfa")
# Initialize an empty DataFrame to collect all results
final_df = pd.DataFrame()

# Loop through each row (e.g., of your HPMS dataframe)
for _, row in gaia_dr3_dim_pm_greater_1000.iterrows():
    final_df = construct_des_hpms_df(final_df, row, cone_search_rad)

final_df

  df = pd.concat([df, combined], axis=0)


Unnamed: 0_level_0,CLASS_STAR_G,CLASS_STAR_R,CLASS_STAR_I,CLASS_STAR_Z,CLASS_STAR_Y,FLAGS_G,FLAGS_R,FLAGS_I,FLAGS_Z,FLAGS_Y,RA,DEC,COADD_OBJECT_ID,SPREAD_MODEL_G,SPREAD_MODEL_R,SPREAD_MODEL_I,SPREAD_MODEL_Z,SPREAD_MODEL_Y,WAVG_MAG_PSF_G,WAVG_MAG_PSF_R,WAVG_MAG_PSF_I,WAVG_MAG_PSF_Z,WAVG_MAG_PSF_Y,WAVG_MAGERR_PSF_G,WAVG_MAGERR_PSF_R,WAVG_MAGERR_PSF_I,WAVG_MAGERR_PSF_Z,WAVG_MAGERR_PSF_Y,NEPOCHS_G,NEPOCHS_R,NEPOCHS_I,NEPOCHS_Z,NEPOCHS_Y,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1
2365588896711673656,0.679783,0.673899,0.68697,0.41555,0.570589,3,3,3,3,3,57.789181,-56.457732,1423270931,0.006449,0.009814,0.008855,0.005876,-0.028283,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,0,0,0,0,0,4731177819694416000,57.791234,-56.456615,272.491124,-1043.112326,19.997510,20.687986,19.061255
2365588907847830054,0.028272,0.034199,0.029082,0.028588,0.02739,3,3,3,3,3,57.791379,-56.45693,1423270647,0.00839,0.006059,0.010938,0.017797,0.01371,21.255957,19.892443,19.415752,19.215796,19.23847,0.012747,0.005008,0.004984,0.00828,0.017643,2,2,2,1,2,4731177819694416000,57.791234,-56.456615,272.491124,-1043.112326,19.997510,20.687986,19.061255
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3206298016903817602,0.768001,0.13328,0.076799,0.102573,0.260082,1,1,1,1,1,331.073253,-56.790655,949223665,0.010093,0.018254,0.007055,-0.004826,-0.005615,-99.0,23.490891,23.837128,-99.0,-99.0,-99.0,0.13595,0.236819,-99.0,-99.0,0,2,1,0,0,6412596012146801152,331.076453,-56.793812,3981.976663,-2466.831815,18.030664,21.142073,16.060108
3206298056050087471,0.568923,0.515469,0.201747,0.059395,0.380853,0,0,0,0,0,331.084618,-56.789782,949223570,-0.004386,0.02612,0.013398,0.011248,-0.022761,-99.0,24.009134,-99.0,-99.0,-99.0,-99.0,0.220395,-99.0,-99.0,-99.0,0,1,0,0,0,6412596012146801152,331.076453,-56.793812,3981.976663,-2466.831815,18.030664,21.142073,16.060108


In [51]:
final_df.to_csv("des_dim_hpms.csv")