# Searching for High PM Stars in Other Catalogs

In this notebook, we seek to explore the appearance of high PM stars in various catalogs, obtained from https://data.lsdb.io/. This will be useful in gauging wether our current method will be capable of identifying these high PM stars, and wether other techniques are viable (i.e. KBMOD). We will search for these stars by first utilizing PM data from Gaia DR3, and crossmatching Gaia stars with high proper motions with those from the other catalogs 

In [1]:
from pathlib import Path
import s3fs

import numpy as np
import pandas as pd
from astropy.io import ascii
from astropy.coordinates import SkyCoord
import matplotlib.pyplot as plt

from dask.distributed import Client
import dask.array
from dask.dataframe.utils import make_meta

from hats import read_hats
from hats.inspection import plot_pixels
from hats_import.catalog.file_readers import CsvReader
from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments
from hats_import.pipeline import ImportArguments, pipeline_with_client

import lsdb

from catalog_filtering import bandFilterLenient, contains_PM

print("Imported libraries.")

Imported libraries.


In [2]:
pm_speed_min = 1000 #units are milliarcseconds per year
pm_speed_max = 10**5
CATALOG_DIR = Path("../../catalogs")
GAIA_HATS_DIR = Path("../../../shared/hats/catalogs/gaia_dr3/gaia")
GAIA_100_NAME = "gaia_dr3_pm_greater_100"
GAIA_100_DIR = CATALOG_DIR / GAIA_100_NAME
GAIA_1000_NAME = 'gaia_dr3_pm_greater_1000'
GAIA_1000_DIR = CATALOG_DIR / GAIA_1000_NAME

In [6]:
%%time
with Client(n_workers=4):
    lsdb.read_hats(
        path=GAIA_HATS_DIR,
        columns=['ra', 'pmra', 'dec', 
                 'pmdec', 'source_id', 'parallax_over_error', 
                 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag'],
    ).query('pmra*pmra + pmdec*pmdec > 100*100').to_hats(catalog_name = GAIA_100_NAME, 
                                                        base_catalog_path = GAIA_100_DIR)

print("gaia_pm successfully saved to /catalog")



gaia_pm successfully saved to /catalog
CPU times: user 1min 20s, sys: 4.34 s, total: 1min 24s
Wall time: 2min 57s


In [13]:
gaia_pm_greater_100 = lsdb.read_hats(GAIA_100_DIR)
gaia_pm_greater_100

Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag
npartitions=3917,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Order: 2, Pixel: 0",int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 3, Pixel: 4",...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 3067",...,...,...,...,...,...,...,...
"Order: 3, Pixel: 767",...,...,...,...,...,...,...,...


In [35]:
%%time
with Client():
    pm_expr = f'{pm_speed_max**2} > (pmra**2 + pmdec**2) > {pm_speed_min**2}'
    gaia_pm_greater_100.query(pm_expr).to_hats(
        catalog_name=GAIA_1000_NAME, 
        base_catalog_path=GAIA_1000_DIR
    )



CPU times: user 4min 46s, sys: 5.81 s, total: 4min 52s
Wall time: 7min 50s


In [3]:
gaia_pm_greater_1000 = lsdb.read_hats(GAIA_1000_DIR).compute()
print(len(gaia_pm_greater_1000))
gaia_pm_greater_1000

633


Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
315264182406594,630505494719360,46.613135,1.961064,389.25305,-925.02626,8.457596,9.291474,7.563784
613857838730173,1227712107314688,43.195868,1.928423,1400.291765,-515.645438,13.919317,14.903977,12.939081
...,...,...,...,...,...,...,...,...
3445176671119583786,6890353330746858368,314.193986,-10.453538,-37.079544,-1124.455038,10.418879,11.718892,9.285866
3445567357812853734,6891134705556330880,322.516126,-12.511222,1019.79614,-259.597456,8.540231,9.297098,7.690686


## Obtaining relevant Catalogs

In [5]:
ztf_14_obj = lsdb.read_hats('../../../shared/hats/catalogs/ztf_dr14/ztf_object')
ztf_16 = lsdb.read_hats('../../../shared/hats/catalogs/ztf_dr16/zubercal')
ztf_22 = lsdb.read_hats('https://data.lsdb.io/hats/ztf_dr22/ztf_lc', margin_cache='https://data.lsdb.io/hats/ztf_dr22/ztf_lc_10arcs')
panstarrs_1_obj = lsdb.read_hats('s3://stpubdata/panstarrs/ps1/public/hats/otmo', margin_cache='s3://stpubdata/panstarrs/ps1/public/hats/otmo_10arcs')
des_y6 = lsdb.read_hats('../../../shared/hats/catalogs/des/des_y6_gold')
delve_2 = lsdb.read_hats('https://data.lsdb.io/hats/delve_dr2')
tic_v82 = lsdb.read_hats('https://data.lsdb.io/hats/tic/tic', margin_cache='https://data.lsdb.io/hats/tic/tic_10arcs')
allwise = lsdb.read_hats('https://data.lsdb.io/hats/wise/allwise')
neowise = lsdb.read_hats('https://data.lsdb.io/hats/wise/neowise')
twomass = lsdb.read_hats('https://data.lsdb.io/hats/two_mass')
erass1 = lsdb.read_hats('https://data.lsdb.io/hats/erosita')
splus_dual = lsdb.read_hats('https://splus.cloud/HIPS/catalogs/dr4/dual', margin_cache='https://splus.cloud/HIPS/catalogs/dr4/dual_2arcsec')
splus_psf = lsdb.read_hats('https://splus.cloud/HIPS/catalogs/dr4/psf', margin_cache='https://splus.cloud/HIPS/catalogs/dr4/psf_2arcsec')
hsc_p3_r = lsdb.read_hats('../../../shared/hats/catalogs/hsc_pdr3/hsc_pdr3-forced-R', margin_cache='../../../shared/hats/catalogs/hsc_pdr3/hsc_pdr3-forced-R_10arcs')
skymapper_4_photometry = lsdb.read_hats('https://data.lsdb.io/hats/skymapper_dr4/photometry')
print('Read catalogs')

FileNotFoundError: Failed to read HATS at location https://splus.cloud/HIPS/catalogs/dr4/dual

In [None]:
catalogs = [ztf_14_obj,
            ztf_16,
            ztf_22,
            panstarrs_1_obj,
            des_y6,
            delve_2,
            tic_v82,
            allwise,
            neowise,
            twomass,
            erass1,
            # splus_dual,
            # splus_psf,
            hsc_p3_r,
            skymapper_4_photometry]

catalog_names = ['ztf_14_obj',
            'ztf_16',
            'ztf_22',
            'panstarrs_1_obj',
            'des_y6',
            'delve_2',
            'tic_v82',
            'allwise',
            'neowise',
            'twomass',
            'erass1',
            # 'splus_dual',
            # 'splus_psf',
            'hsc_p3_r',
            'skymapper_4_photometry']

In [80]:
'''df.apply() compatible fuction which takes the ra and dec coordinates from a gaia df and checks wether the catalogs in catalogs[] 
contain those sky coordinates.

Args:
    - row: Passed by df.apply(), row from df which is where we obtain our coordinates.
    - catalogs: List of catalogs in HATS format.
    - catalog_names: List of catalog names, should align with those in catalogs.

Returns: row with boolean columns stating wether or not the catalog in catalogs contains those coordinates
'''

def coords_in_catalogs(row, catalogs, catalog_names):
    ra = row['ra']
    dec = row['dec']

    res = dict()

    for catalog, catalog_name in zip(catalogs, catalog_names):      
        res[catalog_name] = catalog.hc_structure.moc.contains_skycoords(SkyCoord(ra=ra, dec=dec, unit='deg'))[0]

    return pd.Series(res)

print("defined func")

defined func


In [82]:
print(gaia_pm_greater_1000['source_id'].dtype)
source_ids = gaia_pm_greater_1000['source_id']
gaia_pm_greater_1000 = gaia_pm_greater_1000.drop('source_id', axis=1)
data = gaia_pm_greater_1000.apply(coords_in_catalogs, axis=1, catalogs=catalogs, catalog_names=catalog_names)
gaia_pm_greater_1000['source_id'] = source_ids
col = 'source_id'
gaia_pm_greater_1000 = gaia_pm_greater_1000[[col] + [c for c in gaia_pm_greater_1000.columns if c != col]]
gaia_pm_greater_1000 = gaia_pm_greater_1000.join(data)
gaia_pm_greater_1000

int64[pyarrow]


Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,ztf_14_obj,ztf_16,ztf_22,panstarrs_1_obj,des_y6,delve_2,tic_v82,allwise,neowise,twomass,erass1,hsc_p3_r,skymapper_4_photometry
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
315264182406594,630505494719360,46.613135,1.961064,389.25305,-925.02626,8.457596,9.291474,7.563784,True,True,True,True,False,True,True,True,True,True,False,False,True
613857838730173,1227712107314688,43.195868,1.928423,1400.291765,-515.645438,13.919317,14.903977,12.939081,True,True,True,True,True,True,True,True,True,True,False,False,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3445176671119583786,6890353330746858368,314.193986,-10.453538,-37.079544,-1124.455038,10.418879,11.718892,9.285866,True,True,True,True,False,True,True,True,True,True,False,False,True
3445567357812853734,6891134705556330880,322.516126,-12.511222,1019.79614,-259.597456,8.540231,9.297098,7.690686,True,True,True,True,False,True,True,True,True,True,False,False,True


In [83]:
gaia_pm_greater_1000.to_csv('catalog_coverage.csv')