# Searching for High PM Stars in Other Catalogs

In this notebook, we seek to explore the appearance of high PM stars in various catalogs, obtained from https://data.lsdb.io/. This will be useful in gauging wether our current method will be capable of identifying these high PM stars, and wether other techniques are viable (i.e. KBMOD). We will search for these stars by first utilizing PM data from Gaia DR3, and crossmatching Gaia stars with high proper motions with those from the other catalogs 

In [2]:
from pathlib import Path
import s3fs

import numpy as np
import pandas as pd
from astropy.io import ascii
from astropy.coordinates import SkyCoord
import matplotlib.pyplot as plt

from dask.distributed import Client
import dask.array
from dask.dataframe.utils import make_meta

from hats import read_hats
from hats.inspection import plot_pixels
from hats_import.catalog.file_readers import CsvReader
from hats_import.margin_cache.margin_cache_arguments import MarginCacheArguments
from hats_import.pipeline import ImportArguments, pipeline_with_client

import lsdb

from catalog_filtering import bandFilterLenient, contains_PM

print("Imported libraries.")

Imported libraries.


In [5]:
pm_speed_min = 1000 #units are milliarcseconds per year
pm_speed_max = 10**5
CATALOG_DIR = Path("../../catalogs")
GAIA_HATS_DIR = Path("../../../shared/hats/catalogs/gaia_dr3/gaia")
GAIA_100_NAME = "gaia_dr3_pm_greater_100"
GAIA_100_DIR = CATALOG_DIR / GAIA_100_NAME
GAIA_1000_NAME = 'gaia_dr3_pm_greater_1000'
GAIA_1000_DIR = CATALOG_DIR / GAIA_1000_NAME

In [6]:
%%time
with Client(n_workers=4):
    lsdb.read_hats(
        path=GAIA_HATS_DIR,
        columns=['ra', 'pmra', 'dec', 
                 'pmdec', 'source_id', 'parallax_over_error', 
                 'phot_g_mean_mag', 'phot_bp_mean_mag', 'phot_rp_mean_mag'],
    ).query('pmra*pmra + pmdec*pmdec > 100*100').to_hats(catalog_name = GAIA_100_NAME, 
                                                        base_catalog_path = GAIA_100_DIR)

print("gaia_pm successfully saved to /catalog")



gaia_pm successfully saved to /catalog
CPU times: user 1min 20s, sys: 4.34 s, total: 1min 24s
Wall time: 2min 57s


In [6]:
gaia_pm_greater_100 = lsdb.read_hats(GAIA_100_DIR)
gaia_pm_greater_100

Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag
npartitions=3917,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
"Order: 2, Pixel: 0",int64[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow],double[pyarrow]
"Order: 3, Pixel: 4",...,...,...,...,...,...,...,...
...,...,...,...,...,...,...,...,...
"Order: 4, Pixel: 3067",...,...,...,...,...,...,...,...
"Order: 3, Pixel: 767",...,...,...,...,...,...,...,...


In [35]:
%%time
with Client():
    pm_expr = f'{pm_speed_max**2} > (pmra**2 + pmdec**2) > {pm_speed_min**2}'
    gaia_pm_greater_100.query(pm_expr).to_hats(
        catalog_name=GAIA_1000_NAME, 
        base_catalog_path=GAIA_1000_DIR
    )



CPU times: user 4min 46s, sys: 5.81 s, total: 4min 52s
Wall time: 7min 50s


In [36]:
gaia_pm_greater_1000 = lsdb.read_hats(GAIA_1000_DIR).query('phot_bp_mean_mag > 19 or phot_bp_mean_mag.isna()').compute()
print(len(gaia_pm_greater_1000))
gaia_pm_greater_1000

63


Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
33418788618313182,66837563803594880,56.696369,24.92893,520.177372,-1157.434442,18.573349,19.320286,17.775904
72355622838789655,144711230753602048,68.900732,21.250996,860.716142,-959.20791,19.120424,21.595911,17.604784
...,...,...,...,...,...,...,...,...
3206298010515918110,6412596012146801152,331.076453,-56.793812,3981.976663,-2466.831815,18.030664,21.142073,16.060108
3410775449123011603,6821550875241089152,335.301587,-19.973358,905.928232,-588.583929,17.651583,20.288462,16.224318


## Obtaining relevant Catalogs

In [11]:
ztf_14_obj = lsdb.read_hats('../../../shared/hats/catalogs/ztf_dr14/ztf_object')
ztf_16 = lsdb.read_hats('../../../shared/hats/catalogs/ztf_dr16/zubercal')
ztf_22 = lsdb.read_hats('https://data.lsdb.io/hats/ztf_dr22/ztf_lc', margin_cache='https://data.lsdb.io/hats/ztf_dr22/ztf_lc_10arcs')
panstarrs_1_obj = lsdb.read_hats('s3://stpubdata/panstarrs/ps1/public/hats/otmo', margin_cache='s3://stpubdata/panstarrs/ps1/public/hats/otmo_10arcs')
des_y6 = lsdb.read_hats('../../../shared/hats/catalogs/des/des_y6_gold')
delve_2 = lsdb.read_hats('https://data.lsdb.io/hats/delve_dr2')
tic_v82 = lsdb.read_hats('https://data.lsdb.io/hats/tic/tic', margin_cache='https://data.lsdb.io/hats/tic/tic_10arcs')
allwise = lsdb.read_hats('https://data.lsdb.io/hats/wise/allwise')
neowise = lsdb.read_hats('https://data.lsdb.io/hats/wise/neowise')
twomass = lsdb.read_hats('https://data.lsdb.io/hats/two_mass')
erass1 = lsdb.read_hats('https://data.lsdb.io/hats/erosita')
hsc_p3_r = lsdb.read_hats('../../../shared/hats/catalogs/hsc_pdr3/hsc_pdr3-forced-R', margin_cache='../../../shared/hats/catalogs/hsc_pdr3/hsc_pdr3-forced-R_10arcs')
skymapper_4_photometry = lsdb.read_hats('https://data.lsdb.io/hats/skymapper_dr4/photometry')
print('Read catalogs')

Read catalogs


In [20]:
catalogs = [ztf_14_obj,
            ztf_16,
            ztf_22,
            panstarrs_1_obj,
            des_y6,
            delve_2,
            tic_v82,
            allwise,
            neowise,
            twomass,
            erass1,
            hsc_p3_r,
            skymapper_4_photometry]

catalog_names = ['ztf_14_obj',
            'ztf_16',
            'ztf_22',
            'panstarrs_1_obj',
            'des_y6',
            'delve_2',
            'tic_v82',
            'allwise',
            'neowise',
            'twomass',
            'erass1',
            'hsc_p3_r',
            'skymapper_4_photometry']

print("Defined arrays")

Defined arrays


In [21]:
'''df.apply() compatible fuction which takes the ra and dec coordinates from a gaia df and checks wether the catalogs in catalogs[] 
contain those sky coordinates.

Args:
    - row: Passed by df.apply(), row from df which is where we obtain our coordinates.
    - catalogs: List of catalogs in HATS format.
    - catalog_names: List of catalog names, should align with those in catalogs.

Returns: row with boolean columns stating wether or not the catalog in catalogs contains those coordinates
'''

def coords_in_catalogs(row, catalogs, catalog_names):
    ra = row['ra']
    dec = row['dec']

    res = dict()

    for catalog, catalog_name in zip(catalogs, catalog_names):      
        res[catalog_name] = catalog.hc_structure.moc.contains_skycoords(SkyCoord(ra=ra, dec=dec, unit='deg'))[0]

    return pd.Series(res)

print("defined func")

defined func


In [37]:
print(gaia_pm_greater_1000['source_id'].dtype)
source_ids = gaia_pm_greater_1000['source_id']
gaia_pm_greater_1000 = gaia_pm_greater_1000.drop('source_id', axis=1)
data = gaia_pm_greater_1000.apply(coords_in_catalogs, axis=1, catalogs=catalogs, catalog_names=catalog_names)
gaia_pm_greater_1000['source_id'] = source_ids
col = 'source_id'
gaia_pm_greater_1000 = gaia_pm_greater_1000[[col] + [c for c in gaia_pm_greater_1000.columns if c != col]]
gaia_pm_greater_1000 = gaia_pm_greater_1000.join(data)
gaia_pm_greater_1000

int64[pyarrow]


Unnamed: 0_level_0,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,ztf_14_obj,ztf_16,ztf_22,panstarrs_1_obj,des_y6,delve_2,tic_v82,allwise,neowise,twomass,erass1,hsc_p3_r,skymapper_4_photometry
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
33418788618313182,66837563803594880,56.696369,24.92893,520.177372,-1157.434442,18.573349,19.320286,17.775904,True,True,True,True,False,False,True,True,True,True,False,False,False
72355622838789655,144711230753602048,68.900732,21.250996,860.716142,-959.20791,19.120424,21.595911,17.604784,True,True,True,True,False,False,True,True,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3206298010515918110,6412596012146801152,331.076453,-56.793812,3981.976663,-2466.831815,18.030664,21.142073,16.060108,False,False,False,False,True,True,True,True,True,True,True,False,True
3410775449123011603,6821550875241089152,335.301587,-19.973358,905.928232,-588.583929,17.651583,20.288462,16.224318,True,True,True,True,False,True,True,True,True,True,False,False,True


In [38]:
gaia_pm_greater_1000.to_csv('catalog_coverage_dim.csv')

In [39]:
catalog_coverage = pd.read_csv('catalog_coverage_dim.csv')
display(catalog_coverage)

Unnamed: 0,_healpix_29,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,ztf_14_obj,...,panstarrs_1_obj,des_y6,delve_2,tic_v82,allwise,neowise,twomass,erass1,hsc_p3_r,skymapper_4_photometry
0,33418788618313182,66837563803594880,56.696369,24.928930,520.177372,-1157.434442,18.573349,19.320286,17.775904,True,...,True,False,False,True,True,True,True,False,False,False
1,72355622838789655,144711230753602048,68.900732,21.250996,860.716142,-959.207910,19.120424,21.595911,17.604784,True,...,True,False,False,True,True,True,True,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61,3206298010515918110,6412596012146801152,331.076453,-56.793812,3981.976663,-2466.831815,18.030664,21.142073,16.060108,False,...,False,True,True,True,True,True,True,True,False,True
62,3410775449123011603,6821550875241089152,335.301587,-19.973358,905.928232,-588.583929,17.651583,20.288462,16.224318,True,...,True,False,True,True,True,True,True,False,False,True


In [40]:
ratios = dict()
for catalog_name in catalog_names:
    ratios[catalog_name] = catalog_coverage[catalog_name].sum()
ratios = pd.Series(ratios)
ratios

ztf_14_obj                42
ztf_16                    43
ztf_22                    41
panstarrs_1_obj           43
des_y6                     9
delve_2                   37
tic_v82                   63
allwise                   63
neowise                   63
twomass                   63
erass1                    34
hsc_p3_r                   3
skymapper_4_photometry    43
dtype: int64

In [41]:
hsc_true = catalog_coverage.query('hsc_p3_r == True ')
hsc_true['pmtot'] = (hsc_true['pmra']**2 + hsc_true['pmdec']**2)**0.5
hsc_true

Unnamed: 0,_healpix_29,source_id,ra,dec,pmra,pmdec,phot_g_mean_mag,phot_bp_mean_mag,phot_rp_mean_mag,ztf_14_obj,...,des_y6,delve_2,tic_v82,allwise,neowise,twomass,erass1,hsc_p3_r,skymapper_4_photometry,pmtot
23,1819280488241997640,3638560954457699200,202.951993,-1.285494,-414.209178,-1041.278255,20.534954,21.39854,18.914696,True,...,False,True,True,True,True,True,True,True,True,1120.638053
24,1849489717977963240,3698979462002285824,180.986807,0.262663,-1219.590296,-282.117684,19.133556,20.761082,17.564669,True,...,False,True,True,True,True,True,True,True,True,1251.795062
26,1948178546855350528,3896357089270247168,179.588931,4.579897,570.395737,-923.194122,20.678568,21.712635,19.082453,True,...,False,True,True,True,True,True,True,True,True,1085.190621
