In [1]:
import lsdb
import splusdata
import pandas as pd
from getpass import getpass

In [2]:
# This will print all available links

print(splusdata.get_hipscats())

{'dr4_vacs': {'qso_z': {'hipscats': ['qso_z'], 'margins': ['qso_z_2arcsec']}, 'photozs': {'hipscats': ['photozs'], 'margins': ['photozs_2arcsec']}, 'calib_flag': {'hipscats': ['calib_flag'], 'margins': ['calib_flag_2arcsec']}, 'sqg': {'hipscats': ['sqg'], 'margins': ['sqg_2arcsec']}}, 'sdr1': {'hipscats': ['shorts'], 'margins': ['shorts_2arcsec']}, 'dr4': {'hipscats': ['single', 'dual', 'psf'], 'margins': ['psf_2arcsec', 'single_2arcsec', 'dual_2arcsec']}}


In [3]:
# This will search for a specific pattern
print(splusdata.get_hipscats("dr4/dual"))

[['https://splus.cloud/HIPS/catalogs/dr4/dual/', 'https://splus.cloud/HIPS/catalogs/dr4/dual_2arcsec/']]


In [4]:
from dask.distributed import Client
client = Client(n_workers=10, memory_limit="8GB")
print(client)

<Client: 'tcp://127.0.0.1:38533' processes=10 threads=20, memory=74.51 GiB>


In [5]:
dr4_links = splusdata.get_hipscats("idr4/dual")[0]
dr4_links

['https://splus.cloud/HIPS/catalogs/dr4/dual/',
 'https://splus.cloud/HIPS/catalogs/dr4/dual_2arcsec/']

In [6]:
dr4_margin = lsdb.read_hipscat(dr4_links[1])
for i in dr4_margin.columns:
    print(i)

ID
RA
DEC
A
B
BACKGROUND
BACKGROUND_J0378
BACKGROUND_J0395
BACKGROUND_J0410
BACKGROUND_J0430
BACKGROUND_J0515
BACKGROUND_J0660
BACKGROUND_J0861
BACKGROUND_g
BACKGROUND_i
BACKGROUND_r
BACKGROUND_u
BACKGROUND_z
CLASS_STAR
DET_ID_dual
EBV_SCH
ELLIPTICITY
ELONGATION
FLUX_RADIUS_20
FLUX_RADIUS_50
FLUX_RADIUS_70
FLUX_RADIUS_90
FWHM
FWHM_J0378
FWHM_J0395
FWHM_J0410
FWHM_J0430
FWHM_J0515
FWHM_J0660
FWHM_J0861
FWHM_g
FWHM_i
FWHM_n
FWHM_n_J0378
FWHM_n_J0395
FWHM_n_J0410
FWHM_n_J0430
FWHM_n_J0515
FWHM_n_J0660
FWHM_n_J0861
FWHM_n_g
FWHM_n_i
FWHM_n_r
FWHM_n_u
FWHM_n_z
FWHM_r
FWHM_u
FWHM_z
Field
ID_DEC
ID_RA
ISOarea
J0378_ID_dual
J0378_PStotal
J0378_aper_3
J0378_aper_6
J0378_auto
J0378_iso
J0378_petro
J0395_ID_dual
J0395_PStotal
J0395_aper_3
J0395_aper_6
J0395_auto
J0395_iso
J0395_petro
J0410_ID_dual
J0410_PStotal
J0410_aper_3
J0410_aper_6
J0410_auto
J0410_iso
J0410_petro
J0430_ID_dual
J0430_PStotal
J0430_aper_3
J0430_aper_6
J0430_auto
J0430_iso
J0430_petro
J0515_ID_dual
J0515_PStotal
J0515_aper_3
J

In [7]:
dual = lsdb.read_hipscat(
                dr4_links[0],
                margin_cache=dr4_margin,
                columns = ["Field", "ID", "RA", "DEC", 
                           "X", "Y", "A", "B", "ELLIPTICITY", "ELONGATION",
                           "FWHM", "KRON_RADIUS", "ISOarea", "MU_MAX_r", "MU_MAX_J0660", "MU_MAX_i", "s2n_DET_PStotal",
                           "s2n_g_PStotal", "s2n_J0515_PStotal", "s2n_r_PStotal", "s2n_J0660_PStotal", "s2n_i_PStotal", 
                            "SEX_FLAGS_DET", "SEX_FLAGS_r", "SEX_FLAGS_J0660", "SEX_FLAGS_i",
                            "r_PStotal", "e_r_PStotal",
                            "g_PStotal", "e_g_PStotal",
                            "i_PStotal", "e_i_PStotal",
                            "u_PStotal", "e_u_PStotal",
                            "z_PStotal", "e_z_PStotal",
                            "J0378_PStotal", "e_J0378_PStotal",
                            "J0395_PStotal", "e_J0395_PStotal",
                            "J0410_PStotal", "e_J0410_PStotal",
                            "J0430_PStotal", "e_J0430_PStotal",
                            "J0515_PStotal", "e_J0515_PStotal",
                            "J0660_PStotal", "e_J0660_PStotal",
                            "J0861_PStotal", "e_J0861_PStotal",]
                                        )

### PSF

In [8]:
dr4_psf = splusdata.get_hipscats("dr4/psf")[0]
psf_margin = lsdb.read_hipscat(dr4_psf[1])
for i in psf_margin.columns:
    print(i)

ID
RA
DEC
CLASS_STAR_J0378
CLASS_STAR_J0395
CLASS_STAR_J0410
CLASS_STAR_J0430
CLASS_STAR_J0515
CLASS_STAR_J0660
CLASS_STAR_J0861
CLASS_STAR_g
CLASS_STAR_i
CLASS_STAR_r
CLASS_STAR_u
CLASS_STAR_z
DoPHOT_Star_number_J0378
DoPHOT_Star_number_J0395
DoPHOT_Star_number_J0410
DoPHOT_Star_number_J0430
DoPHOT_Star_number_J0515
DoPHOT_Star_number_J0660
DoPHOT_Star_number_J0861
DoPHOT_Star_number_g
DoPHOT_Star_number_i
DoPHOT_Star_number_r
DoPHOT_Star_number_u
DoPHOT_Star_number_z
J0378_psf
J0395_psf
J0410_psf
J0430_psf
J0515_psf
J0660_psf
J0861_psf
X_J0378
X_J0395
X_J0410
X_J0430
X_J0515
X_J0660
X_J0861
X_g
X_i
X_r
X_u
X_z
Y_J0378
Y_J0395
Y_J0410
Y_J0430
Y_J0515
Y_J0660
Y_J0861
Y_g
Y_i
Y_r
Y_u
Y_z
e_J0378_psf
e_J0395_psf
e_J0410_psf
e_J0430_psf
e_J0515_psf
e_J0660_psf
e_J0861_psf
e_g_psf
e_i_psf
e_r_psf
e_u_psf
e_z_psf
g_psf
i_psf
r_psf
s2n_J0378_psf
s2n_J0395_psf
s2n_J0410_psf
s2n_J0430_psf
s2n_J0515_psf
s2n_J0660_psf
s2n_J0861_psf
s2n_g_psf
s2n_i_psf
s2n_r_psf
s2n_u_psf
s2n_z_psf
u_psf
z_psf
No

In [9]:
psf = lsdb.read_hipscat(
       dr4_psf[0],
       margin_cache=psf_margin,
        columns = ["RA", "DEC", "r_psf", "e_r_psf",
                    "g_psf", "e_g_psf",
                    "i_psf", "e_i_psf",
                    "u_psf", "e_u_psf",
                    "z_psf", "e_z_psf",
                    "J0378_psf", "e_J0378_psf",
                    "J0395_psf", "e_J0395_psf",
                    "J0410_psf", "e_J0410_psf",
                    "J0430_psf", "e_J0430_psf",
                    "J0515_psf", "e_J0515_psf",
                    "J0660_psf", "e_J0660_psf",
                    "J0861_psf", "e_J0861_psf",])

In [10]:
print(psf.columns) 

Index(['RA', 'DEC', 'r_psf', 'e_r_psf', 'g_psf', 'e_g_psf', 'i_psf', 'e_i_psf',
       'u_psf', 'e_u_psf', 'z_psf', 'e_z_psf', 'J0378_psf', 'e_J0378_psf',
       'J0395_psf', 'e_J0395_psf', 'J0410_psf', 'e_J0410_psf', 'J0430_psf',
       'e_J0430_psf', 'J0515_psf', 'e_J0515_psf', 'J0660_psf', 'e_J0660_psf',
       'J0861_psf', 'e_J0861_psf'],
      dtype='object')


#### Matching two hipscat tables

In [11]:
dual_psf = psf.crossmatch(dual, radius_arcsec = 2)

#### Matching an external table with hipscat

In [12]:
# GALEX table
df = pd.read_csv("../GUVcat_AISxSDSS_HSmaster.csv") # load your table

In [13]:
# Scatter the DataFrame to distribute it across workers
df_scattered = client.scatter(df)

In [19]:
# Load the DataFrame into lsdb
df_hips = lsdb.from_dataframe(df, ra_column="GALEX_RA", dec_column="GALEX_DEC", margin_threshold=3600)

This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.
This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


In [22]:
# matching with dual catalog (instantiated earlier)--dual
matched_table_dual = df_hips.crossmatch(dual, radius_arcsec = 2).compute()

This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


In [33]:
# Save DataFrame to CSV or other formats
matched_table_dual.to_csv("../GUVcat_AISxSDSS_HSmaster-splusDr4-dual-2arcsec-lsdb.csv", index=False)

In [34]:
# matching with dual catalog (instantiated earlier)--psf
matched_table_psf = df_hips.crossmatch(psf, radius_arcsec = 2).compute()

This may cause some slowdown.
Consider loading the data with Dask directly
 or using futures or delayed objects to embed the data into the graph without repetition.
See also https://docs.dask.org/en/stable/best-practices.html#load-data-with-dask for more information.


In [35]:
# Save DataFrame to CSV or other formats
matched_table_psf.to_csv("../GUVcat_AISxSDSS_HSmaster-splusDr4-psf-2arcsec-lsdb.csv", index=False)