In [3]:
import sys
import os

parent_dir = os.path.abspath('..')
if parent_dir not in sys.path:
    sys.path.insert(0, parent_dir)

# Dask puts out more advisory logging that we care for.
# It takes some doing to quiet all of it, but this recipe works.
import dask
import logging
import dask_jobqueue
from dask.dataframe.utils import make_meta
from dask.distributed import Client

dask.config.set({"logging.distributed": "critical"})

# This also has to be done, for the above to be effective
logger = logging.getLogger("distributed")
logger.setLevel(logging.CRITICAL)

import warnings

# Finally, suppress the specific warning about Dask dashboard port usage
warnings.filterwarnings("ignore", message="Port 8787 is already in use.")

from pathlib import Path

import numpy as np
import pandas as pd
from astropy.io import ascii
import matplotlib.pyplot as plt
import time

from hats import read_hats

import lsdb

from catalog_filtering import bandFilterLenient, contains_PM
import hpms_pipeline as hpms

print("Imported libraries.")

Imported libraries.


In [4]:
bandList = ['G','R','I','Z','Y']
class_star = None
spread_model = 0.05
magnitude_error = 0.05
check_flags = True
mag = 19
query_string = bandFilterLenient(bandList,classStar=class_star,spreadModel=spread_model,magError=magnitude_error,flag=check_flags,mag=mag)
des_cols = (
    [f'CLASS_STAR_{band}' for band in bandList] + 
    [f'FLAGS_{band}' for band in bandList] + 
    ['RA','DEC','COADD_OBJECT_ID'] + 
    [f'SPREAD_MODEL_{band}' for band in bandList] + 
    [f'WAVG_MAG_PSF_{band}' for band in bandList] + 
    [f'WAVG_MAGERR_PSF_{band}' for band in bandList]
)
k = 2
max_obj_deviation = 0.2
des_id_col = 'COADD_OBJECT_ID_1'
mag_cols = [f'WAVG_MAG_PSF_{band}' for band in ['I']]
min_neighbors = 4
max_neighbor_dist = 24
xmatch_max_neighbors = 100
print("Defined globals.")

Defined globals.


In [6]:
BENCHMARK_CATALOG_DIR = Path("../../../../catalogs/benchmark_catalogs")
CATALOG_NAME = "2.306965202564744e+18"
CATALOG_MC_NAME = "2306965202564744e18_25_arcsec_mc"
CATALOG_DIR = BENCHMARK_CATALOG_DIR / CATALOG_NAME
CATALOG_MC_DIR = BENCHMARK_CATALOG_DIR / CATALOG_MC_NAME

two_deg_catalog = lsdb.read_hats(CATALOG_DIR, margin_cache=CATALOG_MC_DIR)

In [18]:
subset=two_deg_catalog.cone_search(ra=1.383284152,dec=-37.36774403, radius_arcsec=1200)
computed_subset=subset.compute()
computed_subset

Unnamed: 0_level_0,CLASS_STAR_G,CLASS_STAR_R,CLASS_STAR_I,CLASS_STAR_Z,CLASS_STAR_Y,FLAGS_G,FLAGS_R,FLAGS_I,FLAGS_Z,FLAGS_Y,RA,DEC,COADD_OBJECT_ID,SPREAD_MODEL_G,SPREAD_MODEL_R,SPREAD_MODEL_I,SPREAD_MODEL_Z,SPREAD_MODEL_Y,WAVG_MAG_PSF_G,WAVG_MAG_PSF_R,WAVG_MAG_PSF_I,WAVG_MAG_PSF_Z,WAVG_MAG_PSF_Y,WAVG_MAGERR_PSF_G,WAVG_MAGERR_PSF_R,WAVG_MAGERR_PSF_I,WAVG_MAGERR_PSF_Z,WAVG_MAGERR_PSF_Y,NEPOCHS_G,NEPOCHS_R,NEPOCHS_I,NEPOCHS_Z,NEPOCHS_Y
_healpix_29,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
1153464661165131512,0.029689,0.033724,0.043144,0.04444,0.040881,0,0,0,0,0,1.145519,-37.642058,1036793083,0.011249,0.011377,0.011358,0.009856,0.011135,21.890545,20.526283,20.163149,19.773655,19.762308,0.010904,0.004212,0.004752,0.006789,0.023832,8,8,8,8,5
1153466129614435957,0.729479,0.523546,0.368646,0.434529,0.486851,0,0,0,0,0,1.139233,-37.638988,1036792993,0.001268,0.002313,0.014087,-0.003566,-0.058329,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,-99.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1154612699815914664,0.4848,0.564932,0.434464,0.448048,0.370617,0,0,0,0,0,1.292314,-37.043882,1037687316,-0.008209,0.006537,-0.009026,-0.021621,-0.032005,-99.0,24.135427,-99.0,-99.0,-99.0,-99.0,0.238153,-99.0,-99.0,-99.0,0,1,0,0,0
1154612700106214671,0.005041,0.022872,0.026697,0.025792,0.012385,0,0,0,0,0,1.289189,-37.043636,1037687155,0.029609,0.025752,0.025085,0.022232,0.022856,23.778969,22.791555,21.794109,21.187263,21.192188,0.059455,0.028718,0.020466,0.022907,0.085325,5,6,7,8,5


In [30]:
import importlib
importlib.reload(hpms)

<module 'hpms_pipeline' from '/ocean/projects/phy210048p/jpassos/astrophysics/Jupyter Notebooks/kth_star_pipeline/hpms_pipeline.py'>

In [20]:
print(two_deg_catalog.npartitions)

10


In [21]:
from dask.distributed import performance_report

RESULTS_DIR = BENCHMARK_CATALOG_DIR / 'one_third_deg_cs_results_10'
print(f"Length of subset: {len(computed_subset)}")

start_wall = time.time()
print('Start time:',start_wall)
with Client(threads_per_worker=1, memory_limit='4GB', n_workers=4), performance_report(filename='is_this_real.html'):
    hpms.execute_pipeline(subset, query_string, xmatch_max_neighbors,
                          max_neighbor_dist, min_neighbors, k,
                          max_obj_deviation, des_id_col, mag_cols).to_hats(catalog_name='one_third_deg_cs_results_10', 
                                                                           base_catalog_path=RESULTS_DIR)

wall_time = time.time() - start_wall
print("Wall time: ", wall_time)

Length of subset: 44504
Start time: 1751469597.3248553
Wall time:  249.18840146064758


In [None]:
sampled_diffs = hpms.sample_mag_diffs(two_deg_catalog, 'WAVG_MAG_PSF_I', 1000)
plt.hist(sampled_diffs, bins=20, color='green', edgecolor='black')
plt.title('i Band Magnitude Deltas Randomly Sampled in DES')
plt.xlabel('Magnitude Delta')
plt.ylabel('Freqeuncy')