## All-sky NOIRLab Source Catalog (DR2)

To install astro-datalab:  
```pip install --ignore-installed --no-cache-dir astro-datalab```

Login via the command line to datalab:  
```datalab login``` 


Information on the columns each of tables contains can be found here:  
[Exposure](https://datalab.noirlab.edu/query.php?name=nsc_dr2.exposure)  
[Measurements](https://datalab.noirlab.edu/query.php?name=nsc_dr2.meas)  
[Coverage](https://datalab.noirlab.edu/query.php?name=nsc_dr2.coverage)  
[Chip](https://datalab.noirlab.edu/query.php?name=nsc_dr2.chip)  
[Object](https://datalab.noirlab.edu/query.php?name=nsc_dr2.object)  



**References**:  
Nidever, D. L., Dey, A., Fasbender, K., Juneau, S., Meisner, A. M., Wishart, J., Scott, A., Matt, K., Nikutta, R., & Pucha, R. (2021). Second Data Release of the All-sky NOIRLab Source Catalog. The Astronomical Journal, 161(4), 192. https://doi.org/10.3847/1538-3881/ABD6E1

In [1]:
import os
import glob
import pandas as pd
import numpy as np
import healpy as hp 
import matplotlib.pyplot as plt

%matplotlib inline 

from astropy.time import Time

In [2]:
DATA_DIR = "/epyc/projects/thor/thor_data/nsc/"

In [None]:
from dl import authClient as ac
from dl import queryClient as qc
from getpass import getpass

username = 'moeyensj'
token = ac.login(username,getpass('Account Password: '))
if not ac.isValidToken(token):
    print('Error: invalid login for user %s (%s)' % (username,token))
else:
    print("Login token:   %s" % token)

Get the contents of the `exposure` table.

In [11]:
file_name = os.path.join(DATA_DIR, "nsc_dr2_exposure.csv")
if not os.path.exists(file_name):
    query = """SELECT * FROM nsc_dr2.exposure ORDER BY mjd ASC"""
    result = qc.query(token, sql=query, fmt='csv', out=file_name)
exposures = pd.read_csv(file_name, index_col=False)

In [15]:
len(exposures)

412116

In [9]:
Time([exposures["mjd"].min(), exposures["mjd"].max()], scale="utc", format="mjd").isot

array(['2012-09-23T03:40:41.661', '2019-11-16T01:25:22.744'], dtype='<U23')

56193.1532599614

In [None]:
# cut on 20 sigma, bright exposures (< 20)
exposures[exposures["instrument"] == "c4d"].sort_values(by=["depth10sig"], ascending=False)

In [None]:
exposures[exposures["depth10sig"] > 22]

In [None]:
file_name = os.path.join(DATA_DIR, "nsc_dr2_coverage.csv")
if not os.path.exists(file_name):
    query = """SELECT * FROM nsc_dr2.coverage LIMIT 100"""
    result = qc.query(token, sql=query, fmt='csv', out=file_name)
coverage = pd.read_csv(file_name, index_col=False)

In [None]:
exposures.columns

In [None]:
visits = exposures.groupby(by=["nest4096"])["mjd"].describe()

In [None]:
len(visits)

In [None]:
visits["range"] = visits["max"] - visits["min"]
visits.head()

In [None]:
visits[(visits["range"] >= 10) & (visits["count"] >= 10)] 

In [None]:
visits[((visits["count"] / visits["range"]) >= 1) & (visits["count"] >= 10) & (visits["range"] >= 10)]

In [None]:
exposures[exposures["nest4096"] == 34153899]["dateobs"]

In [None]:
fig, ax = plt.subplots(1, 1, dpi=200)
ax.scatter(exposures[(exposures["elat"] <= 20) & (exposures["elat"] >= -20)]["mjd"].values, exposures[(exposures["elat"] <= 20) & (exposures["elat"] >= -20)]["elat"].values, s=1)

In [None]:
fig, ax = plt.subplots(1, 1)
ax.hist(exposures[exposures["nest4096"] == 73819750]["mjd"].values, bins = 100)

In [None]:
Time([57000],scale="utc", format="mjd").isot

The NSC DR2 release contains data from three instruments:

In [8]:
exposures["instrument"].value_counts()

c4d    340952
k4m     41561
ksb     29603
Name: instrument, dtype: int64

CTIO-4m+DECam : c4d  
KPNO-4m+Mosaic3 : ksb  
Bok-2.3+90Prime : k4m  

Number of measurements per instrument:

In [None]:
exposures.groupby(by="instrument")["nmeas"].sum()

In [None]:
exposures.groupby(by="instrument")["mjd"].apply(lambda x : x.max() - x.min())

In [None]:
exposures.groupby(by="instrument")["mjd"].describe()

In [None]:
fig, ax = plt.subplots(1, 1, dpi=200)
for instrument in exposures["instrument"].unique():
    mask = exposures["instrument"].isin([instrument])

    ax.scatter(
        exposures[mask]["mjd"].values, 
        exposures[mask]["nmeas"].values, 
        label=instrument,
        s=1
    )
ax.set_yscale("log")
ax.legend(
    frameon=False,
    bbox_to_anchor=(1.03, 0.6)
)
ax.set_ylabel("Measurements")
ax.set_xlabel("Observation Time [MJD]")

In [None]:
fig, ax = plt.subplots(1, 1, dpi=200)
for instrument in exposures["instrument"].unique():
    mask = exposures["instrument"].isin([instrument])

    ax.scatter(
        exposures[mask]["mjd"].values, 
        exposures[mask]["depth95"].values, 
        label=instrument,
        s=1
    )
ax.legend(
    frameon=False,
    bbox_to_anchor=(1.03, 0.6)
)
ax.set_ylabel(r"Depth $P_{95}$")
ax.set_xlabel("Observation Time [MJD]")

In [None]:
start_mjd = 56515
window_size = 20
window = exposures[
    (exposures["instrument"] == "c4d") 
    & (exposures["mjd"] <= start_mjd + window_size)
    & (exposures["mjd"] >= start_mjd)
]

In [None]:
np.round(window["mjd"].max(), 2), np.round(window["mjd"].min() - 0.01, 2)

In [None]:
query = """
SELECT COUNT(*)
FROM nsc_dr2.object 
WHERE (ndet = 1)
"""
results = qc.query(token, adql=query, fmt='pandas', timeout=3600, async_=True, wait=True, poll=10, verbose=1)
results

In [None]:
query = """
SELECT COUNT(*)
FROM nsc_dr2.object 
WHERE (ndet <= 4)
"""
results = qc.query(token, adql=query, fmt='pandas', timeout=3600, async_=True, wait=True, poll=10, verbose=1)
results

Number of objects with ndet<=4: 1763381575  
Number of objects with ndet==1: 886983556 

In [None]:
ras = np.linspace(0, 360, 360 * 20 + 1)

In [None]:
f"{ras[103]:06.2f}"

In [None]:
def queryRASlice(ra_start, ra_end):
    
    file_name = os.path.join(DATA_DIR, f"nsc_dr2_observations_{ra_start:06.2f}_{ra_end:06.2f}.csv")
    
    if not os.path.exists(file_name):
        query = f"""
        SELECT o.id, o.ra AS mean_ra, o.dec AS mean_dec, o.ndet, o.nphot, o.mjd AS mean_mjd, o.deltamjd, m.measid, m.mjd, m.ra, m.dec, m.raerr, m.decerr, m.mag_auto, m.magerr_auto, m.filter, m.exposure, m.class_star 
        FROM nsc_dr2.object AS o 
        JOIN nsc_dr2.meas as m 
        ON o.id = m.objectId 
        WHERE (o.ndet <= 4) AND (o.ra >= {ra_start}) AND (o.ra < {ra_end})
        """
        results = qc.query(token, adql=query, fmt='pandas', timeout=3600, async_=True, wait=True, poll=5, verbose=0)
        results.sort_values(by=["mjd", "measid"], inplace=True, ascending=[True, True])
        results.to_csv(file_name, index=False)
        
    else:
        pass
        
    return

In [None]:
import multiprocessing as mp

os.nice(10)

pool = mp.Pool(10)
pool.starmap(
    queryRASlice,
    zip(ras[:-1], ras[1:])
)
pool.close()

In [None]:
exposures["mjd"].min(), exposures["mjd"].max()
 

In [None]:
window_size = 31
window_starts = np.arange(
    np.floor(exposures["mjd"].min()), 
    np.ceil(exposures["mjd"].max()), 
    window_size
)
observation_files = sorted(glob.glob(os.path.join(DATA_DIR, "nsc_dr2_observations*.csv")))

In [None]:
def processWindow(window_file_name, observations):
    if len(observations) > 0:
        observations.to_hdf(
            window_file_name, 
            key="data", 
            mode="a", 
            append=True, 
            min_itemsize={'id': 40, 'measid': 40, 'exposure': 40, 'filter' : 2},
        )
    return

In [None]:
pool.close()

In [None]:
import multiprocessing as mp

os.nice(10)

pool = mp.Pool(40)

objids = []
obsids = []
observation_files_completed = np.array([])
for i, observation_file in enumerate(observation_files):
    observations = pd.read_csv(observation_file, index_col=False)
    objids.append(observations["id"].unique())
    obsids.append(observations["measid"].unique())
    
    windows = []
    window_file_names = []
    for window_start in window_starts:
        
        window_end = window_start + window_size
        start_isot = Time(window_start, scale="utc", format="mjd").isot.split("T")[0]
        end_isot = Time(window_end, scale="utc", format="mjd").isot.split("T")[0]
        
        window_file_name = os.path.join(DATA_DIR, "hdf5", f"nsc_dr2_observations_{start_isot}_{end_isot}.h5")
        window_file_names.append(window_file_name)
        
        observations_window = observations[(observations["mjd"] >= window_start) & (observations["mjd"] < window_end)]
        windows.append(observations_window)
        
    pool.starmap(
        processWindow,
        zip(window_file_names, windows)
    )
    
    observation_files_completed = np.concatenate([observation_files_completed, np.array([observation_file])])
    np.savetxt("files_processed.txt", observation_files_completed, delimiter="\n", fmt="%s")
        
    if (i + 1) % 20 == 0:
        print(f"Processed {i + 1} observations files.")
        
objids = np.concatenate(objids)
obsids = np.concatenate(obsids)
pool.close()

In [None]:
observations_h5 = sorted(glob.glob(os.path.join(DATA_DIR, "hdf5", "*.h5")))

In [None]:
for i, file_in in enumerate(observations_h5):
    file_out = os.path.join(DATA_DIR, "preprocessed", os.path.basename(file_in))

    df = pd.read_hdf(file_in, key="data")
    df = df[["measid", "exposure", "mjd", "ra", "dec", "raerr", "decerr", "filter", "mag_auto", "magerr_auto"]]
    df.rename(
        columns={
            "measid" : "obs_id",
            "exposure" : "exposure_id",
            "mjd" : "mjd_utc",
            "ra" : "ra",
            "dec" : "dec",
            "raerr" : "ra_sigma",
            "decerr" : "dec_sigma",
            "mag_auto" : "mag",
            "magerr_auto" : "mag_sigma",
            "filter" : "filter"
        },
        inplace=True
    )
    df.loc[:, "ra_sigma"] /= 3600.0
    df.loc[:, "dec_sigma"] /= 3600.0
    df.loc[df["obs_id"].str[:3].isin(["c4d"]), "observatory_code"] = "W84"
    df.loc[df["obs_id"].str[:3].isin(["ksb"]), "observatory_code"] = "V00"
    df.loc[df["obs_id"].str[:3].isin(["k4m"]), "observatory_code"] = "695"
    df.sort_values(
        by=["mjd_utc", "observatory_code"],
        inplace=True,
        ignore_index=True
    )
    df.to_hdf(
        file_out,
        key='data', 
        index=False,
        mode='w',
        format='table', 
    )
    
    if (i + 1) % 20 == 0:
        print(f"Processed {i + 1} observations files.")