In [41]:
%matplotlib inline

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import sys

from scipy.stats import norm

from astropy.coordinates import SkyCoord
from astropy.coordinates import ICRS, Galactic, FK4, FK5
from astropy.coordinates import Angle, Latitude, Longitude
import astropy.units as u
import build_catalog as bc

# ONCdb Recipe

Here are step-by-step instructions on how to generate the ONCdb from VizieR catalogs.

## Step 1: Ingest ACS Catalog

Download the initial data from the Robberto et al. (2013) HST Treasury Program on the ONC from VizieR (http://vizier.u-strasbg.fr/viz-bin/asu-tsv).

Save a tab-separated file as `ONCdb/raw_data/viz_acs.tsv`. Then comment out the line with the units and the line with the dashes just above the data.

In [33]:
# Set path to the raw data
root = 'raw_data/'

# Save catalog info as a tuple -- ('catname', 'filename', 'catID column name')
cat_info = ('ACS', 'viz_acs.tsv', 'ONCacs')

# Read in the TSV file and rename some columns
acs = pd.read_csv(root+cat_info[1], sep='\t', comment='#', engine='python')
acs = acs[[cat_info[2],'_RAJ2000','_DEJ2000']].groupby(cat_info[2]).agg(lambda x: np.mean(x))
acs.insert(0,'dec_corr', acs['_DEJ2000'])
acs.insert(0,'ra_corr', acs['_RAJ2000'])
acs.insert(0,'catID', acs.index)
acs.insert(0,'catname', cat_info[0])
acs.insert(0,'oncflag', '')
acs.insert(0,'oncID', np.nan)
acs = acs.reset_index(drop=True)

print('ACS has', len(acs), 'objects')

ACS has 3399 objects


## Step 2: Generate initial matrix of pairwise distances

In [34]:
# Get sky coordinates from RA and Dec
c_acs = SkyCoord(acs['_RAJ2000'], acs['_DEJ2000'], unit='degree')

# Generate a pandas data frame
build_dist = pd.DataFrame()
print("Measuring pairwise distances...")
for k in range(len(c_acs)):
    sep = c_acs.separation(c_acs[k]).arcsecond
    build_dist.loc[:,k] = sep
    progress_meter(k*100./len(c_acs))

print('\n')

# Add the distances to each row
onc_build = pd.concat([acs, build_dist], axis=1)
onc_build.columns = onc_build.columns.astype(str)
onc_build.index = onc_build.index.astype(str)

# Group the sources by the specified critical distance
onc_build = group_sources(onc_build, dist_crit)

print(len(onc_build['oncID'].value_counts()), 'unique sources')

# Save it to file
onc_build_name = 'ONCdb_pairwise_distances.tsv'
onc_build.to_csv(root + onc_build_name, sep='\t', index=False)

print('Saved to', onc_build_name)

Measuring pairwise distances...
loading... 100.0%

Grouping sources...
loading... 100.0%

3396 unique sources
Saved to ONCdb_pairwise_distances.tsv


## Step 3: Add additional catalogs

Now we can add additional catalogs by downloading the data from VizieR as a tab-separated ascii file.

In [37]:
# export directly from Viz as 'tab-separated values' (tsv)
# need to comment out the two lines between the header and the data (comment = #)

# there is a way to use astroquery to access Viz tables directly
# however it's a bit slow and requires internet connection

# Robberto 2013, HST Treasury Program on the ONC
# WFPC2 is J/ApJS/207/10/table6
# NICMOS is J/ApJS/207/10/table7

# filename of existing pairwise dist matrix (tsv)
onc_ex_name = onc_up_name = onc_build_name

# catalog info as a tuple -- ('catname', 'filename', 'catID column name')
cat_info = ('WFPC2', 'viz_wfpc2.tsv', 'ONCpc2')

# radius for xmatch (arcsec), pre- and post-shift
# pre should be relatively generous in case of offset
dist_crit_pre = 1.
dist_crit_post = 0.25

# for normal Viz catalogs
new_cat = pd.read_csv(root + cat_info[1], sep='\t', comment='#', engine='python')

# compress multiple observations of a single object in the catalog
new_cat = new_cat[[cat_info[2],'_RAJ2000','_DEJ2000']].groupby(cat_info[2]).agg(lambda x: np.mean(x))

# insert columns for later
new_cat.insert(0,'dec_corr', np.nan)
new_cat.insert(0,'ra_corr', np.nan)

new_cat.insert(0,'catID', new_cat.index)
new_cat.insert(0,'catname', cat_info[0])
new_cat.insert(0,'oncflag', '')
new_cat.insert(0,'oncID', np.nan)

new_cat = new_cat.reset_index(drop=True)

print('new catalog', cat_info[0], 'has', len(new_cat), 'objects')

onc_ex = pd.read_csv(root + onc_ex_name, sep='\t', engine='python')

print('existing oncdb has', len(onc_ex), 'objects')

new catalog WFPC2 has 1488 objects
existing oncdb has 3399 objects


In [38]:
c_new = SkyCoord(new_cat['_RAJ2000'], new_cat['_DEJ2000'], unit='degree')
c_onc = SkyCoord(onc_ex['_RAJ2000'], onc_ex['_DEJ2000'], unit='degree')

# clear the flag column
onc_ex.loc[:,'oncflag'] = ''
new_cat.loc[:,'oncflag'] = ''

onc_iv = get_pw_dists(c_new, c_onc, new_cat, onc_ex)

onc_iv = group_sources(onc_iv, dist_crit_pre)

loading... 99.9%

Grouping sources...
loading... 100.0%



In [39]:
print(len(onc_iv['oncID'].value_counts()), 'unique sources')

3342 unique sources


In [40]:
# Calculate offsets for catclog correction
delta_ra, delta_dec, mu_ra, mu_dec, std_ra, std_dec = find_offsets(onc_iv, cat_info[0], '_RAJ2000', '_DEJ2000')

1298 one-to-one matches found
loading... 99.9%

Delta RA (arcsec): -0.00892258995075
Delta DEC (arcsec): -0.0687896791834
