# Source Catalog Loading, Plotting and Exploring Examples

In [None]:
import numpy as np
import os.path as op

from astropy.table import Table, unique

from hetdex_tools.source_catalog import plot_source_group
from hetdex_api.elixer_widget_cls import ElixerWidget
from hetdex_api.config import HDRconfig

This catalog is a combination of sources from the continuum source catalog, the curated detection line catalog of the same version and the confirmed AGN detection IDs from Chenxu Liu's AGN search. Each row is for a detection and its unique catalog detectid. Detectid's of AGN sources *may* appear twice if they are also found in the continuum or curated line catalogs. This is intentional as information about the detection type is used to guess the redshift of the sources. The column `'det_type'` can be `['line','cont','agn']`. Using the friends of friends clustering algorithm from `hetdex_api/hetdex_tools/fof_kdtree.py` we group sources together spatially into a unique sources using a linking length of 5 arcsec. The unique identifier is given in `'source_id'`. If a source is found within 5 arcsec of a GAIA matched source from Hawkins, Zeimann et al. 2020, the GAIA id is given in `'gaia_match_id'`. I use various logic (found in `guess_source_wavelength` in hetdex_tools/source_catalog.py) to try to make a guess at the redshift. This is preliminary!!! There are no errors or confidence levels and it is not science ready. But feedback is highly welcomed! Any analysis done with this catalog should be presented in the appropriate science working group.

In [None]:
# Enter the catalog version

version = '2.1.2'

config = HDRconfig()
catfile = op.join(config.detect_dir, 'catalogs', 'source_catalog_' + version + '.fits')
source_table = Table.read(catfile)

print('Source catalog was found at {}'.format(catfile))

Columns are as follows, most are directly from the detection and elixer catalogs. Check the wiki for more info.
    
    https://luna.mpe.mpg.de/wikihetdex/index.php/Column_Info

In [None]:
source_table.columns

In [None]:
uniq_table = unique(source_table, keys='source_id')

In [None]:
sel_star = np.where(uniq_table['z_guess'] == 0.0)[0]
sel_oii = np.where((uniq_table['z_guess'] > 0 ) * (uniq_table['z_guess'] < 0.7))[0]
sel_lae = np.where((uniq_table['z_guess'] ) * (uniq_table['z_guess'] ))[0]

print('There are {} stars, {} OII emitters and {} LAEs'.format(np.size(sel_star), np.size(sel_oii), np.size(sel_lae)))

## Here is an example of plotting a group

In [None]:
%matplotlib inline

In [None]:
# the source_ids are sorted by member number ('n_member' in the catalog). 
# This is how many detections are found in the source_id group

sid = 2120000000000 # this source has the most members. It's in GOODS-N. 

# All large galaxies are already removed from the Spring and Fall DEX fields so
# will not be in this source catalog. 

# Index a star 
#sid = uniq_table['source_id'][sel_star[40008]]

# Index an OII galaxy
#sid = uniq_table['source_id'][sel_oii[125008]]

# Index an LAE
#sid = uniq_table['source_id'][sel_lae[114008]]

# Index the AGN list:
#sel_agn = source_table['det_type'] == 'agn'
#agn_ids = np.unique(source_table['source_id'][sel_agn])
#sid = agn_ids[98]

# This step grabs the source group so we can look at each source in Elixer
sel_group = (source_table['source_id'] == sid) 
group = source_table[sel_group]

#print(sid)
#print(group)

# this function overplots the source group over an image
# use label=True to see detectid's on image
# use save=True to save the image to you working directory
# the size of the line emission is scaled to the S/N of the line

plot_source_group(source_id=sid, source_table=source_table, label=False, save=False)
ew = ElixerWidget(detectlist = source_table['detectid'][sel_group])

## Some Examples - Plot redshift distribution

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.hist(uniq_table['z_guess'], range=(0,3.5), bins=50)
plt.title('Redshift distribution of unique sources in 2.1.2')
plt.xlabel('z')
plt.ylabel('count')
#plt.savefig('z_dist_uniq_2.1.2.png')

In [None]:
plt.hist(uniq_table['z_guess'], range=(0,3.5), bins=50)
plt.title('Redshift distribution of unique sources in 2.1.2')
plt.xlabel('z')
plt.ylabel('count')
plt.yscale('log', nonposy='clip')
#plt.savefig('z_dist_uniq_2.1.2_log.png')