In [None]:
import os.path as op
import numpy as np
from astropy.io import ascii, fits
from astropy.io.fits import getdata
from astropy.table import Table, Column,join

In [None]:
%%javascript
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

In [None]:
# fill in the path to the HETDEX AGN Catalog 1 file
path_to_agn1 = "/home/jovyan/Hobby-Eberly-Public/HETDEX/catalogs/agn_catalog_v1.0/"

# check basic info of the fits file
### There are six extensions, 1 & 4 are table extensions, 2,3 & 5,6 are image extensions for spectra and their errors.

There are three major differences between ext2&3 and ext5&6.

(1) We provide two types of spectra with their errors in ext 2&3 and ext5&6: 
As an IFU survey, some spacially extended AGNs can be spacially resolved with multiple detections in the hetdex catalog. We do flux weighted friend-of-friend grouping for all AGN detectids with linking length of delta_r=5 arcsec, and delta_redshift = 0.1 to make the unqiue AGN catalog (extension 1). For each AGN, we pick up the detectid closest to the fof center as detectid_best.

ext 2&3 are the spectra for the detectid_best in the hetdex catalog. \
ext5&6 are extracted spectra at the flux weighted friend-of-friend center.

(2) Aonther difference between ext2&3 and ext5&6 is the number of rows are different. \
ext2&3 have 5322 rows, one row for one agn, arranged in the same order with extension 1.\
ext5&6 have 6004 rows. This is because there are some AGNs that have repeat observations (identified by "shotid" in the table of extension 4). We provide all the spectra for repeated observations in ext 5&6. In the table of extension 4, each observation has one row, and the row number of ext5&6 are arranged following extension4.

(3) A minor difference is:\
    ext2&3 spectra with errors are applied with universal extinction correction of E(B-V)=0.02.\
    ext5&6 are raw data with no extinction applied.\
    The extinction correction only makes a <10% difference.

In [None]:
fname = op.join( path_to_agn1, 'hetdex_agn.fits')
hdul = fits.open(fname)
hdul.info()

## Read extension 1 as a table
### extension 1 has the basic information for each unique AGN, one row per AGN.

In [None]:
agn = Table.read( fname, format = 'fits', hdu = 1 )
agn

### check the header of the table

In [None]:
agn.info

## read extension 2 in as a 2-d array

In [None]:
from astrowidgets import ImageWidget

#hdu2, hdr2 = getdata( fname, 2, header = True )
print(hdul[2].header)
im    = hdul[2].data # read spectra in from extension 2
im_er = hdul[3].data # read errors  in from extension 3 if needed

imw = ImageWidget()
imw.cuts = (-1, 25)
imw.load_array(im)
display(imw)

## the information of the wave_array corresponds to the flux_array in each row can be found in the header: 
### wave_start = 3470.0, wave_step = 2.0

In [None]:
hdul[2].header['wave_start'], hdul[2].header['wave_step'], hdul[2].header['flux_unit']

In [None]:
# prepare wavelength array for the spectra
wave_start = 3470.0
wave_step  = 2.0
n_elements = hdul[2].header['NAXIS1'] #1036
wave_arr = wave_start + wave_step * np.arange( 0, n_elements )
wave_arr

In [None]:
from IPython.display import Image
import matplotlib.pyplot as plt

# ............
w_CIIIa = 977.030
w_NIII  = 991.514
w_OVIa  = 1031.912
w_OVIb  = 1037.613
w_LyA   = 1215.24 
w_NV    = 1240.81
w_OI    = 1305.53
w_CIIa  = 1335.31
w_SiO   = 1399.8
w_CIV   = 1549.48
w_HeII  = 1640.40
w_OIII  = 1665.85
w_CIIIb = 1908.734
w_CIIb  = 2326.0
w_NeIV  = 2439.5
w_MgII  = 2799.117
w_OII   = 3729.875
w_NeVI  = 3426.85
w_NeIII = 3868.760
w_Hg    = 4341.68
# ............

fontsize = 15
xl, xh = 3450, 5550 

# plot some example spectra at the following redshifts
z_sel = np.array( [ 0.9, 1.3, 1.8, 2.2, 2.5, 3.9 ] )

for i in range(np.size(z_sel)):
    sel  = agn['z'] < z_sel[i]
    dtmp = im[sel]
    flux_arr = dtmp[0]
    
    fig, ax = plt.subplots( figsize = ( 10., 5.)  )
    
    plt.plot( wave_arr, flux_arr, 'k' )
    
    lines   = np.array( [  w_OVIa, w_OVIb, w_LyA,   w_NV,   w_SiO,  w_CIV, w_HeII,   w_CIIb, w_CIIIb,  w_MgII, w_OII, w_NeVI, w_NeIII  ] )
    labels  = np.array( [  ' ',     'OVI',  'LyA','    NV', 'Si+O',   'CIV', 'HeII',    'C2', 'C III]', 'MgII', 'OII', 'NeVI', 'NeIII' ] )    

    yl, yh = np.min(flux_arr), np.max(flux_arr)
    for i in range( np.size(lines) ):
        wline = lines[i] * ( 1 + agn[sel]['z'][0] )
        if( ( wline >= xl ) and (wline <= xh ) ):
            wline = np.full( 2, wline )     
            plt.text( wline[0]+10, 0.9 * yh, labels[i], color = 'blue', fontsize=14 )
            plt.plot( wline,  np.array([yl,yh]), color = 'blue', linestyle = '--', linewidth = 1.0 )
    
    plt.xlim( xl, xh )
    plt.tick_params( which = 'both', bottom=True, top=True, left=True, right=True )

    plt.xlabel('$\mathrm{\lambda_{obs}\ (\AA)}$',fontsize=fontsize)
    plt.ylabel('$\mathrm{f_{\lambda,obs}\ (10^{-17} ergs/s/cm^2/\AA)}$',fontsize=fontsize)
    plt.xticks(fontsize=fontsize)
    plt.yticks(fontsize=fontsize)

    stitle = 'agnid=%d, detectid_best=%d, z=%.2f' % (agn[sel]['agnid'][0], agn[sel]['detectid_best'][0], agn[sel]['z'][0])
    plt.title( stitle, fontsize=fontsize )
    plt.show()
    #plt.savefig( 'tmp.png', format='png')
    #Image('tmp.png')

# read extension 4 in as a table of repeat observation info
### There are some repeat observations for some agns. In this table, each observation has one entry with a unique "shotid", nshots shows the number of repeat observations for each unique AGN.

In [None]:
tab = Table.read( fname, format = 'fits', hdu = 4 )
tab

In [None]:
sel = tab['nshots'] == 1
print("There are %d AGNs that are only observed once" % np.sum(sel))

sel = tab['nshots'] == 2
dtmp = np.unique( tab[sel]['agnid'] )
print("There are %d AGNs that are observed twice" % np.size(dtmp) )

sel = tab['nshots'] > 2
dtmp = np.unique( tab[sel]['agnid'] )
print("There are %d AGNs that are observed more than twice" % np.size(dtmp) )

# find all spectra and their error of a certain AGN in extension 5 and 6

### find some strong agns (big nmem) with secure redshifts (z!=0) with more than five repeat observations

In [None]:
sel1 = agn['nshots'] >= 5
sel2 = agn['zflag' ] != 0
sel = sel1 * sel2
dtmp = agn[sel]
sel = np.argsort(-dtmp['nmem'])
dtmp[sel]

### take agnid = 3194 as an example

In [None]:
agnid = 3194

im_arr    = hdul[5].data
im_er_arr = hdul[6].data

sel = np.where( tab['agnid'] == agnid )
shotid_arr  = tab[sel]['shotid']
flux_arr    = im_arr[sel]
flux_er_arr = im_er_arr[sel]

# arrange the repeat observations by time
sel = np.argsort( shotid_arr )
shotid_arr, flux_arr, flux_er_arr = shotid_arr[sel], flux_arr[sel], flux_er_arr[sel]

# prepare the wavelength array for the spectra
wave_start = 3470.0
wave_step  = 2.0
n_elements = hdul[5].header['NAXIS1'] #1036
wave_arr = wave_start + wave_step * np.arange( 0, n_elements )

# plot all repeat observations
fig, ax = plt.subplots( figsize = ( 10., 5.)  )

for i in range(np.size(shotid_arr)):
    
    plt.plot( wave_arr, flux_arr[i], label = '%d' % shotid_arr[i])
    #plt.errorbar( wave_arr, flux_arr[i], flux_er_arr[i] , capsize = 1.5, label = '%d' % shotid_arr[i] )

# overplot with the catalog spectrum for detectid_best in ext 2&3 as black and compare with ext 5&6
im_arr    = hdul[2].data
im_er_arr = hdul[3].data
sel = agn['agnid'] == agnid
flux_arr    = im_arr[sel][0]
flux_er_arr = im_er_arr[sel][0]
plt.errorbar( wave_arr, flux_arr, flux_er_arr, color = 'k',capsize = 1., \
              label = 'ext2&3 spectrum')
    
plt.xlim( xl, xh )
plt.tick_params( which = 'both', bottom=True, top=True, left=True, right=True )

plt.xlabel('$\mathrm{\lambda_{obs}\ (\AA)}$',fontsize=fontsize)
plt.ylabel('$\mathrm{f_{\lambda,obs}\ (10^{-17} ergs/s/cm^2/\AA)}$',fontsize=fontsize)
plt.xticks(fontsize=fontsize)
plt.yticks(fontsize=fontsize)
plt.legend(fontsize=fontsize)
stitle = 'agnid=%d' % (agnid)
plt.title( stitle, fontsize=fontsize )
plt.show()