# object selection form catalog

currently it's just a simple selection S/N of Ha Hb fluxes > 5

In [45]:
from   astropy.table import Table, vstack, join, setdiff
import numpy         as np
from   astropy.coordinates import SkyCoord, match_coordinates_sky
import astropy.units as u
import warnings
from   astropy.io.fits.card import VerifyWarning
warnings.simplefilter('ignore', VerifyWarning)

### GoodsN sample prearation, agn selection

In [46]:
# initial catalog
gn   = Table.read('hlsp_clear_hst_wfc3_gdn_multi_v4.1_clear.fits')
gn_m = Table.read('hlsp_clear_hst_wfc3-acs_gdn-3dhst_multi_v4.6_zout.fits')
gn_cat = join(gn,gn_m,keys_left='ID',keys_right='id',join_type='left',metadata_conflicts='silent')
#agn catalog crossmatch
filename = "goodsn_agn.txt"
with open(filename, "r") as file:
    source_ids = file.readlines()
source_ids = [line.strip() for line in source_ids]
coord_agn = SkyCoord(source_ids,unit=(u.hourangle, u.deg))
coord_obj = SkyCoord(gn_cat['ra'],gn_cat['dec'],unit=(u.deg,u.deg))
idx,d2d,d3d = match_coordinates_sky(coord_agn,coord_obj)
gn_cat['tag'] = ['agn' if index in idx[d2d.deg < 5/3600] else 'gxy' for index in range(len(gn_cat))]

### GoodsS sample prearation, agn selection

In [47]:
gs   = Table.read('hlsp_clear_hst_wfc3_gds_multi_v4.1_clear.fits')
gs_m = Table.read('hlsp_clear_hst_wfc3-acs_gds-3dhst_multi_v4.6_zout.fits')
gs_cat = join(gs,gs_m,keys_left='ID',keys_right='id',join_type='left',metadata_conflicts='silent')

gs_agn = Table.read('goodss_agn.txt', 
        format='ascii',  
        names=[
            "3DHST", "VLA", "CDF-S", "z", "RAdeg", "DEdeg", "f_z", "mtype-s", "mtype-c",
            "xtype-l", "xtype-x", "rtype-r", "rtype-f", "otype-sp", "otype-se", "var",
            "logL-i", "f_logL-i", "logL-s", "logL-x", "Mbh", "M*", "rTag", "xTag", "mTag",
            "oTag", "logNH", "logLx", "logLint", "gamma", "er_gamma", "CType", "L3GHz",
            "L6GHz", "q_L3GHz", "q_L6GHz", "q24Obs", "e_q24Obs", "q24crt", "alpha",
            "er_alpha", "tau", "logL5100", "logL3um", "logL6um", "logL8um", "logL12um",
            "logL20um"])
gs_cat['tag'] = ['agn' if id in gs_agn['3DHST'] else '/' for id in gs['ID']]



### stacking 2 catalog

In [48]:
cat_lis = vstack([gn_cat,gs_cat],metadata_conflicts='silent')
cat_lis.write('full_object_catalog.fits',overwrite=True)

cat_hasline = cat_lis[np.logical_and(cat_lis['Ha_FLUX'] >0, cat_lis['Hb_FLUX']>0)]
sn_ha = cat_hasline['Ha_FLUX']/cat_hasline['Ha_FLUX_ERR']
cat_hasline['sn_ha'] = sn_ha
sn_hb = cat_hasline['Hb_FLUX']/cat_hasline['Hb_FLUX_ERR']
cat_hasline['sn_hb'] = sn_hb
selection = np.logical_and(sn_ha>5,sn_hb>5)
obj_lis = cat_hasline[selection]

### manual selection

In [49]:
manual_selection = np.array((
    ['ERSPRIME',43823],
    ['ERSPRIME',45258],
    ['ERSPRIME',45646],
    ['GN1',37031],
    ['GN1',37623],
    ['GN2', 14895],
    ['GN2', 16173],
    ['GN2', 14895],
    ['GN2', 16752],
    ['GN2', 17579],
    ['GN2', 17829],
    ['GN2', 21552],
    ['GN3', 28121],
    ['GN3', 32166],
    ['GN3', 32660],
    ['GN3', 34570],
    ['GN3', 34708],
    ['GN3', 34838],
    ['GN3', 35042],
    ['GN3', 35568],
    ['GN3', 35822],
    ['GN3', 19075],
    ['GN4', 24377],
    ['GN4', 26015],
    ['GN4', 27282],
    ['GN5', 31789],
    ['GN7', 13777],
    ['GN7', 14716],
    ['GN7', 15127],
    ['GN7', 15204],
    ['GN7', 15300],
    ['GN7', 17352],
    ['GN7', 23580],
    ['GS1', 47214],
    ['GS1', 47399],
    ['GS1', 48850],
    ['GS1', 49063],
    ['GS2', 45633],
    ['GS2', 45795],
    ['GS3', 34363],
    ['GS3', 37903],
    ['GS3', 40611],
    ['GS3', 41370],
    ['GS4', 20698],
    ['GS4', 29686],
    ['GS4', 29717],
    ['GS4', 29846],
    ['GS5', 38513],
    ['GS5', 42758],
))

ismatched = np.isin(obj_lis['ID'],manual_selection[:,1]) & np.isin(obj_lis['subfield'],manual_selection[:,0])
obj_lis['manual_select'] = np.where(ismatched,'selected','keep') 
obj_lis.write('obj_lis_selected.fits',overwrite=True)

print('total num of objs in the fields',len(cat_lis))
print('num of objs after s/n selection:',len(obj_lis))
print('num of obj after agn selection',len(obj_lis[obj_lis['tag']!='agn']))
print('num of obj after agn selection',len(obj_lis[np.logical_and(obj_lis['tag']!='agn',obj_lis['manual_select']!='selected')]))

# download spectrum from server.

It is recommended that to run the script downloadSpectra in prompt because of potential stability issue

# Extract line maps from data products

In [None]:
from    astropy.table       import Table
import  numpy               as     np
from    astropy.io          import fits

from    tqdm                import tqdm
import  os
import  gc                                         

#this gives the file name + prefix of an obj from the cat file
def file_name(obj,prefix,filetype='fits'):
    field = obj['subfield'].lower()
    id    = str(obj['ID']).zfill(5)
    return f"hlsp_clear_hst_wfc3_{field}-{id}_g102-g141_v4_{prefix}.{filetype}"


def extract_HaHb(hdu):
    """

    pass objs from obj_lis to extract ha hb lines

    return: HDUlist with the following entry:

    0 primary extension, same as original file

    1 line-fit results

    2 segmentation map

    3 clear filter maps

    4,5 Ha line map & line weight

    6,7 Hb line map & line weight

    """
    #set up a crop of 50x50 pix in the center
    center_size = 50; shape = hdu[5].shape[0]
    #start index: si and end index: ei
    si = (shape - center_size) // 2; 
    ei = si + center_size

    new_file = fits.HDUList()
    #save primary extension
    new_file.append(hdu[0])
    #save line-fit info
    new_file.append(hdu[1])
    """
    select segmentation map [4]
    also save 1 DSCI image for comparison [5]
    """
    for i in [4,5]: 
        hdu[i].data = hdu[i].data[si:ei,si:ei]
        new_file.append(hdu[i])

    #loop to select ha hb line maps
    for image in hdu:
        if image.header.get('EXTTYPE') in ['Ha','Hb'] and (image.name == 'LINE' or image.name == 'LINEWHT'):
            image.data = image.data[si:ei,si:ei]
            image.name = f"{image.name}_{image.header['EXTTYPE']}"
            new_file.append(image)
    return new_file


def data_process(obj):
    try:
        path_data_product   = f"data_products/{file_name(obj,'full')}"
        path_data_extracted = f"data_extracted/{file_name(obj,'extracted')}"

        need_update_from_data_products = True
        need_save_file = True

        with fits.open(path_data_product) as hdu:
            extracted = extract_HaHb(hdu)
            ha_med = fits.ImageHDU(data = extracted[4].data - np.median(extracted[4].data),
                                 header = extracted[4].header,name=f'{extracted[4].name}_bg')
            hb_med = fits.ImageHDU(data = extracted[6].data - np.median(extracted[6].data),
                                 header = extracted[6].header,name=f'{extracted[6].name}_bg')
            extracted.append(ha_med)
            extracted.append(hb_med)

        #this part still needs psf matching---------
        
        #save file
            extracted.writeto(path_data_extracted,overwrite=True)
        return f"{obj['subfield']}-{obj['ID']} saved"

    except Exception as e:
            return f"! {obj['subfield']}-{obj['ID']} failed, error:{e}"


from concurrent.futures import ThreadPoolExecutor, as_completed
def main():
    os.makedirs('data_extracted',exist_ok=True)
    obj_lis = Table.read('obj_lis_selected.fits')
    results = []
    max_threads= 7
    if max_threads > 1:
        with ThreadPoolExecutor(max_threads) as executor:
            futures = {executor.submit(data_process,obj):obj for obj in obj_lis}
            for future in tqdm(as_completed(futures), total=len(obj_lis), desc="Processing"):
                results.append(future.result())
    else:
        for obj in tqdm(obj_lis):
            results.append(data_process(obj))
    number=0
    for result in results:
        if 'error' in result:
            number +=1
            print(result)
    print('total number of obj processed:',len(results))
    print('number of failed obj',number)

if __name__ == '__main__':
    main()


Processing: 100%|██████████| 158/158 [00:37<00:00,  4.18it/s]

total number of obj processed: 158





UnboundLocalError: local variable 'number' referenced before assignment