# Merge Fors2, Galex and Kids and write it in h5file


- author Sylvie Dagoret-Campagne
- affiliation : IJCLab/IN2P3/CNRS
- creation date : 2023-06-12
- update : 2023-06-12


From this mergin work on spectra and photometry can start

In [None]:
import h5py
import pandas as pd
import numpy as np
import os
from astropy.io import fits
from astropy.table import Table
import matplotlib.pyplot as plt

In [None]:
def GetColumnHfData(hff,list_of_keys,nameval):
    """
    Extract hff atttribute 
    
    parameters
      hff           : descriptor of h5 file
      list_of_keys : list of exposures
      nameval      : name of the attribute
      
    return
           the array of values in the order of 
    """
    all_data = []
    for key in list_of_keys:
        group=hff.get(key)
        val=group.attrs[nameval]
        all_data.append(val)
    return all_data


In [None]:
def ReadFors2h5FileAttributes(hf):
    hf =  h5py.File(input_file_h5, 'r') 
    list_of_keys = list(hf.keys())
    # pick one key    
    key_sel =  list_of_keys[0]
    # pick one group
    group = hf.get(key_sel)  
    #pickup all attribute names
    all_subgroup_keys = []
    for k in group.attrs.keys():
        all_subgroup_keys.append(k)
    # create info
    df_info = pd.DataFrame()
    for key in all_subgroup_keys:
        arr=GetColumnHfData(hf, list_of_keys ,key)
        df_info[key] = arr
    df_info.sort_values(by="num", ascending=True,inplace=True)
    df_info_num = df_info["num"].values
    key_tags = [ f"SPEC{num}" for num in df_info_num ]
    df_info["name"] = key_tags
    
    #'Nsp', 'RT', 'RV', 'Rmag', 'dec', 'eRV', 'lines', 'num', 'ra', 'redshift',
    
    df_info = df_info[['num' ,'name', 'ra', 'dec', 'Rmag','redshift','lines','RT','RV','eRV','Nsp']]
    return df_info

# Read Fors2

In [None]:
input_file_h5  = '../data/FORS2spectra.hdf5'
output_file_h5  = '../data/FORS2spectraGalexKidsPhotom.hdf5'

In [None]:
hf =  h5py.File(input_file_h5, 'r') 
list_of_keys = list(hf.keys())

In [None]:
df_info = ReadFors2h5FileAttributes(hf)

In [None]:
df_info.reset_index(drop=True, inplace=True) 

In [None]:
df_info

### Get Spectra

In [None]:
df_info_num = df_info["num"].values
key_tags = [ f"SPEC{num}" for num in df_info_num ]

In [None]:
all_df = []   
idx=0
for key in key_tags :
    group = hf.get(key)
    df = pd.DataFrame()
    df["wl"] = np.array(group.get("wl"))
    df["fl"] = np.array(group.get("fl")) 
    all_df.append(df)
    idx+=1

# Get merged catalogs

- Be sure to use MAST results for Galex

In [None]:
catalog_galex_filename = "../data/results_crossmathcatalogs/info_fors2GALEX_frommast_crossmatch.csv"
catalog_kids_filename = "../data/results_crossmathcatalogs/info_fors2Kidscrossmatch.csv"

In [None]:
df_info_galex = pd.read_csv(catalog_galex_filename,index_col=0)

In [None]:
df_info_galex.rename(columns={"angularsep":"asep_galex"},inplace=True)

In [None]:
df_info_galex

In [None]:
df_info_kids = pd.read_csv(catalog_kids_filename,index_col=0)

In [None]:
df_info_kids.rename(columns={"angularsep":"asep_kids"},inplace=True)

In [None]:
df_info_kids

## Merge dataframes

In [None]:
df = pd.merge(df_info_galex,df_info_kids)

In [None]:
df

In [None]:
df = pd.merge(df_info,df)

In [None]:
df

In [None]:
#df["lines"] = df_info["lines"]

In [None]:
#df

# Select good match with galex

In [None]:
df.hist("asep_galex",bins=100,color="b")
plt.axvline(5,c="k")

In [None]:
df.hist("asep_kids",bins=100,color='b')

# Save into output file h5

In [None]:
hf_outfile =  h5py.File(output_file_h5, 'w') 

In [None]:
for idx,row in df.iterrows():
    #print(idx,key_tags[idx])
    tag = key_tags[idx]
    h5group_out = hf_outfile.create_group(tag)
    
    wl = all_df[idx]["wl"].values
    fl = all_df[idx]["fl"].values
    
    parameter_names = list(row.index)
    parameter_values   =row.values
    
    if idx==0:
        print(parameter_names)
    
    for name,val in zip(parameter_names,parameter_values):
        h5group_out.attrs[name] = val
    
    d = h5group_out.create_dataset("wl",data=wl,compression="gzip", compression_opts=9)
    d = h5group_out.create_dataset("fl",data=fl,compression="gzip", compression_opts=9)

In [None]:
hf_outfile.keys()

In [None]:
hf_outfile.close() 