### Data
MPA Data is too large to fit in this repo, download it here: https://wwwmpa.mpa-garching.mpg.de/SDSS/DR7/raw_data.html 

In [4]:
from astropy.io import fits
from astropy.table import Table
import pandas as pd
import numpy as np

indx_fits = fits.open("./data/gal_indx_dr7_v5_2.fit", memmap=True)
info_fits = fits.open("./data/gal_info_dr7_v5_2.fit", memmap=True)
line_fits = fits.open("./data/gal_line_dr7_v5_2.fit", memmap=True)

idx_table = indx_fits[1].data
info_table = info_fits[1].data
line_table = line_fits[1].data

masers_df = pd.read_csv('./data/masers_in_SDSS_DR8_potential_dup_bestobjid_with_lum_rudmingb.csv')
nonmasers_df = pd.read_csv("./data/all_surveyed_no_maser_in_SDSS_DR8_potential_dup_bestobjid_rudmingb.csv")

masers_in_class = pd.read_csv("./data/Masers_In_Class_Coronal_Lines_rudmingb.csv")
nonmasers_in_class = pd.read_csv("./data/NonMasers_In_Class_Coronal_Lines_rudmingb.csv")

line_names = ['H_alpha', 'H_beta', 'OIII_5007', 'OI_6300', 'NII_6584', 'SII_6717', 'SII_6731']

new_columns = ["MPA_index","ra","dec","fiberID","plate","mjd","mcpid","maser_morphology","maser_lum","z", "z_err", "v_disp", "v_disp_err", "kcor_mag_rband", "kcor_model_mag_rband", "Lick_Hd_A", "Lick_Hd_A_err", "D4000", "D4000_err", "D4000_n", "D4000_n_err","has_coronal_lines"]

for line_name in line_names:
    new_columns += [f"{line_name}_flux", 
                    f"{line_name}_flux_err", 
                    f"{line_name}_eqw", 
                    f"{line_name}_eqw_err"]
    

dtype_dict = {"MPA_index": int, 
              "fiberID": int, 
              "plate": int,
              "mcpid": int,
              "maser_morphology": int}

for column in new_columns:
    if column not in dtype_dict:
        dtype_dict[column] = float



nonmasers_in_gal_df = pd.DataFrame(columns = new_columns)
nonmasers_in_gal_df = nonmasers_in_gal_df.astype(dtype_dict)

masers_in_gal_df = pd.DataFrame(columns=new_columns+[])
masers_in_gal_df = masers_in_gal_df.astype(dtype_dict)


for df, output_df in [(masers_df, masers_in_gal_df), (nonmasers_df, nonmasers_in_gal_df)]:
    for i, row in df.iterrows():
        ra = row["ra"]
        dec = row["dec"]
        fiberID = row["fiberID"]
        mjd = row["mjd"]
        plate = row["plate"]
        mcpid = row.get("UID", 0)
        maser_morphology = row.get("objClass_int", 0)
        maser_lum = row.get("bestLum", 0)
        has_coronal_lines = int((row["specObjID"] in masers_in_class["SPECOBJID"].values) or (row["specObjID"] in nonmasers_in_class["SPECOBJID"].values))
        
        matching_indices = np.where((info_table["fiberid"] == fiberID) & (info_table["mjd"] == mjd) & (info_table["plateid"] == plate))[0]
        
        if len(matching_indices) > 0:
            matching_index = matching_indices[0]
            # print(type(matching_index))
            # print(matching_index)
            
            output_df.loc[i] = row
            
            output_df.at[i, "MPA_index"] = matching_index
            output_df.at[i, "ra"] = ra
            output_df.at[i, "dec"] = dec
            output_df.at[i, "fiberID"] = fiberID
            output_df.at[i, "mjd"] = mjd
            output_df.at[i, "plate"] = plate
            output_df.at[i, "mcpid"] = mcpid
            output_df.at[i, "maser_morphology"] = maser_morphology
            output_df.at[i, "maser_lum"] = maser_lum
            output_df.at[i, "has_coronal_lines"] = has_coronal_lines
            output_df.at[i, "z"] = info_table["z"][matching_index]
            output_df.at[i, "z_err"] = info_table["z_err"][matching_index]
            output_df.at[i, "v_disp"] = info_table["v_disp"][matching_index]
            output_df.at[i, "v_disp_err"] = info_table["v_disp_err"][matching_index]
            output_df.at[i, "kcor_mag_rband"] = info_table["kcor_mag"][matching_index][1] # float[3], Synthesized gri magnitudes of the spectrum after foreground dereddening and de-redshifting
            output_df.at[i, "kcor_model_mag_rband"] = info_table["kcor_model_mag"][matching_index][2] # float[5], Synthesized ugriz magnitudes of the best fitting Bruzual-Charlot model  in the galaxy restframe
            
            output_df.at[i, "Lick_Hd_A"] = idx_table["Lick_Hd_A"][matching_index]
            output_df.at[i, "Lick_Hd_A_err"] = idx_table["Lick_Hd_A_err"][matching_index]
            output_df.at[i, "D4000"] = idx_table["D4000"][matching_index]
            output_df.at[i, "D4000_err"] = idx_table["D4000_err"][matching_index]
            output_df.at[i, "D4000_n"] = idx_table["D4000_n"][matching_index]
            output_df.at[i, "D4000_n_err"] = idx_table["D4000_n_err"][matching_index]
            
            
            for line_name in line_names:
                flux_column = f"{line_name}_flux"
                flux_err_column = f"{line_name}_flux_err"
                eqw_column = f"{line_name}_eqw"
                eqw_err_column = f"{line_name}_eqw_err"
                
                output_df.at[i, flux_column] = line_table[flux_column][matching_index]
                output_df.at[i, flux_err_column] = line_table[flux_err_column][matching_index]
                output_df.at[i, eqw_column] = line_table[eqw_column][matching_index]
                output_df.at[i, eqw_err_column] = line_table[eqw_err_column][matching_index]
    output_df["MPA_index"] = output_df["MPA_index"].astype(int)
    output_df["fiberID"] = output_df["fiberID"].astype(int)
    output_df["plate"] = output_df["plate"].astype(int)
    output_df["mcpid"] = output_df["mcpid"].astype(int)
    output_df["maser_morphology"] = output_df["maser_morphology"].astype(int)
    output_df["has_coronal_lines"] = output_df["has_coronal_lines"].astype(int)

masers_in_gal_df.to_csv('./data/masers_in_MPA.csv', index=False)
nonmasers_in_gal_df.to_csv('./data/nonmasers_in_MPA.csv', index=False)

In [7]:

# Select columns from masers_in_gal_df
masers_selected = masers_in_gal_df.loc[:, ["MPA_index",'z', 'v_disp', 'kcor_mag_rband', 'D4000', 'H_alpha_flux', 'OIII_5007_flux', 'H_beta_flux', 'NII_6584_flux', 'SII_6717_flux', 'SII_6731_flux', 'OI_6300_flux', 'has_coronal_lines', 'maser_lum', 'maser_morphology']]
masers_selected['is_maser'] = 1
masers_selected['HaHb'] = np.log10(masers_selected['H_alpha_flux'] / masers_selected['H_beta_flux'])
masers_selected['O3Hb'] = np.log10(masers_selected['OIII_5007_flux'] / masers_selected['H_beta_flux'])
masers_selected['N2Ha'] = np.log10(masers_selected['NII_6584_flux'] / masers_selected['H_alpha_flux'])
masers_selected['S2Ha'] = np.log10((masers_selected['SII_6717_flux'] + masers_selected['SII_6731_flux']) / masers_selected['H_alpha_flux'])
masers_selected['O1Ha'] = np.log10(masers_selected['OI_6300_flux'] / masers_selected['H_alpha_flux'])

# Select columns from nonmasers_in_gal_df
nonmasers_selected = nonmasers_in_gal_df.loc[:, ["MPA_index",'z', 'v_disp', 'kcor_mag_rband', 'D4000', 'H_alpha_flux', 'OIII_5007_flux', 'H_beta_flux', 'NII_6584_flux', 'SII_6717_flux', 'SII_6731_flux', 'OI_6300_flux', 'has_coronal_lines', 'maser_lum', 'maser_morphology']]
nonmasers_selected['is_maser'] = 0
nonmasers_selected['HaHb'] = np.log10(nonmasers_selected['H_alpha_flux'] / nonmasers_selected['H_beta_flux'])
nonmasers_selected['O3Hb'] = np.log10(nonmasers_selected['OIII_5007_flux'] / nonmasers_selected['H_beta_flux'])
nonmasers_selected['N2Ha'] = np.log10(nonmasers_selected['NII_6584_flux'] / nonmasers_selected['H_alpha_flux'])
nonmasers_selected['S2Ha'] = np.log10((nonmasers_selected['SII_6717_flux'] + nonmasers_selected['SII_6731_flux']) / nonmasers_selected['H_alpha_flux'])
nonmasers_selected['O1Ha'] = np.log10(nonmasers_selected['OI_6300_flux'] / nonmasers_selected['H_alpha_flux'])

# Concatenate masers_selected and nonmasers_selected DataFrames
masers_and_nonmasers_in_MPA = pd.concat([masers_selected, nonmasers_selected], ignore_index=True)

# Reorder columns
masers_and_nonmasers_in_MPA = masers_and_nonmasers_in_MPA[["MPA_index",'is_maser', 'z', 'v_disp', 'kcor_mag_rband', 'D4000', 'H_alpha_flux', 'OIII_5007_flux', 'HaHb', 'O3Hb', 'N2Ha', 'S2Ha', 'O1Ha', 'has_coronal_lines', 'maser_lum', 'maser_morphology']]

# Save the resulting DataFrame to a CSV file
masers_and_nonmasers_in_MPA.to_csv('./data/masers_and_nonmasers_in_MPA.csv', index=False)
