# Import and filter data

### Import and get BGS data

In [None]:
import numpy as np
import fitsio
import numpy.lib.recfunctions as rfn
import os

spec_path = '/Users/ziqimu/Desktop/fastspec-iron.fits'
output_path = '/Users/ziqimu/Desktop/BGS_data.fits'


with fitsio.FITS(spec_path) as spec_file:
    galaxy_table = spec_file[2][
        'TARGETID','Z','ZWARN','DELTACHI2','SPECTYPE','RA','DEC','BGS_TARGET', 'SURVEY', 'PROGRAM',
        'FLUX_R', 'FLUX_IVAR_R', 'FLUX_G', 'FLUX_IVAR_G', 'FLUX_Z', 'FLUX_IVAR_Z',
        'FLUX_W1', 'FLUX_IVAR_W1', 'FLUX_W2', 'FLUX_IVAR_W2'
    ][:]

    tmp = spec_file[1]['ABSMAG01_SDSS_G', 'ABSMAG01_SDSS_R', 'ABSMAG01_SDSS_Z', "ABSMAG01_SDSS_U",
                       'LOGMSTAR', 'HALPHA_FLUX', 'HBETA_FLUX', 'SFR', "HALPHA_EW"][:]
    
    galaxy_table = rfn.merge_arrays([galaxy_table, tmp], flatten=True, usemask=False)
    del tmp


    select = np.where(
        (galaxy_table['SPECTYPE'] == 'GALAXY') &
        (galaxy_table['SURVEY'] == 'main') &
        (galaxy_table['PROGRAM'] == 'bright')
    )
    galaxy_table = galaxy_table[select]

    _, unique_idx = np.unique(galaxy_table['TARGETID'], return_index=True)
    galaxy_table = galaxy_table[unique_idx]


fitsio.write(output_path, galaxy_table, clobber=True)

print(f'Filtered BGS data saved to: {output_path}')


In [None]:
file_path = '/Users/ziqimu/Desktop/BGS_data.fits'

In [None]:
from astropy.io import fits
with fits.open(file_path) as hdul:
    hdu1 = hdul[1]
    
    num_rows = len(hdu1.data)
    
    print(f"Number of rows in HDU1: {num_rows}")
    
with fits.open(file_path) as hdul:
    hdul.info()

    for i, hdu in enumerate(hdul):
        if isinstance(hdu, fits.BinTableHDU) or isinstance(hdu, fits.TableHDU):
            print(f"\nHDU {i} contains a table with the following columns:")
            print(hdu.columns.names)
            

In [None]:
with fits.open(file_path) as hdul:
    hdu1 = hdul[1]

    z_data = hdu1.data["Z"]
    Dec = hdu1.data["DEC"]
    Ra = hdu1.data["RA"]
    abs_magr_data = hdu1.data["ABSMAG01_SDSS_R"]
    abs_magu_data = hdu1.data["ABSMAG01_SDSS_U"]
    abs_magg_data = hdu1.data["ABSMAG01_SDSS_G"]
    logmstar_data = hdu1.data["LOGMSTAR"]
    halpha_data = hdu1.data["HALPHA_FLUX"]
    hbeta_data = hdu1.data["HBETA_FLUX"]
    sfr_data = hdu1.data["SFR"]  
    
    flux_g = hdu1.data["FLUX_G"]
    flux_r = hdu1.data["FLUX_R"]
    flux_z = hdu1.data["FLUX_Z"]
    
    
    ew = hdu1.data["HALPHA_EW"]

### Redshift Z <= 0.24.

In [None]:
mask_z = z_data <= 0.24
filtered_z = z_data[mask_z]
filtered_dec = Dec[mask_z]
filtered_ra = Ra[mask_z]
filtered_abs_magr = abs_magr_data[mask_z]
filtered_abs_magu = abs_magu_data[mask_z]
filtered_abs_magg = abs_magg_data[mask_z]
filtered_logmstar = logmstar_data[mask_z]
filtered_halpha = halpha_data[mask_z]
filtered_hbeta = hbeta_data[mask_z]
filtered_sfr = sfr_data[mask_z]

filtered_flux_g = flux_g[mask_z]
filtered_flux_r = flux_r[mask_z]
filtered_flux_z = flux_z[mask_z]

filtered_ew = ew[mask_z]



In [None]:
print(f"Number of rows after filtering: {len(filtered_abs_magr)}")
print(len(filtered_flux_r),len(filtered_ra), len(filtered_dec) )


In [None]:
desktop_path = os.path.join(os.path.expanduser("~"), "Desktop")
output_filename = os.path.join(desktop_path, "BGS_filtered_data.fits")


filtered_data = {
    "Z": filtered_z,
    "DEC": filtered_dec,
    "RA": filtered_ra,
    "ABS_MAG_R": filtered_abs_magr,
    "ABS_MAG_U": filtered_abs_magu,
    "ABS_MAG_G": filtered_abs_magg,
    "LOG_MSTAR": filtered_logmstar,
    "HALPHA": filtered_halpha,
    "HBETA": filtered_hbeta,
    "SFR": filtered_sfr,
    "APP_MAG": apparent_magnitude,
    "FIBER_FLUX_G": filtered_fiber_flux_g,
    "FIBER_FLUX_R": filtered_fiber_flux_r,
    "FIBER_FLUX_Z": filtered_fiber_flux_z,
    "FLUX_G": filtered_flux_g,
    "FLUX_R": filtered_flux_r,
    "FLUX_Z": filtered_flux_z,
    "Color_ur": filtered_abs_magu - filtered_abs_magr,
    "Color_gr":  filtered_abs_magg - filtered_abs_magr,
    "Halpha_EW": filtered_ew
}


columns = [fits.Column(name=key, format="E", array=np.array(value)) for key, value in filtered_data.items()]
hdu = fits.BinTableHDU.from_columns(columns)


hdu.writeto(output_filename, overwrite=True)

print(f"Filtered data successfully saved to: {output_filename}")