In [127]:
from astropy.io import fits
from astropy.table import Table
import matplotlib.pyplot as plt
import numpy as np
from astroquery.gaia import Gaia
import logging
from tqdm import tqdm
from astropy.table import vstack
logging.basicConfig(level=logging.INFO)

# Importing/ Filtering Apogee Data

In [128]:
# ------------------ Data Set Paths ------------------
gallah_data_file_allstar = '../data/GALAH_DR3_main_allstar_v2.fits'
gallah_data_file_dynamics = '../data/GALAH_DR3_VAC_dynamics_v2.fits'
gallah_data_file_gaia = '../data/GALAH_DR3_VAC_GaiaEDR3_v2.fits'

# ------------------ Importing data from the allstar FITS file ------------------

with fits.open(gallah_data_file_allstar) as file_allstar:
    gallah_data = file_allstar[1].data

# Table format allows easy addition of columns
gallah_data = Table(gallah_data)

# ------------------ Filtering the Gallah Data ------------------

# 1. Recommended Stellar Parameters Filter
sp_filter = gallah_data['flag_sp'] == 0

# 3. Recommended Signal to Noise Ratio Filter
snr_filter = gallah_data['snr_c3_iraf'] > 30

# 2. Filtering out bad star data
rg_filter = gallah_data['logg'] < 3.0


# 3. Element Abundance Filters
# [Fe/H], [α/Fe], [Na/ Fe], [Al/Fe], [Mn/Fe], [Y/Fe], [Ba/Fe], [Eu/Fe], [Mg/Cu], [Mg/Mn], [Ba/Eu]
# Fe/H filter
fe_h_flag_filter = gallah_data['flag_fe_h'] == 0
fe_h_err_filter = gallah_data['e_fe_h'] < 0.2
fe_h_filter = fe_h_flag_filter & fe_h_err_filter


# [α/Fe] filter
alpha_fe_flag_filter = gallah_data['flag_alpha_fe'] == 0
alpha_fe_err_filter = gallah_data['e_alpha_fe'] < 0.2
alpha_fe_filter = alpha_fe_flag_filter & alpha_fe_err_filter

# [Na/Fe] filter
na_fe_flag_filter = gallah_data['flag_Na_fe'] == 0
na_fe_err_filter = gallah_data['e_Na_fe'] < 0.2
na_fe_filter = na_fe_flag_filter 

# [Al/Fe] filter
al_fe_flag_filter = gallah_data['flag_Al_fe'] == 0
al_fe_err_filter = gallah_data['e_Al_fe'] < 0.2
al_fe_filter = al_fe_flag_filter & al_fe_err_filter

# [Mn/Fe] filter
mn_fe_flag_filter = gallah_data['flag_Mn_fe'] == 0
mn_fe_err_filter = gallah_data['e_Mn_fe'] < 0.2
mn_fe_filter = mn_fe_flag_filter & mn_fe_err_filter

# [Y/Fe] filter
y_fe_flag_filter = gallah_data['flag_Y_fe'] == 0
y_fe_err_filter = gallah_data['e_Y_fe'] < 0.2
y_fe_filter = y_fe_flag_filter & y_fe_err_filter

# [Ba/Fe] filter
ba_fe_flag_filter = gallah_data['flag_Ba_fe'] == 0
ba_fe_err_filter = gallah_data['e_Ba_fe'] < 0.2
ba_fe_filter = ba_fe_flag_filter & ba_fe_err_filter

# [Eu/Fe] filter
eu_fe_flag_filter = gallah_data['flag_Eu_fe'] == 0
eu_fe_err_filter = gallah_data['e_Eu_fe'] < 0.2
eu_fe_filter = eu_fe_flag_filter & eu_fe_err_filter

# [Mg/Cu] filter
mg_fe_flag_filter = gallah_data['flag_Mg_fe'] == 0
cu_fe_flag_filter = gallah_data['flag_Cu_fe'] == 0
mg_cu_flag_filter = mg_fe_flag_filter & cu_fe_flag_filter

gallah_data['e_Mg_Cu'] = np.sqrt(gallah_data['e_Mg_fe']**2 + gallah_data['e_Cu_fe']**2)
gallah_data['Mg_Cu'] = gallah_data['Mg_fe'] - gallah_data['Cu_fe']
mg_cu_err_filter = gallah_data['e_Mg_Cu'] < 0.2
mg_cu_filter = mg_cu_flag_filter & mg_cu_err_filter

# [Mg/Mn] filter
mg_fe_flag_filter = gallah_data['flag_Mg_fe'] == 0
mn_fe_flag_filter = gallah_data['flag_Mn_fe'] == 0
mg_mn_flag_filter = mg_fe_flag_filter & mn_fe_flag_filter

gallah_data['e_Mg_Mn'] = np.sqrt(gallah_data['e_Mg_fe']**2 + gallah_data['e_Mn_fe']**2)
gallah_data['Mg_Mn'] = gallah_data['Mg_fe'] - gallah_data['Mn_fe']
mg_mn_err_filter = gallah_data['e_Mg_Mn'] < 0.2
mg_mn_filter = mg_mn_flag_filter & mg_mn_err_filter


# [Ba/Eu] filter
ba_fe_flag_filter = gallah_data['flag_Ba_fe'] == 0
eu_fe_flag_filter = gallah_data['flag_Eu_fe'] == 0
ba_eu_flag_filter = ba_fe_flag_filter & eu_fe_flag_filter

gallah_data['e_Ba_Eu'] = np.sqrt(gallah_data['e_Ba_fe']**2 + gallah_data['e_Eu_fe']**2)
gallah_data['Ba_Eu'] = gallah_data['Ba_fe'] - gallah_data['Eu_fe']
ba_eu_err_filter = gallah_data['e_Ba_Eu'] < 0.2
ba_eu_filter = ba_eu_flag_filter & ba_eu_err_filter

# ------------------ Applying the Filters ------------------

gallah_filtered_data = gallah_data[sp_filter & snr_filter & rg_filter & fe_h_filter & alpha_fe_filter & 
                                   na_fe_filter & al_fe_filter & mn_fe_filter & y_fe_filter & ba_fe_filter & 
                                   eu_fe_filter & mg_cu_filter & mg_mn_filter & ba_eu_filter]
#  Order by id 
gallah_filtered_data = gallah_filtered_data[np.argsort(gallah_filtered_data['sobject_id'])]

In [129]:
# ------------------ Importing data from the dynamics FITS file ------------------

with fits.open(gallah_data_file_dynamics) as file_dynamics:
    gallah_data_dynamics = file_dynamics[1].data


# ------------------ Match dynamics data with original ------------------

# Filter the dynamics data to only include the sobject_ids of the previously filtered data
dynamics_filter = np.isin(gallah_data_dynamics['sobject_id'], gallah_filtered_data['sobject_id'])
gallah_data_dynamics = gallah_data_dynamics[dynamics_filter]

#  Order by ID
gallah_data_dynamics = gallah_data_dynamics[np.argsort(gallah_data_dynamics['sobject_id'])]

# assert same stars in both datasets and no duplicates
assert len(gallah_filtered_data) == len(gallah_data_dynamics)
assert gallah_data_dynamics['sobject_id'].all() == gallah_filtered_data['sobject_id'].all()
assert len(np.unique(gallah_data_dynamics['sobject_id'])) == len(gallah_data_dynamics['sobject_id'])


In [130]:
# ------------------ Importing data from the Gaia FITS file ------------------

with fits.open(gallah_data_file_gaia) as file_gaia:
    gallah_data_gaia = file_gaia[1].data

# ------------------ Match Gaia data with original ------------------

# Filter the Gaia data to only include the sobject_ids of the previously filtered data
gaia_filter = np.isin(gallah_data_gaia['sobject_id'], gallah_filtered_data['sobject_id'])
gallah_data_gaia = gallah_data_gaia[gaia_filter]

#  Order by ID
gallah_data_gaia = gallah_data_gaia[np.argsort(gallah_data_gaia['sobject_id'])]

# assert same stars in both datasets and no duplicates
assert len(gallah_filtered_data) == len(gallah_data_gaia)
assert gallah_data_gaia['sobject_id'].all() == gallah_filtered_data['sobject_id'].all()
assert len(np.unique(gallah_data_gaia['sobject_id'])) == len(gallah_data_gaia['sobject_id'])

In [131]:
# ------------------ Add dynamics data to central Gallah table ------------------
gallah_filtered_data['Energy'] = gallah_data_dynamics['Energy']
gallah_filtered_data['Eccen'] = gallah_data_dynamics['ecc']
gallah_filtered_data['J_R'] = gallah_data_dynamics['J_R']
gallah_filtered_data['L_Z'] = gallah_data_dynamics['L_Z']
gallah_filtered_data['J_Z'] = gallah_data_dynamics['J_Z']
gallah_filtered_data['R_ap'] = gallah_data_dynamics['R_ap']

# ------------------ Add Gaia data to central Gallah table ------------------

gallah_filtered_data['r_med_photogeo'] = gallah_data_gaia['r_med_photogeo']
gallah_filtered_data['r_lo_photogeo'] = gallah_data_gaia['r_lo_photogeo']
gallah_filtered_data['r_hi_photogeo'] = gallah_data_gaia['r_hi_photogeo']

In [132]:
# ------------------ Filter Eccentricity and Energy and Apocenter ------------------
ecc_filter = gallah_filtered_data['Eccen'] > 0.85
energy_filter = gallah_filtered_data['Energy'] < 0
apocenter_filter = gallah_filtered_data['R_ap'] > 5

# ------------------ Filter for distance uncert ------------------
# Uncertainty less than 1.5 kpc
dist_err_filter_hi = (gallah_filtered_data['r_hi_photogeo']-gallah_filtered_data['r_med_photogeo']) < 1500
dist_err_filter_lo = (gallah_filtered_data['r_med_photogeo']-gallah_filtered_data['r_lo_photogeo']) < 1500

# ------------------ Apply filters ------------------
gallah_filtered_data = gallah_filtered_data[ecc_filter & energy_filter & apocenter_filter &
                                             dist_err_filter_hi & dist_err_filter_lo]

In [135]:
print(f'Number of stars: {len(gallah_filtered_data)}')
print(gallah_filtered_data.colnames)

Number of stars: 1061
['star_id', 'sobject_id', 'dr2_source_id', 'dr3_source_id', 'survey_name', 'field_id', 'flag_repeat', 'wg4_field', 'wg4_pipeline', 'flag_sp', 'teff', 'e_teff', 'irfm_teff', 'irfm_ebv', 'irfm_ebv_ref', 'logg', 'e_logg', 'fe_h', 'e_fe_h', 'flag_fe_h', 'fe_h_atmo', 'vmic', 'vbroad', 'e_vbroad', 'chi2_sp', 'alpha_fe', 'e_alpha_fe', 'nr_alpha_fe', 'flag_alpha_fe', 'flux_A_Fe', 'chi_A_Fe', 'Li_fe', 'e_Li_fe', 'nr_Li_fe', 'flag_Li_fe', 'C_fe', 'e_C_fe', 'nr_C_fe', 'flag_C_fe', 'O_fe', 'e_O_fe', 'nr_O_fe', 'flag_O_fe', 'Na_fe', 'e_Na_fe', 'nr_Na_fe', 'flag_Na_fe', 'Mg_fe', 'e_Mg_fe', 'nr_Mg_fe', 'flag_Mg_fe', 'Al_fe', 'e_Al_fe', 'nr_Al_fe', 'flag_Al_fe', 'Si_fe', 'e_Si_fe', 'nr_Si_fe', 'flag_Si_fe', 'K_fe', 'e_K_fe', 'nr_K_fe', 'flag_K_fe', 'Ca_fe', 'e_Ca_fe', 'nr_Ca_fe', 'flag_Ca_fe', 'Sc_fe', 'e_Sc_fe', 'nr_Sc_fe', 'flag_Sc_fe', 'Sc2_fe', 'e_Sc2_fe', 'nr_Sc2_fe', 'flag_Sc2_fe', 'Ti_fe', 'e_Ti_fe', 'nr_Ti_fe', 'flag_Ti_fe', 'Ti2_fe', 'e_Ti2_fe', 'nr_Ti2_fe', 'flag_Ti2_fe

In [136]:
# ------------------ Save the filtered data ------------------
gallah_filtered_data.write('../filtered_data/gallah_filtered_data.fits', format='fits', overwrite=True)