In [55]:
from astropy.io import fits
import matplotlib.pyplot as plt
import numpy as np
from astroquery.gaia import Gaia
import logging
logging.basicConfig(level=logging.INFO)

In [58]:
# Import Apogee Data sets
apogee_data_file_NN = '../data/Apogee_DR17_vac_NN/apogee_astroNN-DR17.fits'
apogee_data_file_starhorse = '../data/Apogee_DR17_vac_starhorse/APOGEE_DR17_EDR3_STARHORSE_v2.fits'
apogee_data_file_allstar = '../data/Apogee_DR17_Allstar/allStar-dr17-synspec_rev1.fits'

with fits.open(apogee_data_file_allstar) as file:
    apogee_data = file[1].data
    # Cleaning the data set using mask statments for initial inputs
    # Filter for Main Red stars
    mrs_filter = apogee_data['EXTRATARG']==0

    # Filter bad star data
    bs_filter = apogee_data['ASPCAPFLAG'] != 'STAR_BAD'
    prog_filter = apogee_data['PROGRAMNAME'] != 'magclouds'
    rg_filter = apogee_data['LOGG'] < 3.0

    # Filter for valid element abundances
    fe_h_flag_filter = apogee_data['FE_H_FLAG'] == 0
    fe_h_err_filter = apogee_data['FE_H_ERR'] < 0.1

    # Cant find this may be part of the VAC data set
    # alpha_fe_flag_filter = apogee_data['alpha_FE_FLAG'] == 0
    # alpha_fe_err_filter = apogee_data['alpga_FE_ERR'] < 0.1

    al_fe_flag_filter = apogee_data['AL_FE_FLAG'] == 0
    al_fe_err_filter = apogee_data['AL_FE_ERR'] < 0.1

    # Cant find this may be part of the VAC data set
    # mg_mn_flag_filter = apogee_data['CE_FE_FLAG'] == 0
    # mg_mn_err_filter = apogee_data['CE_FE_ERR'] < 0.1

    ce_fe_flag_filter = apogee_data['CE_FE_FLAG'] == 0
    ce_fe_err_filter = apogee_data['CE_FE_ERR'] < 0.15

    # All Main Red Stars
    apogee_data_red = apogee_data[mrs_filter]
    # All stars remaining based on APOGEE filters 
    # Note this is currently missign alpha/Fe and Mg/Mn filters
    apogee_data_filtered= apogee_data[mrs_filter & bs_filter & prog_filter & rg_filter & fe_h_flag_filter & fe_h_err_filter & al_fe_flag_filter & al_fe_err_filter & ce_fe_flag_filter & ce_fe_err_filter]


In [59]:
print(f'Number of stars in the red giant sample: {len(apogee_data_red)}')
print(f'Number of stars in the (APOGEE) filtered sample: {len(apogee_data_filtered)}')
print(f'Columns: {apogee_data_filtered.columns}')

Number of stars in the red giant sample: 372458
Number of stars in the (APOGEE) filtered sample: 164627
Columns: ColDefs(
    name = 'FILE'; format = '64A'
    name = 'APOGEE_ID'; format = '30A'
    name = 'TARGET_ID'; format = '58A'
    name = 'APSTAR_ID'; format = '71A'
    name = 'ASPCAP_ID'; format = '77A'
    name = 'TELESCOPE'; format = '6A'
    name = 'LOCATION_ID'; format = 'J'
    name = 'FIELD'; format = '20A'
    name = 'ALT_ID'; format = '30A'
    name = 'RA'; format = 'D'
    name = 'DEC'; format = 'D'
    name = 'GLON'; format = 'D'
    name = 'GLAT'; format = 'D'
    name = 'J'; format = 'E'
    name = 'J_ERR'; format = 'E'
    name = 'H'; format = 'E'
    name = 'H_ERR'; format = 'E'
    name = 'K'; format = 'E'
    name = 'K_ERR'; format = 'E'
    name = 'SRC_H'; format = '16A'
    name = 'WASH_M'; format = 'E'
    name = 'WASH_M_ERR'; format = 'E'
    name = 'WASH_T2'; format = 'E'
    name = 'WASH_T2_ERR'; format = 'E'
    name = 'DDO51'; format = 'E'
    name = 'DDO

In [66]:
# Extract GAIA ID fron renmaining stars
gaia_ids = apogee_data_filtered['GAIAEDR3_SOURCE_ID']
print(f'GAIA IDs: {gaia_ids}')

# Convert np-array to a comma-separated string for sql sytnax
gaia_id_list = ", ".join(gaia_ids[:4].astype(str))
print(f'GAIA ID list: {gaia_id_list}')

# List all available tables
tables = Gaia.load_tables(only_names=True)
for table in tables:
    print(table)

# Create the query
distance_query = f"""
SELECT source_id, r_med_geo, r_lo_geo, r_hi_geo, r_med_photogeo, r_lo_photogeo, r_hi_photogeo
FROM gedr3dist.main
WHERE source_id IN ({gaia_id_list});
"""


# Run the query with SQL
job = Gaia.launch_job(distance_query)
results = job.get_results()

# # Print the results
# print(results)


# # Filter for eccentricity
# ecc_filter = apogee_data['ECCENTRICITY'] > 0.85
# # Filter for orbital apocenter
# apo_filter = apogee_data['APOCENTER'] > 5
# # Filter for distance error
# dist_err_filter = apogee_data['DIST_ERR'] < 1.5
# # Filter for orbital energy
# energy_filter = apogee_data['ENERGY'] < 0



INFO:astroquery:Retrieving tables...


GAIA IDs: [ 421086363305436800  431594980053422720  422775384964691328 ...
 1995024236127760000 1998097371124974720 1994741318040223232]
GAIA ID list: 421086363305436800, 431594980053422720, 422775384964691328, 421050766615776256
INFO: Retrieving tables... [astroquery.utils.tap.core]


INFO:astroquery:Parsing tables...


INFO: Parsing tables... [astroquery.utils.tap.core]


INFO:astroquery:Done.


INFO: Done. [astroquery.utils.tap.core]
TAP Table name: external.apassdr9
Description: The AAVSO Photometric All-Sky Survey - Data Release 9
    This publication makes use of data products from the AAVSO
    Photometric All Sky Survey (APASS). Funded by the Robert Martin Ayers
    Sciences Fund and the National Science Foundation. Original catalogue released by Henden et al. 2015 AAS Meeting #225, id.336.16. Data retrieved using the VizieR catalogue access tool, CDS, Strasbourg, France. The original description of the VizieR service was published in A&AS 143, 23. VizieR catalogue II/336.
Size (bytes): 22474547200
Num. columns: 0
TAP Table name: external.catwise2020
Description: The CatWISE2020 Catalogue consists of 1,890,715,640 sources over the entire sky selected from Wide-field Infrared Survey Explorer (WISE) and NEOWISE survey data at 3.4 and 4.6 micrometer (W1 and W2) collected from 7 January 2010 to 13 December 2018. This data set adds two years to that used for the CatWISE Preli

HTTPError: Error 400: 
Cannot parse query '
SELECT  TOP 2000 source_id, r_med_geo, r_lo_geo, r_hi_geo, r_med_photogeo, r_lo_photogeo, r_hi_photogeo
FROM gedr3dist.main
WHERE source_id IN (421086363305436800, 431594980053422720, 422775384964691328, 421050766615776256);
' for job '1737974604746O': 9 unresolved identifiers: main [l.3 c.6 - l.3 c.20], source_id [l.2 c.18 - l.2 c.27], r_med_geo [l.2 c.29 - l.2 c.38], r_lo_geo [l.2 c.40 - l.2 c.48], r_hi_geo [l.2 c.50 - l.2 c.58], r_med_photogeo [l.2 c.60 - l.2 c.74], r_lo_photogeo [l.2 c.76 - l.2 c.89], r_hi_photogeo [l.2 c.91 - l.2 c.104], source_id [l.4 c.7 - l.4 c.16] !
 - Unknown table "gedr3dist.main" !
 - Unknown column "source_id" !
 - Unknown column "r_med_geo" !
 - Unknown column "r_lo_geo" !
 - Unknown column "r_hi_geo" !
 - Unknown column "r_med_photogeo" !
 - Unknown column "r_lo_photogeo" !
 - Unknown column "r_hi_photogeo" !
 - Unknown column "source_id" !


In [None]:
# # Print the results
# print(results)


# # Filter for eccentricity
# ecc_filter = apogee_data['ECCENTRICITY'] > 0.85
# # Filter for orbital apocenter
# apo_filter = apogee_data['APOCENTER'] > 5
# # Filter for distance error
# dist_err_filter = apogee_data['DIST_ERR'] < 1.5
# # Filter for orbital energy
# energy_filter = apogee_data['ENERGY'] < 0




# # Plot the HR diagram
# plt.figure(figsize=(10, 8))
# scatter = plt.scatter(filtered_teff, filtered_logg, c=filtered_fe_h, cmap='viridis', s=10, alpha=0.7)
# plt.colorbar(scatter, label='[Fe/H] (Metallicity)')

# # Reverse x-axis (hotter stars on the left)
# plt.gca().invert_xaxis()

# # Label axes
# plt.xlabel('Effective Temperature (K)', fontsize=14)
# plt.ylabel('Surface Gravity (log g)', fontsize=14)
# plt.title('Hertzsprung-Russell Diagram (APOGEE Data)', fontsize=16)
# plt.grid(True)

# plt.show()





# # Extract relevant columns
# teff = apogee_data['TEFF']
# logg = apogee_data['LOGG']
# bp_rp = apogee_data['bp_rp']

# # Apply conditions for red stars (example thresholds)
# red_star_mask = (teff < 5000) & (logg < 3) & (bp_rp > 1.0)

# # Filter the data
# red_stars = apogee_data[red_star_mask]
# print(f'Number of red stars: {len(red_stars)}')
