In [1]:
import pathlib

import astropy.table as at
import astropy.units as u
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

In [2]:
min_nvisits = 3

In [3]:
allstar_file = pathlib.Path(
    '/mnt/home/apricewhelan/data/APOGEE_DR17/allStar-dr17-l33alpha.fits')

allvisit_file = pathlib.Path(
    '/mnt/home/apricewhelan/data/APOGEE_DR17/allVisitLite-dr17-l33alpha.fits')

calib_verr_file = pathlib.Path(
    '../cache/allVisitLite-dr17-l33alpha-calib-verr.fits')

In [4]:
allstar = at.Table.read(allstar_file)
allvisit = at.Table.read(allvisit_file)
verr = at.Table.read(calib_verr_file)



In [5]:
# Remove bad velocities / NaN / Inf values:
bad_visit_mask = (
    np.isfinite(allvisit['VHELIO']) &
    np.isfinite(allvisit['VRELERR']) &
    (allvisit['VRELERR'] < 100.) &
    (allvisit['VHELIO'] != -9999) &
    (np.abs(allvisit['VHELIO']) < 500.)
)
print(f"Filtered {len(bad_visit_mask) - bad_visit_mask.sum()} "
      "bad/NaN/-9999 visits")
allvisit = allvisit[bad_visit_mask]

Filtered 81234 bad/NaN/-9999 visits


In [6]:
# VERY_BRIGHT_NEIGHBOR, SUSPECT_RV_COMBINATION
star_starflag_bits = [3, 16]
visit_starflag_bits = star_starflag_bits

star_starflag_val = np.sum(2 ** np.array(star_starflag_bits))
visit_starflag_val = np.sum(2 ** np.array(visit_starflag_bits))
star_starflag_mask = (allstar['STARFLAG'] & star_starflag_val) == 0
visit_starflag_mask = (allvisit['STARFLAG'] & visit_starflag_val) == 0

print(f"Using allstar STARFLAG bitmask {star_starflag_val}), "
      f"filtered {len(allstar) - star_starflag_mask.sum()} sources")
print(f"Using allvisit STARFLAG bitmask {visit_starflag_val}), "
      f"filtered {len(allvisit) - visit_starflag_mask.sum()} visits")

Using allstar STARFLAG bitmask 65544), filtered 16 sources
Using allvisit STARFLAG bitmask 65544), filtered 0 visits


In [7]:
# apply new (DR17) RV_FLAG masking:
# TODO: audit this when Holtz tells me what the flags are!!
rvflag_mask = allvisit['RV_FLAG'] == 0
# visit_rvflag_val = np.sum(2 ** np.array(rvflag_bits))
# rvflag_mask = (allstar['RVFLAG'] & visit_rvflag_val) == 0

print(f"Applying allvisit RVFLAG mask, filtered "
      f"{len(allvisit) - rvflag_mask.sum()} visits")

# After quality and bitmask cut, figure out what APOGEE_IDs remain
allvisit = allvisit[visit_starflag_mask & rvflag_mask]
v_apogee_ids, counts = np.unique(allvisit['APOGEE_ID'],
                                 return_counts=True)
allstar_visit_mask = np.isin(allstar['APOGEE_ID'],
                             v_apogee_ids[counts >= min_nvisits])
print(f"Keeping only sources with > {min_nvisits} visits: filtered "
      f"{len(allstar_visit_mask) - allstar_visit_mask.sum()} sources")

Applying allvisit RVFLAG mask, filtered 327587 visits
Keeping only sources with > 3 visits: filtered 285298 sources


In [8]:
# TEFF_BAD, LOGG_BAD, VMICRO_BAD, ROTATION_BAD, VSINI_BAD
aspcapflag_bits = [16, 17, 18, 26, 30]

aspcapflag_val = np.sum(2 ** np.array(aspcapflag_bits))
aspcapflag_mask = (allstar['ASPCAPFLAG'] & aspcapflag_val) == 0
print(f"Using allstar ASPCAPFLAG bitmask {aspcapflag_val}, "
      f"filtered {len(allstar) - aspcapflag_mask.sum()}")

allstar = allstar[allstar_visit_mask &
                  star_starflag_mask &
                  aspcapflag_mask]

# Only load visits for stars that we're loading
allvisit = allvisit[np.isin(allvisit['APOGEE_ID'],
                            allstar['APOGEE_ID'])]
v_apogee_ids2 = np.unique(allvisit['APOGEE_ID'])
star_mask2 = np.isin(allstar['APOGEE_ID'], v_apogee_ids2)
allstar = allstar[star_mask2]

_, idx = np.unique(allstar['APOGEE_ID'], return_index=True)
allstar = allstar[idx]

allvisit = allvisit[np.isin(allvisit['APOGEE_ID'],
                            allstar['APOGEE_ID'])]

Using allstar ASPCAPFLAG bitmask 1141309440, filtered 37013


In [11]:
# TODO: change to VISITID with final DR17 version
allvisit = at.join(allvisit, verr, keys=('APOGEE_ID', 'PLATE', 'MJD', 'FIBERID'))

Final check for min nvisits:

In [12]:
v_apogee_ids, counts = np.unique(allvisit['APOGEE_ID'],
                                 return_counts=True)
allstar_visit_mask = np.isin(allstar['APOGEE_ID'],
                             v_apogee_ids[counts >= min_nvisits])
allstar = allstar[allstar_visit_mask]
allvisit = allvisit[np.isin(allvisit['APOGEE_ID'],
                            allstar['APOGEE_ID'])]

In [13]:
print(f"{len(allstar)} unique stars left")
print(f"{len(allvisit)} unique visits left")

342276 unique stars left
1665533 unique visits left


In [16]:
np.unique(allvisit['APOGEE_ID']).size

342276

In [18]:
allvisit.write(f'../cache/visits-dr17alpha-min{min_nvisits}-calibverr.fits',
               overwrite=True)