In [1]:
import warnings

import numpy as np
import pandas as pd
from astropy.table import Table
from astropy.units import UnitsWarning
from tqdm import tqdm

from plato.stars import classify_stars, quality_cuts

In [2]:
# process targets
targets = Table.read("../data/raw/plato_targets.fits").to_pandas()

# make quality cuts
targets = quality_cuts(targets, max_error=0.2)

# add classification
targets = classify_stars(targets, include_galactic_quantities=True)

# rename columns
targets.rename(
    columns={
        "SOURCE_ID": "gaiaID_DR3",
        "dr2_source_id": "gaiaID_DR2",
    },
    inplace=True,
)
for col in targets.columns:
    if col.endswith("_error"):
        targets.rename(columns={col: f"e_{col[:-6]}"}, inplace=True)

Removed 392000/2675538 stars based on quality cuts (17.2%).


Retrieving Kinematic Parameter: 100%|██████████| 2283538/2283538 [14:15<00:00, 2668.65it/s]


In [3]:
# add xgboost metallicities from Andrae2023
total_rows = 174922161
chunksize = int(1e7)
xgboost_data = []

# load in chuncks
for chunk in tqdm(
    pd.read_csv(
        "../data/external/xgboost.csv",
        chunksize=chunksize,
        compression="gzip",
        usecols=["source_id", "mh_xgboost"],
    ),
    total=total_rows // chunksize + 1,
    desc="Processing chunks: ",
):
    # Select only the targets
    filtered_chunk = chunk[chunk["source_id"].isin(targets["gaiaID_DR3"])]
    xgboost_data.append(filtered_chunk)

xgboost_data = pd.concat(xgboost_data).rename(columns={"source_id": "gaiaID_DR3"})
xgboost_data = xgboost_data.assign(mh_xgboost_lower=np.nan, mh_xgboost_upper=np.nan)
targets = targets.merge(xgboost_data, on="gaiaID_DR3", how="left")

Processing chunks: 100%|██████████| 18/18 [02:21<00:00,  7.84s/it]


In [4]:
# add metallicity and alpha from medium-res spectroscopy, photometry, or xgboost

targets["[Fe/H]"] = np.nan
targets["e_[Fe/H]_lower"] = np.nan
targets["e_[Fe/H]_upper"] = np.nan
targets["[Fe/H]_source"] = ""
# add [Fe/H] metallcity in following priority: gspspec, gspphot, xgboost (use next possible source if previous is NaN)
sources = ["mh_gspspec", "mh_gspphot", "mh_xgboost"]
for source in sources:
    mask = targets[f"{source}"].notnull() & targets["[Fe/H]"].isnull()
    targets.loc[mask, "[Fe/H]"] = targets[f"{source}"]
    targets.loc[mask, "e_[Fe/H]_lower"] = targets[f"{source}_lower"]
    targets.loc[mask, "e_[Fe/H]_upper"] = targets[f"{source}_upper"]
    targets.loc[mask, "[Fe/H]_source"] = f"{source}"

# rename alphafe_gspspec column (and errors) to [alpha/Fe]
targets.rename(
    columns={
        "alphafe_gspspec": "[alpha/Fe]",
        "alphafe_gspspec_lower": "e_[alpha/Fe]_lower",
        "alphafe_gspspec_upper": "e_[alpha/Fe]_upper",
    },
    inplace=True,
)

# drop original metallicity columns and their errors
targets.drop(
    columns=[
        f"{source}{suffix}" for source in sources for suffix in ["", "_lower", "_upper"]
    ],
    inplace=True,
)

In [5]:
# add metallicities and alpha from high-res spectroscopic surveys

## Apogee
apogee = Table.read("../data/external/apogee.fits", hdu=1)
# get relevant columns
apogee = apogee[
    ["GAIAEDR3_SOURCE_ID", "FE_H", "FE_H_ERR", "ALPHA_M", "ALPHA_M_ERR", "ASPCAPFLAGS"]
]
apogee_df = apogee.to_pandas()
# qulaity cuts (drop entries where ALPHA_M or M_H is flagged)
apogee_df = apogee_df[
    ~apogee_df["ASPCAPFLAGS"].astype(str).str.contains("ALPHA_M|M_H|SN_WARN")
]

apogee_df = apogee_df.rename(
    columns={
        "GAIAEDR3_SOURCE_ID": "gaiaID_DR3",
        "FE_H": "[Fe/H]_apogee",
        "FE_H_ERR": "e_[Fe/H]_apogee",
        "ALPHA_M": "[alpha/M]_apogee",
        "ALPHA_M_ERR": "e_[alpha/M]_apogee",
    }
)
# merge into targets
targets = pd.merge(
    targets,
    apogee_df.drop_duplicates(subset="gaiaID_DR3", keep="first"),
    on="gaiaID_DR3",
    how="left",
)

## GALAH
with warnings.catch_warnings():
    # silence warning for log(cm.s**-2) units
    warnings.simplefilter("ignore", UnitsWarning)
    galah = Table.read("../data/external/galah.fits")
# quality cuts
galah = galah[galah["flag_sp"] == 0]
galah = galah[galah["flag_fe_h"] == 0]
galah = galah[galah["flag_alpha_fe"] == 0]
# get relevant columns
galah = galah[["dr3_source_id", "fe_h", "e_fe_h", "alpha_fe", "e_alpha_fe"]]
galah_df = galah.to_pandas()
galah_df = galah_df.rename(
    columns={
        "dr3_source_id": "gaiaID_DR3",
        "fe_h": "[Fe/H]_galah",
        "e_fe_h": "e_[Fe/H]_galah",
        "alpha_fe": "[alpha/Fe]_galah",
        "e_alpha_fe": "e_[alpha/Fe]_galah",
    }
)
# merge into targets
targets = pd.merge(
    targets,
    galah_df.drop_duplicates(subset="gaiaID_DR3", keep="first"),
    on="gaiaID_DR3",
    how="left",
)

In [6]:
# process asPIC

asPIC = Table.read(f"../data/external/asPIC_1.1.fits")
asPIC = asPIC[
    [
        "sourceId",
        "GLON",
        "GLAT",
        "gaiaV",
        "egaiaV",
        "Gmag",
        "eGmag",
        "Radius",
        "eRadius",
        "Mass",
        "eMass",
        "Teff",
        "eTeff",
        "sourceFlag",
    ]
]
for col in asPIC.colnames:
    asPIC[col] = asPIC[col][:, 0]
asPIC = asPIC.to_pandas()

# rename source flag
asPIC.rename(
    columns={
        "sourceId": "gaiaID_DR2",
        "sourceFlag": "Stellar Type",
        "egaiaV": "e_gaiaV",
        "eGmag": "e_Gmag",
        "eRadius": "e_Radius",
        "eMass": "e_Mass",
        "eTeff": "e_Teff",
    },
    inplace=True,
)
asPIC["Stellar Type"] = asPIC["Stellar Type"].map(
    {
        1: "FGK",  # FGK
        5: "FGK",  # FGK and known planet host
        2: "M",  # M
        6: "M",  # M and known planet host
    }
)

# match asPIC and targets on sourceId
data = pd.merge(targets, asPIC, on="gaiaID_DR2", how="inner")

In [7]:
# save
data.to_csv("../data/processed/all_sky_targets.csv", index=False)