In [9]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [10]:
import numpy as np
import pandas as pd
from astropy.table import Table

from plato.classification import quality_cuts, classify_stars

In [11]:
# process targets
targets = Table.read("../data/raw/plato_targets.fits").to_pandas()

# make quality cuts
targets = quality_cuts(targets, max_error=0.2)

# add classification
targets = classify_stars(targets)

# rename columns
targets.rename(
    columns={
        "SOURCE_ID": "gaiaID",
    },
    inplace=True,
)

In [12]:
# add metallicity and alpha from medium-res spectroscopy, photometry, or xgboost
targets["[Fe/H]"] = np.nan
targets["[Fe/H]_lower"] = np.nan
targets["[Fe/H]_upper"] = np.nan
targets["[Fe/H]_source"] = ""

# add [Fe/H] metallcity in following priority: gspspec, gspphot, xgboost (use next possible source if previous is NaN)
sources = ["mh_gspspec", "mh_gspphot"]
for source in sources:
    mask = targets[f"{source}"].notnull() & targets["[Fe/H]"].isnull()
    targets.loc[mask, "[Fe/H]"] = targets[f"{source}"]
    targets.loc[mask, "[Fe/H]_lower"] = targets[f"{source}_lower"]
    targets.loc[mask, "[Fe/H]_upper"] = targets[f"{source}_upper"]
    targets.loc[mask, "[Fe/H]_source"] = f"{source}"

# rename alphafe_gspspec column (and errors) to [alpha/Fe]
targets.rename(
    columns={
        f"alphafe_gspspec{suffix}": f"[alpha/Fe]{suffix}"
        for suffix in ["", "_lower", "_upper"]
    },
    inplace=True,
)

# drop unnecessary columns and their errors
targets.drop(
    columns=[
        f"{source}{suffix}" for source in sources for suffix in ["", "_lower", "_upper"]
    ],
    inplace=True,
)

In [None]:
# add metallicities and alpha from high-res spectroscopic surveys

## Apogee
apogee = Table.read("../data/external/apogee.fits")
# get relevant columns
apogee = apogee[["GAIAEDR3_SOURCE_ID", "FE_H", "ALPHA_M"]]
apogee_df = apogee.to_pandas()
apogee_df = apogee_df.rename(
    columns={
        "GAIAEDR3_SOURCE_ID": "gaiaID",
        "FE_H": "[Fe/H]_apogee",
        "ALPHA_M": "[alpha/M]_apogee",
    }
)
# merge into targets
targets = pd.merge(
    targets,
    apogee_df,
    on="gaiaID",
    how="left",
)

## GALAH
galah = Table.read("../data/external/galah.fits")
# quality cuts
galah = galah[galah["flag_fe_h"] == 0]
galah = galah[galah["flag_alpha_fe"] == 0]
# get relevant columns
galah = galah[["dr3_source_id", "fe_h", "alpha_fe"]]
galah_df = galah.to_pandas()
galah_df = galah_df.rename(
    columns={
        "dr3_source_id": "gaiaID",
        "fe_h": "[Fe/H]_galah",
        "alpha_fe": "[alpha/Fe]_galah",
    }
)
# merge into targets
targets = pd.merge(
    targets,
    galah_df,
    on="gaiaID",
    how="left",
)





In [None]:
# process asPIC

asPIC = Table.read(f"../data/external/asPIC_1.1.fits")
asPIC = asPIC[
    [
        "sourceId",
        "GLON",
        "GLAT",
        "gaiaV",
        "egaiaV",
        "Gmag",
        "eGmag",
        "Radius",
        "eRadius",
        "Mass",
        "eMass",
        "Teff",
        "eTeff",
        "sourceFlag",
    ]
]
for col in asPIC.colnames:
    asPIC[col] = asPIC[col][:, 0]
asPIC = asPIC.to_pandas()

# rename source flag
asPIC.rename(
    columns={
        "sourceId": "gaiaID",
        "sourceFlag": "Stellar Type",
    },
    inplace=True,
)
asPIC["Stellar Type"] = asPIC["Stellar Type"].map(
    {
        1: "FGK",  # FGK
        5: "FGK",  # FGK and known planet host
        2: "M",  # M
        6: "M",  # M and known planet host
    }
)

In [None]:
# match asPIC and targets on sourceId
data = pd.merge(targets, asPIC, on="sourceId", how="inner")

In [None]:
# save
data.rename(columns={"sourceId": "gaiaID"}, inplace=True)
data.to_csv(f"../data/processed/plato_targets_{field}_processed.csv", index=False)

### Stars missing from Aksharas catalogue that is are asPIC (due to parallax quality cut)

In [None]:
import polars as pl

all_target_ids = pl.read_csv(
    f"../data/raw/plato_targets.csv", columns=["source_id"]
).to_pandas()

In [None]:
# get asPIC subset that contains values not in all_target_ids
missing_stars = asPIC[~asPIC["sourceId"].isin(all_target_ids["source_id"])]
print(f"Percentage of missing stars: {len(missing_stars)/len(asPIC)*100:.2f}%")

Percentage of missing stars: 6.81%
