In [1]:
import pandas as pd
from astropy.table import Table

from plato.classification import classify_stars

In [2]:
field = "LOPS2"

In [5]:
# process targets
targets = pd.read_csv(f"../data/raw/plato_targets_{field}.csv")

# add classification
targets = classify_stars(targets)

# select relevant columns
targets = targets[
    [
        "source_id",
        "Population",
        "flag",
        f"num_CCDs_{field}",
        "mh_xgboost",
        "mh_gspspec",
        "mh_gspphot",
        "alphafe_gspspec",
    ]
]

# rename columns
targets.rename(
    columns={
        "source_id": "sourceId",
        "flag": "Population_Akshara",
    },
    inplace=True,
)

# rename population flags
targets["Population_Akshara"] = targets["Population_Akshara"].map(
    {
        0: "Halo",
        1: "Thick Disk",
        2: "Thin Disk",
    }
)

# add n_cameras column and drop num_CCDs column
targets["n_cameras"] = 6 * targets[f"num_CCDs_{field}"]
targets.drop(columns=[f"num_CCDs_{field}"], inplace=True)

# add [Fe/H] metallcity in following priority: gspspec, gspphot, xgboost (use next possible source if previous is NaN)
targets["[Fe/H]"] = (
    targets["mh_gspspec"].fillna(targets["mh_gspphot"]).fillna(targets["mh_xgboost"])
)
targets.drop(columns=["mh_xgboost", "mh_gspspec", "mh_gspphot"], inplace=True)

# rename alphafe_gspspec column to [alpha/Fe]
targets.rename(columns={"alphafe_gspspec": "[alpha/Fe]"}, inplace=True)

In [6]:
# add high-res spectroscopic metallicities

## Apogee
apogee = Table.read("../data/external/apogee.fits")
# get relevant columns
apogee = apogee[["GAIAEDR3_SOURCE_ID", "FE_H", "ALPHA_M"]]
apogee_df = apogee.to_pandas()
apogee_df = apogee_df.rename(
    columns={
        "GAIAEDR3_SOURCE_ID": "sourceId",
        "FE_H": "[Fe/H]_apogee",
        "ALPHA_M": "[alpha/Fe]_apogee",
    }
)
# merge into targets
targets = pd.merge(
    targets,
    apogee_df,
    on="sourceId",
    how="left",
)

## GALAH
galah = Table.read("../data/external/galah.fits")
# quality cuts
galah = galah[galah["flag_fe_h"] == 0]
galah = galah[galah["flag_alpha_fe"] == 0]
# get relevant columns
galah = galah[["dr3_source_id", "fe_h", "alpha_fe"]]
galah_df = galah.to_pandas()
galah_df = galah_df.rename(
    columns={
        "dr3_source_id": "sourceId",
        "fe_h": "[Fe/H]_galah",
        "alpha_fe": "[alpha/Fe]_galah",
    }
)
# merge into targets
targets = pd.merge(targets, galah_df, on="sourceId", how="left")





In [15]:
# process asPIC

asPIC = Table.read(f"../data/external/asPIC_1.1.fits")
asPIC = asPIC[
    [
        "sourceId",
        "GLON",
        "GLAT",
        "gaiaV",
        "egaiaV",
        "Gmag",
        "eGmag",
        "Radius",
        "eRadius",
        "Mass",
        "eMass",
        "Teff",
        "eTeff",
        "sourceFlag",
    ]
]
for col in asPIC.colnames:
    asPIC[col] = asPIC[col][:, 0]
asPIC = asPIC.to_pandas()

# rename source flag
asPIC.rename(
    columns={
        "sourceFlag": "Stellar Type",
    },
    inplace=True,
)
asPIC["Stellar Type"] = asPIC["Stellar Type"].map(
    {
        1: "FGK",  # FGK
        5: "FGK",  # FGK and known planet host
        2: "M",  # M
        6: "M",  # M and known planet host
    }
)

In [16]:
# match asPIC and targets on sourceId
data = pd.merge(targets, asPIC, on="sourceId", how="inner")

In [17]:
# save
data.rename(columns={"sourceId": "gaiaID"}, inplace=True)
data.to_csv(f"../data/processed/plato_targets_{field}_processed.csv", index=False)

### Stars missing from Aksharas catalogue that is are asPIC (due to parallax quality cut)

In [18]:
import polars as pl

all_target_ids = pl.read_csv(
    f"../data/raw/plato_targets.csv", columns=["source_id"]
).to_pandas()

In [19]:
# get asPIC subset that contains values not in all_target_ids
missing_stars = asPIC[~asPIC["sourceId"].isin(all_target_ids["source_id"])]
print(f"Percentage of missing stars: {len(missing_stars)/len(asPIC)*100:.2f}%")

Percentage of missing stars: 6.81%
