In [2]:
import pandas as pd

# Load your dataset
df = pd.read_csv("TESS.csv")

# --- Tier 1 (Core predictive features) ---
tier1_features = [
    "toi", "tfopwg_disp",   # always keep
    "pl_orbper", "pl_trandurh", "pl_trandep",
    "pl_rade", "pl_insol", "pl_eqt",
    "st_teff", "st_logg", "st_rad",
    "st_tmag", "st_dist"
]
df_tier1 = df[tier1_features]
df_tier1.to_csv("tess_toi_tier1.csv", index=False)

# --- Tier 2 (Tier 1 + additional useful features & uncertainties) ---
tier2_features = tier1_features + [
    # Orbital/transit uncertainties
    "pl_orbpererr1", "pl_orbpererr2",
    "pl_trandurherr1", "pl_trandurherr2",
    "pl_trandeperr1", "pl_trandeperr2",
    "pl_radeerr1", "pl_radeerr2",
    "pl_insolerr1", "pl_insolerr2",
    "pl_eqterr1", "pl_eqterr2",
    # Stellar uncertainties
    "st_tefferr1", "st_tefferr2",
    "st_loggerr1", "st_loggerr2",
    "st_raderr1", "st_raderr2",
    "st_tmagerr1", "st_tmagerr2",
    "st_disterr1", "st_disterr2"
]
df_tier2 = df[[c for c in tier2_features if c in df.columns]]
df_tier2.to_csv("tess_toi_tier2.csv", index=False)

# --- Tier 3 (All remaining astrophysical + positional + flags) ---
tier3_features = list(df.columns)  # start with all
# Ensure we don’t drop core label/id
always_keep = ["toi", "tfopwg_disp"]
# Drop known non-informative ID/metadata
drop_cols = [
    "toipfx", "tid", "ctoi_alias", "pl_pnum",
    "rastr", "decstr",
    "toi_created", "rowupdate"
]
tier3_features = [c for c in tier3_features if c not in drop_cols]
df_tier3 = df[tier3_features]
df_tier3.to_csv("tess_toi_tier3.csv", index=False)

print("✅ Exported: tess_toi_tier1.csv, tess_toi_tier2.csv, tess_toi_tier3.csv")


✅ Exported: tess_toi_tier1.csv, tess_toi_tier2.csv, tess_toi_tier3.csv
