
# Exoplanet ML — End-to-End (KOI & PS compatible)

This notebook trains:
1. **Type classifier** (multiclass): size-based (and optional thermal) exoplanet classes.
2. **Binary classifier** (optional): *is exoplanet?* (only if your dataset includes a reliable binary label).

It supports **KOI** (`koi_*`) and **Planetary Systems (PS)** (`pl_*`) tables from the NASA Exoplanet Archive.  
**Auto-detects** which one you loaded and maps column names accordingly.



## 1) Requirements


In [65]:

# If needed:
! pip install pandas numpy scikit-learn joblib





## 2) Imports & Constants


In [66]:

import os
from typing import Optional, Tuple

import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.ensemble import HistGradientBoostingClassifier
import joblib

MJUP_TO_MEARTH = 317.828



## 3) Load Dataset (robust CSV/TSV, skip Archive header)


In [67]:

# <<< EDIT PATH IF NEEDED >>>
CSV_PATH = "kepler.csv"  # your downloaded table

# Try CSV (comma) with Archive comments skipped. If it fails, try TSV.
try:
    df = pd.read_csv(CSV_PATH, comment="#", encoding="utf-8-sig", engine="python")
except Exception as e:
    print("CSV read failed, trying TSV...", e)
    df = pd.read_csv(CSV_PATH, sep="\t", comment="#", encoding="utf-8-sig", engine="python")

print(df.shape)
pd.set_option("display.max_columns", 120)
df.head(3)


(9564, 55)


Unnamed: 0,kepid,kepoi_name,kepler_name,koi_disposition,koi_pdisposition,koi_score,koi_fpflag_nt,koi_fpflag_ss,koi_fpflag_co,koi_fpflag_ec,koi_period,koi_period_err1,koi_period_err2,koi_time0bk,koi_time0bk_err1,koi_time0bk_err2,koi_impact,koi_impact_err1,koi_impact_err2,koi_duration,koi_duration_err1,koi_duration_err2,koi_depth,koi_depth_err1,koi_depth_err2,koi_prad,koi_prad_err1,koi_prad_err2,koi_teq,koi_teq_err1,koi_teq_err2,koi_insol,koi_insol_err1,koi_insol_err2,koi_model_snr,koi_tce_plnt_num,koi_tce_delivname,koi_steff,koi_steff_err1,koi_steff_err2,koi_slogg,koi_slogg_err1,koi_slogg_err2,koi_srad,koi_srad_err1,koi_srad_err2,koi_smass,koi_smass_err1,koi_smass_err2,koi_sage,koi_sage_err1,koi_sage_err2,ra,dec,koi_kepmag
0,10797460,K00752.01,Kepler-227 b,CONFIRMED,CANDIDATE,1.0,0,0,0,0,9.488036,2.8e-05,-2.8e-05,170.53875,0.00216,-0.00216,0.146,0.318,-0.146,2.9575,0.0819,-0.0819,615.8,19.5,-19.5,2.26,0.26,-0.15,793.0,,,93.59,29.45,-16.65,35.8,1.0,q1_q17_dr25_tce,5455.0,81.0,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,0.919,0.052,-0.046,,,,291.93423,48.141651,15.347
1,10797460,K00752.02,Kepler-227 c,CONFIRMED,CANDIDATE,0.969,0,0,0,0,54.418383,0.000248,-0.000248,162.51384,0.00352,-0.00352,0.586,0.059,-0.443,4.507,0.116,-0.116,874.8,35.5,-35.5,2.83,0.32,-0.19,443.0,,,9.11,2.87,-1.62,25.8,2.0,q1_q17_dr25_tce,5455.0,81.0,-81.0,4.467,0.064,-0.096,0.927,0.105,-0.061,0.919,0.052,-0.046,,,,291.93423,48.141651,15.347
2,10811496,K00753.01,,CANDIDATE,CANDIDATE,0.0,0,0,0,0,19.89914,1.5e-05,-1.5e-05,175.850252,0.000581,-0.000581,0.969,5.126,-0.077,1.7822,0.0341,-0.0341,10829.0,171.0,-171.0,14.6,3.92,-1.31,638.0,,,39.3,31.04,-10.49,76.3,1.0,q1_q17_dr25_tce,5853.0,158.0,-176.0,4.544,0.044,-0.176,0.868,0.233,-0.078,0.961,0.11,-0.121,,,,297.00482,48.134129,15.436



## 4) Detect Table Type (KOI vs PS) and Map Columns
Maps key columns to a unified set so the rest of the notebook is identical.


In [68]:

def first_present(df, candidates):
    for c in candidates:
        if c in df.columns:
            return c
    return None

IS_KOI = any(c.startswith("koi_") for c in df.columns)
IS_PS  = any(c.startswith("pl_")  for c in df.columns)

# Unified names via aliasing
COL_PL_RADE   = first_present(df, ["pl_rade", "koi_prad"])      # R_⊕
COL_PL_BMASSE = first_present(df, ["pl_bmasse", "koi_smass"])   # M_⊕ (KOI: koi_smass)
COL_PL_BMASSJ = first_present(df, ["pl_bmassj"])                # M_J (often absent in KOI)
COL_PL_EQT    = first_present(df, ["pl_eqt", "koi_teq"])        # K

COL_PERIOD    = first_present(df, ["pl_orbper", "koi_period"])  # days
COL_ST_TEFF   = first_present(df, ["st_teff", "koi_steff"])     # K
COL_ST_RAD    = first_present(df, ["st_rad", "koi_srad"])       # R_⊙
COL_INSOL     = first_present(df, ["koi_insol"])                # KOI-only

print("Detected: KOI?", IS_KOI, "| PS?", IS_PS)
print("Mapped columns:",
      "RADE=",COL_PL_RADE, "BMASSE=",COL_PL_BMASSE, "BMASSJ=",COL_PL_BMASSJ, "EQT=",COL_PL_EQT,
      "PERIOD=",COL_PERIOD, "ST_TEFF=",COL_ST_TEFF, "ST_RAD=",COL_ST_RAD, "INSOL=",COL_INSOL)


Detected: KOI? True | PS? False
Mapped columns: RADE= koi_prad BMASSE= koi_smass BMASSJ= None EQT= koi_teq PERIOD= koi_period ST_TEFF= koi_steff ST_RAD= koi_srad INSOL= koi_insol



## 5) Build Labels (Size & Thermal → Type)
Creates:
- `size_label` from radius/mass
- `thermal_label` from equilibrium temperature
- `type_label` = size or size + thermal (e.g., `joviano_caliente`)


In [69]:

def classify_planet(row: pd.Series) -> str:
    """
    Clasifica el planeta en una de las siguientes clases permitidas:
    earth-like, super earth, ocean world, hot jupiter, gas giant, ice giant, desert world
    (Se eliminan: sub-neptune, neptune-like, jupiter-like, rocky)
    Siempre retorna una clase permitida.
    """
    r  = row.get(COL_PL_RADE,  np.nan)  # Earth radii
    me = row.get(COL_PL_BMASSE, np.nan) # Earth masses
    mj = row.get(COL_PL_BMASSJ, np.nan) # Jupiter masses
    teq = row.get(COL_PL_EQT, np.nan)   # Equilibrium temperature

    if pd.notna(mj) and pd.isna(me):
        me = mj * MJUP_TO_MEARTH

    # Aproximar radio desde masa si falta (topeando en rangos razonables)
    if pd.isna(r) and pd.notna(me):
        if me < 2:
            r = 1.0
        elif me < 10:
            r = 1.7
        elif me < 20:
            r = 2.3
        elif me < 50:
            r = 3.5
        elif me < 150:
            r = 5.0
        else:
            r = 10.0

    # Si no hay radio ni masa, mapear por temperatura a clases válidas
    if pd.isna(r) and pd.isna(me):
        if pd.notna(teq) and teq > 800:
            return "desert world"
        return "ocean world"

    # --- Earth-like --- (relajado + fallback por insolación)
    insol = row.get(COL_INSOL, np.nan) if 'COL_INSOL' in globals() else np.nan
    if pd.notna(r):
        # por temperatura
        if 0.75 <= r <= 1.6 and pd.notna(teq) and 180 <= teq <= 340:
            return "earth-like"
        # por insolación aproximada (si está disponible en KOI)
        if 0.75 <= r <= 1.6 and pd.notna(insol) and 0.3 <= insol <= 2.5:
            return "earth-like"

    # --- Desert World --- (prioridad alta para calientes)
    if pd.notna(r) and r <= 2.5 and pd.notna(teq) and teq > 800:
        return "desert world"

    # --- Super Earth --- (rango específico para tierras calientes)
    if pd.notna(r) and 1.0 <= r <= 2.0:
        # criterio principal: temperatura alta
        if pd.notna(teq) and teq >= 400:
            return "super earth"
        # criterio secundario: insolación alta
        if pd.notna(insol) and insol >= 1.0:
            return "super earth"
        # fallback: rango específico sin teq baja
        if 1.2 <= r <= 1.8 and (pd.isna(teq) or teq >= 350):
            return "super earth"

    # --- Ocean World --- (rango específico para templados)
    if pd.notna(r) and 1.5 <= r <= 2.8:
        # criterio principal: temperatura templada
        if pd.notna(teq) and 200 <= teq < 400:
            return "ocean world"
        # criterio secundario: insolación moderada
        if pd.notna(insol) and 0.3 <= insol < 1.0:
            return "ocean world"
        # criterio terciario: masa moderada
        if pd.notna(me) and 1 <= me <= 6:
            return "ocean world"
        # fallback: rango específico templado
        if 1.8 <= r <= 2.5 and (pd.isna(teq) or 250 <= teq < 450):
            return "ocean world"

    # --- Ice Giant (prioridad absoluta en su rango) ---
    if pd.notna(r) and 3.0 < r <= 6.5:
        # criterio principal: temperatura baja
        if pd.notna(teq) and teq < 320:
            return "ice giant"
        # criterio secundario: insolación muy baja
        if pd.notna(insol) and insol < 0.5:
            return "ice giant"
        # criterio terciario: masa intermedia
        if pd.notna(me) and 5 <= me <= 25:
            return "ice giant"
        # fallback: rango específico sin teq alta
        if 3.5 <= r <= 6.0 and (pd.isna(teq) or teq < 380):
            return "ice giant"
        # prioridad absoluta en rango 4-6
        if 4.0 <= r <= 6.0:
            return "ice giant"

    # --- Hot Jupiter ---
    if pd.notna(r) and r > 6.0 and pd.notna(teq) and teq > 800:
        return "hot jupiter"

    # --- Gas Giant (más restrictivo para evitar solapamiento) ---
    if pd.notna(r) and r > 6.5:
        # solo gigantes muy grandes
        if pd.isna(teq) or teq <= 800:
            return "gas giant"
    elif pd.notna(r) and 4.0 < r <= 6.5:
        # rango medio solo si no es ice giant
        if pd.notna(teq) and teq >= 300:
            return "gas giant"

    # Remapeos explícitos de clases eliminadas según radio/teq
    if pd.notna(r):
        # 2.5-4.0 (antes sub-neptune): usar teq para decidir
        if 2.5 < r <= 4.0:
            if pd.notna(teq) and teq < 320:
                return "ocean world"
            return "super earth"
        # <=2.5 (antes rocky fallback)
        if r <= 2.5:
            if pd.notna(teq) and teq > 800:
                return "desert world"
            # si templado exacto y tamaño tierra ya capturado arriba
            return "super earth"
        # >6 y sin teq ya capturado como gas giant

    # Si solo hay masa (sin radio tras aproximación), decidir sin clases prohibidas
    if pd.isna(r) and pd.notna(me):
        if me < 10:
            return "super earth"
        elif me < 50:
            return "ocean world"
        elif me < 150:
            return "gas giant"
        else:
            return "gas giant"

    # Fallback final permitido
    return "super earth"

def build_type_labels(df: pd.DataFrame) -> pd.Series:
    type_label = df.apply(classify_planet, axis=1)
    return type_label

type_label = build_type_labels(df)
df["type_label"] = type_label

cols_to_show = [c for c in [COL_PL_RADE, COL_PL_BMASSE, COL_PL_BMASSJ, COL_PL_EQT,
                            "type_label"] if c is not None]
df[cols_to_show].head(10)


Unnamed: 0,koi_prad,koi_smass,koi_teq,type_label
0,2.26,0.919,793.0,super earth
1,2.83,0.919,443.0,super earth
2,14.6,0.961,638.0,gas giant
3,33.46,0.836,1395.0,hot jupiter
4,2.75,1.095,1406.0,ocean world
5,3.9,1.053,835.0,super earth
6,2.77,1.053,1160.0,ocean world
7,1.59,1.053,1360.0,desert world
8,39.21,1.358,1342.0,hot jupiter
9,5.76,0.801,600.0,ice giant


In [70]:
df['type_label'].value_counts(dropna=False)

type_label
desert world    2660
super earth     2276
hot jupiter     1873
ocean world     1164
gas giant        982
ice giant        447
earth-like       162
Name: count, dtype: int64

In [71]:
from collections import Counter

# Oversampling para balancear las clases de 'type_label'
from sklearn.utils import resample

# Separar el dataframe por clase
dfs = []
min_count = df['type_label'].value_counts().max()
for label in df['type_label'].unique():
    df_label = df[df['type_label'] == label]
    if len(df_label) < min_count:
        df_label_upsampled = resample(
            df_label,
            replace=True,
            n_samples=min_count,
            random_state=42
        )
        dfs.append(df_label_upsampled)
    else:
        dfs.append(df_label)
df_balanced = pd.concat(dfs).reset_index(drop=True)

print("Distribución de clases tras oversampling:")
print(df_balanced['type_label'].value_counts())


Distribución de clases tras oversampling:
type_label
super earth     2660
gas giant       2660
hot jupiter     2660
ocean world     2660
desert world    2660
ice giant       2660
earth-like      2660
Name: count, dtype: int64



## 6) Feature Selection (Auto for KOI/PS)


In [72]:

def present(df, cols):
    return [c for c in cols if c in df.columns]

# Candidates per table
NUM_KOI = [
    "koi_prad","koi_teq","koi_period","koi_insol","koi_model_snr","koi_score",
    "koi_steff","koi_srad","koi_smass","koi_kepmag",
]
CAT_KOI = [
    # Avoid koi_disposition/koi_pdisposition to prevent leakage if you ever do binary classification
    "koi_tce_delivname",
]

NUM_PS = [
    "pl_rade","pl_bmasse","pl_bmassj","pl_orbper","pl_orbsmax","pl_eqt",
    "st_teff","st_rad","st_mass","st_lum","sy_dist","sy_pnum","sy_snum",
]
CAT_PS = ["discoverymethod","disc_year","discoverylocale","facility"]

num_candidates = NUM_KOI if IS_KOI else NUM_PS
cat_candidates = CAT_KOI if IS_KOI else CAT_PS

num_cols = present(df, num_candidates)
cat_cols = present(df, cat_candidates)

# Fallbacks
if not num_cols and not cat_cols:
    num_cols = df.select_dtypes(include=["number"]).columns.tolist()

print("num_cols:", num_cols)
print("cat_cols:", cat_cols)


num_cols: ['koi_prad', 'koi_teq', 'koi_period', 'koi_insol', 'koi_model_snr', 'koi_score', 'koi_steff', 'koi_srad', 'koi_smass', 'koi_kepmag']
cat_cols: ['koi_tce_delivname']



## 7) Preprocessing Pipelines


In [73]:

transformers = []
if num_cols:
    transformers.append(("num",
                         Pipeline([("imputer", SimpleImputer(strategy="median")),
                                   ("log", FunctionTransformer(np.log1p, feature_names_out="one-to-one")),
                                   ("scaler", StandardScaler())]),
                         num_cols))
if cat_cols:
    transformers.append(("cat",
                         Pipeline([("imputer", SimpleImputer(strategy="most_frequent")),
                                   ("onehot", OneHotEncoder(handle_unknown="ignore", min_frequency=0.01))]),
                         cat_cols))

if not transformers:
    raise RuntimeError("No features available for modeling.")

preprocessor = ColumnTransformer(transformers=transformers,
                                 remainder="drop",
                                 verbose_feature_names_out=False)



## 8) Train/Test Split & Train Type Classifier


In [74]:

df_type = df[df["type_label"].notna()].copy()
X_type  = df_type[num_cols + cat_cols]
y_type  = df_type["type_label"].astype(str)

# Mantener todas las clases (no excluir raras). Usaremos sample_weight para balancear.
vc = y_type.value_counts()
print("Distribución de etiquetas:", vc.to_dict())

# Eliminar explícitamente clases no permitidas por si existieran
DISALLOWED = {"sub-neptune", "neptune-like", "jupiter-like", "rocky"}
mask_allowed = ~y_type.isin(DISALLOWED)
X_type = X_type[mask_allowed]
y_type = y_type[mask_allowed]

# train
Xt_train, Xt_test, yt_train, yt_test = train_test_split(
    X_type, y_type, test_size=0.2, stratify=y_type, random_state=42
)
print("Shapes:", Xt_train.shape, Xt_test.shape)

# pipeline + fit
clf_type = Pipeline([("prep", preprocessor),
                     ("clf", HistGradientBoostingClassifier(random_state=42))])
# sample weights balanceados por clase
class_counts = yt_train.value_counts()
class_weights = (1.0 / class_counts)
sample_weight = yt_train.map(class_weights).values

clf_type.fit(Xt_train, yt_train, clf__sample_weight=sample_weight)

# predict
yt_pred = clf_type.predict(Xt_test)
print("== Type classification report ==")
print(classification_report(yt_test, yt_pred, digits=4))
print("Confusion matrix:\n", confusion_matrix(yt_test, yt_pred))


Distribución de etiquetas: {'desert world': 2660, 'super earth': 2276, 'hot jupiter': 1873, 'ocean world': 1164, 'gas giant': 982, 'ice giant': 447, 'earth-like': 162}
Shapes: (7651, 11) (1913, 11)
== Type classification report ==
              precision    recall  f1-score   support

desert world     0.9981    0.9981    0.9981       532
  earth-like     0.9143    1.0000    0.9552        32
   gas giant     1.0000    1.0000    1.0000       197
 hot jupiter     0.9973    0.9973    0.9973       375
   ice giant     0.9885    0.9663    0.9773        89
 ocean world     0.9829    0.9871    0.9850       233
 super earth     0.9912    0.9868    0.9890       455

    accuracy                         0.9927      1913
   macro avg     0.9818    0.9908    0.9860      1913
weighted avg     0.9928    0.9927    0.9927      1913

Confusion matrix:
 [[531   0   0   0   0   1   0]
 [  0  32   0   0   0   0   0]
 [  0   0 197   0   0   0   0]
 [  0   0   0 374   1   0   0]
 [  0   0   0   1  86   0   2


## 9) (Optional) Binary Classifier — Is Exoplanet?
> Provide your binary label column name if available (1/0). For KOI `koi_disposition` is **not** a clean binary ground truth; avoid leakage.


In [75]:

BINARY_LABEL_COL = 'koi_pdisposition' 

clf_bin = None
if BINARY_LABEL_COL and BINARY_LABEL_COL in df.columns:
    X_bin = df[num_cols + cat_cols]
    y_bin = (df[BINARY_LABEL_COL] == 'CANDIDATE').astype(int)
    

    # train
    Xb_train, Xb_test, yb_train, yb_test = train_test_split(
        X_bin, y_bin, test_size=0.2, stratify=y_bin, random_state=42
    )
    
    # pipeline + fit
    clf_bin = Pipeline([("prep", preprocessor),
                        ("clf", HistGradientBoostingClassifier(random_state=42))])
    clf_bin.fit(Xb_train, yb_train)
    
    # predict
    yb_pred = clf_bin.predict(Xb_test)
    print("== Binary classification report ==")
    print(classification_report(yb_test, yb_pred, digits=4))
    print("Confusion matrix:\n", confusion_matrix(yb_test, yb_pred))
else:
    print("No valid binary label provided; skipping binary model.")


== Binary classification report ==
              precision    recall  f1-score   support

           0     0.9000    0.9278    0.9137       970
           1     0.9233    0.8940    0.9084       943

    accuracy                         0.9111      1913
   macro avg     0.9117    0.9109    0.9111      1913
weighted avg     0.9115    0.9111    0.9111      1913

Confusion matrix:
 [[900  70]
 [100 843]]



## 10) Save Models & Metadata


In [76]:

os.makedirs("modelos", exist_ok=True)
joblib.dump(clf_type, "modelos/clf_exoplanet_type.joblib")
if clf_bin is not None:
    joblib.dump(clf_bin, "modelos/clf_is_exoplanet.joblib")

meta = {
    "num_cols": num_cols,
    "cat_cols": cat_cols,
    "classes_type": sorted(y_type.unique().tolist()),
    "is_koi": bool(IS_KOI),
    "is_ps": bool(IS_PS),
    "col_alias": {
        "R_earth": COL_PL_RADE,
        "M_earth": COL_PL_BMASSE,
        "M_jup": COL_PL_BMASSJ,
        "T_eq": COL_PL_EQT,
        "period_days": COL_PERIOD,
        "st_teff": COL_ST_TEFF,
        "st_rad": COL_ST_RAD,
        "insol": COL_INSOL,
    },
}
joblib.dump(meta, "modelos/metadata.joblib")
print("Saved models & metadata to ./modelos")


Saved models & metadata to ./modelos



## 11) Inference Helpers


In [77]:

def load_models(model_dir: str = "modelos"):
    clf_bin = None
    p_bin = os.path.join(model_dir, "clf_is_exoplanet.joblib")
    if os.path.exists(p_bin):
        clf_bin = joblib.load(p_bin)
    clf_type = joblib.load(os.path.join(model_dir, "clf_exoplanet_type.joblib"))
    meta = joblib.load(os.path.join(model_dir, "metadata.joblib"))
    return clf_bin, clf_type, meta

def predict_exoplanet(example: dict, model_dir: str = "modelos") -> dict:
    clf_bin, clf_type, meta = load_models(model_dir)
    cols = meta["num_cols"] + meta["cat_cols"]
    X = pd.DataFrame([{c: example.get(c, np.nan) for c in cols}])
    out = {}
    if clf_bin is not None:
        proba = clf_bin.predict_proba(X)[0, 1]
        out["is_exoplanet"] = int(proba >= 0.5)
        out["is_exoplanet_proba"] = float(proba)
    proba_type = clf_type.predict_proba(X)[0]
    pred_type = clf_type.predict(X)[0]
    classes = clf_type.named_steps["clf"].classes_
    topk = np.argsort(proba_type)[::-1][:3]
    out["type"] = str(pred_type)
    out["type_top3"] = [(str(classes[i]), float(proba_type[i])) for i in topk]
    return out



## 12) Example Inference


In [78]:

# Example uses either koi_* or pl_* depending on your dataset.
example = {}
if IS_KOI:
    example = {
        "koi_prad": 11.2,
        "koi_teq": 1400,
        "koi_period": 3.5,
        "koi_model_snr": 12.0,
        "koi_steff": 5600.0,
        "koi_srad": 1.0,
        "koi_smass": 5.5,
    }


predict_exoplanet(example, model_dir="modelos")


{'is_exoplanet': 0,
 'is_exoplanet_proba': 0.02408603788990899,
 'type': 'hot jupiter',
 'type_top3': [('hot jupiter', 0.9997216684387114),
  ('gas giant', 0.00012061868642040305),
  ('desert world', 6.999256514607027e-05)]}

In [79]:
import random

# Definir rangos de parámetros para cada clase de tipo planetario
# (nombre_clase, rango_prad, rango_teq, rango_masa)
CLASES_PARAMETROS = [
    ("desert world",      (1.3, 2.5),    (1200, 2000),   (0.5, 8.0)),
    ("hot jupiter",       (8, 40),       (900, 2500),    (80, 400)),
    ("super earth",       (1.0, 2.0),    (400, 900),     (2.0, 12.0)),
    ("gas giant",         (6, 30),       (200, 900),     (50, 300)),
    ("ocean world",       (1.5, 2.8),    (200, 400),     (1.0, 6.0)),
    ("earth-like",        (0.8, 1.5),    (250, 350),     (0.5, 2.5)),
    ("ice giant",         (4.0, 6.5),    (50, 300),      (6, 20)),
]

def generar_ejemplo_por_clase(is_koi=True, clase_idx=0):
    if is_koi:
        nombre_clase, (prad_min, prad_max), (teq_min, teq_max), (mass_min, mass_max) = CLASES_PARAMETROS[clase_idx]
        ejemplo = {
            "koi_prad": round(random.uniform(prad_min, prad_max), 2),
            "koi_teq": round(random.uniform(teq_min, teq_max), 1),
            "koi_period": round(random.uniform(0.5, 500), 3),
            "koi_model_snr": round(random.uniform(10, 100), 2),
            "koi_steff": round(random.uniform(3000, 8000), 1),
            "koi_srad": round(random.uniform(0.1, 10.0), 3),
            "koi_smass": round(random.uniform(mass_min, mass_max), 3),
        }
        return ejemplo

# Probar el modelo con un ejemplo para cada clase
for i, (nombre_clase, _, _, _) in enumerate(CLASES_PARAMETROS):
    ejemplo_random = generar_ejemplo_por_clase(IS_KOI, clase_idx=i)
    resultado = predict_exoplanet(ejemplo_random, model_dir="modelos")
    print(f"Ejemplo para clase '{nombre_clase}': {ejemplo_random}")
    print(f"Resultado: {resultado}\n")

Ejemplo para clase 'desert world': {'koi_prad': 2.32, 'koi_teq': 1963.4, 'koi_period': 209.443, 'koi_model_snr': 80.32, 'koi_steff': 3587.2, 'koi_srad': 0.907, 'koi_smass': 7.272}
Resultado: {'is_exoplanet': 1, 'is_exoplanet_proba': 0.6746771572648834, 'type': 'desert world', 'type_top3': [('desert world', 0.9860101192611376), ('ocean world', 0.01379963973004001), ('super earth', 0.00010342663562939922)]}

Ejemplo para clase 'hot jupiter': {'koi_prad': 17.15, 'koi_teq': 1203.0, 'koi_period': 162.158, 'koi_model_snr': 96.3, 'koi_steff': 6001.0, 'koi_srad': 0.395, 'koi_smass': 169.049}
Resultado: {'is_exoplanet': 0, 'is_exoplanet_proba': 0.28203423017286144, 'type': 'hot jupiter', 'type_top3': [('hot jupiter', 0.9993199136924682), ('gas giant', 0.000397542136019426), ('desert world', 8.462949823534508e-05)]}

Ejemplo para clase 'super earth': {'koi_prad': 1.5, 'koi_teq': 594.5, 'koi_period': 465.214, 'koi_model_snr': 68.63, 'koi_steff': 3046.3, 'koi_srad': 5.607, 'koi_smass': 7.207}
Resu