In [1]:
import os
import pickle
import pandas as pd
from pathlib import Path
from fastapi import FastAPI
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer

# Definir rutas correctamente
ROOT_DIR = Path().resolve().parent
MODELS_DIR = ROOT_DIR / "models"
DATA_PATH = ROOT_DIR / "data/penguins_size.csv"

# Crear la carpeta "models/" si no existe
MODELS_DIR.mkdir(parents=True, exist_ok=True)

app = FastAPI()

# Cargar Datos
df = pd.read_csv(DATA_PATH)
df.columns = df.columns.str.lower()
if "species" not in df.columns:
    raise ValueError("El dataset no contiene la columna 'species'. Verifica el formato.")

df.dropna(inplace=True)

# Separar Features y Target
X = df.drop(columns=["species"])
y = df["species"]

# Dividir datos en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear el Preprocesador
numeric_feats = ["culmen_length_mm", "culmen_depth_mm", "flipper_length_mm", "body_mass_g"]
cat_feats = ["island", "sex"]

numeric_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

cat_transformer = Pipeline([
    ("imputer", SimpleImputer(strategy="constant", fill_value="missing")),
    ("onehot", OneHotEncoder(drop="first", sparse_output=False, handle_unknown="ignore"))
])

preprocessor = ColumnTransformer([
    ("num", numeric_transformer, numeric_feats),
    ("cat", cat_transformer, cat_feats)
])

# Modelo RandomForest con Pipeline
model = Pipeline([
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(n_estimators=100, random_state=42))
])

# Entrenar el modelo
print("🔄 Entrenando modelo...")
model.fit(X_train, y_train)

# Guardar modelo en "models/"
model_path = MODELS_DIR / "penguin_classifier_randomforest.pkl"
with open(model_path, "wb") as f:
    pickle.dump(model, f)

print(f"✅ Modelo guardado en: {model_path}")

# Endpoint para listar modelos disponibles
@app.get("/models/")
def list_available_models():
    models = [f.name for f in MODELS_DIR.glob("*.pkl")]
    return {"available_models": models}

print("Modelos disponibles:", list_available_models())

ModuleNotFoundError: No module named 'fastapi'