In [1]:
import numpy as np
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, f1_score

# ============================================================
# ðŸ”¹ Cargar datos ya procesados
# ============================================================
base_path = '../data'

X_train = np.load(f'{base_path}/X_train.npy', allow_pickle=True)
X_test = np.load(f'{base_path}/X_test.npy', allow_pickle=True)
y_train = np.load(f'{base_path}/y_train.npy', allow_pickle=True)
y_test = np.load(f'{base_path}/y_test.npy', allow_pickle=True)

print("Formas de los conjuntos:")
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test:", X_test.shape)
print("y_test:", y_test.shape)

# Convertir y_train / y_test a vectores planos si no lo estÃ¡n
y_train = y_train.ravel()
y_test = y_test.ravel()

Formas de los conjuntos:
X_train: (1600, 4)
y_train: (1600,)
X_test: (400, 4)
y_test: (400,)


In [2]:
# ============================================================
# ðŸ”¹ Entrenar modelos
# ============================================================
models = {
    "LogisticRegression": LogisticRegression(max_iter=1000, C=0.8, solver='lbfgs'),
    "RandomForest": RandomForestClassifier(
        n_estimators=100, max_depth=4, random_state=42
    ),
    "XGBoost": XGBClassifier(
        max_depth=3, learning_rate=0.1, n_estimators=100,
        subsample=0.8, colsample_bytree=0.8,
        eval_metric='logloss', random_state=42
    )
}

for name, model in models.items():
    print(f"\nðŸš€ Entrenando modelo: {name}...")
    model.fit(X_train, y_train)

    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)

    f1_train = f1_score(y_train, y_pred_train)
    f1_test = f1_score(y_test, y_pred_test)

    print(f"{name} â†’ Accuracy train: {acc_train:.3f} | test: {acc_test:.3f}")
    print(f"{name} â†’ F1 train: {f1_train:.3f} | test: {f1_test:.3f}")
    print("-" * 60)


ðŸš€ Entrenando modelo: LogisticRegression...
LogisticRegression â†’ Accuracy train: 0.898 | test: 0.938
LogisticRegression â†’ F1 train: 0.884 | test: 0.928
------------------------------------------------------------

ðŸš€ Entrenando modelo: RandomForest...
RandomForest â†’ Accuracy train: 0.949 | test: 0.963
RandomForest â†’ F1 train: 0.944 | test: 0.958
------------------------------------------------------------

ðŸš€ Entrenando modelo: XGBoost...
XGBoost â†’ Accuracy train: 0.994 | test: 0.980
XGBoost â†’ F1 train: 0.993 | test: 0.977
------------------------------------------------------------


In [3]:
print("\nðŸ“Š ValidaciÃ³n cruzada (5-Fold) â€” Evaluando robustez...")

for name, model in models.items():
    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"{name} â†’ Accuracy promedio (CV=5): {scores.mean():.3f} Â± {scores.std():.3f}")

# ============================================================
# ðŸ”¸ 5. Guardar modelos entrenados
# ============================================================
joblib.dump(models["LogisticRegression"], "../data/best_model_logreg.pkl")
joblib.dump(models["RandomForest"], "../data/best_model_rf.pkl")
joblib.dump(models["XGBoost"], "../data/best_model_xgb.pkl")

print("\nâœ… Modelos entrenados y guardados correctamente.")


ðŸ“Š ValidaciÃ³n cruzada (5-Fold) â€” Evaluando robustez...
LogisticRegression â†’ Accuracy promedio (CV=5): 0.896 Â± 0.021
RandomForest â†’ Accuracy promedio (CV=5): 0.941 Â± 0.012
XGBoost â†’ Accuracy promedio (CV=5): 0.973 Â± 0.007

âœ… Modelos entrenados y guardados correctamente.
