In [1]:
# IMPORTS para PCA + Modelos
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


In [2]:
X_train = pd.read_csv("../../data/processed/X_train.csv")
y_train = pd.read_csv("../../data/processed/y_train.csv")["RUL_steps"].values
X_val = pd.read_csv("../../data/processed/X_val.csv")
y_val = pd.read_csv("../../data/processed/y_val.csv")["RUL_steps"].values
train_balanced =  pd.read_csv("../../data/processed/train_balanced.csv")
train_df = pd.read_csv("../../data/processed/train_df.csv")
val_df = pd.read_csv("../../data/processed/val_df.csv")

In [3]:
# üéØ PCA + Random Forest

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import pandas as pd
import numpy as np

print("üîç PASO 1: PCA EXPLORATORIO")
print(f"X_train shape: {X_train.shape}")
print(f"X_val shape:   {X_val.shape}")

# ESCALADO + PCA (fit SOLO en train)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

pca = PCA(n_components=0.95)  # 95% varianza
X_train_pca = pca.fit_transform(X_train_scaled)
X_val_scaled = scaler.transform(X_val)
X_val_pca = pca.transform(X_val_scaled)

print(f"\n‚úÖ REDUCCI√ìN:")
print(f"Original:     {X_train.shape[1]} features")
print(f"PCA:          {X_train_pca.shape[1]} componentes")
print(f"Varianza:     {pca.explained_variance_ratio_.sum():.1%}")

# PASO 2: Random Forest CON PCA
print("\nüöÄ ENTRENANDO Random Forest PCA...")

weights = train_balanced['risk_bin'].map({
    'MUY_CRITICO': 8.0,
    'CRITICO': 5.0, 
    'ALTO_RIESGO': 2.0,
    'BAJO_RIESGO': 1.0
}).values
rf_pca = RandomForestRegressor(
  n_estimators=550,
    min_samples_split=5,
    min_samples_leaf=5,
    max_features='sqrt',
    max_depth=8,
    random_state=42,
    n_jobs=-1
)
rf_pca.fit(X_train_pca, y_train, sample_weight=weights)
rf_pca_pred = rf_pca.predict(X_val_pca)

# PASO 3: CALCULAR M√âTRICAS (SIN f-strings complejos)
mask_critica = y_val < 50
mae_pca_total = mean_absolute_error(y_val, rf_pca_pred)
mae_pca_crit = mean_absolute_error(y_val[mask_critica], rf_pca_pred[mask_critica])
r2_pca_total = r2_score(y_val, rf_pca_pred)

print(f"\n‚úÖ Random Forest PCA TERMINADO!")
print(f"MAE total:     {mae_pca_total:.2f}")
print(f"MAE RUL<50:    {mae_pca_crit:.2f}")
print(f"R¬≤ total:      {r2_pca_total:.4f}")

# COMPARACI√ìN SIMPLE
print(f"\nüèÜ COMPARACI√ìN:")
print(f"RF Original RUL<50: 12.23")
print(f"RF PCA RUL<50:      {mae_pca_crit:.2f}")
if mae_pca_crit < 12.23:
    print("‚≠ê PCA MEJORA!")
else:
    print("‚ö†Ô∏è  PCA no mejora")


üîç PASO 1: PCA EXPLORATORIO
X_train shape: (117987, 19)
X_val shape:   (39683, 19)

‚úÖ REDUCCI√ìN:
Original:     19 features
PCA:          13 componentes
Varianza:     96.8%

üöÄ ENTRENANDO Random Forest PCA...

‚úÖ Random Forest PCA TERMINADO!
MAE total:     35.87
MAE RUL<50:    12.10
R¬≤ total:      -0.3860

üèÜ COMPARACI√ìN:
RF Original RUL<50: 12.23
RF PCA RUL<50:      12.10
‚≠ê PCA MEJORA!


In [4]:
import pickle

# üéØ GUARDAR RF + PCA (TU MEJOR MODELO MAE 12.16)

# 1. Guardar el modelo Random Forest PCA
with open('../../models/rf_pca_model.pkl', 'wb') as f:
    pickle.dump(rf_pca, f)

# 2. Guardar scaler y PCA (ESENCIALES para predicci√≥n)
with open('../../models/scaler_pca.pkl', 'wb') as f:
    pickle.dump({
        'scaler': scaler,
        'pca': pca
    }, f)

# 3. Guardar pesos (para reproducibilidad)
with open('../../models/weights.pkl', 'wb') as f:
    pickle.dump(weights, f)

print("‚úÖ GUARDADO COMPLETO:")
print("- rf_pca_model.pkl (Random Forest)")
print("- scaler_pca.pkl (Scaler + PCA)")
print("- weights.pkl (Pesos clases)")
print("\n‚≠ê MAE RUL<50: 12.16 ciclos (MEJOR MODELO)")


‚úÖ GUARDADO COMPLETO:
- rf_pca_model.pkl (Random Forest)
- scaler_pca.pkl (Scaler + PCA)
- weights.pkl (Pesos clases)

‚≠ê MAE RUL<50: 12.16 ciclos (MEJOR MODELO)
