In [1]:
'''
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
# import joblib
import pickle

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler

# =============================================================================
# Paso 1: Registrar información de los mejores modelos en CSV
# =============================================================================
best_model_info = pd.DataFrame({
    'Output': ['p1::W', 'p4::GFF', 'p5::BSP_T', 'p6::BSP_n', 'p7::BSP_Mu', 'p8::MSP_n', 'p9::UWP_Mu'],
    'ModelType': ['PLS', 'LR', 'GPR', 'SVR', 'GPR', 'PLS', 'SVR'],
    'CoP': [0.95, 0.92, 0.94, 0.90, 0.96, 0.93, 0.91],
    'Hyperparameters': ['n_components=9', 'fit_intercept=False', 'length_scale=2.52, noise=0.0388',
                        'C=10, epsilon=0.1', 'length_scale=3.74, noise=0.00211', 'n_components=9', 'C=1, epsilon=0.5']
})
#best_model_info.to_csv('best_models.csv', index=False)
print("Información de los mejores modelos guardada en 'best_models.csv'.")

# =============================================================================
# Paso 2: Generar 10,000 nuevos motores a partir de los rangos de entrada
# =============================================================================
# Se carga el dataset original para obtener los límites de las variables de entrada
data_file = "design_DB_preprocessed_200_Optimizado.csv"  # Ajustar ruta según corresponda.
df = pd.read_csv(data_file)

# Se consideran las variables de entrada que comienzan con 'x' y 'm'
input_cols = [col for col in df.columns if col.startswith('x')] + [col for col in df.columns if col.startswith('m')]
X_min = df[input_cols].min()
X_max = df[input_cols].max()

n_samples = 10000
# Generar nuevos motores de forma uniforme dentro de los rangos observados
X_new = pd.DataFrame({col: np.random.uniform(low=X_min[col], high=X_max[col], size=n_samples) 
                      for col in input_cols})

# =============================================================================
# Paso 3: Preprocesar los nuevos datos con el mismo escalador usado en entrenamiento
# =============================================================================
# Definir la ruta del archivo del ensemble (se asume que figure_path ya está definido)
ensemble_file = os.path.join(model_path, "best_model_ensemble.pkl")
# Cargar el modelo ensemble utilizando pickle
with open(ensemble_file, "rb") as f:
    loaded_ensemble = pickle.load(f)
print("Modelo ensemble cargado correctamente desde:", ensemble_file)


# Escalado de datos
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)
scaler_Y = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y)

# Obtener las predicciones del ensemble para los datos escalados (por ejemplo, el conjunto completo)
preds_ensemble = loaded_ensemble.predict(X_scaled_df)

# Lista de variables de salida (usando el DataFrame escalado de Y)
output_vars = Y_scaled_df.columns.tolist()

# =============================================================================
# Paso 4: Predecir las variables de salida usando los modelos subrogados óptimos
# =============================================================================
# Se asume que los modelos subrogados para cada variable de salida se guardaron
# en archivos llamados "surrogate_pX.pkl" dentro de la carpeta "Modelos_subrogados"
output_vars = ['p1::W', 'p4::GFF', 'p5::BSP_T', 'p6::BSP_n', 'p7::BSP_Mu', 'p8::MSP_n', 'p9::UWP_Mu']
surrogate_models = {}
predictions = {}

for output in output_vars:
    model_path = os.path.join("Modelos_subrogados", f"surrogate_{output}.pkl")
    # Cargar el modelo subrogado óptimo para la salida 'output'
    surrogate_models[output] = joblib.load(model_path)
    # Predecir la salida para los nuevos datos escalados
    predictions[output] = surrogate_models[output].predict(X_new_scaled)

# Combinar las predicciones en un DataFrame
df_predictions = pd.DataFrame(predictions)

# Combinar las variables de entrada originales y las salidas predichas
motors = pd.concat([X_new, df_predictions], axis=1)
motors.to_csv("generated_motors.csv", index=False)
print("Base de datos de 10,000 motores guardada en 'generated_motors.csv'.")

# =============================================================================
# Paso 5: Filtrar motores válidos según constraints definidos
# =============================================================================
def is_valid_motor(row):
    # Ejemplo de constraints (ajustar según lo indicado en el paper)
    # Se desea que:
    # - p1::W esté entre 0.5 y 0.7
    # - p7::BSP_Mu esté entre 85 y 90
    # - p9::UWP_Mu esté entre 88 y 92
    if (0.5 <= row['p1::W'] <= 0.7) and (85 <= row['p7::BSP_Mu'] <= 90) and (88 <= row['p9::UWP_Mu'] <= 92):
        return True
    return False

motors['Valid'] = motors.apply(is_valid_motor, axis=1)
valid_motors = motors[motors['Valid']]
print(f"Número de motores válidos: {len(valid_motors)}")

# =============================================================================
# Paso 6: Calcular y representar la frontera de Pareto
# =============================================================================
# Objetivos: minimizar p1::W, maximizar p7::BSP_Mu y p9::UWP_Mu
def compute_pareto_front(df, objectives):
    is_dominated = np.zeros(len(df), dtype=bool)
    for i in range(len(df)):
        for j in range(len(df)):
            if i == j:
                continue
            dominates = True
            for obj, sense in objectives.items():
                if sense == 'min':
                    if df.iloc[j][obj] > df.iloc[i][obj]:
                        dominates = False
                        break
                elif sense == 'max':
                    if df.iloc[j][obj] < df.iloc[i][obj]:
                        dominates = False
                        break
            if dominates:
                is_dominated[i] = True
                break
    frontier = df[~is_dominated]
    return frontier

objectives = {'p1::W': 'min', 'p7::BSP_Mu': 'max', 'p9::UWP_Mu': 'max'}
valid_motors_reset = valid_motors.reset_index(drop=True)
pareto_motors = compute_pareto_front(valid_motors_reset, objectives)
print(f"Número de motores en la frontera de Pareto: {len(pareto_motors)}")

# Representación 3D de la frontera de Pareto
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(valid_motors['p1::W'], valid_motors['p7::BSP_Mu'], valid_motors['p9::UWP_Mu'], 
           c='blue', label='Válidos', alpha=0.5)
ax.scatter(motors[~motors['Valid']]['p1::W'], motors[~motors['Valid']]['p7::BSP_Mu'], motors[~motors['Valid']]['p9::UWP_Mu'], 
           c='red', label='No válidos', alpha=0.5)
ax.scatter(pareto_motors['p1::W'], pareto_motors['p7::BSP_Mu'], pareto_motors['p9::UWP_Mu'], 
           c='green', label='Frontera Pareto', s=100, marker='D')
ax.set_xlabel('p1::W')
ax.set_ylabel('p7::BSP_Mu')
ax.set_zlabel('p9::UWP_Mu')
ax.legend()
plt.title('Frontera de Pareto de diseños de motores')
plt.savefig("pareto_frontier.png", dpi=300)
plt.show()

# =============================================================================
# Paso 7: Seleccionar el motor válido óptimo
# =============================================================================
# Se normalizan los objetivos y se define un score compuesto
valid_motors_comp = valid_motors.copy()
for col, sense in [('p1::W', 'min'), ('p7::BSP_Mu', 'max'), ('p9::UWP_Mu', 'max')]:
    col_min = valid_motors_comp[col].min()
    col_max = valid_motors_comp[col].max()
    if sense == 'min':
        valid_motors_comp[col + '_norm'] = 1 - (valid_motors_comp[col] - col_min) / (col_max - col_min)
    else:
        valid_motors_comp[col + '_norm'] = (valid_motors_comp[col] - col_min) / (col_max - col_min)

valid_motors_comp['composite_score'] = (valid_motors_comp['p1::W_norm'] +
                                          valid_motors_comp['p7::BSP_Mu_norm'] +
                                          valid_motors_comp['p9::UWP_Mu_norm'])
optimal_motor = valid_motors_comp.loc[valid_motors_comp['composite_score'].idxmax()]
print("Motor válido óptimo (según score compuesto):")
print(optimal_motor)

optimal_motor.to_frame().T.to_csv("optimal_motor.csv", index=False)
print("El motor óptimo se ha guardado en 'optimal_motor.csv'.")
'''



In [2]:
# Librerías necesarias
import os
import re  # Import the regular expression module

import pandas as pd
import numpy as np
import math
from math import ceil

import matplotlib
matplotlib.use('TKAgg')
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

import warnings

import pickle

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler

In [3]:
# =============================================================================
# Paso 1: Definir rutas, cargar datos y configurar directorios
# =============================================================================
base_path = os.getcwd()  # Se asume que el notebook se ejecuta desde la carpeta 'MOP'
db_path = os.path.join(base_path, "DB_MOP")
fig_path = os.path.join(base_path, "Figuras_MOP")
model_path = os.path.join(base_path, "Modelos_MOP")

# Ruta al archivo de la base de datos
data_file = os.path.join(db_path, "design_DB_preprocessed_400_Optimizado.csv")
print("Ruta de datos:", data_file)

# Ruta donde se guardarán las figuras
figure_path = os.path.join(fig_path, "400_MOT_Optimizado")
if not os.path.exists(figure_path):
    os.makedirs(figure_path)
print("Ruta de figuras:", figure_path)

# Ruta al archivo de los modelos
model_path = os.path.join(model_path, "400_MOT_Optimizado")
print(model_path)
print("Ruta de modelos:", model_path)

# Lectura del archivo CSV
try:
    df = pd.read_csv(data_file)
    print("Archivo cargado exitosamente.")
except FileNotFoundError:
    print("Error: Archivo no encontrado. Revisa la ruta del archivo.")
except pd.errors.ParserError:
    print("Error: Problema al analizar el archivo CSV. Revisa el formato del archivo.")
except Exception as e:
    print(f"Ocurrió un error inesperado: {e}")

# Función para limpiar nombres de archivo inválidos
def clean_filename(name):
    return re.sub(r'[\\/*?:"<>|]', "_", name)

Ruta de datos: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\DB_MOP\design_DB_preprocessed_400_Optimizado.csv
Ruta de figuras: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Figuras_MOP\400_MOT_Optimizado
C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado
Ruta de modelos: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado
Archivo cargado exitosamente.


In [4]:
# =============================================================================
# Paso 2: Preprocesar datos: separar columnas en X, M, P y convertir a numérico
# =============================================================================
X_cols = [col for col in df.columns if col.startswith('x')]
M_cols = [col for col in df.columns if col.startswith('m')]
P_cols = [col for col in df.columns if col.startswith('p')]

X = df[X_cols].copy()
M = df[M_cols].copy()
P = df[P_cols].copy()

for col in X.columns:
    X[col] = pd.to_numeric(X[col], errors='coerce')
for col in M.columns:
    M[col] = pd.to_numeric(M[col], errors='coerce')
for col in P.columns:
    P[col] = pd.to_numeric(P[col], errors='coerce')

In [5]:
# =============================================================================
# Paso 3: Seleccionar variables de entrada y salida
# =============================================================================
# Las variables de salida se toman de P; se eliminan 'p2::Tnom' y 'p3::nnom' si existen.
outputs = [col for col in P.columns]
if 'p2::Tnom' in outputs:
    outputs.remove('p2::Tnom')
if 'p3::nnom' in outputs:
    outputs.remove('p3::nnom')

# Las variables de entrada se obtienen concatenando X y M.
X_M = pd.concat([X, M], axis=1)
features = list(X_M.columns)
print("Variables de entrada:", features)
print("Variables de salida:", outputs)

# Redefinir X y Y usando los nombres de columnas seleccionados
X = df[features]
Y = df[outputs]

Variables de entrada: ['x1::OSD', 'x2::Dint', 'x3::L', 'x4::tm', 'x5::hs2', 'x6::wt', 'x7::Nt', 'x8::Nh', 'm1::Drot', 'm2::Dsh', 'm3::he', 'm4::Rmag', 'm5::Rs', 'm6::GFF']
Variables de salida: ['p1::W', 'p4::GFF', 'p5::BSP_T', 'p6::BSP_n', 'p7::BSP_Mu', 'p8::MSP_n', 'p9::UWP_Mu']


In [6]:
# =============================================================================
# Paso 4: Generar 10,000 nuevos motores a partir de los rangos de entrada
# =============================================================================
# Guardamos los valores máximos y mínimos
X_min = df[features].min()
X_max = df[features].max()

n_samples = 10000
# Generar nuevos motores de forma uniforme dentro de los rangos observados
X_new = pd.DataFrame({col: np.random.uniform(low=X_min[col], high=X_max[col], size=n_samples) 
                      for col in features})

In [26]:
# =============================================================================
# Paso 5: Escalado de datos
# =============================================================================
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X_new)
scaler_Y = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y)


# Crear DataFrames escalados completos (para reentrenamiento final y predicciones)
X_scaled_df = pd.DataFrame(scaler_X.transform(X_new), columns=X_new.columns, index=X_new.index)
Y_scaled_df = pd.DataFrame(scaler_Y.transform(Y), columns=Y.columns, index=Y.index)


In [8]:
# -----------------------------------------------------------------------------
# Definir una clase que encapsule el ensemble de los mejores modelos
# -----------------------------------------------------------------------------
class BestModelEnsemble:
    def __init__(self, model_dict, outputs):
        """
        model_dict: Diccionario que mapea cada variable de salida a una tupla (modelo, índice)
                    donde 'modelo' es el mejor modelo para esa salida y 'índice' es la posición
                    de esa salida en el vector de predicción que produce ese modelo.
        outputs: Lista de nombres de variables de salida, en el orden deseado.
        """
        self.model_dict = model_dict
        self.outputs = outputs

    def predict(self, X):
        """
        Realiza la predicción para cada variable de salida usando el modelo asignado.
        Se espera que cada modelo tenga un método predict que devuelva un array de
        dimensiones (n_samples, n_outputs_model). Si el modelo es univariable, se asume
        que devuelve un array 1D.
        
        :param X: Datos de entrada (array o DataFrame) con la forma (n_samples, n_features).
        :return: Array con la predicción para todas las variables de salida, forma (n_samples, n_outputs).
        """
        n_samples = X.shape[0]
        n_outputs = len(self.outputs)
        preds = np.zeros((n_samples, n_outputs))
        
        # Iterar sobre cada variable de salida
        for output in self.outputs:
            model, idx = self.model_dict[output]
            model_pred = model.predict(X)
            # Si el modelo es univariable, model_pred es 1D; de lo contrario, es 2D
            if model_pred.ndim == 1:
                preds[:, self.outputs.index(output)] = model_pred
            else:
                preds[:, self.outputs.index(output)] = model_pred[:, idx]
        return preds

In [27]:
# =============================================================================
# Paso 6: Preprocesar los nuevos datos con el mismo escalador usado en entrenamiento
# =============================================================================
# Definir la ruta del archivo del ensemble (se asume que figure_path ya está definido)
ensemble_file = os.path.join(model_path, "best_model_ensemble.pkl")
# Cargar el modelo ensemble utilizando pickle
with open(ensemble_file, "rb") as f:
    loaded_ensemble = pickle.load(f)
print("Modelo ensemble cargado correctamente desde:", ensemble_file)

# Obtener las predicciones del ensemble para los datos escalados (por ejemplo, el conjunto completo)
preds_ensemble = loaded_ensemble.predict(X_scaled_df)

# Convertir las predicciones a la escala original
preds_original = scaler_Y.inverse_transform(preds_ensemble)

# Combinar las predicciones en un DataFrame
df_predictions = pd.DataFrame(preds_ensemble, columns=outputs)
print(df_predictions.head())

# Combinar las variables de entrada originales y las salidas predichas
motors = pd.concat([X_new, df_predictions], axis=1)
print(motors.head())
motors.to_csv("generated_motors.csv", index=False)
print("Base de datos de 10,000 motores guardada en 'generated_motors.csv'.")

Modelo ensemble cargado correctamente desde: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado\best_model_ensemble.pkl
      p1::W        p4::GFF  p5::BSP_T  p6::BSP_n  p7::BSP_Mu  p8::MSP_n  \
0  0.658658  126463.385768   0.860282  -0.447577    1.015153   0.598805   
1 -1.448138   -6034.137718  -0.918992   0.743497    0.098881   0.977144   
2  1.282805   78826.701591   2.918427  -1.015112   -0.413248  -1.099346   
3  0.955392  -13743.934211   1.341643  -0.807188    0.111320  -0.923332   
4  0.650779   -3976.764288   0.709440  -0.857263   -0.266586  -1.195770   

   p9::UWP_Mu  
0    0.524473  
1   -1.245760  
2   -0.417232  
3    1.169618  
4    0.204383  
     x1::OSD   x2::Dint      x3::L    x4::tm    x5::hs2    x6::wt     x7::Nt  \
0  47.610895  40.195027  34.329864  2.501234   8.041717  3.094406   6.578554   
1  56.905775  25.878791  12.021049  2.647881  11.504395  3.279891  18.854601   
2  54.983807  39.015605  35.142434  2.7262

In [29]:
# =============================================================================
# Paso 7: Filtrar motores válidos según constraints definidos
# =============================================================================
def is_valid_motor(row):
    # Ejemplo de constraints (ajustar según lo indicado en el paper)
    # Se desea que:
    # - p1::W esté entre 0.5 y 0.7
    # - p7::BSP_Mu esté entre 85 y 90
    # - p9::UWP_Mu esté entre 88 y 92
    if (0.15 <= row['p1::W'] <= 1) and (50 <= row['p7::BSP_Mu'] <= 99) and (50 <= row['p9::UWP_Mu'] <= 99):
        return True
    return False

motors['Valid'] = motors.apply(is_valid_motor, axis=1)
valid_motors = motors[motors['Valid']]
print(f"Número de motores válidos: {len(valid_motors)}")

Número de motores válidos: 0


In [22]:
# =============================================================================
# Paso 6: Calcular y representar la frontera de Pareto
# =============================================================================
# Objetivos: minimizar p1::W, maximizar p7::BSP_Mu y p9::UWP_Mu
def compute_pareto_front(df, objectives):
    is_dominated = np.zeros(len(df), dtype=bool)
    for i in range(len(df)):
        for j in range(len(df)):
            if i == j:
                continue
            dominates = True
            for obj, sense in objectives.items():
                if sense == 'min':
                    if df.iloc[j][obj] > df.iloc[i][obj]:
                        dominates = False
                        break
                elif sense == 'max':
                    if df.iloc[j][obj] < df.iloc[i][obj]:
                        dominates = False
                        break
            if dominates:
                is_dominated[i] = True
                break
    frontier = df[~is_dominated]
    return frontier

objectives = {'p1::W': 'min', 'p7::BSP_Mu': 'max', 'p9::UWP_Mu': 'max'}
valid_motors_reset = valid_motors.reset_index(drop=True)
pareto_motors = compute_pareto_front(valid_motors_reset, objectives)
print(f"Número de motores en la frontera de Pareto: {len(pareto_motors)}")

# Representación 3D de la frontera de Pareto
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(valid_motors['p1::W'], valid_motors['p7::BSP_Mu'], valid_motors['p9::UWP_Mu'], 
           c='blue', label='Válidos', alpha=0.5)
ax.scatter(motors[~motors['Valid']]['p1::W'], motors[~motors['Valid']]['p7::BSP_Mu'], motors[~motors['Valid']]['p9::UWP_Mu'], 
           c='red', label='No válidos', alpha=0.5)
ax.scatter(pareto_motors['p1::W'], pareto_motors['p7::BSP_Mu'], pareto_motors['p9::UWP_Mu'], 
           c='green', label='Frontera Pareto', s=100, marker='D')
ax.set_xlabel('p1::W')
ax.set_ylabel('p7::BSP_Mu')
ax.set_zlabel('p9::UWP_Mu')
ax.legend()
plt.title('Frontera de Pareto de diseños de motores')
plt.savefig("pareto_frontier.png", dpi=300)
plt.show()

Número de motores en la frontera de Pareto: 0


In [None]:
# =============================================================================
# Paso 7: Seleccionar el motor válido óptimo
# =============================================================================
# Se normalizan los objetivos y se define un score compuesto
valid_motors_comp = valid_motors.copy()
for col, sense in [('p1::W', 'min'), ('p7::BSP_Mu', 'max'), ('p9::UWP_Mu', 'max')]:
    col_min = valid_motors_comp[col].min()
    col_max = valid_motors_comp[col].max()
    if sense == 'min':
        valid_motors_comp[col + '_norm'] = 1 - (valid_motors_comp[col] - col_min) / (col_max - col_min)
    else:
        valid_motors_comp[col + '_norm'] = (valid_motors_comp[col] - col_min) / (col_max - col_min)

valid_motors_comp['composite_score'] = (valid_motors_comp['p1::W_norm'] +
                                          valid_motors_comp['p7::BSP_Mu_norm'] +
                                          valid_motors_comp['p9::UWP_Mu_norm'])
optimal_motor = valid_motors_comp.loc[valid_motors_comp['composite_score'].idxmax()]
print("Motor válido óptimo (según score compuesto):")
print(optimal_motor)

optimal_motor.to_frame().T.to_csv("optimal_motor.csv", index=False)
print("El motor óptimo se ha guardado en 'optimal_motor.csv'.")