In [1]:
# Librerías necesarias
import os
import re  # Import the regular expression module

import pandas as pd
import numpy as np
import math
from math import ceil

import matplotlib
matplotlib.use('TKAgg')
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
import seaborn as sns
from mpl_toolkits.mplot3d import Axes3D

import warnings

import pickle

from sklearn.preprocessing import LabelEncoder, OneHotEncoder, StandardScaler, MinMaxScaler, RobustScaler

In [2]:
# =============================================================================
# Paso 1: Definir rutas, cargar datos y configurar directorios
# =============================================================================
base_path = os.getcwd()  # Se asume que el notebook se ejecuta desde la carpeta 'MOP'
db_path = os.path.join(base_path, "DB_MOP")
fig_path = os.path.join(base_path, "Figuras_MOP")
model_path = os.path.join(base_path, "Modelos_MOP")

# Ruta al archivo de la base de datos
data_file = os.path.join(db_path, "design_DB_preprocessed_1000_Uniforme.csv")
print("Ruta de datos:", data_file)

# Ruta donde se guardarán las figuras
figure_path = os.path.join(fig_path, "1000_MOT_Uniforme")
if not os.path.exists(figure_path):
    os.makedirs(figure_path)
print("Ruta de figuras:", figure_path)

# Ruta al archivo de los modelos
model_path = os.path.join(model_path, "1000_MOT_Uniforme")
print(model_path)
print("Ruta de modelos:", model_path)

# Lectura del archivo CSV
try:
    df = pd.read_csv(data_file)
    print("Archivo cargado exitosamente.")
except FileNotFoundError:
    print("Error: Archivo no encontrado. Revisa la ruta del archivo.")
except pd.errors.ParserError:
    print("Error: Problema al analizar el archivo CSV. Revisa el formato del archivo.")
except Exception as e:
    print(f"Ocurrió un error inesperado: {e}")

# Función para limpiar nombres de archivo inválidos
def clean_filename(name):
    return re.sub(r'[\\/*?:"<>|]', "_", name)

Ruta de datos: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\DB_MOP\design_DB_preprocessed_400_Optimizado.csv
Ruta de figuras: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Figuras_MOP\400_MOT_Optimizado
C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado
Ruta de modelos: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado
Archivo cargado exitosamente.


In [3]:
# =============================================================================
# Paso 2: Preprocesar datos: separar columnas en X, M, P y convertir a numérico
# =============================================================================
X_cols = [col for col in df.columns if col.startswith('x')]
M_cols = [col for col in df.columns if col.startswith('m')]
P_cols = [col for col in df.columns if col.startswith('p')]

X = df[X_cols].copy()
M = df[M_cols].copy()
P = df[P_cols].copy()

for col in X.columns:
    X[col] = pd.to_numeric(X[col], errors='coerce')
for col in M.columns:
    M[col] = pd.to_numeric(M[col], errors='coerce')
for col in P.columns:
    P[col] = pd.to_numeric(P[col], errors='coerce')

In [4]:
# =============================================================================
# Paso 3: Seleccionar variables de entrada y salida
# =============================================================================
# Las variables de salida se toman de P; se eliminan 'p2::Tnom' y 'p3::nnom' si existen.
outputs = [col for col in P.columns]
if 'p2::Tnom' in outputs:
    outputs.remove('p2::Tnom')
if 'p3::nnom' in outputs:
    outputs.remove('p3::nnom')

# Las variables de entrada se obtienen concatenando X y M.
X_M = pd.concat([X, M], axis=1)
features = list(X.columns)
all_features = list(X_M.columns)
print("Variables de entrada:", features)
print("Variables de salida:", outputs)

# Redefinir X y Y usando los nombres de columnas seleccionados
X = df[features]
Y = df[outputs]

Variables de entrada: ['x1::OSD', 'x2::Dint', 'x3::L', 'x4::tm', 'x5::hs2', 'x6::wt', 'x7::Nt', 'x8::Nh']
Variables de salida: ['p1::W', 'p4::GFF', 'p5::BSP_T', 'p6::BSP_n', 'p7::BSP_Mu', 'p8::MSP_n', 'p9::UWP_Mu']


In [5]:
# =============================================================================
# Paso 4: Generar 10,000 nuevos motores a partir de los rangos de entrada
# =============================================================================
# Las restricciones (Boundaries B) se definen sobre las variables de X.
# Por ejemplo: x1::OSD > x2::Dint y 45 < x1::OSD < 60.
# Definir la función check_boundaries escalable: se evalúan todas las condiciones definidas en una lista.
def check_boundaries(row):
    boundaries = [
        lambda r: r['x1::OSD'] > r['x2::Dint'],  # Ejemplo: x1 debe ser mayor que x2
        lambda r: 45.0 < r['x1::OSD'] < 60.0,           # Ejemplo: x1 debe estar entre 45 y 60
        # Aquí se pueden agregar más condiciones según se requiera
    ]
    return all(condition(row) for condition in boundaries)

# Función para generar muestras considerando si la variable debe ser entera
def generate_samples(n_samples):
    data = {}
    for col in X_cols:
        # Si la variable es una de las que deben ser enteras, usar randint
        if col in ['x7::Nt', 'x8::Nh']:
            low = int(np.floor(X_min[col]))
            high = int(np.ceil(X_max[col]))
            # np.random.randint es exclusivo en el extremo superior, por lo que se suma 1
            data[col] = np.random.randint(low=low, high=high+1, size=n_samples)
        else:
            data[col] = np.random.uniform(low=X_min[col], high=X_max[col], size=n_samples)
    return pd.DataFrame(data)

# Guardamos los valores máximos y mínimos
X_min = df[features].min()
X_max = df[features].max()

desired_samples = 10000
valid_samples_list = []
# Generamos muestras en bloques; para aumentar la probabilidad de cumplir las restricciones,
# se genera un bloque mayor al deseado
batch_size = int(desired_samples * 1.5)

# Acumular muestras válidas hasta obtener el número deseado
while sum(len(df_batch) for df_batch in valid_samples_list) < desired_samples:
    X_batch = generate_samples(batch_size)
    X_valid_batch = X_batch[X_batch.apply(check_boundaries, axis=1)]
    valid_samples_list.append(X_valid_batch)

# Concatenar todas las muestras válidas y truncar a desired_samples
valid_samples = pd.concat(valid_samples_list).reset_index(drop=True)
X_new = valid_samples.iloc[:desired_samples].copy()
print(f"Se generaron {len(X_new)} muestras de X que cumplen con las restricciones de Boundaries B (objetivo: {desired_samples}).")
display(X_new.head())

Se generaron 10000 muestras de X que cumplen con las restricciones de Boundaries B (objetivo: 10000).


Unnamed: 0,x1::OSD,x2::Dint,x3::L,x4::tm,x5::hs2,x6::wt,x7::Nt,x8::Nh
0,48.787502,39.8545,17.48669,2.432229,5.110861,4.451907,15,3
1,45.938035,32.797383,19.149395,3.308764,10.909573,4.913136,22,7
2,46.436181,24.314429,30.868695,2.610742,8.663579,2.977565,29,3
3,54.061333,23.606664,30.597196,3.174716,12.098057,2.912746,22,8
4,52.435532,32.593327,15.526898,2.445332,13.533592,4.292427,15,6


In [6]:
# =============================================================================
# Paso 5: Escalado de datos
# =============================================================================
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X_new)
scaler_Y = StandardScaler()
Y_scaled = scaler_Y.fit_transform(Y)


# Crear DataFrames escalados completos (para reentrenamiento final y predicciones)
X_scaled_df = X_new
Y_scaled_df = Y


In [7]:
# -----------------------------------------------------------------------------
# Definir una clase que encapsule el ensemble de los mejores modelos
# -----------------------------------------------------------------------------
class BestModelEnsemble:
    def __init__(self, model_dict, outputs):
        """
        model_dict: Diccionario que mapea cada variable de salida a una tupla (modelo, índice)
                    donde 'modelo' es el mejor modelo para esa salida y 'índice' es la posición
                    de esa salida en el vector de predicción que produce ese modelo.
        outputs: Lista de nombres de variables de salida, en el orden deseado.
        """
        self.model_dict = model_dict
        self.outputs = outputs

    def predict(self, X):
        """
        Realiza la predicción para cada variable de salida usando el modelo asignado.
        Se espera que cada modelo tenga un método predict que devuelva un array de
        dimensiones (n_samples, n_outputs_model). Si el modelo es univariable, se asume
        que devuelve un array 1D.
        
        :param X: Datos de entrada (array o DataFrame) con la forma (n_samples, n_features).
        :return: Array con la predicción para todas las variables de salida, forma (n_samples, n_outputs).
        """
        n_samples = X.shape[0]
        n_outputs = len(self.outputs)
        preds = np.zeros((n_samples, n_outputs))
        
        # Iterar sobre cada variable de salida
        for output in self.outputs:
            model, idx = self.model_dict[output]
            model_pred = model.predict(X)
            # Si el modelo es univariable, model_pred es 1D; de lo contrario, es 2D
            if model_pred.ndim == 1:
                preds[:, self.outputs.index(output)] = model_pred
            else:
                preds[:, self.outputs.index(output)] = model_pred[:, idx]
        return preds

In [8]:
# =============================================================================
# Paso 6: Preprocesar los nuevos datos con el mismo escalador usado en entrenamiento
# =============================================================================
# Definir la ruta del archivo del ensemble (se asume que figure_path ya está definido)
ensemble_file = os.path.join(model_path, "best_model_ensemble.pkl")
# Cargar el modelo ensemble utilizando pickle
with open(ensemble_file, "rb") as f:
    loaded_ensemble = pickle.load(f)
print("Modelo ensemble cargado correctamente desde:", ensemble_file)

# Obtener las predicciones del ensemble para los datos escalados (por ejemplo, el conjunto completo)
preds_ensemble = loaded_ensemble.predict(X_scaled_df)
display(preds_ensemble[0])

# Convertir las predicciones a la escala original
preds_original = preds_ensemble
display(preds_original[0])

# Combinar las predicciones en un DataFrame
df_predictions = pd.DataFrame(preds_original, columns=outputs)

# Combinar las variables de entrada originales y las salidas predichas
motors = pd.concat([X_new, df_predictions], axis=1)
display(motors.head(15))
# Guardar el DataFrame de los motores generados en formato CSV
model_file = os.path.join(model_path, "generated_motors.csv")
motors.to_csv(model_file, index=False)
print("Base de datos de 10,000 motores guardada en:", model_path)

Modelo ensemble cargado correctamente desde: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado\best_model_ensemble.pkl


array([3.84340875e-01, 5.83854947e+01, 5.98110259e-01, 6.44882556e+03,
       8.77511295e+01, 7.58369172e+03, 8.94966408e+01])

array([3.84340875e-01, 5.83854947e+01, 5.98110259e-01, 6.44882556e+03,
       8.77511295e+01, 7.58369172e+03, 8.94966408e+01])

Unnamed: 0,x1::OSD,x2::Dint,x3::L,x4::tm,x5::hs2,x6::wt,x7::Nt,x8::Nh,p1::W,p4::GFF,p5::BSP_T,p6::BSP_n,p7::BSP_Mu,p8::MSP_n,p9::UWP_Mu
0,48.787502,39.8545,17.48669,2.432229,5.110861,4.451907,15,3,0.384341,58.385495,0.59811,6448.825556,87.75113,7583.691718,89.496641
1,45.938035,32.797383,19.149395,3.308764,10.909573,4.913136,22,7,0.43211,76.801001,0.976885,8155.906406,83.518804,8177.00166,93.693397
2,46.436181,24.314429,30.868695,2.610742,8.663579,2.977565,29,3,0.634403,86.341383,0.931177,5124.290832,74.313144,5927.543678,90.07373
3,54.061333,23.606664,30.597196,3.174716,12.098057,2.912746,22,8,0.733751,74.426522,1.032032,3612.47119,77.923549,5203.404054,92.253033
4,52.435532,32.593327,15.526898,2.445332,13.533592,4.292427,15,6,0.369164,29.430553,0.659899,6465.276726,85.589618,7706.790626,89.467686
5,57.233552,33.449854,30.524701,2.451099,7.686108,2.043411,28,8,0.813764,102.212662,1.295653,3711.822136,75.375145,4722.596482,85.456374
6,45.816208,36.550939,34.875173,3.362772,5.406277,2.913812,19,6,0.693647,82.845505,1.143128,7415.86925,82.836682,7044.888598,85.514292
7,55.82619,37.005829,22.805191,3.204949,6.10125,4.725322,13,4,0.580207,56.831328,0.639957,5466.704224,88.541657,6644.308554,90.230595
8,52.700926,37.392181,31.392797,2.992021,7.785621,3.145387,9,8,0.671264,50.249797,0.956927,4986.426418,88.161095,5950.187398,84.27559
9,48.222993,38.649483,20.451095,2.469039,7.357779,2.738497,14,4,0.402636,39.652555,0.670039,6283.086026,86.300526,7411.089374,87.039777


Base de datos de 10,000 motores guardada en: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado


In [9]:
# =============================================================================
# Paso 7: Filtrar motores válidos según constraints definidos
# =============================================================================
def is_valid_motor(row):
    constraints = [
        lambda r: 0.15 <= r['p1::W'] <= 1,     # p1::W entre 0.15 y 1
        lambda r: 50 <= r['p7::BSP_Mu'] <= 99,    # p7::BSP_Mu entre 50 y 99
        lambda r: 90 <= r['p9::UWP_Mu'] <= 99,     # p9::UWP_Mu entre 50 y 99
        lambda r: r['p4::GFF'] >= 1 and r['p4::GFF'] <= 80/2
        # Puedes agregar más restricciones aquí, por ejemplo:
        # lambda r: r['p4::GFF'] >= 1 and r['p4::GFF'] <= 100,
    ]
    return all(condition(row) for condition in constraints)

motors['Valid'] = motors.apply(is_valid_motor, axis=1)
valid_motors = motors[motors['Valid']]
print(f"Número de motores válidos: {len(valid_motors)}")

Número de motores válidos: 88


In [14]:
##### Ordenar los motores válidos por 'p9::UWP_Mu' de menor a mayor
sorted_motors = valid_motors.sort_values(by='p9::UWP_Mu', ascending=False)
print("Motores válidos ordenados por 'p9::UWP_Mu' (de menor a mayor):")
display(sorted_motors.head())

Motores válidos ordenados por 'p9::UWP_Mu' (de menor a mayor):


Unnamed: 0,x1::OSD,x2::Dint,x3::L,x4::tm,x5::hs2,x6::wt,x7::Nt,x8::Nh,p1::W,p4::GFF,p5::BSP_T,p6::BSP_n,p7::BSP_Mu,p8::MSP_n,p9::UWP_Mu,Valid
6628,55.835624,23.935805,25.201061,2.855758,14.015754,4.867992,17,5,0.594665,39.206611,0.670106,3698.346138,82.332785,5095.262532,92.318893,True
3061,53.200244,24.660976,25.810121,2.825625,12.275898,4.68991,13,5,0.560808,37.777992,0.590023,3793.912494,86.326873,5168.990748,92.127963,True
5203,51.013363,23.788867,22.582429,2.291727,12.277475,4.878327,13,5,0.471088,39.718654,0.521099,6043.785324,86.864672,7321.36841,92.042138,True
7590,57.482965,22.182518,22.299827,2.72939,11.21347,4.87932,10,5,0.561354,39.550519,0.374623,7096.935872,88.90573,8352.063726,91.906829,True
7034,50.76203,25.235529,24.018308,2.749365,13.774323,4.792285,18,4,0.496808,36.257045,0.657251,5003.589738,82.30768,6295.216492,91.661562,True


In [11]:
# =============================================================================
# Paso 6: Calcular y representar la frontera de Pareto
# =============================================================================
# Objetivos: minimizar p1::W, maximizar p7::BSP_Mu y p9::UWP_Mu
def compute_pareto_front(df, objectives):
    is_dominated = np.zeros(len(df), dtype=bool)
    for i in range(len(df)):
        for j in range(len(df)):
            if i == j:
                continue
            dominates = True
            for obj, sense in objectives.items():
                if sense == 'min':
                    if df.iloc[j][obj] > df.iloc[i][obj]:
                        dominates = False
                        break
                elif sense == 'max':
                    if df.iloc[j][obj] < df.iloc[i][obj]:
                        dominates = False
                        break
            if dominates:
                is_dominated[i] = True
                break
    frontier = df[~is_dominated]
    return frontier

objectives = {'p1::W': 'min', 'p7::BSP_Mu': 'max', 'p9::UWP_Mu': 'max'}
valid_motors_reset = valid_motors.reset_index(drop=True)
pareto_motors = compute_pareto_front(valid_motors_reset, objectives)
print(f"Número de motores en la frontera de Pareto: {len(pareto_motors)}")

# Representación 3D de la frontera de Pareto
fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(valid_motors['p1::W'], valid_motors['p7::BSP_Mu'], valid_motors['p9::UWP_Mu'], 
           c='blue', label='Válidos', alpha=0.5)
ax.scatter(motors[~motors['Valid']]['p1::W'], motors[~motors['Valid']]['p7::BSP_Mu'], motors[~motors['Valid']]['p9::UWP_Mu'], 
           c='red', label='No válidos', alpha=0.5)
ax.scatter(pareto_motors['p1::W'], pareto_motors['p7::BSP_Mu'], pareto_motors['p9::UWP_Mu'], 
           c='green', label='Frontera Pareto', s=100, marker='D')
ax.set_xlabel('p1::W')
ax.set_ylabel('p7::BSP_Mu')
ax.set_zlabel('p9::UWP_Mu')
ax.legend()
plt.title('Frontera de Pareto de diseños de motores')
plt.show()
'''
figure_file = os.path.join(figure_path, "pareto_frontier.png")
plt.savefig(figure_file, dpi=1080)
plt.close()
print("Figura de comparación guardada en:", figure_file)
'''

Número de motores en la frontera de Pareto: 18


'\nfigure_file = os.path.join(figure_path, "pareto_frontier.png")\nplt.savefig(figure_file, dpi=1080)\nplt.close()\nprint("Figura de comparación guardada en:", figure_file)\n'

In [12]:
# Si existen motores válidos, procedemos a la selección:
if len(valid_motors) > 0:
    # 1. Motor más liviano: mínimo de p1::W
    motor_liviano = valid_motors.loc[valid_motors['p1::W'].idxmin()]
    
    # 2. Motor más eficiente: máximo de p9::UWP_Mu (asumiendo que mayor p9::UWP_Mu indica mayor eficiencia)
    motor_eficiente = valid_motors.loc[valid_motors['p9::UWP_Mu'].idxmax()]
    
    # 3. Motor más eficiente y liviano:
    # Se normalizan p1::W y p9::UWP_Mu en el subconjunto de motores válidos.
    vm = valid_motors.copy()
    # Normalizar p1::W (donde un menor valor es mejor, así que se invertirá)
    vm['p1::W_norm'] = (vm['p1::W'] - vm['p1::W'].min()) / (vm['p1::W'].max() - vm['p1::W'].min())
    # Normalizar p9::UWP_Mu (mayor es mejor)
    vm['p9::UWP_Mu_norm'] = (vm['p9::UWP_Mu'] - vm['p9::UWP_Mu'].min()) / (vm['p9::UWP_Mu'].max() - vm['p9::UWP_Mu'].min())
    
    # Definir un score compuesto: se busca minimizar p1::W (por ello, usamos 1 - normalizado) y maximizar p9::UWP_Mu
    vm['composite_score'] = (1 - vm['p1::W_norm']) + vm['p9::UWP_Mu_norm']
    motor_eficiente_liviano = vm.loc[vm['composite_score'].idxmax()]
    
    # Mostrar las soluciones:
    print("\nMotor más liviano:")
    print(motor_liviano)
    
    print("\nMotor más eficiente:")
    print(motor_eficiente)
    
    print("\nMotor más eficiente y liviano (score compuesto):")
    print(motor_eficiente_liviano)

# Opcional: Guardar cada solución en un CSV separado
    #motor_liviano.to_frame().T.to_csv("motor_mas_liviano.csv", index=False)
    # motor_eficiente.to_frame().T.to_csv("motor_mas_eficiente.csv", index=False)
    # motor_eficiente_liviano.to_frame().T.to_csv("motor_eficiente_y_liviano.csv", index=False)
    # print("\nSoluciones guardadas en CSV.")
else:
    print("No se encontraron motores válidos. Verifique las constraints y el escalado de los datos.")


Motor más liviano:
x1::OSD         45.724361
x2::Dint        30.054559
x3::L           20.580028
x4::tm           3.079317
x5::hs2         13.919314
x6::wt            4.06568
x7::Nt                 14
x8::Nh                  8
p1::W            0.388211
p4::GFF         39.586329
p5::BSP_T        0.833163
p6::BSP_n     8990.012426
p7::BSP_Mu      85.537144
p8::MSP_n     8577.718958
p9::UWP_Mu      90.521735
Valid                True
Name: 5884, dtype: object

Motor más eficiente:
x1::OSD         55.835624
x2::Dint        23.935805
x3::L           25.201061
x4::tm           2.855758
x5::hs2         14.015754
x6::wt           4.867992
x7::Nt                 17
x8::Nh                  5
p1::W            0.594665
p4::GFF         39.206611
p5::BSP_T        0.670106
p6::BSP_n     3698.346138
p7::BSP_Mu      82.332785
p8::MSP_n     5095.262532
p9::UWP_Mu      92.318893
Valid                True
Name: 6628, dtype: object

Motor más eficiente y liviano (score compuesto):
x1::OSD              51.

In [13]:
# =============================================================================
# Paso 7: Seleccionar el motor válido óptimo
# =============================================================================
# Se normalizan los objetivos y se define un score compuesto
valid_motors_comp = valid_motors.copy()
for col, sense in [('p1::W', 'min'), ('p7::BSP_Mu', 'max'), ('p9::UWP_Mu', 'max')]:
    col_min = valid_motors_comp[col].min()
    col_max = valid_motors_comp[col].max()
    if sense == 'min':
        valid_motors_comp[col + '_norm'] = 1 - (valid_motors_comp[col] - col_min) / (col_max - col_min)
    else:
        valid_motors_comp[col + '_norm'] = (valid_motors_comp[col] - col_min) / (col_max - col_min)

valid_motors_comp['composite_score'] = (valid_motors_comp['p1::W_norm'] +
                                          valid_motors_comp['p7::BSP_Mu_norm'] +
                                          valid_motors_comp['p9::UWP_Mu_norm'])
optimal_motor = valid_motors_comp.loc[valid_motors_comp['composite_score'].idxmax()]
print("Motor válido óptimo (según score compuesto):")
print(optimal_motor)

model_file = os.path.join(model_path, "optimal_motor.csv")
optimal_motor.to_frame().T.to_csv(model_file, index=False)
print("El motor óptimo se ha guardado en:", model_path)

Motor válido óptimo (según score compuesto):
x1::OSD              51.013363
x2::Dint             23.788867
x3::L                22.582429
x4::tm                2.291727
x5::hs2              12.277475
x6::wt                4.878327
x7::Nt                      13
x8::Nh                       5
p1::W                 0.471088
p4::GFF              39.718654
p5::BSP_T             0.521099
p6::BSP_n          6043.785324
p7::BSP_Mu           86.864672
p8::MSP_n           7321.36841
p9::UWP_Mu           92.042138
Valid                     True
p1::W_norm            0.739994
p7::BSP_Mu_norm       0.709265
p9::UWP_Mu_norm       0.880645
composite_score       2.329903
Name: 5203, dtype: object
El motor óptimo se ha guardado en: C:\Users\s00244\Documents\GitHub\MotorDesignDataDriven\Notebooks\4.DBG\Modelos_MOP\400_MOT_Optimizado
