In [1]:
##############################################################################
# Surrogate Models Training in IDAES
# ---------------------------------
# Models: ALAMO, PySMO (Polynomial, RBF, Kriging), Keras (Neural Network)
#
# This notebook will:
#  1. Load and preprocess training data
#  2. Train each surrogate model separately
#  3. Save trained surrogates to JSON/folder
##############################################################################


In [1]:
import os
import numpy as np
import pandas as pd
import random as rn
import tensorflow as tf
from enum import Enum
from typing import Set

# IDAES imports
from idaes.core.surrogate.sampling.data_utils import split_training_validation
from idaes.core.surrogate.alamopy import AlamoTrainer
from idaes.core.surrogate.pysmo_surrogate import (
    PysmoPolyTrainer,
    PysmoRBFTrainer,
    PysmoKrigingTrainer,
    PysmoSurrogate,
)
from idaes.core.surrogate.sampling.scaling import OffsetScaler
from idaes.core.surrogate.keras_surrogate import KerasSurrogate

# Reproducibility
os.environ["PYTHONHASHSEED"] = "0"
os.environ["CUDA_VISIBLE_DEVICES"] = ""
np.random.seed(46)
rn.seed(1342)
tf.random.set_seed(62)


'_BlockData'. The class '_BlockData' has been renamed to 'BlockData'.
(deprecated in 6.7.2) (called from d:\anaconda\envs\my-idaes-env\lib\site-
packages\omlt\block.py:33)


In [2]:
class SurrType(Enum):
    ALAMO = "alamo"
    PYSMO_PLY = "pysmo_poly"
    PYSMO_RBF = "pysmo_rbf"
    PYSMO_KRG = "pysmo_kriging"
    KERAS = "keras"
    NDCT = "NDCT"  # placeholder, not implemented here

def surrogate_json_filename(t: SurrType):
    return f"{t.value}_surrogate.json"


In [3]:
# Load dataset
csv_data = pd.read_csv("reformer-data.csv")  # 2800 data points
data = csv_data.sample(n=200)  # randomly sample points for training

input_data = data.iloc[:, :2]
output_data = data.iloc[:, 2:]

input_labels = input_data.columns
output_labels = output_data.columns

# Split into training/validation
n_data = data[input_labels[0]].size
data_training, data_validation = split_training_validation(data, 0.8, seed=666)

print("Training data size:", data_training.shape)
print("Validation data size:", data_validation.shape)


Training data size: (160, 15)
Validation data size: (40, 15)


  return bound(*args, **kwds)


# ALAMO


In [4]:
import time
# capture long output (not required to use surrogate API)
from io import StringIO
import sys
from idaes.core.surrogate.alamopy import alamo

# 指定 ALAMO 的可执行文件路径
alamo.executable = r"D:\alamo\alamo.exe"
stream = StringIO()
oldstdout = sys.stdout
sys.stdout = stream
fname = surrogate_json_filename(SurrType.ALAMO)

if not os.path.exists(fname):
    trainer = AlamoTrainer(
        input_labels=input_labels,
        output_labels=output_labels,
        training_dataframe=data_training,
    )
    trainer.config.constant = True
    trainer.config.linfcns = True
    trainer.config.multi2power = [1, 2]
    trainer.config.monomialpower = [2, 3]
    trainer.config.ratiopower = [1, 2]
    trainer.config.maxterms = [10] * len(output_labels)
    trainer.config.filename = os.path.join(os.getcwd(), "alamo_run.alm")
    trainer.config.overwrite_files = True

    try:
        start = time.time()   # ⏱️ 开始计时
        success, alm_surr, msg = trainer.train_surrogate()
        elapsed = time.time() - start  # ⏱️ 结束计时

        alm_surr.save_to_file(fname, overwrite=True)
        print(f"✅ ALAMO trained and saved. Training time: {elapsed:.2f} seconds")
    except FileNotFoundError:
        print("⚠️ ALAMO executable not found.")
else:
    print("ℹ️ ALAMO surrogate already exists.")


# PySMO Polynomial/RBF/Kriging

In [8]:
import time

# 提取输入和输出数据
input_data_train = data_training.iloc[:, :2]
output_data_train = data_training.iloc[:, 2:]
input_labels = list(input_data_train.columns)
output_labels = list(output_data_train.columns)

# 定义输入边界 (这里只是示例，根据你的实际范围修改)
bounds = {input_labels[i]: (0.1, 0.8) if i == 0 else (0.8, 1.2) for i in range(len(input_labels))}

# ---------- PySMO Polynomial ----------
fname = surrogate_json_filename(SurrType.PYSMO_PLY)
if not os.path.exists(fname):
    start = time.time()
    trainer = PysmoPolyTrainer(
        input_labels=input_labels,
        output_labels=output_labels,
        training_dataframe=data_training,
    )
    trainer.config.maximum_polynomial_order = 6
    trainer.config.multinomials = True
    trainer.config.training_split = 0.8
    trainer.config.number_of_crossvalidations = 10

    poly_train = trainer.train_surrogate()
    elapsed = time.time() - start

    poly_surr = PysmoSurrogate(poly_train, input_labels, output_labels, bounds)
    poly_surr.save_to_file(fname, overwrite=True)
    print(f"✅ PySMO Polynomial trained and saved. Training time: {elapsed:.2f} seconds")
else:
    print("ℹ️ PySMO Polynomial surrogate already exists.")

# ---------- PySMO RBF ----------
fname = surrogate_json_filename(SurrType.PYSMO_RBF)
if not os.path.exists(fname):
    start = time.time()
    trainer = PysmoRBFTrainer(
        input_labels=input_labels,
        output_labels=output_labels,
        training_dataframe=data_training,
    )
    trainer.config.basis_function = "gaussian"
    trainer.config.solution_method = "pyomo"
    trainer.config.regularization = True

    rbf_train = trainer.train_surrogate()
    elapsed = time.time() - start

    rbf_surr = PysmoSurrogate(rbf_train, input_labels, output_labels, bounds)
    rbf_surr.save_to_file(fname, overwrite=True)
    print(f"✅ PySMO RBF trained and saved. Training time: {elapsed:.2f} seconds")
else:
    print("ℹ️ PySMO RBF surrogate already exists.")

# # ---------- PySMO Kriging ----------
# fname = surrogate_json_filename(SurrType.PYSMO_KRG)
# if not os.path.exists(fname):
#     start = time.time()
#     trainer = PysmoKrigingTrainer(
#         input_labels=input_labels,
#         output_labels=output_labels,
#         training_dataframe=data_training,
#     )
#     trainer.config.numerical_gradients = True
#     trainer.config.regularization = True

#     krg_train = trainer.train_surrogate()
#     elapsed = time.time() - start

#     krg_surr = PysmoSurrogate(krg_train, input_labels, output_labels, bounds)
#     krg_surr.save_to_file(fname, overwrite=True)
#     print(f"✅ PySMO Kriging trained and saved. Training time: {elapsed:.2f} seconds")
# else:
#     print("ℹ️ PySMO Kriging surrogate already exists.")


ℹ️ PySMO Polynomial surrogate already exists.


Parameter estimation method:  pyomo
Basis function:  gaussian
Regularization done:  True
0.001    |     1e-05    |     0.9038345440666218    |     7.786982437506171e+18    |     1729.057438894215    |     7999997.973986604    |     1.7763563895349064e-09
0.001    |     2e-05    |     1.2979352361012746    |     7.786982437506171e+18    |     1729.057438894215    |     3999999.484674922    |     8.881783052749719e-10
0.001    |     5e-05    |     1.7913136984649711    |     7.786982437506171e+18    |     1729.057438894215    |     1600000.3931459538    |     3.552714551759881e-10
0.001    |     7.5e-05    |     1.9604933405454017    |     7.786982437506171e+18    |     1729.057438894215    |     1066667.2618786718    |     2.368477107503146e-10
0.001    |     0.0001    |     2.058250548285959    |     7.786982437506171e+18    |     1729.057438894215    |     800000.6962982253    |     1.776358385492894e-10
0.001    |     0.0002    |     2.

KeyboardInterrupt: 

# Keras neural network

In [6]:
if not os.path.exists("keras_surrogate/keras_model.keras"):
    activation, optimizer, n_hidden_layers, n_nodes_per_layer = "tanh", "Adam", 2, 40
    loss, metrics = "mse", ["mae", "mse"]

    # Scaling
    input_scaler = OffsetScaler.create_normalizing_scaler(input_data)
    output_scaler = OffsetScaler.create_normalizing_scaler(output_data)
    x = input_scaler.scale(input_data).to_numpy()
    y = output_scaler.scale(output_data).to_numpy()
    # 定义输入边界 (根据你的实际问题修改)
    bounds = {
        input_labels[0]: (0.1, 0.8),
        input_labels[1]: (0.8, 1.2)
    }

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(units=n_nodes_per_layer, input_dim=len(input_labels), activation=activation))
    for _ in range(1, n_hidden_layers):
        model.add(tf.keras.layers.Dense(units=n_nodes_per_layer, activation=activation))
    model.add(tf.keras.layers.Dense(units=len(output_labels)))

    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)
    mcp_save = tf.keras.callbacks.ModelCheckpoint(".mdl_wts.keras", save_best_only=True, monitor="val_loss", mode="min")
    model.fit(x=x, y=y, validation_split=0.2, verbose=1, epochs=1000, callbacks=[mcp_save])

    keras_surr = KerasSurrogate(
        model, list(input_labels), list(output_labels), bounds, input_scaler, output_scaler
    )
    keras_surr.save_to_folder("keras_surrogate", "keras_model")
    print("✅ Keras surrogate trained and saved.")
else:
    print("ℹ️ Keras surrogate already exists.")


Epoch 1/1000


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 75ms/step - loss: 0.3746 - mae: 0.5232 - mse: 0.3746 - val_loss: 0.3230 - val_mae: 0.4945 - val_mse: 0.3230
Epoch 2/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - loss: 0.3106 - mae: 0.4715 - mse: 0.3106 - val_loss: 0.2687 - val_mae: 0.4451 - val_mse: 0.2687
Epoch 3/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step - loss: 0.2580 - mae: 0.4245 - mse: 0.2580 - val_loss: 0.2238 - val_mae: 0.3994 - val_mse: 0.2238
Epoch 4/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.2151 - mae: 0.3820 - mse: 0.2151 - val_loss: 0.1868 - val_mae: 0.3573 - val_mse: 0.1868
Epoch 5/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 37ms/step - loss: 0.1801 - mae: 0.3442 - mse: 0.1801 - val_loss: 0.1562 - val_mae: 0.3199 - val_mse: 0.1562
Epoch 6/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35ms/step - loss: 0.151