In [1]:
#!pip install torchvision "flaml[blendsearch,ray]"
#!pip install ray[tune]
#!pip install flaml[tune] 

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
import flaml
from flaml import AutoML
print("FLAML version: {}".format(AutoML.__version__))

# from ray import tune
from flaml import tune
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms

  from .autonotebook import tqdm as notebook_tqdm
2025-08-09 14:31:43,893	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-08-09 14:31:44,154	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.


FLAML version: 2.3.5


In [3]:
# ============ data upload and preperation ============
def load_and_preprocess():
    # data path 
    data_path1 = 'D:/A_sem2/ERP/Simulation/lhs_exps.csv'
    data_path2 = 'D:/A_sem2/ERP/Code/TSA_max_min_mean.xlsx'
    data_path3 = 'D:/A_sem2/ERP/Code/RH2M_max_min_mean.xlsx'
    data_path4 = 'D:/A_sem2/ERP/Code/HIA_max_min_mean.xlsx'

    df1 = pd.read_csv(data_path1, index_col=0, parse_dates=True) 
    df1 = df1.drop(columns=['ALB_ROOF_DIF', 'ALB_IMPROAD_DIF', 'ALB_PERROAD_DIF', 'ALB_WALL_DIF','WIND_HGT_CANYON'])
    df2 = pd.read_excel(data_path2, parse_dates=True)
    df3 = pd.read_excel(data_path3, parse_dates=True)
    df4 = pd.read_excel(data_path4, parse_dates=True)

    df_input = df1.values

    cols_to_select = [0, 2]
    df2_sel = df2.iloc[:, cols_to_select].values
    df3_sel = df3.iloc[:, cols_to_select].values
    df4_sel = df4.iloc[:, cols_to_select].values
    df_output = np.hstack([df2_sel, df3_sel, df4_sel])

    scaler_X = StandardScaler()
    X_scaled = scaler_X.fit_transform(df_input)

    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(df_output)

    X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
    y_tensor = torch.tensor(y_scaled, dtype=torch.float32)
    return X_tensor, y_tensor


X_tensor, y_tensor = load_and_preprocess()

# ============ MLP frame ===========
class MLPRegressor(nn.Module):
    def __init__(self, input_dim, hidden1=64, hidden2=32, hidden3=16):
        super().__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, hidden1),
            nn.ReLU(),
            nn.Linear(hidden1, hidden2),
            nn.ReLU(),
            nn.Linear(hidden2, hidden3),
            nn.ReLU(),
            nn.Linear(hidden3, 6)
        )
    def forward(self, x):
        return self.model(x)

# ============ traning ============
def train_mlp(config, checkpoint_dir=None):
    device = "cuda" if torch.cuda.is_available() else "cpu"

    input_dim = X_tensor.shape[1]
    hidden1 = int(config["hidden1"])
    hidden2 = int(config["hidden2"])
    hidden3 = int(config["hidden3"])
    lr = config["lr"]
    batch_size = int(config["batch_size"])
    epochs = int(config["epochs"])

    model = MLPRegressor(input_dim, hidden1, hidden2, hidden3).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    dataset = TensorDataset(X_tensor, y_tensor)
    train_len = int(len(dataset) * 0.8)
    val_len = len(dataset) - train_len
    train_set, val_set = random_split(dataset, [train_len, val_len])

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=batch_size, shuffle=False)

    for epoch in range(epochs):
        model.train()
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            outputs = model(xb)
            loss = torch.sqrt(criterion(outputs, yb))  # RMSE loss
            loss.backward()
            optimizer.step()

    model.eval()
    val_loss_sum = 0.0
    with torch.no_grad():
        for xb, yb in val_loader:
            xb, yb = xb.to(device), yb.to(device)
            outputs = model(xb)
            loss = torch.sqrt(criterion(outputs, yb))
            val_loss_sum += loss.item() * xb.size(0)

    val_loss = val_loss_sum / len(val_loader.dataset)
    tune.report(loss=val_loss)  # 向FLAML报告指标

# ============ hyperparameter tune space ============
config = {
    "hidden1": tune.choice([32, 64, 128]),
    "hidden2": tune.choice([16, 32, 64]),
    "hidden3": tune.choice([8, 16, 32]),
    "lr": tune.loguniform(1e-4, 1e-2),
    "batch_size": tune.choice([32, 64, 128]),
    "epochs": tune.choice([50, 100, 150]),
}


  df1 = pd.read_csv(data_path1, index_col=0, parse_dates=True)


In [4]:

# ============ tune ============

analysis = flaml.tune.run(
    train_mlp,
    config=config,
    metric="loss",
    mode="min",
    num_samples=10,
    resources_per_trial={"cpu": 1, "gpu": int(torch.cuda.is_available())},
    local_dir="./flaml_logs",  # 推荐指定日志目录
    verbose=3  # 或者 2，3等级别依次增加输出信息
)

print("Best config is:", analysis.get_best_config(metric="loss", mode="min"))


Best config is: {'hidden1': 128, 'hidden2': 16, 'hidden3': 8, 'lr': 0.00027400436211074016, 'batch_size': 64, 'epochs': 100}


In [5]:

# ============ 用最佳超参训练最终模型 ============
def train_final_model(best_config):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    input_dim = X_tensor.shape[1]

    model = MLPRegressor(
        input_dim,
        int(best_config["hidden1"]),
        int(best_config["hidden2"]),
        int(best_config["hidden3"]),
    ).to(device)

    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=best_config["lr"])

    dataset = TensorDataset(X_tensor, y_tensor)
    train_len = int(len(dataset) * 0.8)
    val_len = len(dataset) - train_len
    train_set, val_set = random_split(dataset, [train_len, val_len])

    train_loader = DataLoader(train_set, batch_size=int(best_config["batch_size"]), shuffle=True)
    val_loader = DataLoader(val_set, batch_size=int(best_config["batch_size"]), shuffle=False)

    epochs = int(best_config["epochs"])

    for epoch in range(epochs):
      model.train()
      total_loss = 0
      for batch_idx, (xb, yb) in enumerate(train_loader):
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad()
        output = model(xb)
        loss = torch.sqrt(criterion(output, yb))
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # 每隔一定 batch 打印当前 loss（例如每 10 个 batch）
        if (batch_idx + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Batch [{batch_idx+1}/{len(train_loader)}], Loss: {loss.item():.4f}")

    # 每个 epoch 结束打印平均 loss
    avg_loss = total_loss / len(train_loader)
    print(f"Epoch [{epoch+1}/{epochs}] finished, Average Loss: {avg_loss:.4f}")
    return model
best_config = analysis.get_best_config(metric="loss", mode="min")
model = train_final_model(best_config)

Epoch [1/100], Batch [10/1250], Loss: 1.0062
Epoch [1/100], Batch [20/1250], Loss: 0.9866
Epoch [1/100], Batch [30/1250], Loss: 0.9291
Epoch [1/100], Batch [40/1250], Loss: 1.0140
Epoch [1/100], Batch [50/1250], Loss: 0.8312
Epoch [1/100], Batch [60/1250], Loss: 1.0061
Epoch [1/100], Batch [70/1250], Loss: 1.0256
Epoch [1/100], Batch [80/1250], Loss: 0.9563
Epoch [1/100], Batch [90/1250], Loss: 0.9169
Epoch [1/100], Batch [100/1250], Loss: 0.9976
Epoch [1/100], Batch [110/1250], Loss: 0.9889
Epoch [1/100], Batch [120/1250], Loss: 0.9072
Epoch [1/100], Batch [130/1250], Loss: 0.9770
Epoch [1/100], Batch [140/1250], Loss: 0.8029
Epoch [1/100], Batch [150/1250], Loss: 0.8528
Epoch [1/100], Batch [160/1250], Loss: 0.8333
Epoch [1/100], Batch [170/1250], Loss: 0.7902
Epoch [1/100], Batch [180/1250], Loss: 0.8507
Epoch [1/100], Batch [190/1250], Loss: 0.6879
Epoch [1/100], Batch [200/1250], Loss: 0.7375
Epoch [1/100], Batch [210/1250], Loss: 0.6615
Epoch [1/100], Batch [220/1250], Loss: 0.64

In [6]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

def evaluate_model_inverse_scaler(model, data_loader, device, scaler_y):
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for xb, yb in data_loader:
            xb, yb = xb.to(device), yb.to(device)
            outputs = model(xb)
            y_true.append(yb.cpu().numpy())
            y_pred.append(outputs.cpu().numpy())

    # 拼接所有批次数据
    y_true = np.vstack(y_true)
    y_pred = np.vstack(y_pred)

    # 反标准化
    y_true_orig = scaler_y.inverse_transform(y_true)
    y_pred_orig = scaler_y.inverse_transform(y_pred)

    # 对6个目标变量分别计算指标
    for i in range(y_true_orig.shape[1]):
        rmse = np.sqrt(mean_squared_error(y_true_orig[:, i], y_pred_orig[:, i]))
        r2 = r2_score(y_true_orig[:, i], y_pred_orig[:, i])
        mae = mean_absolute_error(y_true_orig[:, i], y_pred_orig[:, i])
        print(f"Target {i+1}: RMSE={rmse:.4f}, R2={r2:.4f}, MAE={mae:.4f}")


In [7]:
# The data catalogue is stored as a CSV file. Here we read it with pandas.
data_path1 = 'D:/A_sem2/ERP/Simulation/lhs_exps.csv' # Change this to the path of the data file on your system
data_path2 = 'D:/A_sem2/ERP/Code/TSA_max_min_mean.xlsx'
data_path3 = 'D:/A_sem2/ERP/Code/RH2M_max_min_mean.xlsx' # Change this to the path of the data file on your system
data_path4 = 'D:/A_sem2/ERP/Code/HIA_max_min_mean.xlsx'

# Load the data
df1 = pd.read_csv(data_path1, index_col=0, parse_dates=True) 
df1 = df1.drop(columns=['ALB_ROOF_DIF', 'ALB_IMPROAD_DIF', 'ALB_PERROAD_DIF', 'ALB_WALL_DIF','WIND_HGT_CANYON'])
# display(df1)
df2 = pd.read_excel(data_path2, parse_dates=True)
df3 = pd.read_excel(data_path3, parse_dates=True)
df4 = pd.read_excel(data_path4, parse_dates=True)

# 1. upload data
df_input = df1.values  # 输入特征CSV
print(df_input)

# get first and third column of df2, df3, df4
cols_to_select = [0, 2]
# get ccolumn of df
df2_sel = df2.iloc[:, cols_to_select].values
df3_sel = df3.iloc[:, cols_to_select].values
df4_sel = df4.iloc[:, cols_to_select].values
# makke them to be metrics
df_output = np.hstack([df2_sel, df3_sel, df4_sel])

# 2. standardization
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(df_input)
scaler_y = StandardScaler()
# df_output = df_output.reshape(-1,1)
y_scaled = scaler_y.fit_transform(df_output)

# 3. torch tensor
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)
y_tensor = torch.tensor(y_scaled, dtype=torch.float32)

# 4. split to train/test  
train_size = int(0.8 * len(X_tensor))
X_train, X_test = X_tensor[:train_size], X_tensor[train_size:]
y_train, y_test = y_tensor[:train_size], y_tensor[train_size:]

train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)

# 5. DataLoader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=False)
val_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

  df1 = pd.read_csv(data_path1, index_col=0, parse_dates=True)


[[0.86710861 0.99024087 0.15192069 ... 1.12463659 0.0426984  0.35539991]
 [0.68914931 0.83757032 0.18399664 ... 1.68590457 0.78802559 0.36443368]
 [0.44048685 0.90462526 0.21648668 ... 3.77562065 0.21496772 0.58936796]
 ...
 [0.56137753 0.79117553 0.1292929  ... 4.01889391 0.66716056 0.86793034]
 [0.98967886 0.91300226 0.30813693 ... 3.00839137 0.24867129 0.18302804]
 [0.7117039  0.85283726 0.10947204 ... 0.85671413 0.36432908 0.11648628]]


In [8]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Train set performance:")
evaluate_model_inverse_scaler(model, train_loader, device, scaler_y)

print("\nValidation set performance:")
evaluate_model_inverse_scaler(model, val_loader, device, scaler_y)

Train set performance:
Target 1: RMSE=0.3152, R2=0.9691, MAE=0.0721
Target 2: RMSE=0.1124, R2=0.9835, MAE=0.0322
Target 3: RMSE=1.0776, R2=0.9374, MAE=0.7313
Target 4: RMSE=0.5122, R2=0.9859, MAE=0.1975
Target 5: RMSE=0.8575, R2=0.7608, MAE=0.1052
Target 6: RMSE=0.2515, R2=0.8926, MAE=0.0433

Validation set performance:
Target 1: RMSE=0.2656, R2=0.9778, MAE=0.0722
Target 2: RMSE=0.0859, R2=0.9903, MAE=0.0315
Target 3: RMSE=1.0661, R2=0.9384, MAE=0.7306
Target 4: RMSE=0.5146, R2=0.9857, MAE=0.1981
Target 5: RMSE=0.7185, R2=0.8181, MAE=0.1014
Target 6: RMSE=0.2182, R2=0.9178, MAE=0.0425
