In [7]:
from benchmark_funs import *
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data

In [65]:
import tqdm
from pytorch_lamb import Lamb

In [72]:
def fit(model, data, fun):
    criterion = nn.MSELoss()
    # optimizer = optim.LBFGS(model.parameters(), lr=1e-1)
    # optimizer = optim.AdamW(model.parameters(), lr=3e-1)
    # optimizer = optim.AdamW(model.parameters(), lr=3e-1)
    optimizer = Lamb(model.parameters(), lr=3e-0)
    dataset = torch.utils.data.TensorDataset(data, fun(data))
    loader = torch.utils.data.DataLoader(dataset, batch_size=10000, shuffle=True)
    for epoch in range(3):
        box = tqdm.tqdm(enumerate(loader), desc=f"正在拟合{fun.name}")
        for batch_idx, (data, target) in box:
            def closure():
                pred_output = model(data)
                loss = criterion(pred_output.squeeze(), target.squeeze())
                box.set_postfix(loss=loss.detach().numpy().item())
                loss.backward()
                return loss
            closure()
            optimizer.step()
            # optimizer.step(closure=closure)
            optimizer.zero_grad()

In [73]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
def evaluate(opti, p_ind=1000):
    s = 0
    for fun in benchmark_functions:
        assert fun.larger_better == False # 最小化
        interval_len = fun.ub - fun.lb
        # 第一步，拟合 model
        # 万能近似定理：单隐层使用任意压缩函数的前馈神经网络只要隐层数量足够多就能够以任意精度逼近任意可测函数。
        # https://blog.csdn.net/qq_37983752/article/details/115055707
        # model = nn.Sequential(
        #     nn.Linear(fun.dimension, 10),
        #     nn.Sigmoid(), # 压缩函数，将值压缩到0-1之间的连续可导函数
        #     # nn.Linear(1000, 100),  
        #     # nn.Sigmoid(),
        #     nn.Linear(10, 1),    
        # )
        
        model = nn.Sequential(
            nn.Linear(fun.dimension, 10000),
            nn.ReLU(),
            nn.BatchNorm1d(10000), 
            nn.Linear(10000, 1000),
            nn.ReLU(),
            nn.BatchNorm1d(1000), 
            nn.Linear(1000, 100),
            nn.ReLU(),
            nn.BatchNorm1d(100), 
            nn.Linear(100, 10),
            nn.ReLU(),
            nn.BatchNorm1d(10),
            nn.Linear(10, 1),    
        )
        X = nn.init.trunc_normal_(torch.empty(fun.get_budget(), fun.dimension), fun.lb+interval_len/2, interval_len/6, a=fun.lb, b=fun.ub)
        fit(model, X, fun)
        # 第二步，优化 model
        X = nn.init.trunc_normal_(torch.empty(p_ind, fun.dimension), fun.lb+interval_len/2, interval_len/6, a=fun.lb, b=fun.ub)
        X = X.to(device)
        X.requires_grad_(True)
        optimizer = opti([X])
        # box = tqdm.tqdm(range(1000), desc=f"正在优化{fun.name}")
        box = tqdm.tqdm(range(fun.get_budget()//p_ind), desc=f"正在优化{fun.name}的代理模型。")
        best_loss = 1e10
        patience = 0
        for i in box:
            # X[X<fun.lb] = fun.lb
            # X[X>fun.ub] = fun.ub
            loss = fun(X).min() - fun.optival
            # loss = fun(X).sum()
            loss_item = loss.detach().numpy().item()
            box.set_postfix(loss=loss_item)
            if loss_item<best_loss:
                patience = 0
                best_loss = loss_item
            else:
                patience +=1
                if patience>100:
                    break
            loss = model(X).min() - fun.optival
            loss.backward()
            optimizer.step()
            if X.grad.norm()<1e-8:
                break
            optimizer.zero_grad()
            
        s+=loss.detach().numpy().item()
    return s


In [74]:
opti = lambda p: optim.Adam(p, lr=0.1)
evaluate(opti, p_ind=10)

正在拟合Sphere: 30it [00:13,  2.23it/s, loss=1.02e+9]
正在拟合Sphere: 30it [00:13,  2.26it/s, loss=9.66e+8]
正在拟合Sphere: 30it [00:13,  2.26it/s, loss=9.02e+8]
正在优化Sphere的代理模型。:   3%|▎         | 807/30000 [00:10<06:12, 78.35it/s, loss=1.62e+4]
正在拟合Rosenbrock: 13it [00:05,  2.27it/s, loss=7.55e+15]