#### data generation

In [7]:
import numpy as np
import torch

n_samples = 1000
n_features = 22
X = np.random.rand(n_samples, n_features) * 100
y = np.random.rand(n_samples, 3) * 10  # 生成三个回归目标

#### train validation test

In [8]:
from sklearn.model_selection import train_test_split

# 划分训练集为新的训练集和验证集
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 划分训练集为新的训练集和验证集
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

In [9]:
# 打印划分后的数据集大小
print("train dataset:", X_train.shape)
print("validation dataset:", X_val.shape)
print("test dataset:", X_test.shape)

train dataset: (640, 22)
validation dataset: (160, 22)
test dataset: (200, 22)


#### change the data type

In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset


X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

In [13]:
# Create a TensorDataset from the tensors
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # Specify the batch size
batch_size = 64

    # Create DataLoaders for training, validation, and testing
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

In [14]:
len(train_loader)

10

#### fitness function

In [5]:
def MLPNN_fitness_function(params):

    # Defince the parameters
    hidden_size = int(params[0])
    learning_rate = params[1]
    activation = [nn.Sigmoid(), nn.ReLU(), nn.Tanh()][int(params[2])]
    batch_size = [32, 64, 128, 256][int(params[3])]
    hidden_size1 = int(params[4])
    hidden_size2 = int(params[5])
    hidden_size3 = int(params[6])
    hidden_size4 = int(params[7])
    hidden_size5 = int(params[8])
    hidden_size6 = int(params[9])
    hidden_size7 = int(params[10])
    hidden_size8 = int(params[11])
    epoch = [100, 200, 500, 1000, 2000, 5000][int(params[12])]
    weight_decay = params[13]
    
    # Create a TensorDataset from the tensors
    train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
    val_dataset = TensorDataset(X_val_tensor, y_val_tensor)
    test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

    # Specify the batch size
    batch_size = batch_size

    # Create DataLoaders for training, validation, and testing
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    test_loader = DataLoader(test_dataset, batch_size=batch_size)
    
    
    #hidden_sizes = [hidden_size1, hidden_size2, hidden_size3, hidden_size4, hidden_size5]
    def get_hidden_nodes(hidden_size):
        if hidden_size == 1:
            hidden_sizes = [hidden_size1]
        elif hidden_size == 2:
            hidden_sizes = [hidden_size1, hidden_size2]
        elif hidden_size == 3:
            hidden_sizes = [hidden_size1, hidden_size2, hidden_size3]
        elif hidden_size == 4:
            hidden_sizes = [hidden_size1, hidden_size2, hidden_size3, hidden_size4]
        elif hidden_size == 5:
            hidden_sizes = [hidden_size1, hidden_size2, hidden_size3, hidden_size4, hidden_size5]
        elif hidden_size == 6:
            hidden_sizes = [hidden_size1, hidden_size2, hidden_size3, hidden_size4, hidden_size5, hidden_size6]
        elif hidden_size == 7:
            hidden_sizes = [hidden_size1, hidden_size2, hidden_size3, hidden_size4, hidden_size5, hidden_size6, hidden_size7]
        elif hidden_size == 8:
            hidden_sizes = [hidden_size1, hidden_size2, hidden_size3, hidden_size4, hidden_size5, hidden_size6, hidden_size7, hidden_size8]
        else:
            raise ValueError("Unsupported hidden_size. Please choose a value between 1 and 5.")
            
        #print(hidden_sizes)
        return hidden_sizes
    
    hidden_sizes = get_hidden_nodes(hidden_size)
    
    
    class MLP(nn.Module):
        def __init__(self, input_size, hidden_sizes, output_size, activation=activation):
            super(MLP, self).__init__()

            # Create a list to store the layers
            layers = []

            # Add the input layer
            layers.append(nn.Linear(input_size, hidden_sizes[0]))
            layers.append(activation)

            # Add the hidden layers
            for i in range(1, len(hidden_sizes)):
                layers.append(nn.Linear(hidden_sizes[i-1], hidden_sizes[i]))
                layers.append(activation)

            # Add the output layer
            layers.append(nn.Linear(hidden_sizes[-1], output_size))

            # Create a sequential model using the layers list
            self.model = nn.Sequential(*layers)

        def forward(self, x):
            out = self.model(x)
            return out
    
    
    
    # 初始化模型和优化器
    input_size = X_train.shape[1]
    output_size = y_train.shape[1]
    model = MLP(input_size, hidden_sizes, output_size, activation)
    optimizer = optim.SGD(model.parameters(), lr=learning_rate, weight_decay=weight_decay)
    criterion = nn.MSELoss()
    num_epochs = epoch

    # 训练模型
    for epoch in range(num_epochs):
        total_loss = 0.0
        for i, (X_batch, y_batch) in enumerate(train_loader):
            # Clear gradients from the previous iteration
            optimizer.zero_grad()

            # Forward pass: compute model predictions
            predictions = model(X_batch)
            #print('here is prediction：', predictions.shape)

            # Compute the loss
            loss = criterion(predictions, y_batch)
            #print('here is loss：', loss.shape)

            # Backpropagation: compute gradients of the loss with respect to model parameters
            loss.backward()

            # Optimization: update model parameters using the gradients
            optimizer.step()

            # Accumulate the total loss for this epoch
            total_loss += loss.item()

            #print(total_loss)
        # Calculate the average loss for this epoch
        average_loss = total_loss / len(train_loader)
        #print(average_loss)

        #if (epoch + 1) % 10 == 0:
            #print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {average_loss:.4f}')


    with torch.no_grad():
        total_mse = 0.0
        for batch_X, batch_y in val_loader:
            # Forward pass: compute model predictions
            predictions = model(batch_X)

            # Compute the loss (assuming you have defined the loss function as 'criterion')
            mse = criterion(predictions, batch_y)

            # Accumulate the total loss for this validation set
            total_mse += mse.item()

        # Calculate the average loss for the entire validation set
        validation_loss = total_mse / len(val_loader)

        # Print the average loss for this validation set
        #print(f"Validation Loss: {validation_loss:.4f}")

    
    
    return validation_loss

#input_size = X_train.shape[1]
hidden_size = 5
#output_size = y_train.shape[1]
learning_rate = 0.001
activation = [nn.ReLU(), nn.Sigmoid(), nn.Tanh()][0]
batch_size = 64
epoch = 100
weight_decay = 0.001

result1 = MLPNN_fitness_function(hidden_size, learning_rate, activation, batch_size, 128, 128, 128, 64, 32, 64, 32, 64, epoch, weight_decay)

result1

In [6]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from geneticalgorithm import geneticalgorithm as ga


varbound = np.array([[1, 8],               # number of hidden layer
                     [0.0001, 0.2],        # learning rate
                     [0, 2],               # activation function  0表示Sigmoid，1表示ReLU，2表示Tanh       
                     [0, 3],               # batch size
                     [10, 200],            # hidden layer 1
                     [10, 200],            # hidden layer 2
                     [10, 200],            # hidden layer 3
                     [10, 200],            # hidden layer 4
                     [10, 200],            # hidden layer 5
                     [10, 200],            # hidden layer 6
                     [10, 200],            # hidden layer 7
                     [10, 200],            # hidden layer 8      
                     [0, 5],               # epoch
                     [0, 0.01]             # L2 norm
                     ])            



# 创建遗传算法对象
algorithm_param = {'max_num_iteration': 10, 'population_size': 10, 'elit_ratio': 0.01,
                   'parents_portion': 0.3, 'crossover_probability': 0.5, 'mutation_probability': 0.1, 
                   'crossover_type': 'one_point', 'max_iteration_without_improv': None}
model = ga(function=MLPNN_fitness_function, dimension=14, variable_type='real', 
           variable_type_mixed  = np.array(['int', 'real', 'int', 'int', 'int', 'int', 'int', 'int', 'int', 'int', 'int', 'int', 'int', 'real']) ,variable_boundaries=varbound, 
           algorithm_parameters=algorithm_param, function_timeout = 200)

# 运行遗传算法进行优化
model.run()

# 获得优化的超参数组合
best_params = model.output_dict['variable']


'''
# 使用优化的超参数训练最终的MLP回归模型
learning_rate = best_params[0]
hidden_layers = int(best_params[1])
nodes_per_layer = int(best_params[2])
batch_size = int(best_params[3])
activation_function = ['logistic', 'relu', 'tanh'][int(best_params[4])]

final_mlp = MLPRegressor(hidden_layer_sizes=(nodes_per_layer,) * hidden_layers,
                         activation=activation_function,
                         learning_rate_init=learning_rate,
                         batch_size=batch_size,
                         random_state=42)

final_mlp.fit(X_train, y_train)

# 使用测试数据评估最终的MLP回归模型性能（均方误差）
y_pred_test = final_mlp.predict(X_test)
test_mse = mean_squared_error(y_test, y_pred_test)

print("最优超参数组合：", best_params)
print("最终测试均方误差：", test_mse)
'''

KeyboardInterrupt: 

In [None]:
model.param

In [None]:
model.output_dict