In [1]:
import sys
import torch
from itertools import product
from sklearn.base import BaseEstimator, RegressorMixin
from sklearn.metrics import mean_squared_error

sys.path.append('../')


from LSTM.LSTM import LSTMModel
from Bi_LSTM.Bi_LSTM import BiLSTMModel
from Bi_LSTM_Attention.BI_LSTM_Attention import BiLSTMAtteionModel
from N_Beats.N_Beats import NBeatsModel
from ARIMA.ARIMA import ARIMA

from utils.train import *

In [13]:
train_workload, val_workload, test_workload = load_service_workload()

In [15]:
train_workload

Unnamed: 0.1,Unnamed: 0,msinstanceid,timestamp,HTTP_MCR,HTTP_RT
10754622,14870,a89d57c159c2dbb084db02cad47925a851f77efe14d9aa...,0,0.500000,0.500000
10754623,37279,a89d57c159c2dbb084db02cad47925a851f77efe14d9aa...,60000,0.500000,0.500000
10754624,59691,a89d57c159c2dbb084db02cad47925a851f77efe14d9aa...,120000,0.500000,0.500000
10754625,82059,a89d57c159c2dbb084db02cad47925a851f77efe14d9aa...,180000,0.500000,0.500000
10754626,104485,a89d57c159c2dbb084db02cad47925a851f77efe14d9aa...,240000,0.500000,0.500000
...,...,...,...,...,...
2550560,16118711,28a57d83329b1a542138e1a882be759fb3fc24a49bdd5e...,42960000,4.946429,4.946429
2550561,16140979,28a57d83329b1a542138e1a882be759fb3fc24a49bdd5e...,43020000,3.666667,3.666667
2550562,16163233,28a57d83329b1a542138e1a882be759fb3fc24a49bdd5e...,43080000,4.250000,4.250000
2550563,16185463,28a57d83329b1a542138e1a882be759fb3fc24a49bdd5e...,43140000,3.800000,3.800000


In [29]:
class PyTorchGridSearchCV(BaseEstimator, RegressorMixin):

    def __init__(self, model_name, input_dim=4, output_dim=4, step_num=5, hidden_dim=32, num_layers=2, epochs=10, batch_size=32, device=None):
        self.model_name = model_name
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.step_num = step_num
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.epochs = epochs
        self.batch_size = batch_size
        self.device = device if device is not None else torch.device(
            "cuda" if torch.cuda.is_available() else "cpu")

    def fit(self):
        if self.model_name == "LSTM":
            self.model = LSTMModel(self.input_dim, self.hidden_dim,
                                   self.num_layers, self.output_dim).to(self.device)
        elif self.model_name == "Bi-LSTM":
            self.model = BiLSTMModel(self.input_dim, self.hidden_dim,
                                     self.num_layers, self.output_dim).to(self.device)
        elif self.model_name == "Bi-LSTM-Attention":
            self.model = BiLSTMAtteionModel(
                self.input_dim,  self.step_num, self.output_dim, self.hidden_dim, self.num_layers).to(self.device)
        elif self.model_name == "N-Beats":
            self.model = NBeatsModel(self.step_num, self.output_dim,
                                     self.hidden_dim).to(self.device)

        # 按照 msinstanceid 列分组
        train_grouped_df = train_workload.groupby('msinstanceid')
        val_grouped_df = val_workload.groupby('msinstanceid')


        train_http_rt_df = train_grouped_df.apply(
            lambda x: split_array_by_step(x, self.step_num+1))

        val_http_rt_df = val_grouped_df.apply(
            lambda x: split_array_by_step(x, self.step_num+1))
        
        train_http_rt_df = np.concatenate(train_http_rt_df, axis=0)
        val_http_rt_df = np.concatenate(val_http_rt_df, axis=0)
        

        x_train, y_train = create_sequences(train_http_rt_df, self.step_num)
        x_val, y_val = create_sequences(val_http_rt_df, self.step_num)

        train(x_train, y_train, x_val, y_val,
              self.model, self.epochs, self.batch_size, self.device)
        return self

    def predict(self):
        test_grouped_df = test_workload.groupby('msinstanceid')
        test_http_rt_df = test_grouped_df.apply(
            lambda x: split_array_by_step(x, self.step_num+1))
        test_http_rt_df = np.concatenate(test_http_rt_df, axis=0)
        
        x_test, self.y_test = create_sequences(test_http_rt_df, self.step_num)

        x_pred = torch.tensor(x_test, dtype=torch.float32).to(self.device)
        self.model.eval()
        with torch.no_grad():
            y_pred = self.model(x_pred)
        return y_pred.cpu().numpy()

    def set_params(self, **params):
        self.__dict__.update(params)
        return self



In [25]:
def custom_grid_search(model, param_grid):
    param_combinations = list(product(*param_grid.values()))
    best_score = float('inf')
    best_params = None
    best_model = None

    for param_values in param_combinations:
        params = dict(zip(param_grid.keys(), param_values))
        print(f"Training with parameters: {params}")

        # 创建模型实例并设置参数
        model_instance = model(**params)
        
        # 这里，您可以将train_data传递给fit方法
        model_instance.fit()

        # 计算验证集上的评分（例如，MSE）
        y_pred = model_instance.predict()
        score = mean_squared_error(model_instance.y_test, y_pred)

        if score < best_score:
            best_score = score
            best_params = params
            best_model = model_instance

    return best_model, best_score, best_params


In [30]:
param_grid = {
        'model_name':['LSTM'],
        'step_num': [2, 3, 5, 7],
        'num_layers': [1, 2, 4],
        'hidden_dim': [16, 32, 64],
        'epochs': [10, 30, 50, 80, 100],
        'batch_size': [16, 32, 64],
    }

# 实例化PyTorchGridSearchCV对象
input_dim = 4
output_dim = 4

# 执行自定义网格搜索
best_model, best_score, best_params = custom_grid_search(PyTorchGridSearchCV, param_grid)

print(f"Best score: {best_score}")
print(f"Best parameters: {best_params}")

Training with parameters: {'model_name': 'LSTM', 'step_num': 2, 'num_layers': 1, 'hidden_dim': 16, 'epochs': 10, 'batch_size': 16}
Epoch 1/10, Loss: 0.025578
Validation Loss: 0.006556
Epoch 2/10, Loss: 0.007390
Validation Loss: 0.003750
Epoch 3/10, Loss: 0.006037
Validation Loss: 0.003565
Epoch 4/10, Loss: 0.005298
Validation Loss: 0.002874
Epoch 5/10, Loss: 0.004480
Validation Loss: 0.002629
Epoch 6/10, Loss: 0.003526
Validation Loss: 0.001925
Epoch 7/10, Loss: 0.002527
Validation Loss: 0.001295
Epoch 8/10, Loss: 0.001886
Validation Loss: 0.001167
Epoch 9/10, Loss: 0.001595
Validation Loss: 0.001088
Epoch 10/10, Loss: 0.001464
Validation Loss: 0.000890
Training with parameters: {'model_name': 'LSTM', 'step_num': 2, 'num_layers': 1, 'hidden_dim': 16, 'epochs': 10, 'batch_size': 32}
Epoch 1/10, Loss: 0.095785
Validation Loss: 0.024086
Epoch 2/10, Loss: 0.020348
Validation Loss: 0.009779
Epoch 3/10, Loss: 0.013167
Validation Loss: 0.006780
Epoch 4/10, Loss: 0.008962
Validation Loss: 0.00

KeyboardInterrupt: 