In [1]:
import numpy as np
from H_Envs.pendulum import  PendulumEnv
import torch
import torch.nn as nn

In [2]:
from Agent import SB3Agent
import copy
from stable_baselines3.common.evaluation import evaluate_policy
import torch
import numpy as np
import torch.nn as nn


from H_Envs.pendulum import PendulumEnv
import gymnasium as gym
from gymnasium.wrappers import TimeLimit
from MBEnvs.mb_pendulum_base import MB_PendulumEnv
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
from sb3_contrib import TRPO


In [7]:
import torch
import torch as th
import numpy as np
import torch.nn as nn
import pandas as pd
from typing import Any, ClassVar, Dict, Optional, Type, TypeVar, Union
from PredictionModel import PredictionModel
from Utils import *
from sklearn.model_selection import train_test_split
import gymnasium as gym
from Agent import BaseAgent
import copy

DATA_MAX = 10000

class FRLClient():
    '''
      FRLClient:
      Train predcition model(predict the state trainsition) by sampled data
      data is sampled by the true environment 
      
      env: true environment
      model: prediction the transition(train the model by supervised learning)
      agent: interactive with the true environment by policy pi with explorating
	  
      params:
	    lr: learning_rate of model
	    hidden_size:
	    device:
    '''
  
    def __init__(
        self,
        env: gym.Env,
        agent: BaseAgent,
        lr: float = 3e-4,
        hidden_size: int = 256,
        device: Union[th.device, str] = "auto",
      ):
        # Initialize true environment
        self.env = env
        self.obs_size = env.observation_space.shape[0]
        # Check if action_space is Discrete
        if isinstance(env.action_space, gym.spaces.Discrete):
            self.action_size = 1
        else:
            self.action_size = env.action_space.shape[0]
        self.hidden_size = hidden_size
        
        # Initialize prediction model (predict the state transition)
        self.model = PredictionModel(self.obs_size, self.action_size, hidden_size).to(device)
        self.agent = agent
        self.policy = self.agent.policy_net

        self.lr = lr
        self.device = device
        self.dataset_X = None
        self.dataset_y = None
        self.dataMax = DATA_MAX
        
    def get_prediction_model(self):
        # Return the prediction model
        return copy.deepcopy(self.model)

    def get_prediction_model_params(self):
        # Return a deepcopy of the model's state dictionary
        return copy.deepcopy(self.model.state_dict())
      
    def update_policy(self, policy_net):
        # update the policy pi by policy parameters sended by server
        self.agent.update_policy_net(policy_net)
        self.policy = self.agent.policy_net

    def train_prediction_model(self, num_data=1000, num_epoch=100, batch_size=32):
        '''
        Train the prediction model with true data
        '''
        self.train_loss_list = []
        self.test_avg_list = []
        # Split the dataset
        train_X, test_X, train_y, test_y = train_test_split(self.dataset_X, self.dataset_y, test_size=0.2)
        
        # Flatten the column "actions"
        train_X = expand_action_column(train_X, action_column_name="actions")
        test_X = expand_action_column(test_X, action_column_name="actions")
        self.train_X = train_X
        self.test_X = test_X
        self.train_y = train_y
        self.test_y = test_y

        model = self.model.to(self.device)
        
        # Define loss function and optimizer
        loss_fn = nn.MSELoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=self.lr)

        overfit = 0
        last_test_avg = 10000

        # Convert data to numpy arrays
        train_X = train_X.to_numpy()
        train_y = train_y.to_numpy()
        test_X = test_X.to_numpy()
        test_y = test_y.to_numpy()

        # Train the model for a given number of epochs
        for t in range(num_epoch):
            train_loss = model_train(train_X, train_y, self.model, loss_fn, optimizer, batch_size)
            test_avg = model_test(test_X, test_y, self.model, loss_fn, batch_size)
            self.train_loss_list.append(train_loss)
            self.test_avg_list.append(test_avg)
            # Early stopping if overfitting occurs
            if test_avg > last_test_avg:
                overfit += 1
            else:
                overfit = 0
                last_test_avg = test_avg
            if overfit >= 10:
                break
        
        self.model = model
        
    def sample_seq_data(self, n, seq_length):
        env = self.env
        
        # observation, info = env.reset()
        # df = pd.DataFrame(observation).T
        # actions = []
        dataset_X_list = []
        dataset_y_list = []
        cnt = 0
        while(cnt < n):
            observation, info = env.reset()
            obs_df = pd.DataFrame(observation).T 
            actions = []
            done = False
            truncated = False
            while done == False and truncated == False:
                action = self.agent.act(observation)
                obs_df[len(obs_df)] = observation
                actions.append(action)
                observation, reward, done, truncated, info = env.step(action)
                cnt += 1
                if cnt >= n:
                    break
            # df_temp["actions"] = actions
            dataset_X_temp, dataset_y_temp = create_sequences_diff(obs_df, action_df, seq_length)
            dataset_X_list.append(dataset_X_temp)
            dataset_y_list.append(dataset_y_temp)
        
            # 展平数据集
        dataset_X = np.concatenate(dataset_X_list, axis=0)
        dataset_y = np.concatenate(dataset_y_list, axis=0)
        self.cur_dataset_X, self.cur_dataset_y = dataset_X, dataset_y
            
        

    # # gymnasium
    # def sample_data(self, n):
    #     '''
    #     Sample data using policy π to train the prediction model
    #     '''
    #     env = self.env
    #     observation, info = env.reset()
    #     df = pd.DataFrame(observation).T
    #     actions = []

    #     # Sample data from the environment
    #     for i in range(int(n * 0.5)):
    #         action = self.agent.act(observation)

    #         if i != 0:
    #             df.loc[len(df)] = observation
    #         actions.append(action)
    #         observation, reward, done, truncated, info = env.step(action)

    #         if done or truncated:
    #             observation, info = env.reset()
    #     # env.close()
        
    #     observation, info = env.reset()
    #     for i in range(int(n * 0.5)):
    #         # action = self.agent.act(observation)
    #         action = env.action_space.sample()
    #         # if i != 0:
    #         df.loc[len(df)] = observation
    #         actions.append(action)
    #         observation, reward, done, truncated, info = env.step(action)

    #         if done or truncated:
    #             observation, info = env.reset()
    #     env.close()

    #     df["actions"] = actions
    #     self.trajetories = df
    #     self.cur_dataset_X, self.cur_dataset_y = process_dfs_diff(df)
    #     self.addData()
        
    def addData(self):
        """
        Add cur_dataset_X and cur_dataset_y to the main dataset_X and dataset_y.
        If the combined dataset exceeds the maximum size (self.dataMax), 
        randomly delete a batch of data to make space for the new data.
        """
        if self.dataset_X is None or self.dataset_y is None:
            # If the main dataset is not initialized, initialize it with the current data
            self.dataset_X = self.cur_dataset_X.copy()
            self.dataset_y = self.cur_dataset_y.copy()
        else:
            # Check if the combined dataset exceeds the maximum size
            if len(self.dataset_X) + len(self.cur_dataset_X) > self.dataMax:
                # Calculate the number of rows to remove
                excess_size = (len(self.dataset_X) + len(self.cur_dataset_X)) - self.dataMax
    
                # Randomly select indices to remove
                drop_indices = np.random.choice(self.dataset_X.index, excess_size, replace=False)
    
                # Drop rows from both dataset_X and dataset_y
                self.dataset_X = self.dataset_X.drop(drop_indices).reset_index(drop=True)
                self.dataset_y = self.dataset_y.drop(drop_indices).reset_index(drop=True)
    
            # Append new data
            self.dataset_X = pd.concat([self.dataset_X, self.cur_dataset_X], ignore_index=True)
            self.dataset_y = pd.concat([self.dataset_y, self.cur_dataset_y], ignore_index=True)
        
    def learn(self, timesteps = 1000, epoch = 10, batch_size = 32):
        self.sample_data(timesteps)
      
        self.train_prediction_model(timesteps, num_epoch = epoch, batch_size=batch_size)
      
    def get_dataset(self):
        return copy.deepcopy(self.train_X), copy.deepcopy(self.train_y),  copy.deepcopy(self.test_X),  copy.deepcopy(self.test_y)
      


def model_train(X, y, model, loss_fn, optimizer, batch_size):
    # Set the model to training mode
    model.train()
    loss_sum = 0

    # Loop through the dataset in batches
    for i in range(round((len(y) / batch_size) + 0)):
        # Convert batch data to torch tensors and move to GPU
        train_X = torch.from_numpy(X[i * batch_size: (i+1) * batch_size]).cuda()
        train_y = torch.from_numpy(y[i * batch_size: (i+1) * batch_size]).cuda()

        loss = 0

        # Compute loss for each sample in the batch
        for k in range(min(batch_size, len(train_X))):
            pred = model.forward(train_X[k].float())
            loss += loss_fn(pred.to(torch.float32), train_y[k].to(torch.float32))
        loss_sum += loss.item()

        # Zero the gradients, perform backpropagation, and update the weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    
    # Return average loss per sample
    return loss_sum / len(y)

def model_test(X, y, model, loss_fn, batch_size):
    loss_sum = 0
    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculation
    with torch.no_grad():
        # Loop through the dataset in batches
        for i in range(round((len(y) / batch_size) + 1)):
            # Convert batch data to torch tensors and move to GPU
            test_X = torch.from_numpy(X[i * batch_size: (i+1) * batch_size]).cuda()
            test_y = torch.from_numpy(y[i * batch_size: (i+1) * batch_size]).cuda()

            # Compute loss for each sample in the batch
            for k in range(min(batch_size, len(test_X))):
                pred = model.forward(test_X[k].float())
                
                # loss += loss_fn(pred.to(torch.float32), test_y[k].to(torch.float32))
                loss = loss_fn(pred, test_y[k].float())
                loss_sum += loss.item()
                

    # Compute and print average loss per sample
    loss_sum /= len(y)
    print(f"Avg loss: {loss_sum}!")
    return loss_sum

def create_sequences(data, sequence_length):
    """ 将时间序列数据划分为样本，每个样本长度为 sequence_length """
    sequences = []
    targets = []
    
    for i in range(len(data) - sequence_length):
        # 获取长度为 sequence_length 的序列
        seq = data[i:i+sequence_length]
        # 目标是下一个时间步的数据
        target = data[i+sequence_length]
        sequences.append(seq)
        targets.append(target)
    
    return np.array(sequences), np.array(targets)

def create_sequences_diff(obs_df, action_df, sequence_length):
    """
    生成时间序列样本，并将 action 作为 X 的一部分。
    Y 是 observation 的差值 (next_obs - current_obs)。
    
    obs_df: 包含 observation 的 DataFrame
    action_df: 包含 action 的 DataFrame
    sequence_length: 序列的长度
    """
    sequences = []
    targets = []
    
    # 计算整个 obs_df 的差分
    obs_diff_df = obs_df.diff().dropna().reset_index(drop=True)
    
    # 确保 obs_df 和 action_df 长度相同
    assert len(obs_df) == len(action_df), "Observation and Action dataframes must have the same length."
    
    for i in range(len(obs_diff_df) - sequence_length):  # 确保不会越界
        # 获取长度为 sequence_length 的 observation 和 action 序列
        obs_seq = obs_df.iloc[i:i+sequence_length].values
        action_seq = action_df.iloc[i:i+sequence_length].values
        
        # 将 observation 和 action 合并为 X，action 是 X 的一部分
        sequence = np.concatenate([obs_seq, action_seq], axis=1)
        
        # 获取目标 Y，使用预计算的 obs_diff_df 差分序列
        target = obs_diff_df.iloc[i:i+sequence_length].values
        
        sequences.append(sequence)
        targets.append(target)
    
    return np.array(sequences), np.array(targets)

In [40]:
def create_sequences_diff(obs_df, action_df, sequence_length):
    """
    生成时间序列样本，并将 action 作为 X 的一部分。
    Y 是 observation 的差值 (next_obs - current_obs)。
    
    obs_df: 包含 observation 的 DataFrame
    action_df: 包含 action 的 DataFrame
    sequence_length: 序列的长度
    """
    sequences = []
    targets = []
    
    # 计算整个 obs_df 的差分
    obs_diff_df = obs_df.diff().dropna().reset_index(drop=True)
    
    # 确保 obs_df 和 action_df 长度相同
    assert len(obs_df) == len(action_df) + 1, "Observation and Action dataframes length."
    
    for i in range(len(obs_diff_df) - sequence_length):  # 确保不会越界
        # 获取长度为 sequence_length 的 observation 和 action 序列
        obs_seq = obs_df.iloc[i:i+sequence_length].values
        action_seq = action_df.iloc[i:i+sequence_length].values
        
        # 将 observation 和 action 合并为 X，action 是 X 的一部分
        sequence = np.concatenate([obs_seq, action_seq], axis=1)
        
        # 获取目标 Y，使用预计算的 obs_diff_df 差分序列
        target = obs_diff_df.iloc[i+sequence_length].values
        
        sequences.append(sequence)
        targets.append(target)
    
    return np.array(sequences), np.array(targets)

In [74]:
timesteps_real_per_round = 500
timesteps_fc_per_round = timesteps_real_per_round * 30
epoch_per_round = 10
rounds_num = 40
batch_size_env_model = 128
env_paras = [10.0, 10.0, 10.0]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [75]:
CLIENTS_NUM = len(env_paras)
env_models = []
MB_env = TimeLimit(MB_PendulumEnv(env_models,device), max_episode_steps = 200)
    
    # Global_RL = PPO("MlpPolicy", MB_env, verbose=1)
Global_RL = TRPO("MlpPolicy", MB_env, verbose=1)
    
train_loss_records = []
test_loss_records = []
    
# env_theta = [0.1, 0.3, 0.5, 0.7, 0.9]
real_envs = []
Clients = []
for i in range(CLIENTS_NUM):
    real_envs.append( TimeLimit(PendulumEnv(g=env_paras[i]), max_episode_steps=200) )
    policy_net = Global_RL
    agent = SB3Agent(policy_net)
    client = FRLClient(real_envs[i], agent, lr = 3e-4, hidden_size = 256, device = device)
    Clients.append(client)
    env_model = copy.deepcopy(client.model)
    env_models.append(env_model)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.


In [44]:
Clients[0].dataset_X

In [77]:
n = 2000
seq_length = 3

env = Clients[0].env
        
# observation, info = env.reset()
# df = pd.DataFrame(observation).T
# actions = []
dataset_X_list = []
dataset_y_list = []
cnt = 0
while(cnt < n):
    observation, info = env.reset()
    obs_df = pd.DataFrame(observation).T 
    actions = []
    done = False
    truncated = False
    while done == False and truncated == False:
        action = env.action_space.sample()
#         print(obs_df)
#         print(len(obs_df))
#         print(observation)
        
        actions.append(action)
        observation, reward, done, truncated, info = env.step(action)
        obs_df.loc[len(obs_df)] = observation
        cnt += 1
        if cnt >= n:
            break
    
    print(obs_df)
    print(len(obs_df))
    print(len(actions))
    action_df = pd.DataFrame(actions)
    print(action_df)
        
            # df_temp["actions"] = actions
    dataset_X_temp, dataset_y_temp = create_sequences_diff(obs_df, action_df, seq_length)
    dataset_X_list.append(dataset_X_temp)
    dataset_y_list.append(dataset_y_temp)
        
            # 展平数据集
dataset_X = np.concatenate(dataset_X_list, axis=0)
dataset_y = np.concatenate(dataset_y_list, axis=0)

            0         1         2
0    0.379969  0.924999  0.399528
1    0.332464  0.943116  1.016949
2    0.261965  0.965077  1.477137
3    0.161589  0.986858  2.055147
4    0.037517  0.999296  2.495488
..        ...       ...       ...
196 -0.170687  0.985325  2.391550
197 -0.321279  0.946985  3.111057
198 -0.483706  0.875231  3.556087
199 -0.648809  0.760951  4.022689
200 -0.801152  0.598461  4.463996

[201 rows x 3 columns]
201
200
            0
0   -0.508860
1   -1.647656
2   -0.971991
3   -1.998682
4    1.081602
..        ...
195  1.348756
196 -0.129912
197 -1.768054
198 -1.265471
199 -0.862714

[200 rows x 1 columns]
            0         1         2
0   -0.905639 -0.424049  0.747747
1   -0.897235 -0.441554  0.388370
2   -0.899665 -0.436580 -0.110724
3   -0.903978 -0.427579 -0.199615
4   -0.911937 -0.410330 -0.379948
..        ...       ...       ...
196 -0.970328  0.241794  3.261554
197 -0.996728  0.080829  3.265942
198 -0.995724 -0.092380  3.468593
199 -0.964240 -0.265029  3.5

In [78]:
dataset_X.shape

(1970, 3, 4)

In [79]:
dataset_y.shape

(1970, 3)

In [80]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # LSTM expects inputs of shape (batch_size, sequence_length, feature_size)
        lstm_out, _ = self.lstm(x)
        # Only need the output from the last time step
        out = lstm_out[:, -1, :]  # shape: (batch_size, hidden_size)
        out = self.fc(out)  # shape: (batch_size, output_size)
        return out

# Example model parameters
input_size = 4  # number of features
hidden_size = 64  # hidden size for LSTM
output_size = 3  # number of output features

model = LSTMModel(input_size, hidden_size, output_size)


In [81]:
from sklearn.model_selection import train_test_split

# 假设 dataset_X 和 dataset_y 是 NumPy 数组
X_train, X_test, y_train, y_test = train_test_split(dataset_X, dataset_y, test_size=0.2, shuffle=False)


In [82]:
X_train.shape

(1576, 3, 4)

In [84]:
# 将数据转换为 PyTorch 张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).to(device)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).to(device)

In [89]:
# 定义超参数
batch_size = 32
num_epochs = 10

loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model.cuda()

# 训练模型
for epoch in range(num_epochs):
    train_loss = model_train(X_train, y_train, model, loss_fn, optimizer, batch_size)
    print(f'Epoch {epoch+1}, Train Loss: {train_loss}')

    # 每个 epoch 后评估模型在测试集上的表现
    test_loss = model_test(X_testr, y_test, model, loss_fn, batch_size)
    print(f'Epoch {epoch+1}, Test Loss: {test_loss}')

IndexError: too many indices for tensor of dimension 2

In [91]:
X_train.shape

(1576, 3, 4)

In [92]:
# 转换为 PyTorch 张量
X_train_tensor = torch.tensor(X_train, dtype=torch.float32).cuda()
y_train_tensor = torch.tensor(y_train, dtype=torch.float32).cuda()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).cuda()
y_test_tensor = torch.tensor(y_test, dtype=torch.float32).cuda()

In [93]:
import torch
import torch.nn as nn

class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        lstm_out, _ = self.lstm(x)  # LSTM 输出 (batch_size, seq_len, hidden_size)
        out = lstm_out[:, -1, :]    # 取最后一个时间步的输出 (batch_size, hidden_size)
        out = self.fc(out)          # 通过全连接层 (batch_size, output_size)
        return out


In [98]:
def model_train(X, y, model, loss_fn, optimizer, batch_size):
    # Set the model to training mode
    model.train()
    loss_sum = 0

    # Loop through the dataset in batches
    for i in range(0, len(y), batch_size):
        # Convert batch data to torch tensors and move to GPU
        train_X = torch.tensor(X[i: i + batch_size], dtype=torch.float32).cuda()
        train_y = torch.tensor(y[i: i + batch_size], dtype=torch.float32).cuda()

        # Forward pass
        pred = model(train_X)

        # Compute loss for the entire batch
        loss = loss_fn(pred, train_y)
        loss_sum += loss.item()

        # Zero the gradients, perform backpropagation, and update the weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Return average loss per sample
    return loss_sum / len(y)


def model_test(X, y, model, loss_fn, batch_size):
    loss_sum = 0
    # Set the model to evaluation mode
    model.eval()

    # Disable gradient calculation
    with torch.no_grad():
        # Loop through the dataset in batches
        for i in range(0, len(y), batch_size):
            # Convert batch data to torch tensors and move to GPU
            test_X = torch.tensor(X[i: i + batch_size], dtype=torch.float32).cuda()
            test_y = torch.tensor(y[i: i + batch_size], dtype=torch.float32).cuda()

            # Forward pass
            pred = model(test_X)

            # Compute loss for the entire batch
            loss = loss_fn(pred, test_y)
            loss_sum += loss.item()

    # Compute and print average loss per sample
    avg_loss = loss_sum / len(y)
    print(f"Avg loss: {avg_loss}!")
    return avg_loss


In [129]:
# 超参数设置
input_size = 4  # 每个时间步的特征数
hidden_size = 64  # LSTM 隐藏层大小
output_size = 3  # 标签的特征数
num_layers = 1  # LSTM 层数
batch_size = 32  # 每个批次的样本数
num_epochs = 50  # 训练轮数

# 模型、损失函数和优化器
model = LSTMModel(input_size, hidden_size, output_size, num_layers).cuda()
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# 训练和测试循环
for epoch in range(num_epochs):
    train_loss = model_train(X_train_tensor, y_train_tensor, model, loss_fn, optimizer, batch_size)
    test_loss = model_test(X_test_tensor, y_test_tensor, model, loss_fn, batch_size)

    print(f'Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.8f}, Test Loss: {test_loss:.8f}')


  train_X = torch.tensor(X[i: i + batch_size], dtype=torch.float32).cuda()
  train_y = torch.tensor(y[i: i + batch_size], dtype=torch.float32).cuda()
  test_X = torch.tensor(X[i: i + batch_size], dtype=torch.float32).cuda()
  test_y = torch.tensor(y[i: i + batch_size], dtype=torch.float32).cuda()


Avg loss: 0.0007351414149220523!
Epoch 1/50, Train Loss: 0.00269769, Test Loss: 0.00073514
Avg loss: 0.00039267516576774837!
Epoch 2/50, Train Loss: 0.00099132, Test Loss: 0.00039268
Avg loss: 0.00038170526607841405!
Epoch 3/50, Train Loss: 0.00056791, Test Loss: 0.00038171
Avg loss: 0.00035870245889509995!
Epoch 4/50, Train Loss: 0.00045892, Test Loss: 0.00035870
Avg loss: 0.00034204761374760704!
Epoch 5/50, Train Loss: 0.00040270, Test Loss: 0.00034205
Avg loss: 0.0003361879389373784!
Epoch 6/50, Train Loss: 0.00037524, Test Loss: 0.00033619
Avg loss: 0.0003339337683779183!
Epoch 7/50, Train Loss: 0.00036032, Test Loss: 0.00033393
Avg loss: 0.00033317589872341774!
Epoch 8/50, Train Loss: 0.00035165, Test Loss: 0.00033318
Avg loss: 0.0003330828645621157!
Epoch 9/50, Train Loss: 0.00034661, Test Loss: 0.00033308
Avg loss: 0.00033314858426146095!
Epoch 10/50, Train Loss: 0.00034380, Test Loss: 0.00033315
Avg loss: 0.0003331443353976846!
Epoch 11/50, Train Loss: 0.00034220, Test Loss: 0.

In [117]:
# from PredictionModel import PredictionModel
HIDDEN_SIZE = 64
class PredictionModel(nn.Module):
    def __init__(self, obs_size, action_size, seq_length = 1 ,hidden_size = HIDDEN_SIZE):
        super().__init__()
        self.net = nn.Sequential (
            nn.Linear((obs_size + action_size) * seq_length, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, obs_size),
        )
    
    def forward(self, x):
        return self.net(x)

In [119]:
MLP_model = PredictionModel(obs_size = 3, action_size= 1, seq_length = 3, hidden_size= 64)

In [120]:
MLP_model

PredictionModel(
  (net): Sequential(
    (0): Linear(in_features=12, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [121]:
model

LSTMModel(
  (lstm): LSTM(4, 64, batch_first=True)
  (fc): Linear(in_features=64, out_features=3, bias=True)
)

In [122]:
N, T, F = dataset_X.shape
X_reshaped = dataset_X.reshape(N, T * F)

In [123]:
# 划分数据集
X_train_MLP, X_test_MLP, y_train_MLP, y_test_MLP = train_test_split(X_reshaped, dataset_y, test_size=0.2, random_state=42)

PredictionModel(
  (net): Sequential(
    (0): Linear(in_features=12, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=64, bias=True)
    (3): ReLU()
    (4): Linear(in_features=64, out_features=3, bias=True)
  )
)

In [130]:
MLP_model = PredictionModel(obs_size = 3, action_size= 1, seq_length = 3, hidden_size= 64)
MLP_model.cuda()
loss_fn = nn.MSELoss()
optimizer = optim.Adam(MLP_model.parameters(), lr=0.001)

# 训练 MLP
num_epochs = 50
batch_size = 32
for epoch in range(num_epochs):
    train_loss = model_train(X_train_MLP, y_train_MLP, MLP_model, loss_fn, optimizer, batch_size)
    test_loss = model_test(X_test_MLP, y_test_MLP, MLP_model, loss_fn, batch_size)
    print(f'Epoch {epoch+1}, MLP Train Loss: {train_loss}, MLP Test Loss: {test_loss}')

Avg loss: 0.0007704195429377145!
Epoch 1, MLP Train Loss: 0.0020958492041891676, MLP Test Loss: 0.0007704195429377145
Avg loss: 0.000533101775300533!
Epoch 2, MLP Train Loss: 0.0005555438063329625, MLP Test Loss: 0.000533101775300533
Avg loss: 0.0004862645040573505!
Epoch 3, MLP Train Loss: 0.0004457204075539626, MLP Test Loss: 0.0004862645040573505
Avg loss: 0.00046662119959438514!
Epoch 4, MLP Train Loss: 0.00041107271223560655, MLP Test Loss: 0.00046662119959438514
Avg loss: 0.00045430948390497774!
Epoch 5, MLP Train Loss: 0.0003921814412965917, MLP Test Loss: 0.00045430948390497774
Avg loss: 0.0004485245855515681!
Epoch 6, MLP Train Loss: 0.00038074357594661224, MLP Test Loss: 0.0004485245855515681
Avg loss: 0.00044646360178662435!
Epoch 7, MLP Train Loss: 0.00037199085835972563, MLP Test Loss: 0.00044646360178662435
Avg loss: 0.0004437722781020675!
Epoch 8, MLP Train Loss: 0.00036390443766243794, MLP Test Loss: 0.0004437722781020675
Avg loss: 0.00043883859439927913!
Epoch 9, MLP T

In [115]:
X_train_MLP.shape

(1576, 12)