In [1]:
from models import *
from utils import *
from test import *

import math
import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import itertools

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error
from torch.utils.data import DataLoader, TensorDataset
from sklearn.preprocessing import MinMaxScaler


In [2]:
X, y = LBNL59()
X_columns_to_normalize = X.columns.difference(['date'])
X_scaler = MinMaxScaler()
X[X_columns_to_normalize] = X_scaler.fit_transform(X[X_columns_to_normalize])
y_columns_to_sum = y.columns.difference(['date'])
y['sum'] = y[y_columns_to_sum].sum(axis=1)
y = y[["date", 'sum']]
X = X[X["date"] >= '2018-09-16']
y = y[y["date"] >= '2018-09-16']

X_train = X[X["date"] <= '2020-10-16']
y_train = y[y["date"] <= '2020-10-16']

X_test = X[X["date"] >= '2020-10-16']
y_test = y[y["date"] >= '2020-10-16']

In [3]:
seq_length = 20
predict_length = 1
batch_size = 100

Train_dataset = TimeSeriesDataset_sep(X_train, y_train, seq_length, predict_length = predict_length)
Train_dataloader = DataLoader(Train_dataset, batch_size=batch_size, shuffle=False)

Test_dataset = TimeSeriesDataset_sep(X_test, y_test, seq_length, predict_length = predict_length)
Test_dataloader = DataLoader(Test_dataset, batch_size=batch_size, shuffle=False)


In [4]:
class BiLSTMEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, num_heads, dropout=0.1):
        super(BiLSTMEncoder, self).__init__()
        # 定义BiLSTM层，注意设置 bidirectional=True
        self.input_projection = nn.Linear(input_size, hidden_size)
        
        # Transformer Encoder Layer
        self.transformer_layer = nn.TransformerEncoderLayer(d_model=hidden_size, nhead=num_heads, dropout=dropout, batch_first=True)
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_layer, num_layers=num_layers)
        # Fully connected layer for multi-step and multi-variable prediction

        self.bilstm = nn.LSTM(input_size + hidden_size, hidden_size, num_layers, batch_first=True, bidirectional=True)
        
        self.fc = nn.Linear(hidden_size*2, output_size)
        
        self.relu = nn.ReLU()
        self.leakyrelu = nn.LeakyReLU()
    def positional_encoding(self, seq_length, d_model, device):
        pe = torch.zeros(seq_length, d_model).to(device)
        position = torch.arange(0, seq_length, dtype=torch.float).unsqueeze(1).to(device)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)).to(device)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe.unsqueeze(0)  # (1, seq_length, d_model)
    def forward(self, x):
        # x shape: (batch_size, seq_length, input_size)
                
        # Transpose for transformer input format
        transformer_out = self.transformer_encoder(self.input_projection(x))
        
        residual_out = torch.cat([x, transformer_out], dim=2)
        bilstm_out, _ = self.bilstm(residual_out)
        
        bilstm_out = self.leakyrelu(bilstm_out)

        out = self.fc(bilstm_out)
        # Reshape to have (batch_size, num_steps, num_variables)
        
        return out
class BiLSTMTransformer(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size, num_heads, predict_length, dropout=0.1):
        super(BiLSTMTransformer, self).__init__()
        self.hidden_size = hidden_size
        
        # BiLSTM Layer
        self.bilstmencoder = BiLSTMEncoder(input_size, hidden_size*2, num_layers, hidden_size, num_heads, dropout=0.1)
        self.relu = nn.ReLU()
        
        # Transformer Decoder Layer
        self.decoder_layer = nn.TransformerDecoderLayer(d_model=hidden_size, nhead=num_heads, dropout=dropout, batch_first=True)
        self.transformer_decoder = nn.TransformerDecoder(self.decoder_layer, num_layers=num_layers*2)

        self.tgt_projection = nn.Linear(output_size, hidden_size)
        self.output = nn.Linear(hidden_size, output_size)

        self.pred_length = predict_length
    def positional_encoding(self, seq_length, d_model, device):
        pe = torch.zeros(seq_length, d_model).to(device)
        position = torch.arange(0, seq_length, dtype=torch.float).unsqueeze(1).to(device)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)).to(device)
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        return pe.unsqueeze(0)  # (1, seq_length, d_model)
        
    def forward(self, external, internal, y):
        #_, seq, size = y.size()
        
        #pe_y = self.positional_encoding(seq, size, y.device)

        #y = y + pe_y  # 将位置编码加到输入上
        X = torch.cat([external, internal], dim=2)
        y = self.tgt_projection(y)

        encoder_output = self.bilstmencoder(X)
        
        output = self.output(self.transformer_decoder(y, encoder_output))
        
        return output
    def predict(self, external, internal):
        src = torch.cat([external, internal], dim=2)
        batch_size, seq_leng, feat = src.shape
        memory = self.bilstmencoder(src)
        tgt = internal[:, -1:, :]
        predictions = []
        for _ in range(self.pred_length):
            output = self.output(self.transformer_decoder(self.tgt_projection(tgt), memory))
            predictions.append(output[:, -1:, :])
            tgt = torch.cat([tgt, output[:, -1:, :]], dim=1)
        return torch.cat(predictions, dim=1)


In [5]:
num_epochs = 50
input_size = 306
output_size = 1
hidden_size = 250
num_layers = 2
num_heads = 10
model = BiLSTMTransformer(input_size, hidden_size, num_layers, output_size, num_heads, predict_length).to(device)

In [7]:
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
teacher_forcing_ratio = 0
criterion = nn.MSELoss()
#criterion = nn.SmoothL1Loss()  # 用于回归任务
optimizer = torch.optim.Adam(model.parameters(), lr = 0.00008)
loss_per_epoch = []
val_mse_per_epoch = []
val_r2_per_epoch = []

for epoch in range(num_epochs):
    model.train()  # 确保模型在训练模式下
    for external, internal, batch_y in Train_dataloader:
        external, internal, batch_y = external.to(device), internal.to(device), batch_y.to(device)

        # 前向传播
        total_loss = 0
        y = internal[:, -1:, :]
        for step in range(predict_length):
            outputs = model(external, internal, y)
            next_pred = outputs[:, -1:, :]
            #outputs = model(batch_X, batch_y.view(batch_y.shape[0], batch_y.shape[2]))
            loss = criterion(next_pred, batch_y[:, step:step+1, :])
            total_loss += loss
            if np.random.rand() < teacher_forcing_ratio:
                next_input = batch_y[:, step:step + 1, :]
            else:
                next_input = next_pred
            y = torch.cat([y, next_input], dim=1)
                
        total_loss = total_loss / predict_length
        # 反向传播和优化
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()

    #scheduler.step()

    loss_per_epoch.append(loss.item())
    teacher_forcing_ratio -= 0.05

    # 评估验证集
    val_loss, (val_mse, val_mae, val_r2, val_mape) = evaluate_Transformer(model, Test_dataloader, criterion, device, [mean_squared_error, mean_absolute_error, r2_score, mean_absolute_percentage_error])
    val_mse_per_epoch.append(val_mse)
    val_r2_per_epoch.append(val_r2)

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Val Loss: {val_loss:.4f}, Val MSE: {val_mse:.4f}, Val MAE: {val_mae:.4f}, Val R²: {val_r2:.4f}, Val MAPE: {val_mape:.4f}')


Epoch [1/50], Loss: 68.2781, Val Loss: 13.5826, Val MSE: 13.6297, Val MAE: 2.2416, Val R²: 0.9154, Val MAPE: 0.0961
Epoch [2/50], Loss: 58.9617, Val Loss: 13.5298, Val MSE: 13.5756, Val MAE: 2.2333, Val R²: 0.9157, Val MAPE: 0.0954
Epoch [3/50], Loss: 57.3082, Val Loss: 13.3799, Val MSE: 13.4276, Val MAE: 2.2239, Val R²: 0.9167, Val MAPE: 0.0970
Epoch [4/50], Loss: 50.2730, Val Loss: 13.5955, Val MSE: 13.6449, Val MAE: 2.2355, Val R²: 0.9153, Val MAPE: 0.0957
Epoch [5/50], Loss: 50.0506, Val Loss: 13.9984, Val MSE: 14.0449, Val MAE: 2.3214, Val R²: 0.9128, Val MAPE: 0.1029
Epoch [6/50], Loss: 50.0143, Val Loss: 13.2781, Val MSE: 13.3268, Val MAE: 2.1967, Val R²: 0.9173, Val MAPE: 0.0948
Epoch [7/50], Loss: 50.6144, Val Loss: 13.9553, Val MSE: 14.0025, Val MAE: 2.2977, Val R²: 0.9131, Val MAPE: 0.1014
Epoch [8/50], Loss: 52.9202, Val Loss: 14.0667, Val MSE: 14.1160, Val MAE: 2.3190, Val R²: 0.9124, Val MAPE: 0.1025
Epoch [9/50], Loss: 51.6761, Val Loss: 13.9569, Val MSE: 14.0006, Val MA

KeyboardInterrupt: 

In [None]:

Epoch [38/50], Loss: 73.2371, Val Loss: 12.8126, Val MSE: 12.8590, Val MAE: 2.1686, Val R²: 0.9202, Val MAPE: 0.0943
Epoch [41/50], Loss: 69.2789, Val Loss: 12.9042, Val MSE: 12.9539, Val MAE: 2.1648, Val R²: 0.9196, Val MAPE: 0.0921
