In [1]:
import os
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
import torch.fft
from types import SimpleNamespace

from layers.Embed import DataEmbedding
from layers.Conv_Blocks import Inception_Block_V1

# Hyperparameters of the model 
configs = SimpleNamespace()
configs.seq_len = 100  
configs.pred_len = 1         
configs.label_len = 0
configs.top_k = 2
configs.d_model = 64  
configs.d_ff = 128  
configs.num_kernels = 3
configs.e_layers = 2
configs.enc_in = 2           
configs.c_out = 6            
configs.embed = 'fixed'
configs.freq = 'h'
configs.dropout = 0.3
configs.task_name = 'short_term_forecast'
configs.num_class = 10


In [2]:
def FFT_for_Period(x, k=2):
    x = x.float() 
    xf = torch.fft.rfft(x, dim=1)
    frequency_list = abs(xf).mean(0).mean(-1)
    frequency_list[0] = 0
    _, top_list = torch.topk(frequency_list, k)
    top_list = top_list.detach().cpu().numpy()
    period = [max(1, x.shape[1] // int(t)) for t in top_list]
    return period, abs(xf).mean(-1)[:, top_list]

class TimesBlock(nn.Module):
    def __init__(self, configs):
        super(TimesBlock, self).__init__()
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.k = configs.top_k
        # parameter-efficient design
        self.conv = nn.Sequential(
            Inception_Block_V1(configs.d_model, configs.d_ff,
                               num_kernels=configs.num_kernels),
            nn.GELU(),
            Inception_Block_V1(configs.d_ff, configs.d_model,
                               num_kernels=configs.num_kernels)
        )

    def forward(self, x):
        B, T, N = x.size()
        period_list, period_weight = FFT_for_Period(x, self.k)

        res = []
        for i in range(self.k):
            period = period_list[i]
            # padding
            if (self.seq_len + self.pred_len) % period != 0:
                length = (((self.seq_len + self.pred_len) // period) + 1) * period
                padding = torch.zeros([x.shape[0], (length - (self.seq_len + self.pred_len)), x.shape[2]]).to(x.device)
                out = torch.cat([x, padding], dim=1)
            else:
                length = (self.seq_len + self.pred_len)
                out = x
            # reshape
            out = out.reshape(B, length // period, period, N).permute(0, 3, 1, 2).contiguous()
            # 2D conv: from 1d Variation to 2d Variation
            out = self.conv(out)
            # reshape back
            out = out.permute(0, 2, 3, 1).reshape(B, -1, N)
            res.append(out[:, :(self.seq_len + self.pred_len), :])
        res = torch.stack(res, dim=-1)
        # adaptive aggregation
        period_weight = F.softmax(period_weight, dim=1)
        period_weight = period_weight.unsqueeze(1).unsqueeze(1).repeat(1, T, N, 1)
        res = torch.sum(res * period_weight, -1)
        # residual connection
        res = res + x
        return res


In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GraphResidualBlock(nn.Module):
    def __init__(self, num_nodes, hidden_dim=32):
        super().__init__()
        self.num_nodes = num_nodes

        # Learnable Adjacency Matrix
        self.A_param = nn.Parameter(torch.randn(num_nodes, num_nodes))

        # Node Feature Transformation
        self.fc1 = nn.Linear(1, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        if x.dim() == 3:  
            x = x.squeeze(1)  

        # Normalised adjacency matrix
        A = torch.softmax(self.A_param, dim=-1)  

        h = x.unsqueeze(-1)        
        h = self.fc1(h)             
        h = torch.matmul(A, h)     
        h = F.relu(h)
        h = self.fc2(h).squeeze(-1) 

        return x + h 

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class GRN(nn.Module):
    """ Gated Residual Network """
    def __init__(self, input_dim, hidden_dim, output_dim=None, dropout=0.3):
        super().__init__()
        output_dim = output_dim or input_dim
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.gate = nn.Linear(output_dim, output_dim)
        self.norm = nn.LayerNorm(output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        residual = x
        x = F.gelu(self.fc1(x))
        x = self.dropout(self.fc2(x))
        gate = torch.sigmoid(self.gate(x))
        x = gate * x + (1 - gate) * residual
        return self.norm(x)

In [5]:
class PositionalEncoding(nn.Module):
    """Standard Transformer Position Encoding"""
    def __init__(self, d_model, max_len=1000):
        super().__init__()
        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float32).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-torch.log(torch.tensor(10000.0)) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0)   
        self.register_buffer("pe", pe)

    def forward(self, x):
        return x + self.pe[:, :x.size(1), :]


In [6]:
class CompactTFT(nn.Module):
    def __init__(self, enc_in, d_model=64, n_heads=4, n_layers=2, dropout=0.3):
        super().__init__()
        self.input_proj = nn.Linear(enc_in, d_model)
        self.pos_encoding = PositionalEncoding(d_model)

        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model, nhead=n_heads,
            dim_feedforward=d_model*2, dropout=dropout, batch_first=True
        )
        self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=n_layers)

        # Static Fusion + GRN
        self.static_fuse = GRN(d_model, d_model*2, d_model, dropout=dropout)

        # Output layer normalisation
        self.norm = nn.LayerNorm(d_model)

    def forward(self, x, static_emb=None):
        x = self.input_proj(x)
        x = self.pos_encoding(x)
        x = self.encoder(x)

        if static_emb is not None:
            s = static_emb.unsqueeze(1).repeat(1, x.size(1), 1)
            x = self.static_fuse(x + s)

        return self.norm(x)

In [7]:
# Coordinates of six points (x, y)
coords = torch.tensor([
    [532043.125, 3401273.750],
    [532036.375, 3401250.250],
    [532028.938, 3401220.750],
    [532246.000, 3401357.500],
    [532248.438, 3401325.500],
    [532248.313, 3401293.000]
], dtype=torch.float32)

class Model(nn.Module):
    """
    TimesNet + SimpleTFT + GraphResidualBlock
    x_enc: [B, T, C_enc]
    x_mark_enc: temporal marks (or None)
    x_static: [B, static_dim]
    """
    def __init__(self, configs, static_dim=7):
        super(Model, self).__init__()
        self.configs = configs
        self.task_name = configs.task_name
        self.seq_len = configs.seq_len
        self.pred_len = configs.pred_len
        self.layer = configs.e_layers

        # -------------------------
        # 1. TFT Pre-processing
        # -------------------------
        self.tft = CompactTFT(enc_in=configs.enc_in, d_model=configs.d_model, n_heads=4, n_layers=2, dropout=0.3)

        # -------------------------
        # 2. TimesNet
        # -------------------------
        self.model = nn.ModuleList([TimesBlock(configs) for _ in range(configs.e_layers)])
        self.layer_norm = nn.LayerNorm(configs.d_model)

        # -------------------------
        # 3. static feature projection
        # -------------------------
        self.static_proj = nn.Linear(static_dim, configs.d_model)

        # -------------------------
        # 4. Predictive output
        # -------------------------
        self.projection = nn.Linear(configs.d_model, configs.c_out)
        # -------------------------
        # 5. GraphResidualBlock
        # -------------------------
        self.graph_residual = GraphResidualBlock(num_nodes=configs.c_out, hidden_dim=32)

        self.predict_linear = nn.Linear(self.seq_len, self.pred_len + self.seq_len)

    def _add_static_to_enc(self, enc_out, x_static):
        if x_static is None:
            return enc_out
        static_emb = self.static_proj(x_static)                    # [B, d_model]
        static_emb = static_emb.unsqueeze(1).repeat(1, enc_out.size(1), 1)  # [B, T, d_model]
        return enc_out + static_emb

    def forecast(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None, x_static=None):
        # 1. TFT Pre-processing
        #enc_out = self.tft(x_enc)  # [B, seq_len, d_model]
        enc_out = self.tft(x_enc, static_emb=x_static)  # [B, seq_len, d_model]

        # 2. Add static features
        #enc_out = self._add_static_to_enc(enc_out, x_static)

        # 3. Time-line linear alignment
        enc_out = self.predict_linear(enc_out.permute(0, 2, 1)).permute(0, 2, 1)  # [B, seq_len, d_model]

        # 4. TimesNet blocks
        for i in range(self.layer):
            enc_out = self.layer_norm(self.model[i](enc_out))

        # 5. Projected onto c_out
        dec_out = self.projection(enc_out)  # [B, seq_len, c_out]

        # 6. GraphResidualBlock refinement
        dec_out = self.graph_residual(dec_out[:, -self.pred_len:, :])   
        dec_out = dec_out.unsqueeze(1)   

        return dec_out

    def forward(self, x_enc, x_mark_enc=None, x_dec=None, x_mark_dec=None, mask=None, x_static=None):
        dec_out = self.forecast(x_enc, x_mark_enc, x_dec, x_mark_dec, x_static=x_static)
        return dec_out.squeeze(1)   

In [8]:
# Simple interface/shape testing
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Here we use a small batch to test whether the forward pass can run successfully.
B = 4
dummy_x_enc = torch.randn(B, configs.seq_len, configs.enc_in).to(device)    
dummy_x_mark = None    
dummy_x_dec = None
dummy_x_mark_dec = None
dummy_x_static = torch.randn(B, 1).to(device)   

model = Model(configs, static_dim=1).to(device)
model.eval()

with torch.no_grad():
    out = model(dummy_x_enc, dummy_x_mark, dummy_x_dec, dummy_x_mark_dec, mask=None, x_static=dummy_x_static)
    print("Model output shape:", out.shape)  


Model output shape: torch.Size([4, 6])


In [None]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader

# Path
data_dir = r"D:\DATA"

# Loading data
X_seq_train = np.load(f"{data_dir}/X_seq_train.npy")   
X_static_train = np.load(f"{data_dir}/X_static_train.npy")   
Y_train = np.load(f"{data_dir}/Y_train.npy")  

X_seq_test = np.load(f"{data_dir}/X_seq_test.npy")
X_static_test = np.load(f"{data_dir}/X_static_test.npy")
Y_test = np.load(f"{data_dir}/Y_test.npy")

print("training set:", X_seq_train.shape, X_static_train.shape, Y_train.shape)
print("test set:", X_seq_test.shape, X_static_test.shape, Y_test.shape)

# ========== Min-Max Normalisation ==========
def minmax_scale(train, test):
    min_val = train.min(axis=0, keepdims=True)
    max_val = train.max(axis=0, keepdims=True)
    train_norm = (train - min_val) / (max_val - min_val + 1e-8)
    test_norm = (test - min_val) / (max_val - min_val + 1e-8)
    return train_norm, test_norm, min_val, max_val

# Normalise the three types of data respectively
X_seq_train, X_seq_test, X_seq_min, X_seq_max = minmax_scale(
    X_seq_train.reshape(-1, X_seq_train.shape[-1]),
    X_seq_test.reshape(-1, X_seq_test.shape[-1])
)
X_seq_train = X_seq_train.reshape(-1, configs.seq_len, 2)
X_seq_test = X_seq_test.reshape(-1, configs.seq_len, 2)

X_static_train, X_static_test, X_static_min, X_static_max = minmax_scale(X_static_train, X_static_test)
Y_train, Y_test, Y_min, Y_max = minmax_scale(Y_train, Y_test)

# ==========  Convert to Tensor  ==========
X_seq_train_t = torch.tensor(X_seq_train, dtype=torch.float32)
X_static_train_t = torch.tensor(X_static_train, dtype=torch.float32)
Y_train_t = torch.tensor(Y_train, dtype=torch.float32)

X_seq_test_t = torch.tensor(X_seq_test, dtype=torch.float32)
X_static_test_t = torch.tensor(X_static_test, dtype=torch.float32)
Y_test_t = torch.tensor(Y_test, dtype=torch.float32)

# Composite TensorDataset
train_dataset = TensorDataset(X_seq_train_t, X_static_train_t, Y_train_t)
test_dataset = TensorDataset(X_seq_test_t, X_static_test_t, Y_test_t)

# DataLoader
batch_size = 512
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=10, pin_memory=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8, pin_memory=True)

训练集: (55085, 100, 2) (55085, 1) (55085, 6)
测试集: (20793, 100, 2) (20793, 1) (20793, 6)


In [None]:
# NOTE Custom loss function

import torch
import torch.nn as nn

coords = torch.tensor([
    [532043.125, 3401273.750],
    [532036.375, 3401250.250],
    [532028.938, 3401220.750],
    [532246.000, 3401357.500],
    [532248.438, 3401325.500],
    [532248.313, 3401293.000]
], dtype=torch.float32)

# Adjacent point pairs (first three in one group, last three in another)
neighbor_pairs = [(0,1), (1,2), (3,4), (4,5)]

# Calculate the distance between each pair of adjacent points
distances = torch.tensor([
    torch.norm(coords[i] - coords[j]).item() for i,j in neighbor_pairs
], dtype=torch.float32)

def continuity_loss(pred, y, beta=0.1, device="cpu"):
    mse = nn.MSELoss()(pred, y)

    # --- Spatial Continuity ---
    spatial_loss = 0.0
    for k, (i, j) in enumerate(neighbor_pairs):
        dij = distances[k].to(device)
        spatial_loss += torch.mean(((pred[:, i] - pred[:, j]) ** 2) / (dij ** 2))

    total_loss = mse + beta * spatial_loss
    return total_loss


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import root_mean_squared_error, mean_absolute_percentage_error, r2_score

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Use of equipment:", device)

model = model.to(device)
optimizer = optim.AdamW(model.parameters(), lr=1e-3)

# ===========================
# GPU + Mixed-Precision Training
# ===========================
def train_one_epoch(model, loader, optimizer, scaler, device):
    model.train()
    total_loss = 0
    total_rmse = 0
    total_mape = 0
    total_r2 = 0
    n_samples = 0

    for x_seq, x_static, y in loader:
        B, seq_len, _ = x_seq.shape
        pred_len = 1

        x_enc = x_seq.to(device)
        x_static = x_static.to(device)
        y = y.to(device)

        x_mark_enc = torch.zeros(B, seq_len, 5, device=device)
        x_dec = torch.zeros(B, pred_len, x_seq.size(-1), device=device)
        x_mark_dec = torch.zeros(B, pred_len, 5, device=device)

        optimizer.zero_grad()

        with torch.amp.autocast(device_type='cuda', dtype=torch.float16):
            out = model(x_enc, x_mark_enc, x_dec, x_mark_dec, x_static=x_static)
            out = out.squeeze(1) 
            loss = continuity_loss(out, y, beta=0.1)

        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        y_np = y.detach().cpu().numpy()
        out_np = out.detach().cpu().numpy()
        rmse = root_mean_squared_error(y_np, out_np)
        #mape = mean_absolute_percentage_error(y_np, out_np)
        r2 = r2_score(y_np, out_np)

        total_loss += loss.item() * B
        total_rmse += rmse * B
        #total_mape += mape * B
        total_r2 += r2 * B
        n_samples += B

    avg_loss = total_loss / n_samples
    avg_rmse = total_rmse / n_samples
    #avg_mape = total_mape / n_samples
    avg_r2 = total_r2 / n_samples

    return avg_loss, avg_rmse, 0, avg_r2


# ===========================
# GPU + Mixed Precision Evaluation
# ===========================
def evaluate(model, loader, device):
    model.eval()
    total_loss = 0
    total_rmse = 0
    total_mape = 0
    total_r2 = 0
    total_max = 0
    n_samples = 0

    with torch.no_grad():
        for x_seq, x_static, y in loader:
            B, seq_len, _ = x_seq.shape
            pred_len = 1

            x_enc = x_seq.to(device)
            x_static = x_static.to(device)
            y = y.to(device)

            x_mark_enc = torch.zeros(B, seq_len, 5, device=device)
            x_dec = torch.zeros(B, pred_len, x_seq.size(-1), device=device)
            x_mark_dec = torch.zeros(B, pred_len, 5, device=device)

            out = model(x_enc, x_mark_enc, x_dec, x_mark_dec, x_static=x_static)
            out = out.squeeze(1)

            loss = continuity_loss(out, y, beta=0.1)

            y_np = y.detach().cpu().numpy()
            out_np = out.detach().cpu().numpy()
            rmse = root_mean_squared_error(y_np, out_np)
            mape = mean_absolute_percentage_error(y_np, out_np)
            #r2 = r2_score(y_np, out_np)
            max_err = np.max(np.abs(y_np - out_np))

            total_loss += loss.item() * B
            total_rmse += rmse * B
            total_mape += mape * B
            #total_r2 += r2 * B
            total_max += max_err * B
            n_samples += B

    avg_loss = total_loss / n_samples
    avg_rmse = total_rmse / n_samples
    avg_mape = total_mape / n_samples
    #avg_r2 = total_r2 / n_samples
    avg_max = total_max / n_samples

    return avg_loss, avg_rmse, avg_mape, 0, avg_max

使用设备: cuda


In [None]:
print(model)

from torchinfo import summary

summary(
    model,
    input_data=(
        torch.randn(2, configs.seq_len, configs.enc_in).to(device),    
        torch.zeros(2, configs.seq_len, 5).long().to(device),         
        torch.zeros(2, 1, configs.enc_in).to(device),                  
        torch.zeros(2, 1, 5).long().to(device),                       
        torch.randn(2, 1).to(device)                                  
    ),
    verbose=1
)


Model(
  (tft): CompactTFT(
    (input_proj): Linear(in_features=2, out_features=64, bias=True)
    (pos_encoding): PositionalEncoding()
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0-1): 2 x TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
          (linear1): Linear(in_features=64, out_features=128, bias=True)
          (dropout): Dropout(p=0.3, inplace=False)
          (linear2): Linear(in_features=128, out_features=64, bias=True)
          (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.3, inplace=False)
          (dropout2): Dropout(p=0.3, inplace=False)
        )
      )
    )
    (static_fuse): GRN(
      (fc1): Linear(in_features=64, out_features=128, bias=True)
      (fc2): Linear(in_features=128, out_fe

Layer (type:depth-idx)                             Output Shape              Param #
Model                                              [2, 6]                    128
├─CompactTFT: 1-1                                  [2, 100, 64]              20,864
│    └─Linear: 2-1                                 [2, 100, 64]              192
│    └─PositionalEncoding: 2-2                     [2, 100, 64]              --
│    └─TransformerEncoder: 2-3                     [2, 100, 64]              --
│    │    └─ModuleList: 3-1                        --                        66,944
│    └─LayerNorm: 2-4                              [2, 100, 64]              128
├─Linear: 1-2                                      [2, 64, 101]              10,201
├─ModuleList: 1-5                                  --                        (recursive)
│    └─TimesBlock: 2-5                             [2, 101, 64]              --
│    │    └─Sequential: 3-2                        [2, 64, 13, 8]            574,016
│    │

In [None]:
import torch
import gc
import pandas as pd
import os

# Initialise GradScaler
scaler = torch.amp.GradScaler()

epochs = 5000
best_val_loss = float("inf")
best_val_me = float("inf")

train_loss_list = []
val_loss_list = []

# Save Path
save_path = r"D:\0DATA"
os.makedirs(save_path, exist_ok=True)
excel_file = os.path.join(save_path, "train_val_loss_1013.xlsx")

for epoch in range(1, epochs+1):
    train_loss, train_rmse, train_mape, train_r2 = train_one_epoch(
        model, train_loader, optimizer, scaler, device
    )

    gc.collect()
    torch.cuda.empty_cache()

    val_loss, val_rmse, val_mape, val_r2, val_max = evaluate(model, test_loader, device)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model, os.path.join(save_path, f"best_val_mse_1013_test.pth"))

    if val_max < best_val_me:
        best_val_me = val_max
        torch.save(model, os.path.join(save_path, f"best_val_max_1013_test.pth"))

    print(
        f"Epoch {epoch:02d}/{epochs} | "
        f"Train Loss: {train_loss:.6f} , Val Loss: {val_loss:.6f} | "
        f"Train RMSE: {train_rmse:.6f} , Train R2: {train_r2:.6f} | "
        f"Val RMSE: {val_rmse:.6f} , Val MAPE: {val_mape:.6f} , Val Max_Err: {val_max:.6f}"
    )

    train_loss_list.append(train_loss)
    val_loss_list.append(val_loss)


df_loss = pd.DataFrame({
    "Train Loss": train_loss_list,
    "Validation Loss": val_loss_list
})
df_loss.to_excel(excel_file, index_label="Epoch")

print("✅ Training complete, best validation set loss:", best_val_loss)
print(f"Training/Validation Loss saved to: {excel_file}")

Epoch 01/5000 | Train Loss: 0.033951 , Val Loss: 0.000241 | Train RMSE: 0.117938 , Train R2: 0.418967 | Val RMSE: 0.013736 , Val MAPE: 0.043159 , Val Max_Err: 0.025704
Epoch 02/5000 | Train Loss: 0.000874 , Val Loss: 0.000190 | Train RMSE: 0.029414 , Train R2: 0.985053 | Val RMSE: 0.011901 , Val MAPE: 0.037411 , Val Max_Err: 0.022272
Epoch 03/5000 | Train Loss: 0.000525 , Val Loss: 0.000154 | Train RMSE: 0.022838 , Train R2: 0.991013 | Val RMSE: 0.010612 , Val MAPE: 0.032805 , Val Max_Err: 0.020332
Epoch 04/5000 | Train Loss: 0.000375 , Val Loss: 0.000239 | Train RMSE: 0.019336 , Train R2: 0.993576 | Val RMSE: 0.014147 , Val MAPE: 0.043972 , Val Max_Err: 0.023385
Epoch 05/5000 | Train Loss: 0.000300 , Val Loss: 0.000158 | Train RMSE: 0.017286 , Train R2: 0.994871 | Val RMSE: 0.011041 , Val MAPE: 0.031559 , Val Max_Err: 0.019408
Epoch 06/5000 | Train Loss: 0.000270 , Val Loss: 0.000123 | Train RMSE: 0.016398 , Train R2: 0.995382 | Val RMSE: 0.009141 , Val MAPE: 0.031299 , Val Max_Err: 0