In [None]:
import os
import sys
PROJECT_DIR = os.path.abspath("../..")
from joblib import Parallel, delayed
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")
os.chdir(PROJECT_DIR)
sys.path.append(PROJECT_DIR)

In [None]:
from typing import Dict
import numpy as np
import torch
from torch.utils.data import DataLoader
from sklearn.svm import SVR
from basicts.utils import load_pkl, get_regular_settings
from basicts.data import TimeSeriesForecastingDataset
from basicts.metrics import masked_mae, masked_rmse, masked_mape, masked_mse
from basicts.scaler import ZScoreScaler

## Hyper-parameters

In [None]:
# construct configs
dataset_name = "ETTh1"

regular_settings = get_regular_settings(dataset_name)

input_len = 96 # regular_settings['INPUT_LEN']
output_len = 96 # regular_settings['OUTPUT_LEN']
rescale = regular_settings['RESCALE']
null_val = regular_settings['NULL_VAL']
norm_each_channel = regular_settings['NORM_EACH_CHANNEL']
train_val_test_ratio = regular_settings['TRAIN_VAL_TEST_RATIO']

# target_time_series = None # for subset forecasting
target_time_series = None # for subset forecasting

gpu_num = 1
batch_size = 128 # only used for collecting data

## Construct Dataset

In [None]:
train_set = TimeSeriesForecastingDataset(dataset_name=dataset_name, input_len=input_len, output_len=output_len, train_val_test_ratio=train_val_test_ratio, mode="train")
valid_set = TimeSeriesForecastingDataset(dataset_name=dataset_name, input_len=input_len, output_len=output_len, train_val_test_ratio=train_val_test_ratio, mode="valid")
test_set = TimeSeriesForecastingDataset(dataset_name=dataset_name, input_len=input_len, output_len=output_len, train_val_test_ratio=train_val_test_ratio, mode="test")

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

scaler = ZScoreScaler(dataset_name=dataset_name, train_ratio=train_val_test_ratio[0], norm_each_channel=norm_each_channel, rescale=rescale)

In [None]:
# training * validation
Xs_train = []
Ys_train = []

Xs_test = []
Ys_test = []

def preprocessing(input_data, scaler, target_time_series) -> Dict:
    if scaler is not None:
        input_data['target'] = scaler.transform(input_data['target'])
        input_data['inputs'] = scaler.transform(input_data['inputs'])
    if target_time_series is not None:
        input_data['target'] = input_data['target'][:, :, target_time_series, :]
        input_data['inputs'] = input_data['inputs'][:, :, target_time_series, :]
    return input_data

for i, iter_data in enumerate(train_loader):
    iter_data = preprocessing(iter_data, scaler=scaler, target_time_series=target_time_series)
    inputs, target = iter_data['inputs'], iter_data['target']
    Xs_train.append(inputs)
    Ys_train.append(target)



for i, iter_data in enumerate(test_loader):
    iter_data = preprocessing(iter_data, scaler=scaler, target_time_series=target_time_series)
    inputs, target = iter_data['inputs'], iter_data['target']
    Xs_test.append(inputs)
    Ys_test.append(target)

Xs_train = torch.cat(Xs_train, dim=0)[..., [0]]
Xs_train = Xs_train[::input_len,:,:,:]
Ys_train = torch.cat(Ys_train, dim=0)[..., [0]]
Ys_train = Ys_train[::input_len,:,:,:]

Xs_test = torch.cat(Xs_test, dim=0)[..., [0]]
Ys_test = torch.cat(Ys_test, dim=0)[..., [0]]

In [None]:
def reshape(data):
    B, L, N, C = data.shape
    data = data[..., 0].transpose(1, 2).reshape(B*N, L) 
    return data.cpu().numpy()

In [None]:
Xs_test = reshape(Xs_test)
Xs_train = reshape(Xs_train)
Ys_train = reshape(Ys_train)

## Train

In [None]:
# direct forecasting
from sklearn.multioutput import MultiOutputRegressor
model = MultiOutputRegressor(SVR(kernel="poly"), n_jobs=-1)
model.fit(Xs_train, Ys_train)

## Test

In [None]:
# inference
preds_test = model.predict(Xs_test)
B, L, N, C = Ys_test.shape
print(B, N, L, C)
preds_test = torch.tensor(preds_test).reshape(B, N, L, 1)
Ys_test = Ys_test.transpose(1, 2)

In [None]:
# print results
print("MAE: ", masked_mae(preds_test, Ys_test, null_val).item())
print("MSE: ", masked_mse(preds_test, Ys_test, null_val).item())
print("RMSE: ", masked_rmse(preds_test, Ys_test, null_val).item())
print("MAPE: {:.2f}%".format(masked_mape(preds_test, Ys_test, null_val) * 100))

##  Visualization

In [None]:
import matplotlib.pyplot as plt
import numpy as np

print(preds_test.shape)
# 假设 preds_test 和 Ys_test 都是形状为 (B, N, L, 1) 的 NumPy 数组
# 我们希望指定 B 和 N 来选择特定的预测序列和真实值

B_index = 0  # 选择 B 维度的某一索引
N_index = 0  # 选择 N 维度的某一索引

# 选择预测值和真实值
predicted_values = preds_test[B_index, N_index, :, 0]  # shape: (L,)
true_values = Ys_test[B_index, N_index, :, 0]  # shape: (L,)

# 创建时间序列图
plt.figure(figsize=(10, 6))
plt.plot(predicted_values, label='Predicted', color='blue', linestyle='--')
plt.plot(true_values, label='True', color='red', linestyle='-')
plt.xlabel('Time Step (L)')
plt.ylabel('Value')
plt.title(f'Prediction vs True Values (B={B_index}, N={N_index})')
plt.legend()
plt.grid(True)
plt.show()
