In [1]:
import os
import sys
PROJECT_DIR = os.path.abspath("../..")
print(PROJECT_DIR)
os.chdir(PROJECT_DIR)
sys.path.append(PROJECT_DIR)


/home/S22/workspace/BasicTS


In [2]:
from typing import Dict

import torch
import lightgbm as lgb
from torch.utils.data import DataLoader

from basicts.utils import load_pkl, get_regular_settings
from basicts.data import TimeSeriesForecastingDataset
from basicts.metrics import masked_mae, masked_rmse, masked_mape
from basicts.scaler import ZScoreScaler

## Hyper-parameters

In [3]:
# construct configs
dataset_name = "PEMS08"

regular_settings = get_regular_settings(dataset_name)

input_len = regular_settings['INPUT_LEN']
output_len = regular_settings['OUTPUT_LEN']
rescale = regular_settings['RESCALE']
null_val = regular_settings['NULL_VAL']
norm_each_channel = regular_settings['NORM_EACH_CHANNEL']
train_val_test_ratio = regular_settings['TRAIN_VAL_TEST_RATIO']

# target_time_series = None # for subset forecasting

gpu_num = 1
batch_size = 128 # only used for collecting data

# lgm params
params = {
    'boosting_type': 'gbdt',
    'objective': 'regression',
    'metric': 'l2',
    'num_leaves': 31,
    'learning_rate': 0.05,
    'feature_fraction': 0.9,
    'bagging_fraction': 0.8,
    'bagging_freq': 5,
    'verbose': 0
}

## Construct Dataset


In [4]:
train_set = TimeSeriesForecastingDataset(dataset_name=dataset_name, input_len=input_len, output_len=output_len, train_val_test_ratio=train_val_test_ratio, mode="train")
valid_set = TimeSeriesForecastingDataset(dataset_name=dataset_name, input_len=input_len, output_len=output_len, train_val_test_ratio=train_val_test_ratio, mode="valid")
test_set = TimeSeriesForecastingDataset(dataset_name=dataset_name, input_len=input_len, output_len=output_len, train_val_test_ratio=train_val_test_ratio, mode="test")

train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

scaler = ZScoreScaler(dataset_name=dataset_name, train_ratio=train_val_test_ratio[0], norm_each_channel=norm_each_channel, rescale=rescale)


In [5]:
# training * validation
Xs_train = []
Ys_train = []
Xs_valid = []
Ys_valid = []
Xs_test = []
Ys_test = []

def preprocessing(input_data, scaler, target_time_series) -> Dict:
    if scaler is not None:
        input_data['target'] = scaler.transform(input_data['target'])
        input_data['inputs'] = scaler.transform(input_data['inputs'])
    if target_time_series is not None:
        input_data['target'] = input_data['target'][:, :, target_time_series, :]
        input_data['inputs'] = input_data['inputs'][:, :, target_time_series, :]
    return input_data

for i, iter_data in enumerate(train_loader):
    iter_data = preprocessing(iter_data, scaler=scaler, target_time_series=target_time_series)
    inputs, target = iter_data['inputs'], iter_data['target']
    Xs_train.append(inputs)
    Ys_train.append(target)

for i, iter_data in enumerate(valid_loader):
    iter_data = preprocessing(iter_data, scaler=scaler, target_time_series=target_time_series)
    inputs, target = iter_data['inputs'], iter_data['target']
    Xs_valid.append(inputs)
    Ys_valid.append(target)

for i, iter_data in enumerate(test_loader):
    iter_data = preprocessing(iter_data, scaler=scaler, target_time_series=target_time_series)
    inputs, target = iter_data['inputs'], iter_data['target']
    Xs_test.append(inputs)
    Ys_test.append(target)

Xs_train = torch.cat(Xs_train, dim=0)[..., [0]]
Ys_train = torch.cat(Ys_train, dim=0)[..., [0]]
Xs_valid = torch.cat(Xs_valid, dim=0)[..., [0]]
Ys_valid = torch.cat(Ys_valid, dim=0)[..., [0]]
Xs_test = torch.cat(Xs_test, dim=0)[..., [0]]
Ys_test = torch.cat(Ys_test, dim=0)[..., [0]]

In [6]:
def reshape(data):
    B, L, N, C = data.shape
    data = data[..., 0].transpose(1, 2).reshape(B*N, L)
    return data


## Train (Direct Multi-Step Forecasting)

In [7]:
# direct forecasting
from sklearn.multioutput import MultiOutputRegressor
model = MultiOutputRegressor(lgb.LGBMRegressor(), n_jobs = -1)
model.fit(reshape(Xs_train), reshape(Ys_train))

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.001915 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 10691, number of used features: 12
[LightGBM] [Info] Start training from score 0.720899
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.142087 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 10691, number of used features: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.145854 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 3060
[LightGBM] [Info] Number of data points in the train set: 10691, number of used features: 12
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testin

## Test (Direct Multi-Step Forecasting)

In [8]:
# inference
preds_test = model.predict(reshape(Xs_test))
B, N, L, C = Ys_test.shape
preds_test = torch.tensor(preds_test).reshape(B, N, L, 1)

In [9]:
# post process
def postprocessing(input_data, scaler) -> Dict:
    if scaler is not None:
        input_data = scaler.inverse_transform(input_data)
    return input_data


In [10]:
prediction = postprocessing(preds_test, scaler=scaler)
real_value = postprocessing(Ys_test, scaler=scaler)

In [11]:
# print results
print("MAE: ", masked_mae(prediction, real_value, null_val).item())
print("RMSE: ", masked_rmse(prediction, real_value, null_val).item())
print("MAPE: {:.2f}%".format(masked_mape(prediction, real_value, null_val) * 100))

MAE:  27.82349904069392
RMSE:  38.74070708373054
MAPE: 10.20%
