In [1]:
!git clone https://github.com/Olyco/EasyTSF.git

Cloning into 'EasyTSF'...
remote: Enumerating objects: 283, done.[K
remote: Counting objects: 100% (283/283), done.[K
remote: Compressing objects: 100% (194/194), done.[K
remote: Total 283 (delta 142), reused 195 (delta 75), pack-reused 0 (from 0)[K
Receiving objects: 100% (283/283), 1.53 MiB | 5.35 MiB/s, done.
Resolving deltas: 100% (142/142), done.


In [2]:
!pip install -r "EasyTSF/requirements.txt"

Collecting Brotli==1.0.9 (from -r EasyTSF/requirements.txt (line 1))
  Downloading Brotli-1.0.9-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.4 kB)
Collecting certifi==2024.7.4 (from -r EasyTSF/requirements.txt (line 2))
  Downloading certifi-2024.7.4-py3-none-any.whl.metadata (2.2 kB)
Collecting charset-normalizer==3.3.2 (from -r EasyTSF/requirements.txt (line 3))
  Downloading charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (33 kB)
Collecting colorama==0.4.6 (from -r EasyTSF/requirements.txt (line 4))
  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Collecting filelock==3.13.1 (from -r EasyTSF/requirements.txt (line 5))
  Downloading filelock-3.13.1-py3-none-any.whl.metadata (2.8 kB)
Collecting fsspec==2024.6.1 (from -r EasyTSF/requirements.txt (line 6))
  Downloading fsspec-2024.6.1-py3-none-any.whl.metadata (11 kB)
Collecting idna==3.7 (from -r EasyTSF/requirements.txt (line 7))
  Downloading id

In [None]:
!pip install ray

Collecting ray
  Downloading ray-2.45.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)
Downloading ray-2.45.0-cp311-cp311-manylinux2014_x86_64.whl (68.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.4/68.4 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ray
Successfully installed ray-2.45.0


In [None]:
# !python EasyTSF/train.py -c EasyTSF/config/reproduce_conf/RMoK/ETTh1_96for96.py

Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type      | Params | Mode 
----------------------------------------------------
0 | model         | DenseRMoK | 130 K  | train
1 | loss_function | MSELoss   | 0      | train
----------------------------------------------------
130 K     Trainable params
0         Non-trainable params
130 K     Total params
0.520     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode
Epoch 0: 100% 132/132 [02:33<00:00,  1.16s/it, v_num=ed_1, train/loss_step=0.404]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0% 0/44 [00:00<?, ?it/s]       [A
Validation DataLoader 0:   0% 0/44 [00:00<?, ?it/s][A
Validation DataLoader 0:   2% 1/44 [00:00<00:13,  3.27it/s][A
Validation DataLoader 0:   5% 2/44 [00:00<00:10,  3.90it/s][A
Validation DataLoader 0:   7% 3/44 [00:00<00:09,  4.17it/s][A
Validation

In [None]:
!ls

EasyTSF  sample_data


In [1]:
!pwd

/content


In [2]:
config = dict(
    dataset_name='ETTh1',
    var_num=7,
    freq=60,
    data_split=[8640, 2880, 2880],

    hist_len=5,
    pred_len=2,

    batch_size=64,
    max_epochs=10,
    lr=0.0001,
    optimizer="AdamW",
    optimizer_betas=(0.95, 0.9),
    optimizer_weight_decay=1e-5,
    lr_scheduler='StepLR',
    lr_step_size=1,
    lr_gamma=0.5,
    gradient_clip_val=5,
    val_metric="val/loss",
    test_metric="test/mae",
    es_patience=10,

    norm_time_feature=False,
    time_feature_cls=["tod", "dow"],

    num_workers=2,

    model_name="DenseRMoK",

    revin_affine=True,

    data_root="EasyTSF/dataset",
)

In [7]:
import lightning.pytorch as pl
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

import numpy as np
import pandas as pd
import os

In [64]:
class GeneralTSFDataset(Dataset):
    def __init__(self, hist_len, pred_len, variable, time_feature):
        self.hist_len = hist_len
        self.pred_len = pred_len
        self.variable = variable
        self.time_feature = time_feature

    def __getitem__(self, index):
        hist_start = index
        hist_end = index + self.hist_len
        pred_end = hist_end + self.pred_len
        # if index < 10:
        #   print(f"[{hist_start}, {hist_end}]")

        var_x = self.variable[hist_start:hist_end, ...]
        tf_x = self.time_feature[hist_start:hist_end, ...]

        var_y = self.variable[hist_end:pred_end, ...]
        tf_y = self.time_feature[hist_end:pred_end, ...]

        # if index < 10:
        #   print(var_x, tf_x)
        #   print(var_y, tf_y)

        return var_x, tf_x, var_y, tf_y

    def __len__(self):
        return len(self.variable) - (self.hist_len + self.pred_len) + 1


class DataInterface(pl.LightningDataModule):

    def __init__(self, **kwargs):
        super().__init__()
        self.num_workers = kwargs['num_workers']
        self.batch_size = kwargs['batch_size']
        self.hist_len = kwargs['hist_len']
        self.pred_len = kwargs['pred_len']
        self.norm_time_feature = kwargs['norm_time_feature']
        self.train_len, self.val_len, self.test_len = kwargs['data_split']
        self.time_feature_cls = kwargs['time_feature_cls']

        self.data_path = os.path.join(kwargs['data_root'], "{}.npz".format(kwargs['dataset_name']))
        self.config = kwargs

        self.variable, self.time_feature = self.__read_data__()

    def __read_data__(self):
        data = np.load(self.data_path)
        variable = data['variable']
        timestamp = pd.DatetimeIndex(data['timestamp'])
        # print(variable, timestamp)

        # time_feature
        time_feature = []
        for tf_cls in self.time_feature_cls:
            if tf_cls == "tod":
                tod_size = int((24 * 60) / self.config['freq']) - 1
                tod = np.array(list(map(lambda x: ((60 * x.hour + x.minute) / self.config['freq']), timestamp)))
                print(tod, tod.shape)
                if self.norm_time_feature:
                    time_feature.append(tod / tod_size)
                else:
                    time_feature.append(tod)
            elif tf_cls == "dow":
                dow_size = 7 - 1
                dow = np.array(timestamp.dayofweek)  # 0 ~ 6
                print(dow, dow.shape)
                if self.norm_time_feature:
                    time_feature.append(dow / dow_size)
                else:
                    time_feature.append(dow)
            elif tf_cls == "dom":
                dom_size = 31 - 1
                dom = np.array(timestamp.day) - 1  # 0 ~ 30
                if self.norm_time_feature:
                    time_feature.append(dom / dom_size)
                else:
                    time_feature.append(dom)
            elif tf_cls == "doy":
                doy_size = 366 - 1
                doy = np.array(timestamp.dayofyear) - 1  # 0 ~ 181
                if self.norm_time_feature:
                    time_feature.append(doy / doy_size)
                else:
                    time_feature.append(doy)
            else:
                raise NotImplementedError

        return variable, np.stack(time_feature, axis=-1)

    def train_dataloader(self):
        dataset=GeneralTSFDataset(
                  self.hist_len,
                  self.pred_len,
                  self.variable[:self.train_len].copy(),
                  self.time_feature[:self.train_len].copy()
              )
        print(f"[0, {self.train_len}]")
        return DataLoader(
            dataset=dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
            drop_last=True
        )

    def val_dataloader(self):
        dataset=GeneralTSFDataset(
                  self.hist_len,
                  self.pred_len,
                  self.variable[self.train_len - self.hist_len:self.train_len + self.val_len].copy(),
                  self.time_feature[self.train_len - self.hist_len:self.train_len + self.val_len].copy(),
              )
        print(f"[{self.train_len - self.hist_len}, {self.train_len + self.val_len}]")
        return DataLoader(
            dataset=dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
            drop_last=False
        )

    def test_dataloader(self):
        print(f"[{self.train_len + self.val_len - self.hist_len}, {len(self.variable)}]")
        return DataLoader(
            dataset=GeneralTSFDataset(
                self.hist_len,
                self.pred_len,
                self.variable[self.train_len + self.val_len - self.hist_len:].copy(),
                self.time_feature[self.train_len + self.val_len - self.hist_len:].copy(),
            ),
            batch_size=1,
            num_workers=self.num_workers,
            shuffle=False
        )

In [65]:
data_module = DataInterface(**config)
data_module.time_feature.shape

[ 0.  1.  2. ... 21. 22. 23.] (14400,)
[4 4 4 ... 1 1 1] (14400,)


(14400, 2)

In [66]:
train_loader = data_module.train_dataloader()

[0, 8640]


In [67]:
len(train_loader)

134

In [51]:
for i, batch in enumerate(train_loader):
  if i == 133:
      print(batch)

[tensor([[[-0.1441,  0.0584, -0.2379,  ..., -0.0994, -2.0238,  0.0806],
         [-0.5128, -0.2297, -0.5664,  ..., -0.3075, -2.4109,  0.0652],
         [ 0.0518, -0.4541, -0.0126,  ...,  0.0501, -2.4109, -0.0497],
         [-0.5128, -0.1340, -0.4955,  ...,  0.2582, -2.6045,  0.0040],
         [-1.0084, -0.9349, -0.9462,  ..., -0.4267, -2.6045,  0.1956]],

        [[-0.5128, -0.2297, -0.5664,  ..., -0.3075, -2.4109,  0.0652],
         [ 0.0518, -0.4541, -0.0126,  ...,  0.0501, -2.4109, -0.0497],
         [-0.5128, -0.1340, -0.4955,  ...,  0.2582, -2.6045,  0.0040],
         [-1.0084, -0.9349, -0.9462,  ..., -0.4267, -2.6045,  0.1956],
         [-0.6742, -0.2297, -0.6630,  ..., -0.1590, -2.3633,  0.2263]],

        [[ 0.0518, -0.4541, -0.0126,  ...,  0.0501, -2.4109, -0.0497],
         [-0.5128, -0.1340, -0.4955,  ...,  0.2582, -2.6045,  0.0040],
         [-1.0084, -0.9349, -0.9462,  ..., -0.4267, -2.6045,  0.1956],
         [-0.6742, -0.2297, -0.6630,  ..., -0.1590, -2.3633,  0.2263],
 

In [52]:
val_loader = data_module.val_dataloader()

[8635, 11520]


In [53]:
len(val_loader)

45

In [54]:
for i, batch in enumerate(val_loader):
  if i == 0:
      print(batch)

[tensor([[[-0.0289, -0.8708, -0.0576,  ...,  0.1986, -2.2665,  0.4409],
         [ 0.1900, -0.8067,  0.1033,  ...,  0.5855, -2.2173,  0.3796],
         [ 0.0747, -0.7105,  0.0711,  ...,  0.4966, -2.3141,  0.4256],
         [ 0.1784, -0.6143,  0.2128,  ...,  0.3178, -2.1697,  0.4716],
         [ 0.3513, -0.4541,  0.2773,  ..., -0.0095, -2.2665,  0.3949]],

        [[ 0.1900, -0.8067,  0.1033,  ...,  0.5855, -2.2173,  0.3796],
         [ 0.0747, -0.7105,  0.0711,  ...,  0.4966, -2.3141,  0.4256],
         [ 0.1784, -0.6143,  0.2128,  ...,  0.3178, -2.1697,  0.4716],
         [ 0.3513, -0.4541,  0.2773,  ..., -0.0095, -2.2665,  0.3949],
         [ 0.5012, -0.1019,  0.5348,  ..., -0.0994, -2.1697,  0.4179]],

        [[ 0.0747, -0.7105,  0.0711,  ...,  0.4966, -2.3141,  0.4256],
         [ 0.1784, -0.6143,  0.2128,  ...,  0.3178, -2.1697,  0.4716],
         [ 0.3513, -0.4541,  0.2773,  ..., -0.0095, -2.2665,  0.3949],
         [ 0.5012, -0.1019,  0.5348,  ..., -0.0994, -2.1697,  0.4179],
 

In [56]:
test_loader = data_module.test_dataloader()

[11515, 14400]


In [58]:
len(test_loader)

2879

In [61]:
for i, batch in enumerate(test_loader):
  if i == 2878:
      print(batch)

[tensor([[[ 1.1004, -0.0058,  0.8502, -0.0183,  1.8058, -0.3815, -1.8665],
         [ 1.2616,  0.0904,  0.9854,  0.0736,  1.9250, -0.3815, -1.8665],
         [ 1.6535,  0.3789,  1.2882,  0.2029,  2.5200, -0.3323, -1.8665],
         [ 1.7803,  0.5071,  1.3976,  0.2953,  2.6988, -0.2847, -1.8665],
         [ 1.6304,  0.3469,  1.3011,  0.2584,  2.4008, -0.2847, -1.6596]]],
       dtype=torch.float64), tensor([[[17.,  1.],
         [18.,  1.],
         [19.,  1.],
         [20.,  1.],
         [21.,  1.]]], dtype=torch.float64), tensor([[[ 1.3538,  0.2186,  1.0948,  0.1660,  1.8654, -0.3815, -1.6289],
         [ 1.0312,  0.0904,  0.8696,  0.1292,  1.1805, -0.4291, -1.6136]]],
       dtype=torch.float64), tensor([[[22.,  1.],
         [23.,  1.]]], dtype=torch.float64)]


In [62]:
data_path = os.path.join(config['data_root'], "{}.npz".format(config['dataset_name']))
data = np.load(data_path)
display(data)

NpzFile 'EasyTSF/dataset/ETTh1.npz' with keys: variable, timestamp, mean, std

In [None]:
display(data['mean'])

array([ 7.93774225,  2.02103866,  5.0797706 ,  0.74618588,  2.78176239,
        0.78845312, 17.1282617 ])

In [None]:
display(data['std'])

array([5.81274941, 2.09010465, 5.51879358, 1.92637927, 1.02352266,
       0.63023664, 9.17649102])

In [None]:
display(data['timestamp'])

array(['2016-07-01T00:00:00.000000000', '2016-07-01T01:00:00.000000000',
       '2016-07-01T02:00:00.000000000', ...,
       '2018-02-20T21:00:00.000000000', '2018-02-20T22:00:00.000000000',
       '2018-02-20T23:00:00.000000000'], dtype='datetime64[ns]')

In [None]:
timestamp = pd.DatetimeIndex(data['timestamp'])
display(timestamp)

DatetimeIndex(['2016-07-01 00:00:00', '2016-07-01 01:00:00',
               '2016-07-01 02:00:00', '2016-07-01 03:00:00',
               '2016-07-01 04:00:00', '2016-07-01 05:00:00',
               '2016-07-01 06:00:00', '2016-07-01 07:00:00',
               '2016-07-01 08:00:00', '2016-07-01 09:00:00',
               ...
               '2018-02-20 14:00:00', '2018-02-20 15:00:00',
               '2018-02-20 16:00:00', '2018-02-20 17:00:00',
               '2018-02-20 18:00:00', '2018-02-20 19:00:00',
               '2018-02-20 20:00:00', '2018-02-20 21:00:00',
               '2018-02-20 22:00:00', '2018-02-20 23:00:00'],
              dtype='datetime64[ns]', length=14400, freq=None)

In [63]:
print(data['variable'][-10:])

[[ 5.58816043e-01 -1.97616237e-01  3.99585383e-01 -7.38099239e-02
   1.09156112e+00 -3.81528352e-01 -1.86653718e+00]
 [ 7.08487021e-01 -1.66038878e-01  5.09029609e-01 -7.38099239e-02
   1.24006803e+00 -3.32340450e-01 -1.79755656e+00]
 [ 1.04275222e+00  5.83517341e-02  8.24497138e-01 -9.64961077e-05
   1.53805834e+00 -3.81528352e-01 -1.86653718e+00]
 [ 1.10038416e+00 -5.75980432e-03  8.50227435e-01 -1.82652789e-02
   1.80576142e+00 -3.81528352e-01 -1.86653718e+00]
 [ 1.26158159e+00  9.04076173e-02  9.85401786e-01  7.36169317e-02
   1.92495735e+00 -3.81528352e-01 -1.86653718e+00]
 [ 1.65347874e+00  3.78909882e-01  1.28818544e+00  2.02874943e-01
   2.51996148e+00 -3.32340450e-01 -1.86653718e+00]
 [ 1.78026891e+00  5.07133187e-01  1.39762966e+00  2.95276317e-01
   2.69875585e+00 -2.84739233e-01 -1.86653718e+00]
 [ 1.63042616e+00  3.46854113e-01  1.30105059e+00  2.58419557e-01
   2.40076508e+00 -2.84739233e-01 -1.65959533e+00]
 [ 1.35379273e+00  2.18630808e-01  1.09484601e+00  1.66018244e-0