In [1]:
!git clone https://github.com/Olyco/EasyTSF.git

Cloning into 'EasyTSF'...
remote: Enumerating objects: 272, done.[K
remote: Counting objects: 100% (272/272), done.[K
remote: Compressing objects: 100% (183/183), done.[K
remote: Total 272 (delta 135), reused 197 (delta 75), pack-reused 0 (from 0)[K
Receiving objects: 100% (272/272), 1.52 MiB | 3.42 MiB/s, done.
Resolving deltas: 100% (135/135), done.


In [1]:
!pip install -r "EasyTSF/requirements.txt"



In [None]:
!pip install ray

Collecting ray
  Downloading ray-2.45.0-cp311-cp311-manylinux2014_x86_64.whl.metadata (19 kB)
Downloading ray-2.45.0-cp311-cp311-manylinux2014_x86_64.whl (68.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.4/68.4 MB[0m [31m30.1 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ray
Successfully installed ray-2.45.0


In [None]:
# !python EasyTSF/train.py -c EasyTSF/config/reproduce_conf/RMoK/ETTh1_96for96.py

Seed set to 1
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs

  | Name          | Type      | Params | Mode 
----------------------------------------------------
0 | model         | DenseRMoK | 130 K  | train
1 | loss_function | MSELoss   | 0      | train
----------------------------------------------------
130 K     Trainable params
0         Non-trainable params
130 K     Total params
0.520     Total estimated model params size (MB)
15        Modules in train mode
0         Modules in eval mode
Epoch 0: 100% 132/132 [02:33<00:00,  1.16s/it, v_num=ed_1, train/loss_step=0.404]
Validation: |          | 0/? [00:00<?, ?it/s][A
Validation:   0% 0/44 [00:00<?, ?it/s]       [A
Validation DataLoader 0:   0% 0/44 [00:00<?, ?it/s][A
Validation DataLoader 0:   2% 1/44 [00:00<00:13,  3.27it/s][A
Validation DataLoader 0:   5% 2/44 [00:00<00:10,  3.90it/s][A
Validation DataLoader 0:   7% 3/44 [00:00<00:09,  4.17it/s][A
Validation

In [None]:
!ls

EasyTSF  sample_data


In [2]:
!pwd

/content


In [91]:
config = dict(
    dataset_name='ETTh1',
    var_num=7,
    freq=60,
    data_split=[8640, 2880, 2880],

    hist_len=5,
    pred_len=2,

    batch_size=64,
    max_epochs=10,
    lr=0.0001,
    optimizer="AdamW",
    optimizer_betas=(0.95, 0.9),
    optimizer_weight_decay=1e-5,
    lr_scheduler='StepLR',
    lr_step_size=1,
    lr_gamma=0.5,
    gradient_clip_val=5,
    val_metric="val/loss",
    test_metric="test/mae",
    es_patience=10,

    norm_time_feature=False,
    time_feature_cls=["tod", "dow"],

    num_workers=2,

    model_name="DenseRMoK",

    revin_affine=True,

    data_root="EasyTSF/dataset",
)

In [17]:
import lightning.pytorch as pl
from torch.utils.data import DataLoader
from torch.utils.data import Dataset

In [106]:
class GeneralTSFDataset(Dataset):
    def __init__(self, hist_len, pred_len, variable, time_feature):
        self.hist_len = hist_len
        self.pred_len = pred_len
        self.variable = variable
        self.time_feature = time_feature

    def __getitem__(self, index):
        hist_start = index
        hist_end = index + self.hist_len
        pred_end = hist_end + self.pred_len
        if index < 10:
          print(f"[{hist_start}, {hist_end}]")

        var_x = self.variable[hist_start:hist_end, ...]
        tf_x = self.time_feature[hist_start:hist_end, ...]

        var_y = self.variable[hist_end:pred_end, ...]
        tf_y = self.time_feature[hist_end:pred_end, ...]

        # if index < 10:
        #   print(var_x, tf_x)
        #   print(var_y, tf_y)

        return var_x, tf_x, var_y, tf_y

    def __len__(self):
        return len(self.variable) - (self.hist_len + self.pred_len) + 1


class DataInterface(pl.LightningDataModule):

    def __init__(self, **kwargs):
        super().__init__()
        self.num_workers = kwargs['num_workers']
        self.batch_size = kwargs['batch_size']
        self.hist_len = kwargs['hist_len']
        self.pred_len = kwargs['pred_len']
        self.norm_time_feature = kwargs['norm_time_feature']
        self.train_len, self.val_len, self.test_len = kwargs['data_split']
        self.time_feature_cls = kwargs['time_feature_cls']

        self.data_path = os.path.join(kwargs['data_root'], "{}.npz".format(kwargs['dataset_name']))
        self.config = kwargs

        self.variable, self.time_feature = self.__read_data__()

    def __read_data__(self):
        data = np.load(self.data_path)
        variable = data['variable']
        timestamp = pd.DatetimeIndex(data['timestamp'])
        # print(variable, timestamp)

        # time_feature
        time_feature = []
        for tf_cls in self.time_feature_cls:
            if tf_cls == "tod":
                tod_size = int((24 * 60) / self.config['freq']) - 1
                tod = np.array(list(map(lambda x: ((60 * x.hour + x.minute) / self.config['freq']), timestamp)))
                print(tod, tod.shape)
                if self.norm_time_feature:
                    time_feature.append(tod / tod_size)
                else:
                    time_feature.append(tod)
            elif tf_cls == "dow":
                dow_size = 7 - 1
                dow = np.array(timestamp.dayofweek)  # 0 ~ 6
                print(dow, dow.shape)
                if self.norm_time_feature:
                    time_feature.append(dow / dow_size)
                else:
                    time_feature.append(dow)
            elif tf_cls == "dom":
                dom_size = 31 - 1
                dom = np.array(timestamp.day) - 1  # 0 ~ 30
                if self.norm_time_feature:
                    time_feature.append(dom / dom_size)
                else:
                    time_feature.append(dom)
            elif tf_cls == "doy":
                doy_size = 366 - 1
                doy = np.array(timestamp.dayofyear) - 1  # 0 ~ 181
                if self.norm_time_feature:
                    time_feature.append(doy / doy_size)
                else:
                    time_feature.append(doy)
            else:
                raise NotImplementedError

        return variable, np.stack(time_feature, axis=-1)

    def train_dataloader(self):
        dataset=GeneralTSFDataset(
                  self.hist_len,
                  self.pred_len,
                  self.variable[:self.train_len].copy(),
                  self.time_feature[:self.train_len].copy()
              )
        print(dataset)
        return DataLoader(
            dataset=dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
            drop_last=True
        )

    def val_dataloader(self):
        dataset=GeneralTSFDataset(
                  self.hist_len,
                  self.pred_len,
                  self.variable[self.train_len - self.hist_len:self.train_len + self.val_len].copy(),
                  self.time_feature[self.train_len - self.hist_len:self.train_len + self.val_len].copy(),
              )
        return DataLoader(
            dataset=dataset,
            batch_size=self.batch_size,
            num_workers=self.num_workers,
            shuffle=False,
            drop_last=False
        )

    def test_dataloader(self):
        return DataLoader(
            dataset=GeneralTSFDataset(
                self.hist_len,
                self.pred_len,
                self.variable[self.train_len + self.val_len - self.hist_len:].copy(),
                self.time_feature[self.train_len + self.val_len - self.hist_len:].copy(),
            ),
            batch_size=1,
            num_workers=self.num_workers,
            shuffle=False
        )

In [107]:
data_module = DataInterface(**config)
data_module.time_feature.shape

[ 0.  1.  2. ... 21. 22. 23.] (14400,)
[4 4 4 ... 1 1 1] (14400,)


(14400, 2)

In [108]:
train_loader = data_module.train_dataloader()

<__main__.GeneralTSFDataset object at 0x7d6adf642c10>


In [109]:
for i, batch in enumerate(train_loader):
  if i == 0:
      print(i)

[0, 5]
[1, 6]
[2, 7]
[3, 8]
[4, 9]
[5, 10]
[6, 11]
[7, 12]
[8, 13]
[9, 14]
0


In [111]:
val_loader = data_module.val_dataloader()

45

In [112]:
for i, batch in enumerate(val_loader):
  if i == 0:
      print("hi")

[0, 5]
[1, 6]
[2, 7]
[3, 8]
[4, 9]
[5, 10]
[6, 11]
[7, 12]
[8, 13]
[9, 14]
hi


In [13]:
import numpy as np
import pandas as pd
import os

In [8]:
data_path = os.path.join(config['data_root'], "{}.npz".format(config['dataset_name']))
data = np.load(data_path)
display(data)

NpzFile 'EasyTSF/dataset/ETTh1.npz' with keys: variable, timestamp, mean, std

In [9]:
display(data['mean'])

array([ 7.93774225,  2.02103866,  5.0797706 ,  0.74618588,  2.78176239,
        0.78845312, 17.1282617 ])

In [10]:
display(data['std'])

array([5.81274941, 2.09010465, 5.51879358, 1.92637927, 1.02352266,
       0.63023664, 9.17649102])

In [11]:
display(data['timestamp'])

array(['2016-07-01T00:00:00.000000000', '2016-07-01T01:00:00.000000000',
       '2016-07-01T02:00:00.000000000', ...,
       '2018-02-20T21:00:00.000000000', '2018-02-20T22:00:00.000000000',
       '2018-02-20T23:00:00.000000000'], dtype='datetime64[ns]')

In [15]:
timestamp = pd.DatetimeIndex(data['timestamp'])
display(timestamp)

DatetimeIndex(['2016-07-01 00:00:00', '2016-07-01 01:00:00',
               '2016-07-01 02:00:00', '2016-07-01 03:00:00',
               '2016-07-01 04:00:00', '2016-07-01 05:00:00',
               '2016-07-01 06:00:00', '2016-07-01 07:00:00',
               '2016-07-01 08:00:00', '2016-07-01 09:00:00',
               ...
               '2018-02-20 14:00:00', '2018-02-20 15:00:00',
               '2018-02-20 16:00:00', '2018-02-20 17:00:00',
               '2018-02-20 18:00:00', '2018-02-20 19:00:00',
               '2018-02-20 20:00:00', '2018-02-20 21:00:00',
               '2018-02-20 22:00:00', '2018-02-20 23:00:00'],
              dtype='datetime64[ns]', length=14400, freq=None)

In [82]:
print(data['variable'][:10])

[[-0.36312285 -0.0057598  -0.63071223 -0.14752332  1.38857471  0.87514257
   1.46055158]
 [-0.38617567  0.02629596 -0.65010053 -0.16621124  1.32897674  0.92433047
   1.16152666]
 [-0.47838673 -0.13398305 -0.68869591 -0.20306795  0.97236495  0.68156452
   1.16152666]
 [-0.4899131  -0.03781563 -0.68869591 -0.18438003  1.00167546  0.77835355
   0.86261065]
 [-0.44380761 -0.03781563 -0.65010053 -0.14752332  1.06127367  0.77835355
   0.52522672]
 [-0.39770204  0.05835173 -0.64357736 -0.11066662  1.24006803  0.92433047
   0.44088073]
 [-0.13259516  0.44302153 -0.53413316  0.01859142  2.19266043  1.69705608
   0.61720086]
 [-0.08648959  0.60330061 -0.50187973  0.14784946  2.2522584   2.27779023
   0.65555967]
 [-0.40922842  0.4750773  -0.47614948  0.22156286  0.16925618  1.0211195
   0.49460498]
 [-0.58195222  0.25068669 -0.57272854  0.0367602  -0.09942361  0.92433047
   0.03462516]]
