In [1]:
import sys

parent_dir = 'Predict-Future-Sales'
p_sub = sys.path[0]

ride = ''
for path in p_sub.split('/'):
    if path != parent_dir:
        ride = ride + path + '/'
    else:
        ride = ride + path + '/'
        break
sys.path[0] = ride

import numpy as np
import pandas as pd
from pandas import DatetimeIndex
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import TensorDataset

from module.lino_module.preprocess import mode_of_freq, expand_and_split,\
                                          time_delay_embedding, src_tgt_split,\
                                          to_torch_dataset
from typing import Tuple, Optional, Union
from numpy import ndarray
from pandas import DataFrame, Series
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


### Time Delay Embedding に対応させた曜日と月時情報をd_modelにconcatしたデータセットを出力する

In [18]:
# from module.lino_module.preprocess import tde_dataset_wm

data = pd.read_csv('../data/sales_train.csv')
data = mode_of_freq(data).item_cnt_day
demo = np.arange(len(data))
ds = pd.Series(demo , index=data.index)

kwrgs ={'data': ds,
        'seq': 7,
        'd_model': 4,
        'dilation': 6,
        'src_tgt_seq': (6, 2),
        'batch_size': 64,
        'scaler': None,
        'weekly': False,
        'monthly': False,
        'train_rate': 0.9
        }

train, test = tde_dataset_wm(**kwrgs)
src, tgt, y = next(iter(train))
src[0], tgt[0]

(tensor([[ 0.,  7., 14., 21.],
         [ 1.,  8., 15., 22.],
         [ 2.,  9., 16., 23.],
         [ 3., 10., 17., 24.],
         [ 4., 11., 18., 25.],
         [ 5., 12., 19., 26.]]),
 tensor([[ 5., 12., 19., 26.],
         [ 6., 13., 20., 27.]]))

In [15]:
kwrgs ={'data': ds,
        'seq': 7,
        'd_model': 4,
        'dilation': 0,
        'src_tgt_seq': (6, 2),
        'batch_size': 64,
        'scaler': MinMaxScaler,
        'weekly': True,
        'monthly': True,
        'train_rate': 0.9
        }

train, test = tde_dataset_wm(**kwrgs)
src, tgt, y = next(iter(train))
src[0]

tensor([[0.0000e+00, 9.6805e-04, 1.9361e-03, 2.9042e-03, 1.6667e-01, 3.3333e-01,
         5.0000e-01, 6.6667e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [9.6805e-04, 1.9361e-03, 2.9042e-03, 3.8722e-03, 3.3333e-01, 5.0000e-01,
         6.6667e-01, 8.3333e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [1.9361e-03, 2.9042e-03, 3.8722e-03, 4.8403e-03, 5.0000e-01, 6.6667e-01,
         8.3333e-01, 1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [2.9042e-03, 3.8722e-03, 4.8403e-03, 5.8083e-03, 6.6667e-01, 8.3333e-01,
         1.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [3.8722e-03, 4.8403e-03, 5.8083e-03, 6.7764e-03, 8.3333e-01, 1.0000e+00,
         0.0000e+00, 1.6667e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00],
        [4.8403e-03, 5.8083e-03, 6.7764e-03, 7.7444e-03, 1.0000e+00, 0.0000e+00,
         1.6667e-01, 3.3333e-01, 0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00]])

In [5]:
def tde_dataset_wm(data: Series,
                   seq: int,
                   d_model: int,
                   dilation: int,
                   src_tgt_seq: Tuple[int],
                   batch_size: int,
                   scaler: Optional[Union[StandardScaler, MinMaxScaler]],
                   weekly=True,
                   monthly=True,
                   train_rate=0.9
                   ) -> Tuple[DataLoader]:
    """TDEに対応した曜日ラベルと月ラベル付与したデータセットのメイン関数"""
    index = data.index
    if scaler is not None:
        data = scaler().fit_transform(data.values.reshape(-1, 1))
        data = data.reshape(-1)
    x, y = expand_and_split(data, seq)
    tded, label = delay_embeddings(
                                   x, y,
                                   index,
                                   d_model,
                                   dilation,
                                   seq,
                                   weekly, monthly)
    src, tgt = src_tgt_split(tded, *src_tgt_seq)
    train, test = to_torch_dataset(src, tgt, label, batch_size, train_rate)
    return train, test

In [3]:
def delay_embeddings(x: ndarray,
                     y: ndarray,
                     index: DatetimeIndex,
                     d_model: int,
                     dilation: int,
                     seq: int,
                     weekly: bool,
                     monthly: bool):
    """TDEに対応した曜日、月時ラベルをconcatする"""
    # Time Delay Embedding
    tded, label = time_delay_embedding(x, y, d_model, dilation)

    # 曜日ラベル
    if weekly:
        # positional encodingのために0-1でスケーリング
        scaled_weekday = index.weekday / 6
        week, _ = expand_and_split(scaled_weekday, seq)
        tded_week = time_delay_embedding(week, None, d_model, dilation)
        tded = np.concatenate((tded, tded_week), axis=2)

    # 月ラベル
    if monthly:
        # positional encodingのために0-1でスケーリング
        scaled_month = (index.month - 1) / 11
        month, _ = expand_and_split(scaled_month, seq)
        tded_month = time_delay_embedding(month, None, d_model, dilation)
        tded = np.concatenate((tded, tded_month), axis=2)
    return tded, label