In [1]:
from typing import List, Tuple
import torch
import numpy as np
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def mode_of_freq(data: pd.DataFrame,
                 key='date',
                 freq='D',
                 mode='sum'
                 ) -> pd.DataFrame:
    # 日付をobjectからdate_time型に変更
    data.date = pd.to_datetime(data.date, format=('%d.%m.%Y'))
    # 時系列(key)について日毎(D)の売上数の合計値(sum)で出力
    mode_of_key = getattr(data.groupby(pd.Grouper(key=key, freq=freq)), mode)
    return mode_of_key()

In [3]:
def _target_split(ds: pd.Series, d_model: int) -> Tuple[np.ndarray]:
    """2次元にd_modelずらしたデータと正解データを作成する"""
    endpoint = len(ds) - d_model + 1
    d_modeled = np.stack([ds[i: i+d_model] for i in range(0, endpoint)])
    x = d_modeled[:-1]
    y = d_modeled[-1]
    return x, y

In [14]:
def _src_tgt_split(x: np.ndarray, y: np.ndarray, src_seq: int, tgt_seq: int, sliding=1) -> Tuple[np.ndarray]:
    """エンコーダ入力とデコーダ入力を作成する
    引数: 
        x: _target_splitで変換されたデータx
        y: _target_splitで変換されたデータy
        src_seq: エンコーダのシーケンス数
        tgt_seq: デコーダのシーン数
        sliding: エンコーダ入力とデコーダ入力のずれ
    """
    x_row = x.shape[0]
    src_seq = 7
    tgt_seq = 2
    sliding = 1
    pack = [(x[i: i+src_seq],
            x[i+src_seq-(tgt_seq-sliding): i+src_seq+sliding],
            y[i: i+src_seq])
            for i in range(x_row//src_seq)]
    return pack

In [15]:
def _to_torch_dataset(pack, batch_size):
    pack = [torch.from_numpy(i.astype(np.float32)).clone() for i in pack]
    dataset = torch.utils.data.TensorDataset(*pack)
    dataset = torch.utils.data.DataLoader(dataset, batch_size, shuffle=False)
    return dataset

In [16]:
data = pd.read_csv('../data/sales_train.csv')
data = mode_of_freq(data).item_cnt_day
x, y = _target_split(data, d_model=32)
pack = _src_tgt_split(x, y, src_seq=7, tgt_seq=2, sliding=1)

# この下のセルから再開

In [43]:
x_row = x.shape[0]
src_seq = 7
tgt_seq = 2
sliding = 1

src_list, tgt_list, target_list = [],[], []
for i in range(x_row//src_seq):
    src = x[i: i+src_seq]
    src_list.append(src)
    tgt = x[i+src_seq-(tgt_seq-sliding): i+src_seq+sliding]
    tgt_list.append(tgt)
    target = y[i+src_seq+sliding: i+src_seq+sliding+1]
    target_list.append(target)
src, tgt, target = np.array(src_list), np.array(tgt_list), np.array(target_list)

  src, tgt, target = np.array(src_list), np.array(tgt_list), np.array(target_list)


In [44]:
target[-3]

array([], dtype=float64)

In [None]:
import torch.nn as nn

src, tgt, y = pack
src.shape, tgt.shape, y.shape

In [None]:
d_model = 32
encoder_layer = nn.TransformerEncoderLayer(d_model=32, nhead=8, batch_first=True)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(64,7,32)
memory = transformer_encoder(src)
print(memory.shape)

In [None]:
decoder_layer = nn.TransformerDecoderLayer(d_model, nhead=8, batch_first=True)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
tgt = torch.rand(64, 2,32)
out = transformer_decoder(tgt, memory)
linear = nn.Linear(32, 1)
y = linear(out)
y.shape