In [1]:
import sys

parent_dir = 'Predict-Future-Sales'
p_sub = sys.path[0]

ride = ''
for path in p_sub.split('/'):
    if path != parent_dir:
        ride = ride + path + '/'
    else:
        ride = ride + path + '/'
        break
sys.path[0] = ride

import datetime
from typing import Tuple

import pandas as pd
import numpy as np
from numpy import ndarray
from sklearn.preprocessing import StandardScaler
import torch
from torch import Tensor

from module.lino_module.preprocess import _mode_of_freq, _src_tgt_split

from typing import Tuple, Optional
from pandas import DataFrame, Series, DatetimeIndex
from numpy import ndarray
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def recurrent_inference(
                    rec_freq: int,
                    model: object,
                    data: pd.DataFrame,
                    seq: int,
                    d_model: int,
                    dilation: int,
                    src_tgt_seq: Tuple[int],
                    ) -> pd.Series:
    """再帰的に推論を行う
    引数:
        rec_freq: 推論回数
        model: 訓練済みモデル
        data: オリジナルデータ
        seq: 訓練条件の seq
        d_model: 訓練条件の d_model
        dilation: 訓練条件の dilation
        src_tgt_seq: 訓練条件の src_tgt_seq,
    """
    # 日ごとの基本統計量
    sum_freq = _mode_of_freq(data).item_cnt_day
    start_idx = sum_freq.index[-1] + datetime.timedelta(1)
    # スケーラー
    scs = StandardScaler().fit(sum_freq.values.reshape(-1, 1))
    sum_freq = scs.transform(sum_freq.values.reshape(-1, 1)).reshape(-1)
    # 再帰的な推論
    src_seq, tgt_seq = src_tgt_seq
    step_num = 1  # ハードコードしてるけど後日改修
    inference_seq = np.array([])
    for _ in range(rec_freq):
        embed = _tde_for_inference(sum_freq, seq, d_model, dilation)
        src, tgt = _src_tgt_split(embed, src_seq, tgt_seq)
        output = _inference(model, src, tgt).reshape(-1)
        pred = output[-step_num:]
        sum_freq = np.append(sum_freq, pred.reshape(-1))
        inversed = scs.inverse_transform(pred.reshape(-1, 1)).reshape(-1)
        inference_seq = np.append(inference_seq, np.round(inversed))
    # 推論結果をSeries型へ整形
    end_idx = start_idx + datetime.timedelta(len(inference_seq) - 1)
    index = pd.date_range(start_idx, end_idx)
    return pd.Series(inference_seq, index)


def _inference(model: object, src: Tensor, tgt: Tensor) -> Tensor:
    src = torch.from_numpy(src.astype(np.float32)).T.unsqueeze(0)
    tgt = torch.from_numpy(tgt.astype(np.float32)).T.unsqueeze(0)
    model.eval()
    output = model(src, tgt).detach().numpy()
    return output


def _tde_for_inference(
                    ds: pd.Series,
                    seq: int,
                    d_model: int,
                    dilation: int
                    ) -> ndarray:
    for_array = []
    for i in range(d_model):
        if i != 0:
            for_array.append(ds[-seq - i * (dilation + 1): -i * (dilation + 1)])
        else:
            for_array.append(ds[-seq:])
    time_delay_embedded = np.array([content for content in reversed(for_array)])
    return time_delay_embedded


In [68]:
data = pd.read_csv('../data/sales_train.csv')
df = _mode_of_freq(data).item_cnt_day
demo = np.arange(1000)
monthly_label = np.array(df.index.month)[-len(demo):]
weekly_num = list(np.arange(0, 7))
index = demo.copy()
weekly_label = weekly_num * (len(index) // 7) + weekly_num[:len(index) % 7]

new = pd.DataFrame(demo, columns=['data'])
new['weekly'] = weekly_label
new['monthly'] = monthly_label

Index(['data', 'weekly', 'monthly'], dtype='object')

In [130]:
new.tail(10)

Unnamed: 0,data,weekly,monthly
990,990,3,10
991,991,4,10
992,992,5,10
993,993,6,10
994,994,0,10
995,995,1,10
996,996,2,10
997,997,3,10
998,998,4,10
999,999,5,10


In [142]:
embeded = []
for column in reversed(new.columns):
    trg = getattr(new, column)
    tded = _tde_for_inference(trg, seq=7, d_model=4, dilation=1)
    embeded.append(tded.tolist())
embeded = np.array(embeded)

In [143]:
embeded

array([[[ 10,  10,  10,  10,  10,  10,  10],
        [ 10,  10,  10,  10,  10,  10,  10],
        [ 10,  10,  10,  10,  10,  10,  10],
        [ 10,  10,  10,  10,  10,  10,  10]],

       [[  0,   1,   2,   3,   4,   5,   6],
        [  2,   3,   4,   5,   6,   0,   1],
        [  4,   5,   6,   0,   1,   2,   3],
        [  6,   0,   1,   2,   3,   4,   5]],

       [[987, 988, 989, 990, 991, 992, 993],
        [989, 990, 991, 992, 993, 994, 995],
        [991, 992, 993, 994, 995, 996, 997],
        [993, 994, 995, 996, 997, 998, 999]]])

In [None]:
class Inferece():
    def __init__(self, model, ds, seq, d_model, dilation):
        self.model = model
        self.ds = ds
        self.seq = seq
        self.d_model = d_model
        self.dilation = dilation

        self.df = pd.DataFrame(ds, columns='data')
        self.df['weekly'] = ds.index.month()
        self.df['monthly'] = weekly_num * (len(ds) // 7) + weekly_num[:len(ds) % 7]
        self.inference = None
        self.embedded = None

    def predict(self, freq):
        return 
        
    def _tde_for_inference(self):
        for_array = []
        for i in range(self.d_model):
            if i != 0:
                for_array.append(
                    self.df.data[
                        -self.seq - i * (self.dilation + 1): -i * (self.dilation + 1)
                        ]
                    )
            else:
                for_array.append(self.df.data[-self.seq:])
        self.time_delay_embedded = np.array([content for content in reversed(for_array)])
        return None