In [1]:
import sys

parent_dir = 'Predict-Future-Sales'
p_sub = sys.path[0]

ride = ''
for path in p_sub.split('/'):
    if path != parent_dir:
        ride = ride + path + '/'
    else:
        ride = ride + path + '/'
        break
sys.path[0] = ride

import pickle
import datetime

import pandas as pd
import numpy as np
from numpy import ndarray
from sklearn.preprocessing import StandardScaler
import torch
from torch import Tensor

from module.lino_module.preprocess import mode_of_freq, src_tgt_split

from typing import Tuple, Optional
from pandas import DataFrame, Series, DatetimeIndex
from numpy import ndarray
from torch.utils.data import DataLoader

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def inference(model: object, src: Tensor, tgt: Tensor) -> Tensor:
    src = torch.from_numpy(src.astype(np.float32)).T.unsqueeze(0)
    tgt = torch.from_numpy(tgt.astype(np.float32)).T.unsqueeze(0)
    model.eval()
    output = model(src, tgt).detach().numpy()
    return output


def tde_for_inference(
                    ds: pd.Series,
                    seq: int,
                    d_model: int,
                    dilation: int
                    ) -> ndarray:
    for_array = []
    for i in range(d_model):
        if i != 0:
            for_array.append(ds[-seq - i * (dilation + 1): -i * (dilation + 1)])
        else:
            for_array.append(ds[-seq:])
    time_delay_embedded = np.array([content for content in reversed(for_array)])
    return time_delay_embedded


In [3]:
data = pd.read_csv('../data/sales_train.csv')
ds = mode_of_freq(data).item_cnt_day
demo = np.arange(1000)
monthly_label = np.array(ds.index.month)[-len(demo):]
weekly_num = list(np.arange(0, 7))
index = demo.copy()
weekly_label = weekly_num * (len(index) // 7) + weekly_num[:len(index) % 7]

new = pd.DataFrame(demo, columns=['data'])
new['weekly'] = weekly_label
new['monthly'] = monthly_label

display(new.tail(10))

seq = 7
d_model = 4
dilation = 2

embeded = []
for column in new.columns:
    trg = getattr(new, column)
    tded = tde_for_inference(trg, seq, d_model, dilation)
    embeded.append(tded.tolist())
embeded = np.array(embeded).reshape(d_model*len(new.columns), -1).T
embeded

Unnamed: 0,data,weekly,monthly
990,990,3,10
991,991,4,10
992,992,5,10
993,993,6,10
994,994,0,10
995,995,1,10
996,996,2,10
997,997,3,10
998,998,4,10
999,999,5,10


array([[984, 987, 990, 993,   4,   0,   3,   6,  10,  10,  10,  10],
       [985, 988, 991, 994,   5,   1,   4,   0,  10,  10,  10,  10],
       [986, 989, 992, 995,   6,   2,   5,   1,  10,  10,  10,  10],
       [987, 990, 993, 996,   0,   3,   6,   2,  10,  10,  10,  10],
       [988, 991, 994, 997,   1,   4,   0,   3,  10,  10,  10,  10],
       [989, 992, 995, 998,   2,   5,   1,   4,  10,  10,  10,  10],
       [990, 993, 996, 999,   3,   6,   2,   5,  10,  10,  10,  10]])

## Recurrent Inference Class

In [None]:
data = pd.read_csv('../data/sales_train.csv')
dummy_data = np.arange(1000)
index = mode_of_freq(data).item_cnt_day.index[:len(dummy_data)]
dummy = pd.Series(dummy_data, index=index)

In [None]:
class RecurrentInference():
    def __init__(self, model, seq, d_model, dilation, weekly=True, monthly=True):
        self.model = model
        self.seq = seq
        self.d_model = d_model
        self.dilation = dilation
        self.weekly = weekly
        self.monthly = monthly

        self.origin = None
        self.df = None
        self.inference = None
        self.embedded = None
        last_index = None
        last_data = None
        self.scaler = None

    def __call__(self, ds, scaler):
        self.origin = ds
        reshaped = ds.values.reshape(-1, 1)
        self.scaler = scaler().fit(reshaped)
        scaled_ds = self.scaler.transform(reshaped).reshape(-1)
        self.df = pd.DataFrame(scaled_ds, columns=['data'], index=[ds.index])
        self.last_index = ds.index[-1]
        self.last_data = ds[-1]
        self.inference = pd.Series(self.last_data, index=[self.last_index])

        if self.weekly:
            self.df['weekly'] = ds.index.weekday / 6
        if self.monthly:
            self.df['monthly'] = (ds.index.month - 1) / 11


    def predict(self, freq):
        step_num = 1  # ハードコードは後日改修
        self.embedded = self.tde()
        src, tgt = src_tgt_split(self.embedded)
        output = inference(self.model, src, tgt)
        pred = output[-step_num:]

        # 推論の追加
        self.last_index = self.last_index + datetime.timedelta(1)
        self.inference[self.last_index] = pred

        # datasetの更新
        scaled_weekday = self.last_index.weekday() / 6
        scaled_month = (self.last_index.month() - 1) / 11
        self.df[self.last_index] = [pred, scaled_weekday, scaled_month]
        

        return pred
    
    def tde(self):
        embeded = []
        for column in self.df.columns:
            trg = getattr(self.df, column)
            tded = tde_for_inference(trg, seq=7, d_model=4, dilation=1)
            embeded.append(tded.tolist())
        embeded = np.array(embeded).reshape(self.d_model*len(new.columns), -1).T
        return None


In [None]:
with open('./models_mw/seq7_dmodel32_dilation1.pkl', 'rb') as f:
    kw = pickle.load(f)
    
del kw['src_tgt_seq'], kw['batch_size']
data = kw.pop('data')

In [None]:
rec_inference = RecurrentInference(**kw)
scaler = StandardScaler
ds = mode_of_freq(data).item_cnt_day
rec_inference(ds, scaler)
pred = rec_inference.predict(3)