In [1]:
# 自作モジュールの参照先を上書き
import sys
parent_dir = 'Predict-Future-Sales'
p_sub = sys.path[0]

ride = ''
for path in p_sub.split('/'):
    if path != parent_dir:
        ride = ride + path + '/'
    else:
        ride = ride + path + '/'
        break
sys.path[0] = ride


import time
import pickle

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.optim import Adam
import matplotlib.pyplot as plt

from module.lino_module.preprocess import mode_of_freq, tde_dataset_wm,\
                                          select_device
from module.lino_module.model import TransformerModel, training, learning_plot,\
                                     confirmation, to_time_series,\
                                     confirmation_plot

# 乱数固定
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

  from .autonotebook import tqdm as notebook_tqdm


## データセットの準備

In [19]:
data = pd.read_csv('../data/sales_train.csv')
data = mode_of_freq(data).item_cnt_day
seq = 7
d_model = 24
dilation = 2
src_tgt_seq = (5, 4)
step_num = 3
scaler = StandardScaler

kwrgs = {'data': data,
         'seq': seq,
         'd_model': d_model,
         'dilation': dilation,
         'src_tgt_seq': src_tgt_seq,
         'step_num': step_num,
         'daily': False,
         'weekly': True,
         'monthly': True,
         'scaler': scaler,
         'batch_size': 64,
         'train_rate': 1.0}

train, test = tde_dataset_wm(**kwrgs)

## Transformerモデルの準備

In [20]:
# device = select_device() # mps では Transformer の attention でエラー
device = torch.device('cpu')

dim = d_model * (kwrgs['daily'] + kwrgs['weekly'] + kwrgs['monthly'] + 1)
transformer = TransformerModel(d_model=dim, nhead=8, device=device).to(device)
# criterion = RMSELoss()  # 論文ではRMSELoss
criterion = nn.MSELoss().to(device)

# 学習率その他は論文の値を参考
warmup_steps = 5000
lr = d_model**0.5 * min(step_num**0.5, step_num * warmup_steps**(-1.5))
optimizer = Adam(transformer.parameters(), lr=1e-5, betas=(0.95, 0.98), eps=10e-9)

## 訓練

In [21]:
epochs = 100
training_kwrgs = {'model': transformer,
                  'train': train,
                  'test': test,
                  'device': device,
                  'criterion': criterion,
                  'optimizer': optimizer,
                  'epochs': epochs,
                  'verbose': 10,
                  'center': 50}
transformer, _, _, _ = training(**training_kwrgs)

--------------------- start ----------------------
                     epoch_0                      
train_loss:  0.9801 | validation_loss:  0.9036 | test_loss:  nan
                     epoch_10                     
train_loss:  0.5438 | validation_loss:  0.456 | test_loss:  nan
                     epoch_20                     
train_loss:  0.4281 | validation_loss:  0.3524 | test_loss:  nan
                     epoch_30                     
train_loss:  0.3741 | validation_loss:  0.296 | test_loss:  nan
                     epoch_40                     
train_loss:  0.337 | validation_loss:  0.2597 | test_loss:  nan
                     epoch_50                     
train_loss:  0.3043 | validation_loss:  0.2337 | test_loss:  nan
                     epoch_60                     
train_loss:  0.2744 | validation_loss:  0.2174 | test_loss:  nan
                     epoch_70                     
train_loss:  0.2543 | validation_loss:  0.2012 | test_loss:  nan
                     epo

## モデルの保存

In [22]:
name = ''
del kwrgs['batch_size'], kwrgs['train_rate']
for key, value in kwrgs.items():
    if key=='data':
        pass
    elif key=='scaler':
        name += f'{key}({value.__name__})_'
    else:
        name += f'{key}({value})_'

kwrgs['model'] = transformer
model_dir = './models_wm/'
model_name = f'epochs({epochs})' + name + '.pkl'
with open(model_dir + model_name, 'wb') as f:
    pickle.dump(kwrgs, f)