In [1]:
# 自作モジュールの参照先を上書き
import sys
parent_dir = 'Predict-Future-Sales'
p_sub = sys.path[0]

ride = ''
for path in p_sub.split('/'):
    if path != parent_dir:
        ride = ride + path + '/'
    else:
        ride = ride + path + '/'
        break
sys.path[0] = ride


import time
import pickle

import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn as nn
from torch.optim import Adam
import matplotlib.pyplot as plt

from module.lino_module.preprocess import mode_of_freq, tde_dataset_wm,\
                                          select_device
from module.lino_module.model import TransformerModel, training, learning_plot,\
                                     confirmation, to_time_series,\
                                     confirmation_plot

# 乱数固定
seed = 42
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.use_deterministic_algorithms = True

  from .autonotebook import tqdm as notebook_tqdm


## データセットの準備

In [2]:
data = pd.read_csv('../data/sales_train.csv')
data = mode_of_freq(data).item_cnt_day
seq = 7
d_model = 24
dilation = 2
src_tgt_seq = (int(seq * 0.9),int(seq * 0.4))

kwrgs = {'data': data,
         'seq': seq,
         'd_model': d_model,
         'dilation': dilation,
         'src_tgt_seq': src_tgt_seq,
         'weekly': True,
         'monthly': True,
         'batch_size': 64,
         'scaler': StandardScaler,
         'train_rate': 1.0}

train, _ = tde_dataset_wm(**kwrgs)

## Transformerモデルの準備

In [3]:
# device = select_device() # mps では Transformer の attention でエラーが出る
device = torch.device('cpu')

if kwrgs['weekly'] & kwrgs['monthly']:
    dim = d_model * 3
elif kwrgs['weekly'] | kwrgs['monthly']:
    dim = d_model * 2
else:
    dim = d_model

transformer = TransformerModel(d_model=dim, nhead=8).to(device)
# criterion = RMSELoss()  # 論文ではRMSELoss
criterion = nn.MSELoss()

# 学習率その他は論文の値を参考
step_num = 1
warmup_steps = 5000
lr = d_model**0.5 * min(step_num**0.5, step_num * warmup_steps**(-1.5))
optimizer = Adam(transformer.parameters(), lr=1e-5, betas=(0.95, 0.98), eps=10e-9)

## 訓練

In [4]:
epochs = 100
training_kwrgs = {'model': transformer,
                  'dataset': train,
                  'device': device,
                  'criterion': criterion,
                  'optimizer': optimizer,
                  'epochs': epochs,
                  'verbose': 10,
                  'center': 50}
transformer, train_loss, validation_loss = training(**training_kwrgs)

--------------------- start ----------------------
                     epoch_0                      
train_loss:  1.1543 | validation_loss:  1.1063 | time:  2.253
                     epoch_10                     
train_loss:  0.5167 | validation_loss:  0.431 | time:  1.871
                     epoch_20                     
train_loss:  0.4029 | validation_loss:  0.314 | time:  2.126
                     epoch_30                     
train_loss:  0.3346 | validation_loss:  0.2612 | time:  2.072
                     epoch_40                     
train_loss:  0.2985 | validation_loss:  0.232 | time:  1.959
                     epoch_50                     
train_loss:  0.2568 | validation_loss:  0.2088 | time:  2.097
                     epoch_60                     
train_loss:  0.2344 | validation_loss:  0.1899 | time:  1.899
                     epoch_70                     
train_loss:  0.2291 | validation_loss:  0.1737 | time:  1.885
                     epoch_80                   

## 訓練データ損失と検証データ損失

## モデルの保存

In [5]:
name = ''
for key, value in kwrgs.items():
    if key=='data':
        pass
    elif key=='scaler':
        name += f'{key}({value.__name__})_'
    else:
        name += f'{key}({value})_'

kwrgs['model'] = transformer
model_dir = './models_wm/'
model_name = name + '.pkl'
with open(model_dir + model_name, 'wb') as f:
    pickle.dump(kwrgs, f)