In [1]:
import sys
# Over Ride Parent Path.
parent_dir_name = 'Predict-Future-Sales'
sys_path = ''
for p in str(sys.path[0]).split('/'):
    if p != parent_dir_name:
        sys_path = sys_path + p + '/'
    else:
        sys_path += parent_dir_name
        break

sys.path[0] = sys_path
sys.path

['/Users/lino/Desktop/Predict-Future-Sales',
 '/Users/lino/.vscode/extensions/ms-toolsai.jupyter-2022.11.1003412109/pythonFiles',
 '/Users/lino/.vscode/extensions/ms-toolsai.jupyter-2022.11.1003412109/pythonFiles/lib/python',
 '/Users/lino/opt/anaconda3/envs/datascience/lib/python39.zip',
 '/Users/lino/opt/anaconda3/envs/datascience/lib/python3.9',
 '/Users/lino/opt/anaconda3/envs/datascience/lib/python3.9/lib-dynload',
 '',
 '/Users/lino/opt/anaconda3/envs/datascience/lib/python3.9/site-packages']

In [2]:
import math
from typing import Optional, List, Tuple

import torch
from torch import Tensor
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import LayerNorm
from torch.nn.init import xavier_uniform_
from torch.nn import TransformerEncoder, TransformerEncoderLayer
from torch.nn import TransformerDecoder, TransformerDecoderLayer

# Mymodule
from module.lino import mode_of_freq, making_dataset

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import numpy as np
import pandas as pd

In [4]:
class TransformerModel(nn.Module):
    """Transformer model.

    Args:
        d_model: encoder/decoder inputsの特徴量数
        nhead: Multi-head Attentionのヘッド数
        nhid: feedforward neural networkの次元数
        nlayers: encoder内のsub-encoder-layerの数
        dropout: ドロップアウト率
        activation: 活性化関数
        use_src_mask: encoderで時系列マスクを適用するか
        cat_embs: 各カテゴリ変数におけるカテゴリ数とembedding次元数
        fc_dims: decoder outputsに対するfeedforward neural networkの次元数
        device: cpu or gpu
    """

    def __init__(
        self,
        d_model: int,
        nhead: int = 8,
        nhid: int = 2048,
        nlayers: int = 6,
        dropout: float = 0.1,
        activation: str = "relu",
        use_src_mask: bool = False,
        fc_dims: Optional[List[int]] = None,
        device: Optional[bool] = None,
    ):
        super(TransformerModel, self).__init__()

        # デバイスの選定
        if device is None:
            self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        else:
            self.device = device

        # アトリビュートの格納先
        self.tgt_mask = None
        self.src_mask = None
        self.use_src_mask = use_src_mask
        self.pos_encoder = PositionalEncoding(d_model, dropout)
        encoder_layers = TransformerEncoderLayer(
            d_model, nhead, nhid, dropout, activation
        )
        encoder_norm = LayerNorm(d_model)
        self.transformer_encoder = TransformerEncoder(
            encoder_layers, nlayers, encoder_norm
        )

        decoder_layers = TransformerDecoderLayer(
            d_model, nhead, nhid, dropout, activation
        )
        decoder_norm = LayerNorm(d_model)
        self.transformer_decoder = TransformerDecoder(
            decoder_layers, nlayers, decoder_norm
        )

        if fc_dims is None:
            fc_dims = []

        if len(fc_dims) > 0:
            fc_layers = []
            for i, hdim in enumerate(fc_dims):
                if i != 0:
                    fc_layers.append(nn.Linear(fc_dims[i - 1], hdim))
                    fc_layers.append(nn.Dropout(dropout))
                else:
                    fc_layers.append(nn.Linear(d_model, hdim))
                    fc_layers.append(nn.Dropout(dropout))

            self.fc = nn.Sequential(*fc_layers)
            self.output = nn.Linear(fc_dims[-1], 1)
        else:
            self.fc = None
            self.output = nn.Linear(d_model, 1)

        self._reset_parameters()

    def _generate_square_subsequent_mask(self, sz):
        """未来の情報を考慮しないためのマスクを生成."""

        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
        mask = (
            mask.float()
            .masked_fill(mask == 0, float("-inf"))
            .masked_fill(mask == 1, float(0.0))
        )
        return mask

    def _reset_parameters(self):
        """パラメータを初期化."""

        for p in self.parameters():
            if p.dim() > 1:
                xavier_uniform_(p)

    def forward(
        self,
        src: Optional[Tensor] = None,
        tgt: Optional[Tensor] = None,
        memory: Optional[Tensor] = None,
    ) -> Tensor:
        """Transformerを適用.

        Args:
            src: Encoder input（数値）
            tgt: Decoder input（数値）
            memory: Encoder output
        """

        if src is not None:
            src = Variable(src, requires_grad=True).to(self.device).float()
            src = self.pos_encoder(src)

            if self.use_src_mask:
                if self.src_mask is None or self.src_mask.size(0) != len(src):
                    mask = self._generate_square_subsequent_mask(len(src)).to(
                        self.device
                    )
                    self.src_mask = mask

            memory = self.transformer_encoder(src, mask=self.src_mask)

        if tgt is None:
            return memory
        else:
            tgt = Variable(tgt, requires_grad=True).to(self.device).float()

            tgt = self.pos_encoder(tgt)

            if self.tgt_mask is None or self.tgt_mask.size(0) != len(tgt):
                mask = self._generate_square_subsequent_mask(len(tgt)).to(self.device)
                self.tgt_mask = mask

            decoder_output = self.transformer_decoder(
                tgt, memory, tgt_mask=self.tgt_mask
            )

            fc_input = decoder_output

            if self.fc is not None:
                fc_output = self.fc(fc_input)
            else:
                fc_output = fc_input

            output = self.output(fc_output)

            return output

In [5]:
class PositionalEncoding(nn.Module):
    """Positional Encoding."""

    def __init__(self, d_model, dropout=0.1, max_len=5000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)

        pe = torch.zeros(max_len, d_model)
        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)
        )
        pe[:, 0::2] = torch.sin(position * div_term)
        pe[:, 1::2] = torch.cos(position * div_term)
        pe = pe.unsqueeze(0).transpose(0, 1)
        self.register_buffer("pe", pe)

    def forward(self, x):
        """PositionalEncodingを適用."""

        x = x + self.pe[: x.size(0), :]
        return self.dropout(x)

## データセットの用意

In [6]:
# preprocess
data = pd.read_csv('../data/sales_train.csv')
data = mode_of_freq(data)
pack = making_dataset(data.iloc[:, -1], span=32)
x_train, x_test, y_train, y_test = [torch.from_numpy(trg.astype(np.float32)).clone() for trg in pack]

batch_size = 100
x_batch = (x_train[i*batch_size:(i+1)*batch_size] for i in range(1 + x_train.shape[0]//batch_size))
y_batch = (y_train[i*batch_size:(i+1)*batch_size] for i in range(1 + len(y_train)//batch_size))

### モデルの訓練

In [7]:
from torch.optim import Adam

In [8]:
d_model = 32
transformer = TransformerModel(d_model)
optimizer = Adam(transformer.parameters(), lr=1e-4, betas=[0.9, 0.98], eps=10e-9)
criterion = nn.MSELoss()

In [10]:
x.shape

torch.Size([100, 32])

In [11]:
transformer(x).shape

torch.Size([100, 100, 32])

In [9]:
epoch = 10
train_loss = []
test_loss = []
for i in range(epoch):
    print(f' epoch_{i} '.center(50, '-'))
    for x, y in zip(x_batch, y_batch):
        # モデル訓練
        transformer.train()
        optimizer.zero_grad()
        output = transformer(x)
        loss = criterion(output, y)
        train_loss.append(loss)
        # 勾配計算
        loss.backward()
        optimizer.step()
    print(loss)

-------------------- epoch_0 ---------------------


  return F.mse_loss(input, target, reduction=self.reduction)


RuntimeError: The size of tensor a (32) must match the size of tensor b (100) at non-singleton dimension 2

In [20]:
src.shape

torch.Size([1, 32, 512])

In [24]:
encoder_layer = nn.TransformerEncoderLayer(d_model=512, nhead=8)
transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=6)
src = torch.rand(1, 512)
memory = transformer_encoder(src)

In [26]:
decoder_layer = nn.TransformerDecoderLayer(d_model=512, nhead=8)
transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=6)
tgt = torch.rand(4, 512)
out = transformer_decoder(tgt, memory)

In [27]:
out.shape

torch.Size([4, 512])