# Transformer

본 ipython notebook은 [DIYA](https://blog.diyaml.com/) 회원들의 자연어처리 스터디를 위해, 아래의 자료를 바탕으로 만들어졌습니다.
* [Transformer Time Series Prediction](https://github.com/oliverguhr/transformer-time-series-prediction)
* [Transformers for Time Series](https://github.com/maxjcohen/transformer)
* [Sequence-to-Sequence Modeling with nn.Transformer and TorchText](https://pytorch.org/tutorials/beginner/transformer_tutorial.html)

본 실습의 구성은 다음과 같습니다.
1. [가상 데이터 생성하기](#Generate-Data)
2. [Transformer 구현](#Transformer-Model)
3. [Transformer 학습](#Train)

## Generate Data

In [None]:
"""
3가지 조화 진동자(harmonic oscillator)로 이루어진 가상 데이터를 생성합니다.
"""
import numpy as np
import torch
from torch.utils.data import Dataset
from sklearn.preprocessing import MinMaxScaler


class ToyDataset(Dataset):
    def __init__(self, seq, hist_window, output_window):
        super().__init__()
        self.seq = seq
        self.hw = hist_window
        self.ow = output_window

    def __len__(self):
        return len(self.seq) - self.hw - self.ow

    def __getitem__(self, idx):
        hist = self.seq[idx:idx + self.hw, None]
        preds = self.seq[idx + self.hw:idx + self.hw + self.ow, None]
        return torch.FloatTensor(hist), torch.FloatTensor(preds)


def get_data(n_samples=3000, test_ratio=0.1, hist_window=90, output_window=7):
    time = np.linspace(0, 400, n_samples)
    amplitude = np.linspace(0, 5, n_samples)  # linear trend
    amplitude += np.sin(time) + np.sin(time * 0.05)
    amplitude += np.sin(time * 0.12) * np.random.normal(-0.2, 0.2, len(time))

    scaler = MinMaxScaler(feature_range=(-1, 1))
    amplitude = scaler.fit_transform(amplitude.reshape(-1, 1)).reshape(-1)

    num_test = int(n_samples * test_ratio)
    train_data = ToyDataset(amplitude[:-num_test], hist_window, output_window)
    test_data = ToyDataset(amplitude[-num_test:], hist_window, output_window)
    return train_data, test_data

In [None]:
# 가상 데이터의 형태를 그려봅시다.
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('whitegrid')
train_data, test_data = get_data()
n_samples = len(train_data.seq) + len(test_data.seq)

plt.figure(figsize=(15, 5))
plt.plot(np.arange(0, len(train_data.seq)), train_data.seq, label='train')
plt.plot(np.arange(len(train_data.seq), n_samples), test_data.seq, label='test')
plt.legend(loc='upper left')

In [None]:
"""
Transformer를 학습시키기 위한 함수를 정의합니다.
"""
import time
import torch.nn.functional as F
from torch.utils.data import DataLoader

def train(model, epochs=20, lr=1e-3, batch_size=32):
    train_data, test_data = get_data()
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

    optim = torch.optim.AdamW(model.parameters(), lr=lr)
    tmp = "Epoch: {:3d} | Time: {:.4f} ms | Loss/Train: {:.4f} | Loss/Eval: {:.4f}"

    for epoch in range(epochs):
        start_time = time.time()
        
        # Train single epoch
        loss_train = train_epoch(train_loader, model, optim)

        # Evaluate
        loss_eval = evaluate(test_loader, model)

        elapsed = time.time() - start_time
        print(tmp.format(epoch + 1, elapsed, loss_train, loss_eval))


def train_epoch(dataloader, model, optim):
    model.train()
    avg_loss = 0.0
    for data, targets in dataloader:
        outputs = model(data)[:, -targets.size(1):]
        loss = F.mse_loss(outputs, targets)
        avg_loss += loss.item()

        optim.zero_grad()
        loss.backward()
        optim.step()
    return avg_loss / len(dataloader)


def evaluate(dataloader, model):
    model.eval()
    avg_loss = 0.0
    for data, targets in dataloader:
        outputs = model(data)[:, -targets.size(1):]
        loss = F.mse_loss(outputs, targets)
        avg_loss += loss.item()
    return avg_loss / len(dataloader)

## Transformer Model

In [None]:
"""TODO
torch.nn.TransformerEncoder를 이용해 Transformer Model을 구현해주세요.
"""
import math
import torch
import torch.nn as nn
import torch.nn.functional as F


class TransformerModel(nn.Module):
    pass

## Train

In [None]:
"""TODO
model을 정의하고 학습시킵니다.
"""
model = TransformerModel(None)
train(model)