# Transformer Model

## 0. imports

In [1]:
%load_ext jupyter_black

In [2]:
import sys

sys.path.append("..")

In [10]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from collections import namedtuple

In [4]:
from src.dataset import ETTDataModule
from src.model import DataEmbedding
from src.model import Attention
from src.model import Encoder, Decoder
from src.model import EncoderLayer, DecoderLayer

## 1. prev setting

In [5]:
dm_params = {
    "data_path": "../data/ETT-small/ETTh1.csv",
    "task": "M",
    "freq": "h",
    "target": "OT",
    "seq_len": 96,
    "label_len": 48,
    "pred_len": 96,
    "use_scaler": True,
    "use_time_enc": True,
    "batch_size": 32,
}


dm = ETTDataModule(**dm_params)

In [6]:
emb_params = {
    "c_in": 7,
    "d_model": 512,
    "embed_type": "time_features",
    "freq": "h",
    "dropout": 0.1,
}

enc_embedding = DataEmbedding(**emb_params)
dec_embedding = DataEmbedding(**emb_params)

In [7]:
attn_params = {
    "d_model": 512,
    "n_heads": 8,
    "d_keys": None,
    "d_values": None,
    "scale": None,
    "attention_dropout": 0.1,
    "output_attention": True,
}

attn_layer = Attention(**attn_params)

In [9]:
enc_layer_params = {
    "attention": Attention(**attn_params),
    "d_model": 512,
    "d_ff": 2048,
    "dropout": 0.1,
    "activation": "gelu",
}

dec_layer_params = {
    "self_attention": Attention(**attn_params),
    "cross_attention": Attention(**attn_params),
    "d_model": 512,
    "d_ff": 2048,
    "dropout": 0.1,
    "activation": "gelu",
}


d_model = 512
num_enc_layers: int = 2
num_dec_layers: int = 1
c_out = 7

encoder = Encoder(
    enc_layers=[EncoderLayer(**enc_layer_params) for _ in range(num_enc_layers)],
    norm_layer=nn.LayerNorm(d_model),
)

decoder = Decoder(
    dec_layers=[DecoderLayer(**dec_layer_params) for _ in range(num_dec_layers)],
    norm_layer=nn.LayerNorm(d_model),
    projection=nn.Linear(d_model, c_out),
)

## 2. Transformer

In [16]:
# transformer_params
Config = namedtuple(
    "Config",
    [
        "c_in",
        "c_out",
        "d_model",
        "embed_type",
        "freq",
        "dropout",
        "n_heads",
        "d_keys",
        "d_values",
        "d_ff",
        "scale",
        "attention_dropout",
        "output_attention",
        "activation",
        "num_enc_layers",
        "num_dec_layers",
    ],
)

configs = Config(
    c_in=7,
    c_out=7,
    d_model=512,
    embed_type="time_features",
    freq="h",
    dropout=0.1,
    n_heads=8,
    d_keys=None,
    d_values=None,
    d_ff=2048,
    scale=None,
    attention_dropout=0.1,
    output_attention=True,
    activation="gelu",
    num_enc_layers=2,
    num_dec_layers=1,
)

# TODO: task_name, pred_len 추가 필요