In [1]:
import numpy as np
import pandas as pd
from copy import deepcopy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

### Device 세팅

In [2]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'

torch.manual_seed(1)
if device == 'mps':
    torch.mps.manual_seed(1)

print(device)

mps


### 모델 및 학습 정의

In [3]:
DROP_OUT_R = 0.5
LEARNING_RATE = 0.001

EPOCHS = 500
BATCH_SIZE = 50
Seq_length = 5
#VALIDATION_RATE = 0.2

CHECK_INTERVAL = 10
# EARLY_STOP = 30

In [4]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len, device):
        super().__init__()

        # Positional Encoding 초기화
        # 1. 비어있는 tensor 생성
        # (max_len,d_model)
        self.P_E = torch.zeros(max_len, d_model, device=device)
        # 학습되는 값이 아님으로 requires_grad 을 False로 설정
        self.P_E.requires_grad = False

        # 2. pos (0~max_len) 생성 (row 방향 => unsqueeze(dim=1))
        pos = torch.arange(0, max_len, dtype=torch.float, device=device).unsqueeze(dim=1)

        # 3. _2i (0~2i) 생성 (col 방향)
        # 2i는 step = 2 를 활용하여 i의 2배수를 만듦
        _2i = torch.arange(0, d_model, step= 2, dtype=torch.float, device=device)

        # 4. 제안된 positional encoding 생성 
        # (i가 짝수일때 : sin, 홀수일때 : cos)
        self.P_E[:, 0::2] = torch.sin(pos / 10000 ** (_2i / d_model))
        self.P_E[:, 1::2] = torch.cos(pos / 10000 ** (_2i / d_model))

    def forward(self,x):
        # x seq 길이에 맞춰 PE return 
        # (seq_len, d_model)
        _, seq_len,_ = x.size()
        PE_for_x = self.P_E[:seq_len,:]

        return PE_for_x

In [5]:
class MC_PE_2MAttn_use_VIT(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.PE_layer = PositionalEncoding(30, 5, device)

        self.MA_layer1 = nn.MultiheadAttention(
            embed_dim = 30,
            num_heads = 5,
            batch_first=True,
            dropout = DROP_OUT_R
        )

        self.FC_layer1 = nn.Linear(30, 15)

        self.MA_layer2 = nn.MultiheadAttention(
            embed_dim = 15,
            num_heads = 5,
            batch_first=True,
            dropout = DROP_OUT_R
        )

        self.FC_layer2 = nn.Linear(15, 1)
        self.FC_layer3 = nn.Linear(5, 1)

    def forward(self, input_x):
        pe_x = self.PE_layer(input_x)
        input_x += pe_x
        
        ma_y1,_ = self.MA_layer1(input_x, input_x, input_x)        
        fc_y1 = self.FC_layer1(ma_y1)

        ma_y2, _ = self.MA_layer2(fc_y1, fc_y1, fc_y1)
        fc_y2 = self.FC_layer2(ma_y2)

        fc_y3 = self.FC_layer3(fc_y2.reshape(-1,5))
        return fc_y3, pe_x

MC_PE_2MAttn = MC_PE_2MAttn_use_VIT().to(device)

In [6]:
torch.arange(0,150).reshape(1,5,30)

tensor([[[  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
           14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,  27,
           28,  29],
         [ 30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40,  41,  42,  43,
           44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,
           58,  59],
         [ 60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,
           74,  75,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,
           88,  89],
         [ 90,  91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103,
          104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117,
          118, 119],
         [120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133,
          134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147,
          148, 149]]])

In [7]:
_, pe_check = MC_PE_2MAttn(torch.arange(0,150,dtype=torch.float).reshape(1,5,30).to(device))

In [8]:
pe_check

tensor([[ 0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,
          0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,
          0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,
          1.0000e+00,  0.0000e+00,  1.0000e+00,  0.0000e+00,  1.0000e+00],
        [ 8.4147e-01,  5.4030e-01,  5.1514e-01,  8.5711e-01,  2.8870e-01,
          9.5742e-01,  1.5783e-01,  9.8747e-01,  8.5664e-02,  9.9632e-01,
          4.6399e-02,  9.9892e-01,  2.5116e-02,  9.9968e-01,  1.3593e-02,
          9.9991e-01,  7.3564e-03,  9.9997e-01,  3.9811e-03,  9.9999e-01,
          2.1544e-03,  1.0000e+00,  1.1659e-03,  1.0000e+00,  6.3096e-04,
          1.0000e+00,  3.4145e-04,  1.0000e+00,  1.8478e-04,  1.0000e+00],
        [ 9.0930e-01, -4.1615e-01,  8.8306e-01,  4.6926e-01,  5.5281e-01,
          8.3331e-01,  3.1170e-01,  

In [19]:
import plotly.express as px

fig = px.imshow(pe_check.cpu(), aspect="auto")

fig.update_xaxes(title_text='Pos')
fig.update_yaxes(title_text='i')
fig.update_xaxes(title_font_size =30,
                title_font_color='black',
                title_font_family='Courier')
fig.update_yaxes(title_font_size =30,
                title_font_color='black',
                title_font_family='Courier',
                )
fig.update_layout(width=1100,height=400)
fig.show()