In [32]:
import os
import gc

import pandas as pd
import numpy as np
import torch

from transformers import PatchTSTForPrediction

from torch.utils.data import DataLoader
from datasets import Dataset

In [33]:
data = "coin"

output_dir = "saved_models"
log_dir = os.path.join('logstf', data)

loss_name = "SMAPE"

num_train_epochs = 300
model_num = 1
model_path = "./saved_models"
learning_rate = 1e-6

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [34]:
## target domain
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)
target_y = pd.read_csv(f"../data/{data}/train_output_7.csv").iloc[:, 1:].values.astype(np.float32)

target_X_val = target_X[-round(target_X.shape[0] * 0.2):, :].astype(np.float32)
target_y_val = target_y[-round(target_y.shape[0] * 0.2):].astype(np.float32)
target_X = target_X[:-round(target_X.shape[0] * 0.2), :].astype(np.float32)
target_y = target_y[:-round(target_y.shape[0] * 0.2)].astype(np.float32)

test_X  = pd.read_csv(f"../data/{data}/val_input_7.csv").iloc[:, 1:].values.astype(np.float32)
test_y  = pd.read_csv(f"../data/{data}/val_output_7.csv").iloc[:, 1:].values.astype(np.float32)

In [35]:
def array_to_dataset(X, y):
    X, y = torch.tensor(X), torch.tensor(y)
    X = X.reshape(-1, X.shape[1], 1)
    y = y.reshape(-1, y.shape[1], 1)

    dataset = torch.utils.data.TensorDataset(X, y)

    return dataset

train_dataset = array_to_dataset(target_X, target_y)
val_dataset = array_to_dataset(target_X_val, target_y_val)
test_dataset = array_to_dataset(test_X, test_y)

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 8, shuffle = True)
val_dataloader = torch.utils.data.DataLoader(val_dataset, batch_size = 64)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 64)

In [36]:
for k in range(1, model_num+1):
    current_path = os.path.join(model_path, f"model_{loss_name}_{k}.pth")

    backbone_model = PatchTSTForPrediction.from_pretrained(os.path.join(model_path, "PatchTSTBackbone")).to(device)
    backbone_model.load_state_dict(torch.load(current_path))    ## 구조 변경 없이 그대로 로드

In [38]:
def SMAPE(yhat, y):
    numerator = 100*torch.abs(y - yhat)
    denominator = (torch.abs(y) + torch.abs(yhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

In [39]:
optimizer = torch.optim.AdamW(backbone_model.parameters(), lr = learning_rate)
log_data = []

if loss_name == "mse":
    loss_fn = torch.nn.MSELoss()
elif loss_name == "mae":
    loss_fn = torch.nn.L1Loss()
elif loss_name == "SMAPE":
    loss_fn = SMAPE

## early stopping
PATIENCE = 10
best_val_loss = np.inf
patience_counter = 0

for epoc in range(num_train_epochs):
    backbone_model.train()

    total_train_loss = 0

    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        yhat = backbone_model(X).prediction_outputs
        loss = loss_fn(yhat, y)
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()*X.shape[0]

    avg_train_loss = total_train_loss/len(train_dataloader.dataset)

    backbone_model.eval()

    with torch.no_grad():
        yys = []
        yyhats = []

        for XX, yy in val_dataloader:
            XX = XX.to(device)
            yys.append(yy.to(device))
            yyhats.append(backbone_model(XX).prediction_outputs)

        yyhat = torch.concat(yyhats)
        yy = torch.concat(yys)

        val_loss = loss_fn(yyhat, yy).item()

    print(f"Epoch {epoc+1}/{num_train_epochs} | Train Loss: {avg_train_loss:.6f}\t\t Val Loss: {val_loss:.6f}")

    log_data.append({"epoch": epoc, "loss": avg_train_loss, "eval_loss": val_loss})

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_state_dict = backbone_model.state_dict()   ## 저장 없이 결과물만 산출...
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= PATIENCE:
        break

Epoch 1/300 | Train Loss: 3.882582		 Val Loss: 2.746786
Epoch 2/300 | Train Loss: 3.724143		 Val Loss: 2.697295
Epoch 3/300 | Train Loss: 3.630796		 Val Loss: 2.676523
Epoch 4/300 | Train Loss: 3.546190		 Val Loss: 2.659863
Epoch 5/300 | Train Loss: 3.461951		 Val Loss: 2.638196
Epoch 6/300 | Train Loss: 3.426297		 Val Loss: 2.651755
Epoch 7/300 | Train Loss: 3.358912		 Val Loss: 2.652848
Epoch 8/300 | Train Loss: 3.261051		 Val Loss: 2.643697
Epoch 9/300 | Train Loss: 3.271560		 Val Loss: 2.654185
Epoch 10/300 | Train Loss: 3.218434		 Val Loss: 2.664256
Epoch 11/300 | Train Loss: 3.142867		 Val Loss: 2.648431
Epoch 12/300 | Train Loss: 3.113573		 Val Loss: 2.661589
Epoch 13/300 | Train Loss: 3.077688		 Val Loss: 2.654392
Epoch 14/300 | Train Loss: 3.048001		 Val Loss: 2.612900
Epoch 15/300 | Train Loss: 2.999415		 Val Loss: 2.649789
Epoch 16/300 | Train Loss: 2.990046		 Val Loss: 2.674138
Epoch 17/300 | Train Loss: 2.947379		 Val Loss: 2.649256
Epoch 18/300 | Train Loss: 2.924937		 Va

In [27]:
target_y.mean(), target_y_val.mean(), test_y.mean()

(np.float32(151.20018), np.float32(67.18279), np.float32(75.756134))

In [40]:
backbone_model.load_state_dict(best_state_dict)

<All keys matched successfully>

In [41]:
pd.DataFrame(log_data).to_csv(os.path.join(log_dir, f"transfer_{loss_name}_lr{learning_rate}_run{1}.csv"))

In [42]:
with torch.no_grad():
    yys = []
    yyhats = []

    for XX, yy in test_dataloader:
        XX = XX.to(device)
        yys.append(yy.to(device))
        yyhats.append(backbone_model(XX).prediction_outputs)

    yyhat = torch.concat(yyhats)
    yy = torch.concat(yys)

    test_loss = loss_fn(yyhat, yy)

In [43]:
mseLoss = torch.nn.MSELoss()
maeLoss = torch.nn.L1Loss()

def smape(yy, yyhat):
    numerator = 100*abs(yy - yyhat)
    denominator = (abs(yy) + abs(yyhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

print(f"test RMSE: {torch.sqrt(mseLoss(yyhat, yy))}")
print(f"test MAE: {maeLoss(yyhat, yy)}")
print(f"test SMAPE: {smape(yy, yyhat)}")

test RMSE: 3.8674254417419434
test MAE: 1.5500025749206543
test SMAPE: 2.1890923976898193


In [44]:
with torch.no_grad():
    yys = []
    yyhats = []

    for XX, yy in test_dataloader:
        XX = XX.to(device)
        yys.append(yy.to(device))
        yyhats.append(backbone_model(XX).prediction_outputs)

    yyhat = torch.concat(yyhats)
    yy = torch.concat(yys)

    test_loss = loss_fn(yyhat, yy)

In [45]:
mseLoss = torch.nn.MSELoss()
maeLoss = torch.nn.L1Loss()

def smape(yy, yyhat):
    numerator = 100*abs(yy - yyhat)
    denominator = (abs(yy) + abs(yyhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

print(f"test RMSE: {torch.sqrt(mseLoss(yyhat, yy))}")
print(f"test MAE: {maeLoss(yyhat, yy)}")
print(f"test SMAPE: {smape(yy, yyhat)}")

test RMSE: 3.8674254417419434
test MAE: 1.5500025749206543
test SMAPE: 2.1890923976898193


> ????????? 그냥 단순한 Linear head인데...

In [29]:
backbone_model.head

PatchTSTPredictionHead(
  (flatten): Flatten(start_dim=2, end_dim=-1)
  (projection): Linear(in_features=1792, out_features=24, bias=True)
  (dropout): Dropout(p=0.2, inplace=False)
)