In [1]:
import os
import gc

import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader

from transformers import (
    PatchTSTConfig, PatchTSTForPrediction,
    TrainingArguments, Trainer, EarlyStoppingCallback
)
from datasets import Dataset

2025-11-03 11:53:05.856430: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
data = "coin"
loss_name = "mse"
model_num = 1
model_path = "./saved_models"
learning_rate = 1e-7

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
## target domain
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)
target_y = pd.read_csv(f"../data/{data}/train_output_7.csv").iloc[:, 1:].values.astype(np.float32)

target_X_val = target_X[-round(target_X.shape[0] * 0.2):, :].astype(np.float32)
target_y_val = target_y[-round(target_y.shape[0] * 0.2):].astype(np.float32)
target_X = target_X[:-round(target_X.shape[0] * 0.2), :].astype(np.float32)
target_y = target_y[:-round(target_y.shape[0] * 0.2)].astype(np.float32)

test_X  = pd.read_csv(f"../data/{data}/val_input_7.csv").iloc[:, 1:].values.astype(np.float32)
test_y  = pd.read_csv(f"../data/{data}/val_output_7.csv").iloc[:, 1:].values.astype(np.float32)

np.random.seed(2)
random_indices1 = np.random.choice(pd.read_csv("../data/M4_train.csv").iloc[:, (1):].index,
                                   size=target_X.shape[0] * 20, replace=True)

X_data = pd.read_csv("../data/M4_train.csv").iloc[:, 1 + (24 * 0):].loc[random_indices1].values.astype(np.float32)
y_data = pd.read_csv("../data/M4_test.csv").iloc[:, 1:].loc[random_indices1].values.astype(np.float32)

In [6]:
test_X

array([[59.35, 59.85, 60.13, ..., 63.74, 63.76, 63.28],
       [60.58, 60.71, 60.25, ..., 61.8 , 62.09, 62.16],
       [61.69, 60.69, 60.58, ..., 61.15, 61.05, 61.38],
       ...,
       [70.77, 70.95, 70.92, ..., 69.96, 69.96, 69.97],
       [71.76, 71.64, 71.06, ..., 70.55, 70.94, 70.8 ],
       [73.02, 72.86, 72.2 , ..., 71.47, 71.55, 71.3 ]],
      shape=(528, 168), dtype=float32)

In [4]:
def create_hf_dataset(x, y):
    x_list = [s[..., np.newaxis] for s in x]    ## (N, 168) -> (N, 168, 1)
    y_list = [s[..., np.newaxis] for s in y]    ## (N, 24) -> (N, 24, 1)

    data_dict = {
        "past_values": x_list,
        "future_values": y_list
    }

    return Dataset.from_dict(data_dict)

In [16]:
np.random.seed(42)
select = np.random.choice(len(X_data), size=len(X_data), replace=True)
X_bootstrap = X_data[select]
y_bootstrap = y_data[select]

val_split_index = int(len(X_bootstrap) * 0.8)
X_train, X_valid = X_bootstrap[:val_split_index], X_bootstrap[val_split_index:]
y_train, y_valid = y_bootstrap[:val_split_index], y_bootstrap[val_split_index:]

train_dataset = create_hf_dataset(X_train, y_train)
test_dataset = create_hf_dataset(X_valid, y_valid)

In [64]:
for k in range(1, model_num+1):
    current_path = os.path.join(model_path, f"model_{loss_name}_{k}")

    backbone_model = PatchTSTForPrediction.from_pretrained(current_path).to(device)
    backbone = backbone_model.model
    config = backbone_model.config

In [68]:
N_PATCHES = config.patch_length
D_MODEL = config.d_model
T_OUT = target_y.shape[1]
C_NEW = 128

In [24]:
train_dataset = create_hf_dataset(target_X, target_y)
val_dataset = create_hf_dataset(target_X_val, target_y_val)
test_dataset = create_hf_dataset(test_X, test_y)

train_dataset.set_format(type='torch', columns=['past_values', 'future_values'])
val_dataset.set_format(type='torch', columns=['past_values', 'future_values'])
test_dataset.set_format(type='torch', columns=['past_values', 'future_values'])

train_loader = DataLoader(train_dataset, batch_size = 8)
val_loader = DataLoader(val_dataset, batch_size = 64)
test_loader = DataLoader(test_dataset, batch_size = 64)

In [33]:
for batch in val_loader:
    break

In [34]:
batch["past_values"].shape  ## (B, t, 1)

torch.Size([64, 168, 1])

In [65]:
backbone(past_values = batch["past_values"].to("cuda:0")).last_hidden_state.shape    ## (B, 1, 10, 256)

torch.Size([64, 1, 10, 256])

In [124]:
class TransferModel(torch.nn.Module):
    def __init__(self, body, t_out, c_new):
        super().__init__()
        self.body = body
        self.t_out = t_out
        body_out_features = body.encoder.layers[-1].ff[-1].out_features
        self.c_new = c_new
        
        self.flatten = torch.nn.Flatten(start_dim=1, end_dim=-1)
        self.adapter = torch.nn.Linear(body_out_features*10, self.t_out * self.c_new)  ## Dense(128)

        self.head = torch.nn.Sequential(
            torch.nn.Dropout(0.2),
            torch.nn.Linear(128, 64),
            torch.nn.Dropout(0.2),
            torch.nn.Linear(64, 1) 
        )

    def forward(self, x):
        features = self.body(past_values=x).last_hidden_state
        flat_feat = self.flatten(features)  ## (B, body_out_features*10)
        adapted_feat = self.adapter(flat_feat)
        head_input = adapted_feat.view(-1, self.t_out, self.c_new)  ## (B, 24, 128)
        output = self.head(head_input)

        return output

In [139]:
model_instance = TransferModel(backbone, T_OUT, C_NEW).to(device)

In [140]:
model_instance(batch["past_values"].to(device)).shape

torch.Size([16, 24, 1])

In [141]:
optimizer = torch.optim.Adam(model_instance.parameters(), lr=learning_rate)
loss_fn = torch.nn.MSELoss()

In [142]:
batch.keys()

dict_keys(['past_values', 'future_values'])

In [143]:
batch["past_values"].shape

torch.Size([16, 168, 1])

In [144]:
EPOCHS = 2000
PATIENCE = 10
best_val_loss = np.inf
patience_counter = 0
BEST_MODEL_PATH = "./result/test"

model_instance.to(device)

for epoch in range(EPOCHS):
    model_instance.train()
    total_train_loss = 0
    
    for batch in train_loader:
        X_batch, y_batch = batch["past_values"].to(device), batch["future_values"].to(device)
        
        optimizer.zero_grad()
        outputs = model_instance(X_batch)
        loss = loss_fn(outputs, y_batch)
        loss.backward()
        optimizer.step()
        
        total_train_loss += loss.item()*batch["past_values"].shape[0]

    train_loss = total_train_loss / train_dataset.num_rows

    # evaluation
    model_instance.eval()
    total_val_loss = 0
    
    with torch.no_grad():
        for val_batch in val_loader:
            X_val_batch, y_val_batch = val_batch["past_values"].to(device), val_batch["future_values"].to(device)
            
            val_outputs = model_instance(X_val_batch)
            val_loss = loss_fn(val_outputs, y_val_batch)
            total_val_loss += val_loss.item()*val_batch["past_values"].shape[0]

    val_loss = total_val_loss / val_dataset.num_rows
    
    print(f"Epoch {epoch+1}/{EPOCHS} - Train Loss: {train_loss:.6f} - Val Loss: {val_loss:.6f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        # Keras의 restore_best_weights=True (모델 저장)
        torch.save(model_instance.state_dict(), BEST_MODEL_PATH) 
        print(f"  > Val loss 개선. 베스트 모델 저장. (Loss: {best_val_loss:.6f})")
        patience_counter = 0 # 인내심 초기화
    else:
        patience_counter += 1
        print(f"  > Val loss 개선 없음. (Patience: {patience_counter}/{PATIENCE})")
    
    if patience_counter >= PATIENCE:
        print(f"--- 조기 종료 (Epoch {epoch+1}) ---")
        break

print("--- 훈련 종료 ---")
# Keras의 restore_best_weights (베스트 모델 불러오기)
model_instance.load_state_dict(torch.load(BEST_MODEL_PATH))
print(f"가장 좋았던 모델(Val Loss: {best_val_loss:.6f})을 로드했습니다.")

Epoch 1/2000 - Train Loss: 26554.466702 - Val Loss: 4728.464389
  > Val loss 개선. 베스트 모델 저장. (Loss: 4728.464389)
Epoch 2/2000 - Train Loss: 26254.817364 - Val Loss: 4611.591836
  > Val loss 개선. 베스트 모델 저장. (Loss: 4611.591836)
Epoch 3/2000 - Train Loss: 25960.967809 - Val Loss: 4496.641962
  > Val loss 개선. 베스트 모델 저장. (Loss: 4496.641962)
Epoch 4/2000 - Train Loss: 25678.530838 - Val Loss: 4383.305945
  > Val loss 개선. 베스트 모델 저장. (Loss: 4383.305945)
Epoch 5/2000 - Train Loss: 25393.768440 - Val Loss: 4271.443834
  > Val loss 개선. 베스트 모델 저장. (Loss: 4271.443834)
Epoch 6/2000 - Train Loss: 25089.297174 - Val Loss: 4160.696126
  > Val loss 개선. 베스트 모델 저장. (Loss: 4160.696126)
Epoch 7/2000 - Train Loss: 24814.659719 - Val Loss: 4050.924530
  > Val loss 개선. 베스트 모델 저장. (Loss: 4050.924530)
Epoch 8/2000 - Train Loss: 24541.910852 - Val Loss: 3942.260525
  > Val loss 개선. 베스트 모델 저장. (Loss: 3942.260525)
Epoch 9/2000 - Train Loss: 24252.273091 - Val Loss: 3834.227241
  > Val loss 개선. 베스트 모델 저장. (Loss: 3834.

In [145]:
model_instance(batch["past_values"].to(device)).shape

torch.Size([2, 24, 1])

In [146]:
preds = []

for batch in test_loader:
    preds.append(model_instance(batch["past_values"].to(device)))

In [147]:
torch.tensor(test_y)

tensor([[63.7900, 63.6200, 64.2500,  ..., 61.8000, 62.0900, 62.1600],
        [61.2900, 62.5900, 61.7500,  ..., 61.1500, 61.0500, 61.3800],
        [60.7600, 60.8100, 61.4700,  ..., 59.1000, 59.0200, 59.1000],
        ...,
        [69.9900, 69.9200, 70.2000,  ..., 70.5500, 70.9400, 70.8000],
        [70.9300, 70.8000, 71.2400,  ..., 71.4700, 71.5500, 71.3000],
        [71.0000, 70.8500, 71.0100,  ...,  0.0000,  0.0000,  0.0000]])

In [148]:
torch.sqrt(torch.mean((torch.tensor(test_y) - torch.concat(preds, axis = 0).squeeze().data.to('cpu'))**2))

tensor(17.5024)

In [149]:
torch.mean(abs(torch.tensor(test_y) - torch.concat(preds, axis = 0).squeeze().data.to('cpu')))

tensor(13.7543)

In [150]:
test_X.mean()

np.float32(75.789345)

> `16, 13` 수준으로 나옴. 일단 훨씬 나아지긴 했음