In [1]:
import os
import gc

import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader

from transformers import (
    PatchTSTConfig, PatchTSTForPrediction,
    TrainingArguments, Trainer, EarlyStoppingCallback
)
from datasets import Dataset

2025-11-02 19:02:56.928551: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
data = "coin"
output_dir = "./saved_models"
LOG_DIR = "./logstf/coin"
loss_name = "mse"
learning_rate = 5e-5

In [3]:
## target domain
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)

np.random.seed(2)
random_indices1 = np.random.choice(pd.read_csv("../data/M4_train.csv").iloc[:, (1):].index,
                                   size=target_X.shape[0] * 20, replace=True)

X_data = pd.read_csv("../data/M4_train.csv").iloc[:, 1 + (24 * 0):].loc[random_indices1].values.astype(np.float32)
y_data = pd.read_csv("../data/M4_test.csv").iloc[:, 1:].loc[random_indices1].values.astype(np.float32)

In [None]:
TSTconfig = PatchTSTConfig(
    num_input_channels = 1,
    context_length = X_data.shape[1],
    prediction_length = y_data.shape[1],

    patch_length = 16,
    patch_stride = 16,
    d_model = 256,
    num_attention_heads = 8,
    num_hidden_layers = 8,
    ffn_dim = 512,
    dropout = 0.2,
    head_dropout = 0.2,
    pooling_type = None,
    channel_attention = False,
    scaling = "std",
    loss = loss_name,
    pre_norm = True,
    do_mask_input = False
)

In [5]:
model = PatchTSTForPrediction(TSTconfig)

In [6]:
np.random.seed(42)
select = np.random.choice(len(X_data), size=len(X_data), replace=True)
X_bootstrap = X_data[select]
y_bootstrap = y_data[select]

val_split_index = int(len(X_bootstrap) * 0.8)
X_train, X_valid = X_bootstrap[:val_split_index], X_bootstrap[val_split_index:]
y_train, y_valid = y_bootstrap[:val_split_index], y_bootstrap[val_split_index:]

In [7]:
def create_hf_dataset(x, y):
    x_list = [s[..., np.newaxis] for s in x]    ## (N, 168) -> (N, 168, 1)
    y_list = [s[..., np.newaxis] for s in y]    ## (N, 24) -> (N, 24, 1)

    data_dict = {
        "past_values": x_list,
        "future_values": y_list
    }

    return Dataset.from_dict(data_dict)

In [8]:
train_dataset = create_hf_dataset(X_train, y_train)
test_dataset = create_hf_dataset(X_valid, y_valid)

In [9]:
training_args = TrainingArguments(
    output_dir = output_dir,
    overwrite_output_dir = True,
    learning_rate = learning_rate,
    num_train_epochs = 2000,
    do_eval = True,
    eval_strategy = "epoch",
    per_device_train_batch_size = 256,
    per_device_eval_batch_size = 256,
    dataloader_num_workers = 16,
    save_strategy = "epoch",
    logging_strategy = "epoch",
    save_total_limit = 1,
    load_best_model_at_end = True,
    metric_for_best_model = "eval_loss",
    greater_is_better = False,
    label_names = ["future_values"]
)

early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience = 10,
    early_stopping_threshold = 0.001
)

In [10]:
trainer = Trainer(
    model = model,
    args = training_args,
    train_dataset = train_dataset,
    eval_dataset = test_dataset,
    callbacks = [early_stopping_callback]
)

In [11]:
trainer.train()

Epoch,Training Loss,Validation Loss
1,1585352.0,1517283.625
2,961546.8696,799644.6875
3,833653.913,720968.6875
4,755879.0435,655159.125
5,717851.1304,606954.5
6,658094.0,590459.5
7,682773.5217,582107.375
8,593815.7826,592338.6875
9,574113.4783,594332.875
10,507859.7826,526616.8125


TrainOutput(global_step=3818, training_loss=293287.6998428497, metrics={'train_runtime': 274.6969, 'train_samples_per_second': 84223.74, 'train_steps_per_second': 334.915, 'total_flos': 4147332083951616.0, 'train_loss': 293287.6998428497, 'epoch': 83.0})

In [None]:
log_data = trainer.state.log_history
df = pd.DataFrame(log_data)

df_train = df[df['loss'].notna()][['epoch', 'loss']]
df_eval = df[df['eval_loss'].notna()][['epoch', 'eval_loss']]

final_df = pd.merge(df_train, df_eval, on="epoch", how="outer")

In [None]:
final_df.to_csv(os.path.join(LOG_DIR, f"pretrain_{loss_name}_model{}.csv"))

Unnamed: 0,epoch,loss,eval_loss
0,1.0,1.585352e+06,1.517284e+06
1,2.0,9.615469e+05,7.996447e+05
2,3.0,8.336539e+05,7.209687e+05
3,4.0,7.558790e+05,6.551591e+05
4,5.0,7.178511e+05,6.069545e+05
...,...,...,...
78,79.0,1.297128e+05,3.029985e+05
79,80.0,1.200142e+05,3.016360e+05
80,81.0,1.163667e+05,2.924721e+05
81,82.0,1.133141e+05,2.971394e+05


In [21]:
# 1. 훈련된 베스트 모델 로드
best_model_path = "./pretrained/MAE/checkpoint-17376" # 베스트 모델 경로
best_model = PatchTSTForPrediction.from_pretrained(best_model_path)
best_model.eval()

# 2. test_dataset으로 DataLoader 생성
# (test_dataset은 'past_values'와 'future_values'를 포함하는 Hf Dataset)
test_dataset.set_format(type='torch', columns=['past_values', 'future_values'])
test_loader = DataLoader(test_dataset, batch_size=64)

total_real_mae = 0
total_samples = 0

print("--- '실제 MAE' (Unscaled) 계산 시작 ---")

with torch.no_grad():
    for batch in test_loader:
        # 모델의 forward pass 실행
        outputs = best_model(
            past_values=batch['past_values'],
            # [참고] 'future_values'를 전달하지 않아도 예측은 가능합니다.
            # (전달하면 outputs.loss도 계산해줌)
        )

        if isinstance(outputs.prediction_outputs, tuple):
            unscaled_preds = outputs.prediction_outputs[0]
        else:
            unscaled_preds = outputs.prediction_outputs # (튜플이 아닌 경우 대비)

        unscaled_labels = batch['future_values']
        
        #    (배치 전체의 평균 MAE)
        real_mae = torch.abs(unscaled_preds - unscaled_labels).mean()
        
        # (정확한 계산을 위해 배치 크기 가중 평균)
        total_real_mae += real_mae.item() * len(batch['future_values'])
        total_samples += len(batch['future_values'])

final_real_mae = total_real_mae / total_samples
print(f"--- 훈련된 모델의 '실제 MAE' (Unscaled) ---")
print(f"Final Real MAE: {final_real_mae}")

--- '실제 MAE' (Unscaled) 계산 시작 ---
--- 훈련된 모델의 '실제 MAE' (Unscaled) ---
Final Real MAE: 4368.487526203752


In [22]:
from tbparse import SummaryReader

2025-11-01 22:57:34.252156: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [34]:
# 로그 파일이 있는 폴더 경로
log_dir = "./logs/events.out.tfevents.1761999330.cd0dd4fe3564.737145.0"

# 1. 로그 폴더를 읽습니다.
reader = SummaryReader(log_dir)

# 2. 스칼라 값(loss 등)을 DataFrame으로 변환합니다.
df_scalars = reader.scalars

In [8]:
# 훈련된 베스트 모델 로드
best_model_path = "./pretrained/checkpoint-12851" # 베스트 모델 경로
best_model = PatchTSTForPrediction.from_pretrained(best_model_path)
best_model.eval()

# 2. test_dataset으로 DataLoader 생성
# (test_dataset은 'past_values'와 'future_values'를 포함하는 Hf Dataset)
test_dataset.set_format(type='torch', columns=['past_values', 'future_values'])
test_loader = DataLoader(test_dataset, batch_size=64)

total_real_mae = 0
total_samples = 0

with torch.no_grad():
    for batch in test_loader:
        # 모델의 forward pass 실행
        outputs = best_model(
            past_values=batch['past_values']   ## attribute
        )

        if isinstance(outputs.prediction_outputs, tuple):
            unscaled_preds = outputs.prediction_outputs[0]
        else:
            unscaled_preds = outputs.prediction_outputs # (튜플이 아닌 경우 대비)

        unscaled_labels = batch['future_values']    ## label
        
        #    (배치 전체의 평균 MAE)
        real_mae = torch.abs(unscaled_preds - unscaled_labels).mean()
        
        # (정확한 계산을 위해 배치 크기 가중 평균)
        total_real_mae += real_mae.item() * len(batch['future_values'])
        total_samples += len(batch['future_values'])

final_real_mae = total_real_mae / total_samples
print(f"--- 훈련된 모델의 '실제 MAE' (Unscaled) ---")
print(f"Final Real MAE: {final_real_mae}")

--- 훈련된 모델의 '실제 MAE' (Unscaled) ---
Final Real MAE: 360.81136456037126


In [55]:
total_real_mse = 0
total_samples = 0
loss_fn = torch.nn.MSELoss()

with torch.no_grad():
    for batch in test_loader:
        # 모델의 forward pass 실행
        outputs = best_model(
            past_values=batch['past_values']   ## attribute
        )

        if isinstance(outputs.prediction_outputs, tuple):
            unscaled_preds = outputs.prediction_outputs[0]
        else:
            unscaled_preds = outputs.prediction_outputs # (튜플이 아닌 경우 대비)

        unscaled_labels = batch['future_values']    ## label
        
        real_mse = loss_fn(unscaled_preds, unscaled_labels)
        
        # (정확한 계산을 위해 배치 크기 가중 평균)
        total_real_mae += real_mse.item() * len(batch['future_values'])
        total_samples += len(batch['future_values'])

final_real_mae = total_real_mae / total_samples
print(f"--- 훈련된 모델의 '실제 MSE' (Unscaled) ---")
print(f"Final Real MSE: {final_real_mae}")

--- 훈련된 모델의 '실제 MSE' (Unscaled) ---
Final Real MSE: 697501.4553393184


In [None]:
final_real_mae**0.5

835.1655257129082

In [52]:
unscaled_preds = outputs.prediction_outputs
loss = torch.nn.MSELoss()(unscaled_preds, unscaled_labels)

In [53]:
loss

tensor(222041.7812, grad_fn=<MseLossBackward0>)

In [20]:
y_data.mean()

np.float32(4472.5435)

In [None]:
torch.cuda.empty_cache()
gc.collect()

In [8]:
best_model_path = "./pretrained/checkpoint-12851" # 베스트 모델 경로
best_model = PatchTSTForPrediction.from_pretrained(best_model_path)

In [27]:
MLP_head = torch.nn.Sequential(
    torch.nn.Flatten(start_dim=2, end_dim=-1),
    torch.nn.Linear(1280, 128),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(128, 64),
    torch.nn.Dropout(0.2),
    torch.nn.Linear(64, 1)
)

In [36]:
model_instance = torch.nn.Sequential(
    best_model.model,
    MLP_head
)
optimizr = torch.optim.Adam(model_instance.parameters(), lr = 1e-6)
loss_fn = torch.nn.MSELoss()

In [None]:
model_file = os.path.join('saved_models', f'model_{lossf}_{i}.h5')
base_loaded = PatchTSTForPrediction.from_pretrained(model_file)

In [37]:
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience = 10,
    early_stopping_threshold = 0.001
)

In [42]:
X = torch.tensor(X_train)