In [5]:
import os

import pandas as pd
import numpy as np
from transformers import (
    EarlyStoppingCallback,
    PatchTSTConfig,
    PatchTSTForPrediction,
    Trainer,
    TrainingArguments,
)

from datasets import Dataset

2025-11-01 16:05:50.161959: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-11-01 16:05:50.254372: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX512_FP16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-11-01 16:05:51.714147: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [7]:
data = "coin"

In [8]:
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)
target_y = pd.read_csv(f"../data/{data}/train_output_7.csv").iloc[:, 1:].values.astype(np.float32)

X_train = target_X[:-round(target_X.shape[0] * 0.2), :].astype(np.float32)  ## ??? 왜 있는 거임?
y_train = target_y[:-round(target_y.shape[0] * 0.2)].astype(np.float32)
target_X_val = target_X[-round(target_X.shape[0] * 0.2):, :].astype(np.float32)
target_y_val = target_y[-round(target_y.shape[0] * 0.2):].astype(np.float32)

test_X  = pd.read_csv(f"../data/{data}/val_input_7.csv").iloc[:, 1:].values.astype(np.float32)
test_y  = pd.read_csv(f"../data/{data}/val_output_7.csv").iloc[:, 1:].values.astype(np.float32)

In [9]:
np.random.seed(2)
random_indices1 = np.random.choice(pd.read_csv("../data/M4_train.csv").iloc[:, (1):].index,
                                   size=target_X.shape[0] * 20, replace=True)
X_train = pd.read_csv("../data/M4_train.csv").iloc[:, 1 + (24 * 0):].loc[random_indices1].values.astype(np.float32)
y_train = pd.read_csv("../data/M4_test.csv").iloc[:, 1:].loc[random_indices1].values.astype(np.float32)

In [10]:
X_train.shape[1], y_train.shape[1]

(168, 24)

In [11]:
def create_hf_dataset(data: np.array):
    data_list = [series for series in data]
    data_dict = {"target_values": data_list}
    return Dataset.from_dict(data_dict)

train_dataset = create_hf_dataset(X_train)
test_dataset = create_hf_dataset(y_train)

In [28]:
train_dataset

Dataset({
    features: ['target_values'],
    num_rows: 14460
})

In [15]:
TSTconfig = PatchTSTConfig(
    num_input_channels = 1,
    context_length = X_train.shape[1] + y_train.shape[1],
    prediction_length = y_train.shape[1],
    patch_length = 16,
    patch_stride = 16,

    ## masking
    mask_type = "random",
    random_mask_ratio = 0.4,

    ## TF setting
    d_model = 128,
    num_attention_heads = 16,
    num_hidden_layers=3,
    ffn_dim=256,
    dropout=0.2,
    head_dropout=0.2,
    pooling_type=None,
    channel_attention=False,
    scaling="std",
    loss="mse",
    pre_norm=True,
    norm_type="batchnorm"
)

In [16]:
model = PatchTSTForPrediction(TSTconfig)

In [24]:
X_train.shape

(14460, 168)

In [None]:
y_train.shape

(14460, 24)

In [25]:
select = np.random.choice(len(X_train), size=len(X_train), replace=True)

X_bootstrap = X_train[select]
y_bootstrap = y_train[select]

val_split_index = int(len(X_bootstrap) * 0.8)
X_train_split, X_val_split = X_bootstrap[:val_split_index], X_bootstrap[val_split_index:]
y_train_split, y_val_split = y_bootstrap[:val_split_index], y_bootstrap[val_split_index:]

In [27]:
X_val_split

array([[2644.  , 2635.  , 2714.  , ..., 3657.  , 3493.  , 3486.  ],
       [1000.  , 1004.  , 1000.  , ..., 1304.  , 1309.  , 1342.  ],
       [ 780.  , 1230.  ,  790.  , ..., 1480.  , 1380.  ,  930.  ],
       ...,
       [7986.6 , 7945.7 , 7716.9 , ..., 5505.4 , 5457.3 , 5541.9 ],
       [4702.  , 4437.  , 4562.  , ..., 3619.  , 3750.  , 3668.  ],
       [ 603.23,  416.09,  589.32, ..., 1576.78, 1253.44, 1070.77]],
      dtype=float32)

In [30]:
training_args = TrainingArguments(
    output_dir="./pretrained/patchtst/output/",
    overwrite_output_dir=True,
    learning_rate=0.0001,
    num_train_epochs=100,
    do_eval=True,
    eval_strategy="epoch",
    per_device_train_batch_size=64,
    per_device_eval_batch_size=64,
    dataloader_num_workers=16,
    save_strategy="epoch",
    logging_strategy="epoch",
    save_total_limit=3,
    logging_dir="./pretrained/patchtst/logs/",  # Make sure to specify a logging directory
    load_best_model_at_end=True,  # Load the best model when training ends
    metric_for_best_model="eval_loss",  # Metric to monitor for early stopping
    greater_is_better=False,  # For loss
    label_names=["future_values"],
)

# Create the early stopping callback
early_stopping_callback = EarlyStoppingCallback(
    early_stopping_patience=10,  # Number of epochs with no improvement after which to stop
    early_stopping_threshold=0.0001,  # Minimum improvement required to consider as improvement
)

# define trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
    callbacks=[early_stopping_callback],
    # compute_metrics=compute_metrics,
)

# pretrain
trainer.train()

ValueError: No columns in the dataset match the model's forward method signature: (past_values, past_observed_mask, future_values, output_hidden_states, output_attentions, return_dict, label_ids, label, future_values). The following columns have been ignored: [target_values]. Please check the dataset and model. You may need to set `remove_unused_columns=False` in `TrainingArguments`.

In [9]:
model

PatchTSTModel(
  (scaler): PatchTSTScaler(
    (scaler): PatchTSTStdScaler()
  )
  (patchifier): PatchTSTPatchify()
  (masking): Identity()
  (encoder): PatchTSTEncoder(
    (embedder): PatchTSTEmbedding(
      (input_embedding): Linear(in_features=16, out_features=128, bias=True)
    )
    (positional_encoder): PatchTSTPositionalEncoding(
      (positional_dropout): Identity()
    )
    (layers): ModuleList(
      (0-2): 3 x PatchTSTEncoderLayer(
        (self_attn): PatchTSTAttention(
          (k_proj): Linear(in_features=128, out_features=128, bias=True)
          (v_proj): Linear(in_features=128, out_features=128, bias=True)
          (q_proj): Linear(in_features=128, out_features=128, bias=True)
          (out_proj): Linear(in_features=128, out_features=128, bias=True)
        )
        (dropout_path1): Identity()
        (norm_sublayer1): PatchTSTBatchNorm(
          (batchnorm): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        )
        (