In [1]:
import os
import glob
import gc

import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader

from transformers import PatchTSTForPrediction

from datasets import Dataset

2025-11-04 22:13:16.795728: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
data = "coin"

output_dir = "saved_models"
log_dir = os.path.join('logstf', data)

os.makedirs(log_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok = True)

loss_name = "SMAPE"

learning_rate = 5e-5
num_train_epochs = 400

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
## target domain
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)

np.random.seed(2)
random_indices1 = np.random.choice(pd.read_csv("../data/M4_train.csv").iloc[:, (1):].index,
                                   size=target_X.shape[0] * 20, replace=True)

X_data = pd.read_csv("../data/M4_train.csv").iloc[:, 1 + (24 * 0):].loc[random_indices1].values.astype(np.float32)
y_data = pd.read_csv("../data/M4_test.csv").iloc[:, 1:].loc[random_indices1].values.astype(np.float32)

In [4]:
## bootstrap
np.random.seed(42)
select = np.random.choice(len(X_data), size=len(X_data), replace=True)
X_bootstrap = X_data[select]
y_bootstrap = y_data[select]

val_split_index = int(len(X_bootstrap) * 0.8)

def to_tensor_and_reshape(array):
    result = torch.tensor(array)
    result = result.reshape(-1, result.shape[1], 1)

    return result

X_train, X_valid = to_tensor_and_reshape(X_bootstrap[:val_split_index]), to_tensor_and_reshape(X_bootstrap[val_split_index:])
y_train, y_valid = to_tensor_and_reshape(y_bootstrap[:val_split_index]), to_tensor_and_reshape(y_bootstrap[val_split_index:])

## setting dataloader
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 256, shuffle = True, num_workers = 16)

test_dataset = torch.utils.data.TensorDataset(X_valid, y_valid)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 256, num_workers = 16)

In [5]:
backbone_model = PatchTSTForPrediction.from_pretrained(os.path.join(output_dir, "PatchTSTBackbone")).to(device)

In [6]:
## custom loss function
def SMAPE(yhat, y):
    numerator = 100*torch.abs(y - yhat)
    denominator = (torch.abs(y) + torch.abs(yhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

def MAPE(yhat, y):
    return torch.mean(100*torch.abs((y - yhat) / y))

class MASE(torch.nn.Module):
    def __init__(self, training_data, period = 1):
        super().__init__()
        ## 원본 코드 구현, 사실상 MAE와 동일, 잘못 짜여진 코드, 일단은 하던대로 할 것.
        self.scale = torch.mean(torch.abs(torch.tensor(training_data[period:] - training_data[:-period])))
    
    def forward(self, yhat, y):
        error = torch.abs(y - yhat)
        return torch.mean(error) / self.scale

In [None]:
if loss_name == "mse":
    loss_fn = torch.nn.MSELoss()
    lr = learning_rate
elif loss_name == "mae":
    loss_fn = torch.nn.L1Loss() ## 2배면 잘 작동
    lr = learning_rate * 2
elif loss_name == "SMAPE":
    loss_fn = SMAPE             ## 4배면 잘 작동
    lr = learning_rate * 4
elif loss_name == "mape":
    loss_fn = MAPE              ## 2배면 잘 작동
    lr = learning_rate * 2
elif loss_name == "MASE":
    loss_fn = MASE(y_data, y_data.shape[1])
    lr = learning_rate * 14  ## 학습률 정상화... 그래도 잘 안됨
else:
    raise Exception("Your loss name is not valid.")

optimizer = torch.optim.AdamW(backbone_model.parameters(), lr = lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = num_train_epochs)
log_data = []

## early stopping
PATIENCE = 10
best_val_loss = np.inf
patience_counter = 0

for epoc in range(num_train_epochs):
    backbone_model.train()

    total_train_loss = 0

    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        yhat = backbone_model(X).prediction_outputs
        loss = loss_fn(yhat, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(backbone_model.parameters(), max_norm = 1.0)
        optimizer.step()

        total_train_loss += loss.item()*X.shape[0]

    avg_train_loss = total_train_loss / len(train_dataloader.dataset)

    backbone_model.eval()

    with torch.no_grad():
        yys = []
        yyhats = []

        for XX, yy in test_dataloader:
            XX = XX.to(device)
            yys.append(yy.to(device))
            yyhats.append(backbone_model(XX).prediction_outputs)

        yyhat = torch.concat(yyhats)
        yy = torch.concat(yys)

        val_loss = loss_fn(yyhat, yy)

    print(f"Epoch {epoc+1}/{num_train_epochs} | Train Loss: {avg_train_loss:.6f}\t\t Val Loss: {val_loss:.6f}")

    log_data.append({"epoch": epoc, "loss": avg_train_loss, "eval_loss": val_loss.item()})

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(backbone_model.state_dict(), os.path.join(output_dir, f"model_{loss_name}_{1}.pth"))
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= PATIENCE:
        break

    scheduler.step()

Epoch 1/400 | Train Loss: 12.145416		 Val Loss: 11.327583
Epoch 2/400 | Train Loss: 10.011508		 Val Loss: 9.162341
Epoch 3/400 | Train Loss: 9.643101		 Val Loss: 9.140893
Epoch 4/400 | Train Loss: 9.299493		 Val Loss: 8.922081
Epoch 5/400 | Train Loss: 9.067393		 Val Loss: 8.389436
Epoch 6/400 | Train Loss: 8.689944		 Val Loss: 8.493913
Epoch 7/400 | Train Loss: 8.589314		 Val Loss: 9.540770
Epoch 8/400 | Train Loss: 8.371845		 Val Loss: 8.119756
Epoch 9/400 | Train Loss: 8.215777		 Val Loss: 8.044712
Epoch 10/400 | Train Loss: 8.005516		 Val Loss: 8.354805
Epoch 11/400 | Train Loss: 7.795427		 Val Loss: 7.873311
Epoch 12/400 | Train Loss: 7.736048		 Val Loss: 7.944956
Epoch 13/400 | Train Loss: 7.654992		 Val Loss: 7.823000
Epoch 14/400 | Train Loss: 7.477790		 Val Loss: 8.021585
Epoch 15/400 | Train Loss: 7.485159		 Val Loss: 7.544950
Epoch 16/400 | Train Loss: 7.269270		 Val Loss: 7.458125
Epoch 17/400 | Train Loss: 7.163961		 Val Loss: 7.437646
Epoch 18/400 | Train Loss: 7.032316		

In [8]:
## save log
pd.DataFrame(log_data).to_csv(os.path.join(log_dir, f"pretrain_{loss_name}_model{1}.csv"))

## load best model
backbone_model.load_state_dict(torch.load(os.path.join(output_dir, f"model_{loss_name}_{1}.pth")))

<All keys matched successfully>

In [9]:
yyhats = []
yys = []

with torch.no_grad():
    for XX, yy in test_dataloader:
        XX = XX.to(device)
        yys.append(yy.to(device))
        yyhats.append(backbone_model(XX).prediction_outputs)

In [10]:
yyhat, yy = torch.concat(yyhats).squeeze(), torch.concat(yys).squeeze()

In [11]:
mseLoss = torch.nn.MSELoss()
maeLoss = torch.nn.L1Loss()

def smape(yy, yyhat):
    numerator = 100*abs(yy - yyhat)
    denominator = (abs(yy) + abs(yyhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

print(f"test RMSE: {torch.sqrt(mseLoss(yyhat, yy))}")
print(f"test MAE: {maeLoss(yyhat, yy)}")
print(f"test SMAPE: {smape(yy, yyhat)}")

test RMSE: 675.2691650390625
test MAE: 224.3711395263672
test SMAPE: 5.346951961517334


In [12]:
yyhat = pd.DataFrame(yyhat.to("cpu"))
yyhat.columns = [f"{i}A" for i in range(yyhat.shape[1])]
yy = pd.DataFrame(yy.to("cpu"))
yy.columns = [f"{i}B" for i in range(yyhat.shape[1])]

In [13]:
val_result = pd.concat([yyhat, yy], axis = 1).sort_index(axis = 1)
val_result.columns = [f"prediction_{(i+1)//2}" if i%2 == 1 else f"ground_truth_{(i+1)//2}" for i in range(1, val_result.shape[1]+1)]
val_result.to_csv(os.path.join(log_dir, f"prediction_val_results_{loss_name}_model{1}.csv"), index = False)

In [14]:
val_result

Unnamed: 0,prediction_1,ground_truth_1,prediction_2,ground_truth_2,prediction_3,ground_truth_3,prediction_4,ground_truth_4,prediction_5,ground_truth_5,...,prediction_20,ground_truth_20,prediction_21,ground_truth_21,prediction_22,ground_truth_22,prediction_23,ground_truth_23,prediction_24,ground_truth_24
0,1752.652344,1745.800049,1792.139893,1869.199951,1801.667236,1854.199951,1800.565430,1863.800049,1823.670654,1853.800049,...,1770.563965,1833.199951,1770.020996,1851.099976,1792.988770,1854.900024,1779.965698,1854.199951,1797.135132,1870.400024
1,10133.837891,9980.000000,10864.724609,10070.000000,10841.255859,10190.000000,10977.914062,10200.000000,10914.329102,10210.000000,...,10184.203125,9940.000000,10418.611328,9960.000000,10535.654297,9950.000000,10472.678711,9970.000000,10483.978516,10050.000000
2,1474.902954,1445.834961,1497.290283,1433.120972,1471.403198,1439.545044,1466.879883,1441.876953,1501.551147,1440.109009,...,1473.575684,1441.203003,1490.659790,1435.150024,1477.021240,1420.425049,1508.834961,1441.050049,1493.636108,1425.350952
3,6539.148926,6546.056641,6636.816895,6936.538574,6688.670410,6724.092285,6591.450684,6635.496582,6903.552734,6887.514160,...,6298.084961,6130.813477,6609.665039,6259.539551,6578.859863,6557.993164,6899.544434,6951.220703,6670.315430,7044.781250
4,4707.916992,4723.200195,5158.883301,5204.700195,5172.888672,5262.500000,5273.964844,5392.399902,5322.962891,5476.600098,...,4826.148438,5034.799805,4918.047852,5064.700195,5080.344727,5092.600098,5072.863281,5120.899902,5091.442383,5156.399902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,8843.205078,8830.000000,8878.408203,8800.000000,8889.751953,8780.000000,8757.480469,8710.000000,8800.332031,8790.000000,...,8858.031250,8810.000000,8791.740234,8690.000000,8805.286133,8710.000000,8900.219727,8810.000000,8906.587891,8810.000000
2888,970.803955,958.630005,1075.812622,1087.250000,1078.613159,1063.640015,1061.907227,1064.489990,1059.749146,1038.270020,...,1060.049194,1048.619995,1073.467407,1077.099976,1043.175537,1073.660034,1044.657593,1079.089966,1060.717041,1105.900024
2889,5688.923340,5577.244629,6481.854004,6268.951660,5857.679688,6390.646484,5947.586914,6173.075195,5833.675781,5789.339844,...,5318.253418,5299.357910,4812.864258,4838.526367,4834.447266,4506.627441,6158.281250,6024.740234,6011.474609,5739.643066
2890,6552.569336,6588.406250,6458.676758,6459.303711,6349.005859,6185.271484,6411.419922,6149.212402,6464.003418,6352.244629,...,6563.725586,6698.416016,6566.213867,6632.667480,6541.937988,6713.057617,6542.205566,6679.114746,6482.615723,6442.051758


In [15]:
torch.cuda.empty_cache()
gc.collect()

234