In [43]:
import os
import glob
import gc

import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader

from transformers import PatchTSTForPrediction

from datasets import Dataset

In [44]:
data = "coin"

output_dir = "saved_models"
log_dir = os.path.join('logstf', data)

os.makedirs(log_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok = True)

loss_name = "MASE"

learning_rate = 5e-5
num_train_epochs = 400

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [45]:
## target domain
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)

np.random.seed(2)
random_indices1 = np.random.choice(pd.read_csv("../data/M4_train.csv").iloc[:, (1):].index,
                                   size=target_X.shape[0] * 20, replace=True)

X_data = pd.read_csv("../data/M4_train.csv").iloc[:, 1 + (24 * 0):].loc[random_indices1].values.astype(np.float32)
y_data = pd.read_csv("../data/M4_test.csv").iloc[:, 1:].loc[random_indices1].values.astype(np.float32)

In [46]:
## bootstrap
np.random.seed(42)
select = np.random.choice(len(X_data), size=len(X_data), replace=True)
X_bootstrap = X_data[select]
y_bootstrap = y_data[select]

val_split_index = int(len(X_bootstrap) * 0.8)

def to_tensor_and_reshape(array):
    result = torch.tensor(array)
    result = result.reshape(-1, result.shape[1], 1)

    return result

X_train, X_valid = to_tensor_and_reshape(X_bootstrap[:val_split_index]), to_tensor_and_reshape(X_bootstrap[val_split_index:])
y_train, y_valid = to_tensor_and_reshape(y_bootstrap[:val_split_index]), to_tensor_and_reshape(y_bootstrap[val_split_index:])

## setting dataloader
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 256, shuffle = True, num_workers = 16)

test_dataset = torch.utils.data.TensorDataset(X_valid, y_valid)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 256, num_workers = 16)

In [47]:
backbone_model = PatchTSTForPrediction.from_pretrained(os.path.join(output_dir, "PatchTSTBackbone")).to(device)

In [48]:
## custom loss function
def SMAPE(yhat, y):
    numerator = 100*torch.abs(y - yhat)
    denominator = (torch.abs(y) + torch.abs(yhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

def MAPE(yhat, y):
    return torch.mean(100*torch.abs((y - yhat) / y))

class MASE(torch.nn.Module):
    def __init__(self, training_data, period = 1):
        super().__init__()
        self.scale = torch.mean(torch.abs(torch.tensor(training_data[:, period:] - training_data[:, :-period])))    ## 모든 훈련 데이터에 대한 평균 스케일 계산
    
    def forward(self, yhat, y):
        error = torch.abs(y - yhat)
        return torch.mean(error / self.scale)

In [None]:
if loss_name == "mse":
    loss_fn = torch.nn.MSELoss()
    lr = learning_rate
else:
    lr = learning_rate*2
    if loss_name == "mae":
        loss_fn = torch.nn.L1Loss()
    elif loss_name == "SMAPE":
        loss_fn = SMAPE
    elif loss_name == "mape":
        loss_fn = MAPE
    elif loss_name == "MASE":
        loss_fn = MASE(y_data, 1)
    else:
        raise Exception("Your loss name is not valid.")

optimizer = torch.optim.AdamW(backbone_model.parameters(), lr = lr)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = num_train_epochs)
log_data = []

## early stopping
PATIENCE = 15
best_val_loss = np.inf
patience_counter = 0

for epoc in range(num_train_epochs):
    backbone_model.train()

    total_train_loss = 0

    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        yhat = backbone_model(X).prediction_outputs
        loss = loss_fn(yhat, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(backbone_model.parameters(), max_norm = 1.0)
        optimizer.step()

        total_train_loss += loss.item()*X.shape[0]

    avg_train_loss = total_train_loss / len(train_dataloader.dataset)

    backbone_model.eval()

    with torch.no_grad():
        yys = []
        yyhats = []

        for XX, yy in test_dataloader:
            XX = XX.to(device)
            yys.append(yy.to(device))
            yyhats.append(backbone_model(XX).prediction_outputs)

        yyhat = torch.concat(yyhats)
        yy = torch.concat(yys)

        val_loss = loss_fn(yyhat, yy)

    print(f"Epoch {epoc+1}/{num_train_epochs} | Train Loss: {avg_train_loss:.6f}\t\t Val Loss: {val_loss:.6f}")

    log_data.append({"epoch": epoc, "loss": avg_train_loss, "eval_loss": val_loss.item()})

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(backbone_model.state_dict(), os.path.join(output_dir, f"model_{loss_name}_{1}.pth"))
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= PATIENCE:
        break

    scheduler.step()

Epoch 1/400 | Train Loss: 2.575711		 Val Loss: 2.446912
Epoch 2/400 | Train Loss: 2.001803		 Val Loss: 1.757823
Epoch 3/400 | Train Loss: 1.911106		 Val Loss: 1.880399
Epoch 4/400 | Train Loss: 1.855843		 Val Loss: 1.679141
Epoch 5/400 | Train Loss: 1.786365		 Val Loss: 1.622896
Epoch 6/400 | Train Loss: 1.737312		 Val Loss: 1.597142
Epoch 7/400 | Train Loss: 1.698749		 Val Loss: 1.600184
Epoch 8/400 | Train Loss: 1.653931		 Val Loss: 1.535675
Epoch 9/400 | Train Loss: 1.605308		 Val Loss: 1.560574
Epoch 10/400 | Train Loss: 1.574386		 Val Loss: 1.536518
Epoch 11/400 | Train Loss: 1.549072		 Val Loss: 1.624128
Epoch 12/400 | Train Loss: 1.508551		 Val Loss: 1.454669
Epoch 13/400 | Train Loss: 1.479817		 Val Loss: 1.534351
Epoch 14/400 | Train Loss: 1.444158		 Val Loss: 1.466596
Epoch 15/400 | Train Loss: 1.408554		 Val Loss: 1.524756
Epoch 16/400 | Train Loss: 1.410807		 Val Loss: 1.380227
Epoch 17/400 | Train Loss: 1.363850		 Val Loss: 1.451997
Epoch 18/400 | Train Loss: 1.351522		 Va

In [None]:
## save log
pd.DataFrame(log_data).to_csv(os.path.join(log_dir, f"pretrain_{loss_name}_model{1}.csv"))

## load best model
backbone_model.load_state_dict(torch.load(os.path.join(output_dir, f"model_{loss_name}_{1}.pth")))

<All keys matched successfully>

In [None]:
yyhats = []
yys = []

with torch.no_grad():
    for XX, yy in test_dataloader:
        XX = XX.to(device)
        yys.append(yy.to(device))
        yyhats.append(backbone_model(XX).prediction_outputs)

In [None]:
yyhat, yy = torch.concat(yyhats).squeeze(), torch.concat(yys).squeeze()

In [None]:
mseLoss = torch.nn.MSELoss()
maeLoss = torch.nn.L1Loss()

def smape(yy, yyhat):
    numerator = 100*abs(yy - yyhat)
    denominator = (abs(yy) + abs(yyhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

print(f"test RMSE: {torch.sqrt(mseLoss(yyhat, yy))}")
print(f"test MAE: {maeLoss(yyhat, yy)}")
print(f"test SMAPE: {smape(yy, yyhat)}")

test RMSE: 635.1454467773438
test MAE: 219.265869140625
test SMAPE: 5.3464579582214355


In [None]:
yyhat = pd.DataFrame(yyhat.to("cpu"))
yyhat.columns = [f"{i}A" for i in range(yyhat.shape[1])]
yy = pd.DataFrame(yy.to("cpu"))
yy.columns = [f"{i}B" for i in range(yyhat.shape[1])]

In [None]:
val_result = pd.concat([yyhat, yy], axis = 1).sort_index(axis = 1)
val_result.columns = [f"prediction_{(i+1)//2}" if i%2 == 1 else f"ground_truth_{(i+1)//2}" for i in range(1, val_result.shape[1]+1)]
val_result.to_csv(os.path.join(log_dir, f"prediction_val_results_{loss_name}_model{1}.csv"), index = False)

In [None]:
val_result

Unnamed: 0,prediction_1,ground_truth_1,prediction_2,ground_truth_2,prediction_3,ground_truth_3,prediction_4,ground_truth_4,prediction_5,ground_truth_5,...,prediction_20,ground_truth_20,prediction_21,ground_truth_21,prediction_22,ground_truth_22,prediction_23,ground_truth_23,prediction_24,ground_truth_24
0,1764.744507,1745.800049,1810.076660,1869.199951,1817.699951,1854.199951,1791.267700,1863.800049,1827.923828,1853.800049,...,1797.349365,1833.199951,1779.291992,1851.099976,1775.888306,1854.900024,1769.098755,1854.199951,1791.199585,1870.400024
1,9977.685547,9980.000000,10281.222656,10070.000000,10415.597656,10190.000000,10491.425781,10200.000000,10621.905273,10210.000000,...,10064.826172,9940.000000,10103.845703,9960.000000,9935.583008,9950.000000,9991.794922,9970.000000,10191.219727,10050.000000
2,1441.031860,1445.834961,1438.576416,1433.120972,1439.517456,1439.545044,1429.690430,1441.876953,1457.640625,1440.109009,...,1454.538086,1441.203003,1436.432129,1435.150024,1434.387817,1420.425049,1436.371948,1441.050049,1436.618286,1425.350952
3,6617.981445,6546.056641,6946.009277,6936.538574,6761.762695,6724.092285,6713.710938,6635.496582,6884.118652,6887.514160,...,6133.299805,6130.813477,6287.193359,6259.539551,6580.555176,6557.993164,6948.881836,6951.220703,7040.197754,7044.781250
4,4700.222168,4723.200195,5200.006836,5204.700195,5216.735352,5262.500000,5293.151855,5392.399902,5397.792969,5476.600098,...,5059.943359,5034.799805,5011.688477,5064.700195,5056.430176,5092.600098,5083.967285,5120.899902,5077.353516,5156.399902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,8784.845703,8830.000000,8848.226562,8800.000000,8803.655273,8780.000000,8695.550781,8710.000000,8765.865234,8790.000000,...,8852.857422,8810.000000,8764.726562,8690.000000,8791.627930,8710.000000,8825.441406,8810.000000,8794.834961,8810.000000
2888,955.064087,958.630005,1091.725464,1087.250000,1054.492676,1063.640015,1070.163818,1064.489990,1043.964966,1038.270020,...,1051.307373,1048.619995,1070.507568,1077.099976,1062.181030,1073.660034,1076.685791,1079.089966,1096.553223,1105.900024
2889,5602.021973,5577.244629,6176.615234,6268.951660,5921.100586,6390.646484,5889.652832,6173.075195,5650.695801,5789.339844,...,5242.411133,5299.357910,4735.445801,4838.526367,4700.993652,4506.627441,5949.955078,6024.740234,5968.816406,5739.643066
2890,6617.987793,6588.406250,6340.265625,6459.303711,6303.109375,6185.271484,6259.520508,6149.212402,6340.421387,6352.244629,...,6517.442871,6698.416016,6486.223145,6632.667480,6477.447266,6713.057617,6432.340820,6679.114746,6318.266602,6442.051758


In [22]:
torch.cuda.empty_cache()
gc.collect()

1007