In [11]:
import os
import glob
import gc

import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader

from transformers import PatchTSTForPrediction

from datasets import Dataset

In [12]:
data = "coin"

output_dir = "saved_models"
log_dir = os.path.join('logstf', data)

os.makedirs(log_dir, exist_ok=True)
os.makedirs(output_dir, exist_ok = True)

loss_name = "mse"

learning_rate = 5e-5
num_train_epochs = 200

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
## target domain
target_X = pd.read_csv(f"../data/{data}/train_input_7.csv").iloc[:, 1:].values.astype(np.float32)

np.random.seed(2)
random_indices1 = np.random.choice(pd.read_csv("../data/M4_train.csv").iloc[:, (1):].index,
                                   size=target_X.shape[0] * 20, replace=True)

X_data = pd.read_csv("../data/M4_train.csv").iloc[:, 1 + (24 * 0):].loc[random_indices1].values.astype(np.float32)
y_data = pd.read_csv("../data/M4_test.csv").iloc[:, 1:].loc[random_indices1].values.astype(np.float32)

In [14]:
## bootstrap
np.random.seed(42)
select = np.random.choice(len(X_data), size=len(X_data), replace=True)
X_bootstrap = X_data[select]
y_bootstrap = y_data[select]

val_split_index = int(len(X_bootstrap) * 0.8)

def to_tensor_and_reshape(array):
    result = torch.tensor(array)
    result = result.reshape(-1, result.shape[1], 1)

    return result

X_train, X_valid = to_tensor_and_reshape(X_bootstrap[:val_split_index]), to_tensor_and_reshape(X_bootstrap[val_split_index:])
y_train, y_valid = to_tensor_and_reshape(y_bootstrap[:val_split_index]), to_tensor_and_reshape(y_bootstrap[val_split_index:])

## setting dataloader
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size = 256, shuffle = True, num_workers = 16)

test_dataset = torch.utils.data.TensorDataset(X_valid, y_valid)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size = 256, num_workers = 16)

In [15]:
backbone_model = PatchTSTForPrediction.from_pretrained(os.path.join(output_dir, "PatchTSTBackbone")).to(device)

In [16]:
optimizer = torch.optim.AdamW(backbone_model.parameters(), lr = learning_rate)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = num_train_epochs)
log_data = []

if loss_name == "mse":
    loss_fn = torch.nn.MSELoss()
elif loss_name == "mae":
    loss_fn = torch.nn.L1Loss()
# elif loss_name

## early stopping
PATIENCE = 15
best_val_loss = np.inf
patience_counter = 0

for epoc in range(num_train_epochs):
    backbone_model.train()

    total_train_loss = 0

    for X, y in train_dataloader:
        X, y = X.to(device), y.to(device)

        optimizer.zero_grad()
        yhat = backbone_model(X).prediction_outputs
        loss = loss_fn(yhat, y)
        loss.backward()
        torch.nn.utils.clip_grad_norm_(backbone_model.parameters(), max_norm = 1.0)
        optimizer.step()

        total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_dataloader)

    backbone_model.eval()

    with torch.no_grad():
        yys = []
        yyhats = []

        for XX, yy in test_dataloader:
            XX = XX.to(device)
            yys.append(yy.to(device))
            yyhats.append(backbone_model(XX).prediction_outputs)

        yyhat = torch.concat(yyhats)
        yy = torch.concat(yys)

        val_loss = loss_fn(yyhat, yy)

    print(f"Epoch {epoc+1}/{num_train_epochs} | Train Loss: {avg_train_loss:.6f}\t\t Val Loss: {val_loss:.6f}")

    log_data.append({"epoch": epoc, "loss": avg_train_loss, "eval_loss": val_loss.item()})

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(backbone_model.state_dict(), os.path.join(output_dir, f"model_{loss_name}_{1}.pth"))
        patience_counter = 0
    else:
        patience_counter += 1

    if patience_counter >= PATIENCE:
        break

    scheduler.step()

Epoch 1/200 | Train Loss: 1940915.179348		 Val Loss: 1165182.000000
Epoch 2/200 | Train Loss: 1032320.980978		 Val Loss: 810952.750000
Epoch 3/200 | Train Loss: 872731.099185		 Val Loss: 733173.375000
Epoch 4/200 | Train Loss: 783761.773098		 Val Loss: 657705.625000
Epoch 5/200 | Train Loss: 739468.938859		 Val Loss: 600945.312500
Epoch 6/200 | Train Loss: 676805.575408		 Val Loss: 630928.625000
Epoch 7/200 | Train Loss: 639079.065897		 Val Loss: 545971.750000
Epoch 8/200 | Train Loss: 542701.792120		 Val Loss: 600936.937500
Epoch 9/200 | Train Loss: 550070.450408		 Val Loss: 551084.437500
Epoch 10/200 | Train Loss: 508315.013587		 Val Loss: 554443.625000
Epoch 11/200 | Train Loss: 484492.355978		 Val Loss: 518444.500000
Epoch 12/200 | Train Loss: 485839.133152		 Val Loss: 510741.781250
Epoch 13/200 | Train Loss: 449760.745245		 Val Loss: 508716.781250
Epoch 14/200 | Train Loss: 477128.488111		 Val Loss: 510219.093750
Epoch 15/200 | Train Loss: 402925.199389		 Val Loss: 490926.437500
E

In [17]:
## save log
pd.DataFrame(log_data).to_csv(os.path.join(log_dir, f"pretrain_{loss_name}_model{1}.csv"))

## load best model
backbone_model.load_state_dict(torch.load(os.path.join(output_dir, f"model_{loss_name}_{1}.pth")))

<All keys matched successfully>

<torch.utils.data.dataloader.DataLoader at 0x7f2f51ff2840>

In [37]:
yys[0].shape

torch.Size([256, 24, 1])

In [39]:
len(test_dataloader)

12

In [71]:
yyhats = []
yys = []

with torch.no_grad():
    for XX, yy in test_dataloader:
        XX = XX.to(device)
        yys.append(yy.to(device))
        yyhats.append(backbone_model(XX).prediction_outputs)

In [89]:
yyhat, yy = torch.concat(yyhats).squeeze(), torch.concat(yys).squeeze()

In [90]:
yyhat = pd.DataFrame(yyhat.to("cpu"))
yyhat.columns = [f"{i}A" for i in range(yyhat.shape[1])]
yy = pd.DataFrame(yy.to("cpu"))
yy.columns = [f"{i}B" for i in range(yyhat.shape[1])]

1

In [105]:
val_result = pd.concat([yyhat, yy], axis = 1).sort_index(axis = 1)
val_result.columns = [f"prediction_{(i+1)//2}" if i%2 == 1 else f"ground_truth_{(i+1)//2}" for i in range(1, val_result.shape[1]+1)]
val_result.to_csv(os.path.join(log_dir, f"prediction_val_results_{loss_name}_model{1}.csv"), index = False)

In [102]:
val_result

Unnamed: 0,prediction_1,ground_truth_1,prediction_2,ground_truth_2,prediction_3,ground_truth_3,prediction_4,ground_truth_4,prediction_5,ground_truth_5,...,prediction_20,ground_truth_20,prediction_21,ground_truth_21,prediction_22,ground_truth_22,prediction_23,ground_truth_23,prediction_24,ground_truth_24
0,1707.848877,1745.800049,1734.792236,1869.199951,1742.787720,1854.199951,1742.532227,1863.800049,1741.122925,1853.800049,...,1782.030518,1833.199951,1752.822144,1851.099976,1737.945923,1854.900024,1730.955444,1854.199951,1735.780029,1870.400024
1,9779.167969,9980.000000,10503.027344,10070.000000,10413.624023,10190.000000,10503.994141,10200.000000,10352.125000,10210.000000,...,10159.966797,9940.000000,10064.224609,9960.000000,9982.077148,9950.000000,9934.011719,9970.000000,10219.075195,10050.000000
2,1464.434570,1445.834961,1465.694580,1433.120972,1462.740845,1439.545044,1478.380981,1441.876953,1485.951416,1440.109009,...,1470.554688,1441.203003,1465.894409,1435.150024,1465.591187,1420.425049,1463.738525,1441.050049,1471.424438,1425.350952
3,6840.120605,6546.056641,6810.293945,6936.538574,6597.047852,6724.092285,6590.900391,6635.496582,6789.518555,6887.514160,...,6373.643555,6130.813477,6514.244629,6259.539551,6702.911133,6557.993164,6620.116699,6951.220703,6860.129883,7044.781250
4,4431.148438,4723.200195,4997.003906,5204.700195,5002.770996,5262.500000,5179.929688,5392.399902,5096.360352,5476.600098,...,4705.217773,5034.799805,4628.752930,5064.700195,4815.059570,5092.600098,4739.848633,5120.899902,4808.609375,5156.399902
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2887,8804.451172,8830.000000,8885.068359,8800.000000,8857.950195,8780.000000,8766.936523,8710.000000,8753.163086,8790.000000,...,8936.829102,8810.000000,8830.230469,8690.000000,8805.250000,8710.000000,8899.399414,8810.000000,8882.827148,8810.000000
2888,948.323975,958.630005,1034.923950,1087.250000,1048.290894,1063.640015,1046.935547,1064.489990,1038.391357,1038.270020,...,1018.620239,1048.619995,1033.620850,1077.099976,1021.959717,1073.660034,1027.536987,1079.089966,1030.504395,1105.900024
2889,5970.575684,5577.244629,6280.669922,6268.951660,5848.233887,6390.646484,5752.697754,6173.075195,6050.154785,5789.339844,...,5600.000977,5299.357910,4989.262695,4838.526367,5203.392090,4506.627441,6041.491211,6024.740234,5888.279785,5739.643066
2890,6513.104004,6588.406250,6442.052734,6459.303711,6390.719727,6185.271484,6389.219238,6149.212402,6503.473145,6352.244629,...,6600.451660,6698.416016,6508.057617,6632.667480,6494.035645,6713.057617,6566.168457,6679.114746,6510.375977,6442.051758


In [19]:
mseLoss = torch.nn.MSELoss()
maeLoss = torch.nn.L1Loss()

def smape(yy, yyhat):
    numerator = 100*abs(yy - yyhat)
    denominator = (abs(yy) + abs(yyhat))/2
    smape = torch.mean(numerator / denominator)
    return smape

In [20]:
print(f"test RMSE: {torch.sqrt(mseLoss(yyhat, yy))}")
print(f"test MAE: {maeLoss(yyhat, yy)}")
print(f"test SMAPE: {smape(yy, yyhat)}")

test RMSE: 554.2002563476562
test MAE: 233.8564453125
test SMAPE: 6.526806831359863


In [16]:
torch.cuda.empty_cache()
gc.collect()

585