In [1]:
import json

with open("./params.json", mode = "r", encoding = "utf-8") as f:
    data = json.load(f)
    seed_val = data["seed_val"]
    model_path = data["model_path"]
    dataset_path_train = data["dataset_path"]["train"]
    dataset_path_val = data["dataset_path"]["validation"]
    dataset_path_test = data["dataset_path"]["test"]
    num_single_sample_timesteps = data["num_single_sample_timesteps"]
    window_stride = data["window_stride"]
    input_window_length = data["input_window_length"]
    label_window_length = data["label_window_length"]
    input_features = data["input_features"]
    label_features = data["label_features"]
    positional_encoding_max_len = data["positional_encoding_max_len"]
    embedding_dim = data["embedding_dim"]
    num_attention_head = data["num_attention_head"]
    num_encoder_layers = data["num_encoder_layers"]
    num_decoder_layers = data["num_decoder_layers"]
    position_wise_nn_dim = data["position_wise_nn_dim"]
    dropout = data["dropout"]
    batch_size = data["batch_size"]
    epochs = data["epochs"]
    learning_rate = data["learning_rate"]

In [2]:
import random
import numpy as np
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(seed_val)
random.seed(seed_val)
np.random.seed(seed_val)

In [3]:
from torch.utils.data import DataLoader
from torchmetrics.regression import R2Score

from utils.pipeline.Data import get_mean_std_respected_temporal, WindowedIterableDataset
from utils.pipeline.Model import TimeSeriesHuggingFaceTransformer
from utils.pipeline.Run import train, validate
from utils.pipeline.Monitor import Overfit

KeyboardInterrupt: 

In [None]:
##### TRAIN #####
# stats = get_mean_std(
#     dataset_path = dataset_path_train,
#     cols = input_features
# )

stats = get_mean_std_respected_temporal(
    dataset_path = dataset_path_train,
    cols = input_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    input_window_len = input_window_length,
    label_window_len = label_window_length,
    window_stride = window_stride
)

df_train = WindowedIterableDataset(
    dataset_path = dataset_path_train,
    stats = stats,
    input_features = input_features,
    label_features = label_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    stride = window_stride,
    input_window_length = input_window_length,
    label_window_length = label_window_length
)

data_loader_train = DataLoader(
    df_train,
    batch_size = batch_size,
    # num_workers = 0,
    # prefetch_factor = 12,
    # persistent_workers = False,
    pin_memory = True
)



##### VALIDATION #####
df_val = WindowedIterableDataset(
    dataset_path = dataset_path_val,
    stats = stats,
    input_features = input_features,
    label_features = label_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    stride = window_stride,
    input_window_length = input_window_length,
    label_window_length = label_window_length
)

data_loader_val = DataLoader(
    df_val,
    batch_size = batch_size,
    # num_workers = 0,
    # prefetch_factor = 12,
    # persistent_workers = False,
    pin_memory = True
)



##### TEST #####
df_test = WindowedIterableDataset(
    dataset_path = dataset_path_test,
    stats = stats,
    input_features = input_features,
    label_features = label_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    stride = window_stride,
    input_window_length = input_window_length,
    label_window_length = label_window_length
)

data_loader_test = DataLoader(
    df_test,
    batch_size = batch_size,
    # num_workers = 0,
    # prefetch_factor = 12,
    # persistent_workers = False,
    pin_memory = True
)

In [None]:
model = TimeSeriesHuggingFaceTransformer(
    input_dim = len(input_features),
    output_dim = len(label_features),
    d_model = embedding_dim,
    num_head = num_attention_head,
    num_encoder_layers = num_encoder_layers,
    num_decoder_layers = num_decoder_layers,
    position_wise_ffn_dim = position_wise_nn_dim,
    dropout = dropout,
    positional_encoding_max_len = positional_encoding_max_len
).to(device)

overfit_monitor = Overfit()

print(f"Number of trainable parameters in the model: {sum(p.numel() for p in model.parameters() if p.requires_grad)}\n")

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(
    model.parameters(),
    lr = learning_rate
)

train_r2 = R2Score(multioutput = "uniform_average").to(device)
val_r2 = R2Score(multioutput = "uniform_average").to(device)

train_per_timestep_r2 = [R2Score(multioutput = "uniform_average").to(device) for _ in range(label_window_length)]
val_per_timestep_r2 = [R2Score(multioutput = "uniform_average").to(device) for _ in range(label_window_length)]

overfit_count = 0

for epoch in range(epochs):
    train_loss, _ = train(
        model = model,
        optimizer = optimizer,
        criterion = criterion,
        r2 = train_r2,
        per_timestep_r2 = train_per_timestep_r2,
        data_loader = data_loader_train,
        device = device,
        epoch = epoch,
        total_epochs = epochs
    )

    val_loss, _ = validate(
        model = model,
        criterion = criterion,
        r2 = val_r2,
        per_timestep_r2 = val_per_timestep_r2,
        data_loader = data_loader_val,
        device = device,
        epoch = epoch,
        total_epochs = epochs
    )

    # if(overfit_monitor.check(epoch = epoch, train_loss = train_loss, val_loss = val_loss)):
    #     break

Number of trainable parameters in the model: 28977290



Epoch 1/5: 0it [00:00, ?it/s]Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.58.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.
Epoch 1/5: 9it [00:03,  2.80it/s, train_loss=0.469782]


KeyboardInterrupt: 

In [None]:
torch.save(model, model_path)