In [1]:
import json

with open("./params.json", mode = "r", encoding = "utf-8") as f:
    data = json.load(f)
    seed_val = data["seed_val"]
    model_path = data["model_path"]
    dataset_path_train = data["dataset_path"]["train"]
    dataset_path_val = data["dataset_path"]["validation"]
    dataset_path_test = data["dataset_path"]["test"]
    num_single_sample_timesteps = data["num_single_sample_timesteps"]
    window_stride = data["window_stride"]
    input_window_length = data["input_window_length"]
    label_window_length = data["label_window_length"]
    input_features = data["input_features"]
    label_features = data["label_features"]
    relative_attention_num_buckets = data["relative_attention_num_buckets"]
    embedding_dim = data["embedding_dim"]
    num_attention_head = data["num_attention_head"]
    num_encoder_layers = data["num_encoder_layers"]
    num_decoder_layers = data["num_decoder_layers"]
    position_wise_nn_dim = data["position_wise_nn_dim"]
    dropout = data["dropout"]
    batch_size = data["batch_size"]
    epochs = data["epochs"]
    learning_rate = data["learning_rate"]

In [2]:
import random
import numpy as np
import torch

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

torch.manual_seed(seed_val)
random.seed(seed_val)
np.random.seed(seed_val)

In [3]:
from torch.utils.data import DataLoader
from torchmetrics.regression import R2Score
from datetime import datetime

from utils.pipeline.Data import get_mean_std_respected_temporal, WindowedIterableDataset
from utils.pipeline.Model import TimeSeriesHuggingFaceTransformer
from utils.pipeline.Run import train, validate
from utils.pipeline.Monitor import Overfit

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
##### TRAIN #####
# stats = get_mean_std(
#     dataset_path = dataset_path_train,
#     cols = input_features
# )

input_stats = get_mean_std_respected_temporal(
    dataset_path = dataset_path_train,
    cols = input_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    input_window_len = input_window_length,
    label_window_len = label_window_length,
    window_stride = window_stride
)
if(set(input_features) == set(label_features)):
    output_stats = input_stats
else:
    output_stats = get_mean_std_respected_temporal(
        dataset_path = dataset_path_train,
        cols = label_features,
        num_single_sample_timesteps = num_single_sample_timesteps,
        input_window_len = input_window_length,
        label_window_len = label_window_length,
        window_stride = window_stride
    )

df_train = WindowedIterableDataset(
    dataset_path = dataset_path_train,
    input_stats = input_stats,
    label_stats = output_stats,
    input_features = input_features,
    label_features = label_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    stride = window_stride,
    input_window_length = input_window_length,
    label_window_length = label_window_length
)

data_loader_train = DataLoader(
    df_train,
    batch_size = batch_size,
    # num_workers = 0,
    # prefetch_factor = 12,
    # persistent_workers = False,
    pin_memory = True
)



##### VALIDATION #####
df_val = WindowedIterableDataset(
    dataset_path = dataset_path_val,
    input_stats = input_stats,
    label_stats = output_stats,
    input_features = input_features,
    label_features = label_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    stride = window_stride,
    input_window_length = input_window_length,
    label_window_length = label_window_length
)

data_loader_val = DataLoader(
    df_val,
    batch_size = batch_size,
    # num_workers = 0,
    # prefetch_factor = 12,
    # persistent_workers = False,
    pin_memory = True
)



##### TEST #####
df_test = WindowedIterableDataset(
    dataset_path = dataset_path_test,
    input_stats = input_stats,
    label_stats = output_stats,
    input_features = input_features,
    label_features = label_features,
    num_single_sample_timesteps = num_single_sample_timesteps,
    stride = window_stride,
    input_window_length = input_window_length,
    label_window_length = label_window_length
)

data_loader_test = DataLoader(
    df_test,
    batch_size = batch_size,
    # num_workers = 0,
    # prefetch_factor = 12,
    # persistent_workers = False,
    pin_memory = True
)

In [5]:
model = TimeSeriesHuggingFaceTransformer(
    input_window_len = input_window_length,
    output_window_len = label_window_length,
    input_dim = len(input_features),
    output_dim = len(label_features),
    d_model = embedding_dim,
    num_head = num_attention_head,
    num_encoder_layers = num_encoder_layers,
    num_decoder_layers = num_encoder_layers,
    position_wise_ffn_dim = position_wise_nn_dim,
    relative_attention_num_buckets = relative_attention_num_buckets,
    dropout = dropout
).to(device)

overfit_monitor = Overfit()

print(f"Number of trainable parameters in the model: {sum(p.numel() for p in model.parameters() if p.requires_grad)}\n")

criterion = torch.nn.GaussianNLLLoss(full = True)
optimizer = torch.optim.Adam(
    model.parameters(),
    lr = learning_rate
)

train_r2 = R2Score(multioutput = "uniform_average").to(device)
val_r2 = R2Score(multioutput = "uniform_average").to(device)

train_per_timestep_r2 = [R2Score(multioutput = "uniform_average").to(device) for _ in range(label_window_length)]
val_per_timestep_r2 = [R2Score(multioutput = "uniform_average").to(device) for _ in range(label_window_length)]

overfit_count = 0

for epoch in range(epochs):
    train_loss, train_r2_value = train(
        model = model,
        optimizer = optimizer,
        criterion = criterion,
        r2 = train_r2,
        per_timestep_r2 = train_per_timestep_r2,
        data_loader = data_loader_train,
        device = device,
        epoch = epoch,
        total_epochs = epochs
    )

    val_loss, val_r2_value = validate(
        model = model,
        criterion = criterion,
        r2 = val_r2,
        per_timestep_r2 = val_per_timestep_r2,
        data_loader = data_loader_val,
        device = device,
        epoch = epoch,
        total_epochs = epochs
    )

    # if(overfit_monitor.check(epoch = epoch, train_loss = train_loss, val_loss = val_loss)):
    #     break

Number of trainable parameters in the model: 1171589



Epoch 1/5: 758it [00:45, 16.50it/s, train_loss=-0.537813]


Epoch [1/5], Train Loss: -0.191257, Train R2: 0.929041

Worst 5 Time-Steps Train R2:
    Time-step 1: R2 = 0.771168
    Time-step 2: R2 = 0.911358
    Time-step 3: R2 = 0.922869
    Time-step 50: R2 = 0.925059
    Time-step 49: R2 = 0.926885
Best 5 Time-Steps Train R2:
    Time-step 14: R2 = 0.936603
    Time-step 11: R2 = 0.936576
    Time-step 10: R2 = 0.936552
    Time-step 16: R2 = 0.936543
    Time-step 13: R2 = 0.936541



Epoch 1/5: 219it [00:34,  6.34it/s, val_loss=20.009001]


Epoch [1/5], Val Loss: 24.158529, Val R2: 0.548052

Worst 5 Time-Steps Val R2:
    Time-step 50: R2 = 0.145167
    Time-step 49: R2 = 0.158402
    Time-step 48: R2 = 0.168296
    Time-step 47: R2 = 0.179113
    Time-step 46: R2 = 0.191940
Best 5 Time-Steps Val R2:
    Time-step 1: R2 = 0.970884
    Time-step 2: R2 = 0.959593
    Time-step 3: R2 = 0.946424
    Time-step 4: R2 = 0.933629
    Time-step 5: R2 = 0.920661

-----------------------------------------------------------------



Epoch 2/5: 758it [00:46, 16.43it/s, train_loss=-0.633574]


Epoch [2/5], Train Loss: -0.563941, Train R2: 0.971946

Worst 5 Time-Steps Train R2:
    Time-step 1: R2 = 0.926785
    Time-step 2: R2 = 0.962892
    Time-step 3: R2 = 0.968839
    Time-step 50: R2 = 0.970784
    Time-step 49: R2 = 0.971162
Best 5 Time-Steps Train R2:
    Time-step 10: R2 = 0.974614
    Time-step 11: R2 = 0.974583
    Time-step 13: R2 = 0.974509
    Time-step 17: R2 = 0.974459
    Time-step 9: R2 = 0.974442



Epoch 2/5: 219it [00:34,  6.32it/s, val_loss=24.147175]


Epoch [2/5], Val Loss: 28.373122, Val R2: 0.605758

Worst 5 Time-Steps Val R2:
    Time-step 50: R2 = 0.209721
    Time-step 49: R2 = 0.224350
    Time-step 48: R2 = 0.236432
    Time-step 47: R2 = 0.249503
    Time-step 46: R2 = 0.263944
Best 5 Time-Steps Val R2:
    Time-step 1: R2 = 0.976499
    Time-step 2: R2 = 0.965820
    Time-step 3: R2 = 0.954632
    Time-step 4: R2 = 0.943979
    Time-step 5: R2 = 0.933189

-----------------------------------------------------------------



Epoch 3/5: 758it [00:45, 16.52it/s, train_loss=-0.732729]


Epoch [3/5], Train Loss: -0.642471, Train R2: 0.974484

Worst 5 Time-Steps Train R2:
    Time-step 1: R2 = 0.938717
    Time-step 2: R2 = 0.966983
    Time-step 3: R2 = 0.972133
    Time-step 50: R2 = 0.973547
    Time-step 49: R2 = 0.973763
Best 5 Time-Steps Train R2:
    Time-step 10: R2 = 0.976565
    Time-step 11: R2 = 0.976562
    Time-step 12: R2 = 0.976537
    Time-step 13: R2 = 0.976530
    Time-step 8: R2 = 0.976505



Epoch 3/5: 219it [00:34,  6.44it/s, val_loss=24.340273]


Epoch [3/5], Val Loss: 35.993988, Val R2: 0.618299

Worst 5 Time-Steps Val R2:
    Time-step 50: R2 = 0.236048
    Time-step 49: R2 = 0.249873
    Time-step 48: R2 = 0.261797
    Time-step 47: R2 = 0.274604
    Time-step 46: R2 = 0.288494
Best 5 Time-Steps Val R2:
    Time-step 1: R2 = 0.980266
    Time-step 2: R2 = 0.969724
    Time-step 3: R2 = 0.959285
    Time-step 4: R2 = 0.949201
    Time-step 5: R2 = 0.938734

-----------------------------------------------------------------



Epoch 4/5: 758it [00:45, 16.52it/s, train_loss=-0.754862]


Epoch [4/5], Train Loss: -0.682100, Train R2: 0.975702

Worst 5 Time-Steps Train R2:
    Time-step 1: R2 = 0.944690
    Time-step 2: R2 = 0.969180
    Time-step 3: R2 = 0.973633
    Time-step 50: R2 = 0.974931
    Time-step 49: R2 = 0.975028
Best 5 Time-Steps Train R2:
    Time-step 10: R2 = 0.977539
    Time-step 13: R2 = 0.977535
    Time-step 9: R2 = 0.977486
    Time-step 12: R2 = 0.977480
    Time-step 14: R2 = 0.977454



Epoch 4/5: 219it [00:34,  6.43it/s, val_loss=29.293171]


Epoch [4/5], Val Loss: 34.325264, Val R2: 0.632112

Worst 5 Time-Steps Val R2:
    Time-step 50: R2 = 0.265137
    Time-step 49: R2 = 0.278811
    Time-step 48: R2 = 0.290247
    Time-step 47: R2 = 0.302938
    Time-step 46: R2 = 0.316487
Best 5 Time-Steps Val R2:
    Time-step 1: R2 = 0.982434
    Time-step 2: R2 = 0.972560
    Time-step 3: R2 = 0.962665
    Time-step 4: R2 = 0.952701
    Time-step 5: R2 = 0.942405

-----------------------------------------------------------------



Epoch 5/5: 758it [00:45, 16.61it/s, train_loss=-0.684590]


Epoch [5/5], Train Loss: -0.711945, Train R2: 0.976520

Worst 5 Time-Steps Train R2:
    Time-step 1: R2 = 0.948087
    Time-step 2: R2 = 0.970400
    Time-step 3: R2 = 0.974444
    Time-step 50: R2 = 0.975829
    Time-step 49: R2 = 0.976004
Best 5 Time-Steps Train R2:
    Time-step 9: R2 = 0.978234
    Time-step 14: R2 = 0.978193
    Time-step 17: R2 = 0.978150
    Time-step 12: R2 = 0.978098
    Time-step 8: R2 = 0.978094



Epoch 5/5: 219it [00:34,  6.38it/s, val_loss=26.970810]

Epoch [5/5], Val Loss: 33.265032, Val R2: 0.637597

Worst 5 Time-Steps Val R2:
    Time-step 50: R2 = 0.258892
    Time-step 49: R2 = 0.274275
    Time-step 48: R2 = 0.287786
    Time-step 47: R2 = 0.302049
    Time-step 46: R2 = 0.317016
Best 5 Time-Steps Val R2:
    Time-step 1: R2 = 0.981694
    Time-step 2: R2 = 0.971604
    Time-step 3: R2 = 0.961999
    Time-step 4: R2 = 0.952513
    Time-step 5: R2 = 0.942870

-----------------------------------------------------------------






In [6]:
torch.save(model, model_path)

description = '''
dataset: random
bos_projector: non-linear (1 LeakyReLU)
bos_input: encoder hidden state of last input time-step
positional encoding: sin, cos
Loss: Gaussian negative log likelihood -> mean and var pred for each time-step
seed_val: 7
num_single_sample_timesteps: 1000
input_window_len: 50
label_window_len: 50
window_stride: 10
relative_attention_num_buckets: 64
embedding_dim: 128
num_attention_head: 8
num_encoder_layers: 5
num_decoder_layers: 5
position_wise_nn_dim: 64
dropout: 0.5
batch_size: 128
epochs: 5
learning_rate: 0.0005
psi_e, b_e, psi_plus, b_plus, u_list, eta_list -> psi_e, b_e, psi_plus, b_plus, u_list

'''

with open("./results/experiments.txt", mode = "a") as f:
    f.write(str(datetime.now()).replace(" ", "-") + "\n")
    f.write(f"{model_path}\n\n")
    f.write(f"Training Loss: {train_loss}, Training R2: {train_r2_value}\n")
    f.write(f"Validation Loss: {val_loss}, Validation R2: {val_r2_value}\n")
    f.write(description)
    f.write("==========================================\n")