In [2]:
import sys
sys.path.insert(2, '/home/gaurang/glycemic_control/code/src/')

In [3]:
import src.dataset_old as ds
import src.utils as utils
from torch.utils.data import DataLoader
import torch
import datetime
import src.models.transformer as tst
import numpy as np

from pathlib import Path

In [4]:
# Hyperparams
test_size = 0.1
batch_size = 128
target_col_name = "FCR_N_PriceEUR"
timestamp_col = "timestamp"
# Only use data from this date and onwards
cutoff_date = datetime.datetime(2017, 1, 1) 

## Params
dim_val = 512
n_heads = 8
n_decoder_layers = 4
n_encoder_layers = 4
enc_seq_len = 153 # length of input given to encoder
output_sequence_length = 48 # target sequence length. If hourly data and length = 48, you predict 2 days ahead
window_size = enc_seq_len + output_sequence_length # used to slice data into sub-sequences
step_size = 1 # Step size, i.e. how many time steps does the moving window move at each step
in_features_encoder_linear_layer = 2048
in_features_decoder_linear_layer = 2048
max_seq_len = enc_seq_len
batch_first = True

In [5]:
# Define input variables 
exogenous_vars = [] # should contain strings. Each string must correspond to a column name
input_variables = [target_col_name] + exogenous_vars
target_idx = 0 # index position of target in batched trg_y

input_size = len(input_variables)

In [6]:
# Read data
data = utils.read_data(fpath_data=Path('/home/gaurang/glycemic_control/data/glycaemia_project_csvs/test_data.csv'), timestamp_col_name=timestamp_col)

# Remove test data from dataset
training_data = data[:-(round(len(data)*test_size))]

Reading file in /home/gaurang/glycemic_control/data/glycaemia_project_csvs/test_data.csv


In [7]:
# Make list of (start_idx, end_idx) pairs that are used to slice the time series sequence into chunkc. 
# Should be training data indices only
training_indices = utils.get_indices_entire_sequence(
    data=training_data, 
    window_size=window_size, 
    step_size=step_size)

In [8]:
# Making instance of custom dataset class
training_data = ds.TransformerDataset(
    data=torch.tensor(training_data[input_variables].values).float(),
    indices=training_indices,
    enc_seq_len=enc_seq_len,
    target_seq_len=output_sequence_length
    )

From get_src_trg: data size = torch.Size([41387, 1])


In [9]:
# Making dataloader
training_data = DataLoader(training_data, batch_size)

i, batch = next(enumerate(training_data))

src, trg, trg_y = batch

In [12]:
trg_y.shape

torch.Size([128, 48])

In [9]:
model = tst.TimeSeriesTransformer(
    input_size=len(input_variables),
    dec_seq_len=enc_seq_len,
    batch_first=batch_first,
    num_predicted_features=1
    )

In [10]:
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = utils.generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    )

In [12]:
# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = utils.generate_square_subsequent_mask( 
    dim1=output_sequence_length,
    dim2=output_sequence_length
    )

In [13]:
output = model(
    src=src,
    tgt=trg,
    src_mask=src_mask,
    tgt_mask=tgt_mask
    )

In [14]:
output.shape

torch.Size([128, 48, 1])

tensor([[-0.9502],
        [-0.7927],
        [-0.7944],
        [-0.6271],
        [-0.4181],
        [-1.0290],
        [-0.3800],
        [-0.3112],
        [-0.5555],
        [-0.0844],
        [-0.8001],
        [-0.9051],
        [-0.8896],
        [-0.7853],
        [-0.6157],
        [-0.4375],
        [-0.6047],
        [-0.6153],
        [-0.6599],
        [-0.6641],
        [-0.2943],
        [-0.3266],
        [-0.3369],
        [-0.2158],
        [-0.5407],
        [-0.5734],
        [-0.4010],
        [-0.2437],
        [-0.5715],
        [-0.5381],
        [-0.3746],
        [-0.3853],
        [-0.4931],
        [-0.4329],
        [-0.6245],
        [-0.1371],
        [-0.4367],
        [-0.7469],
        [-0.4439],
        [-0.5834],
        [-0.4872],
        [-0.2352],
        [-0.8837],
        [-0.6376],
        [-0.5532],
        [-0.1492],
        [-0.9295],
        [-0.0112]], grad_fn=<SelectBackward0>)