In [1]:
import sys
sys.path.insert(2, '/home/gaurang/glycemic_control/code/src/')

In [2]:
import pandas as pd
import torch
from torch.utils.data import DataLoader

from src.constants import CONTINUOUS_COVARIATES_PROCESSED, STATIC_COLS, TARGET_COL
from src.dataset import get_normalizing_scaler, df_to_patient_tensors, TransformerDataset
from src.utils import get_patient_indices, generate_square_subsequent_mask
from src.models.transformer import TimeSeriesTransformer


In [3]:
# Hyperparams
batch_size = 128

## Params
dim_val = 512
n_heads = 8
n_decoder_layers = 4
n_encoder_layers = 4
enc_seq_len = 3 # length of input given to encoder
output_sequence_length = 1 # target sequence length. If hourly data and length = 48, you predict 2 days ahead
window_size = enc_seq_len + output_sequence_length # used to slice data into sub-sequences
step_size = 1 # Step size, i.e. how many time steps does the moving window move at each step
in_features_encoder_linear_layer = 2048
in_features_decoder_linear_layer = 2048
max_seq_len = enc_seq_len
batch_first = True

In [4]:
# Define input variables 
exogenous_vars =  CONTINUOUS_COVARIATES_PROCESSED + STATIC_COLS # should contain strings. Each string must correspond to a column name
input_variables = TARGET_COL + exogenous_vars
target_idx = 0 # index position of target in batched trg_y

input_size = len(input_variables)

In [5]:
# Read data
df_train = pd.read_csv('/home/gaurang/glycemic_control/data/glycaemia_project_csvs/processed_data/train_test_splits/v1_patients_split/train.csv')
df_valid = pd.read_csv('/home/gaurang/glycemic_control/data/glycaemia_project_csvs/processed_data/train_test_splits/v1_patients_split/valid.csv')
df_test = pd.read_csv('/home/gaurang/glycemic_control/data/glycaemia_project_csvs/processed_data/train_test_splits/v1_patients_split/test.csv')


In [6]:
#df to patient tensor

scaler = get_normalizing_scaler(df_train[input_variables])
# X_train, y_train = df_to_patient_tensors(df_train, feature_cols=input_variables, target_col=TARGET_COL, scaler=scaler)
X_valid, y_valid = df_to_patient_tensors(df_valid, feature_cols=input_variables, target_col=TARGET_COL, scaler=scaler)
# X_test, y_test = df_to_patient_tensors(df_test, feature_cols=input_variables, target_col=TARGET_COL, scaler=scaler)

In [7]:
valid_indices, num_valid_samples = get_patient_indices(y_valid, input_seq_len=enc_seq_len, forecast_len=output_sequence_length, step_size=step_size)

In [8]:
valid_dataset = TransformerDataset(data=X_valid, labels=y_valid, indices=valid_indices, num_samples=num_valid_samples, enc_seq_len=enc_seq_len, target_seq_len=output_sequence_length)

In [9]:
valid_data_loader = DataLoader(valid_dataset, batch_size)

In [10]:
i, batch = next(enumerate(valid_data_loader))

In [11]:
src, trg, trg_y = batch

In [12]:
print(src.shape, trg.shape, trg_y.shape)

torch.Size([128, 3, 16]) torch.Size([128, 1, 1]) torch.Size([128, 1])


In [13]:
model = TimeSeriesTransformer(
    input_size=len(input_variables),
    dec_seq_len=enc_seq_len,
    batch_first=batch_first,
    num_predicted_features=1
    )

In [14]:
# Make src mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, enc_seq_len]
src_mask = generate_square_subsequent_mask(
    dim1=output_sequence_length,
    dim2=enc_seq_len
    )

In [15]:
# Make tgt mask for decoder with size:
# [batch_size*n_heads, output_sequence_length, output_sequence_length]
tgt_mask = generate_square_subsequent_mask( 
    dim1=output_sequence_length,
    dim2=output_sequence_length
    )

In [16]:
output = model(
    src=src,
    tgt=trg,
    src_mask=src_mask,
    tgt_mask=tgt_mask
    )

In [19]:
output

tensor([[[ 7.6556e-01]],

        [[ 7.1866e-01]],

        [[ 1.1937e+00]],

        [[ 2.1025e-01]],

        [[ 3.7781e-01]],

        [[ 4.6313e-01]],

        [[ 5.0239e-01]],

        [[-1.8414e-01]],

        [[-8.5029e-02]],

        [[ 3.7096e-02]],

        [[ 5.1956e-01]],

        [[ 6.6049e-01]],

        [[ 1.1405e+00]],

        [[ 1.0789e+00]],

        [[ 1.0786e-01]],

        [[ 2.9880e-01]],

        [[ 3.8904e-01]],

        [[ 7.8342e-01]],

        [[ 8.8981e-01]],

        [[ 7.3211e-01]],

        [[ 1.5768e-02]],

        [[ 1.9226e-01]],

        [[ 4.9389e-01]],

        [[ 1.8585e-01]],

        [[ 7.5977e-01]],

        [[ 2.0207e-01]],

        [[ 5.5178e-01]],

        [[ 1.5317e-01]],

        [[ 4.6693e-01]],

        [[ 6.4167e-01]],

        [[-5.7679e-02]],

        [[ 2.5332e-01]],

        [[ 9.8534e-01]],

        [[ 8.5729e-02]],

        [[ 6.9226e-02]],

        [[ 3.2191e-01]],

        [[-8.4041e-02]],

        [[ 3.7734e-01]],

        [[-1

In [36]:
prev_len = len(X_train[0]) + len(X_train[1])

In [37]:
prev_len

34

In [38]:
sample_id = 7
start_pos, end_pos = train_indices_patient[sample_id]

In [65]:
y_train_patient[start_pos:end_pos]

tensor([[221.],
        [251.],
        [215.],
        [190.]], dtype=torch.float64)

In [45]:
torch.equal(stacked_train[prev_len+start_pos:prev_len+end_pos], X_train_patient[start_pos:end_pos])

True