In [1]:
from torch_geometric_temporal.dataset import ChickenpoxDatasetLoader
from torch_geometric_temporal.signal import temporal_signal_split, StaticGraphTemporalSignal

  from .autonotebook import tqdm as notebook_tqdm


## Demo

In [2]:
loader = ChickenpoxDatasetLoader()

dataset = loader.get_dataset()
train_dataset, test_dataset = temporal_signal_split(dataset, train_ratio=0.2)

In [3]:
a = dataset.__getitem__(1)

In [4]:
a

Data(x=[20, 4], edge_index=[2, 102], edge_attr=[102], y=[20])

In [5]:
import torch
import torch.nn.functional as F
from torch_geometric_temporal.nn.recurrent import DCRNN
from torch_geometric.nn.pool import global_mean_pool
import numpy as np

class RecurrentGCN(torch.nn.Module):
    def __init__(self, node_features):
        super(RecurrentGCN, self).__init__()
        self.recurrent = DCRNN(node_features, 32, 1)
        self.linear = torch.nn.Linear(32, 1)
        self.batch = torch.as_tensor(np.zeros(16), dtype=torch.int64)


    def forward(self, x, edge_index, edge_weight):
        h = self.recurrent(x, edge_index, edge_weight)
        h = F.relu(h)
        h = self.linear(h)
        h = global_mean_pool(h, size=1, batch=self.batch)
        return h

In [6]:
torch.as_tensor(np.zeros(20), dtype=torch.int64)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [7]:
model = RecurrentGCN(node_features=20)
x = torch.rand(20,4)
x.shape

torch.Size([20, 4])

In [8]:
model.eval()
cost = 0
for time, snapshot in enumerate(test_dataset):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (20x36 and 52x32)

## mimic data

In [6]:
import argparse
import gc
import logging
import time
from pathlib import Path
from typing import Any, Callable, Tuple

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader

from src.constants import CONTINUOUS_COVARIATES_PROCESSED, STATIC_COLS, TARGET_COL
from src.dataset import GraphDataset, df_to_patient_tensors, get_normalizing_scaler, stack_dataset_featues_target, get_adjacency_coo
# from src.models.transformer import TimeSeriesTransformer
from src.utils import (
    EarlyStopping,
    generate_square_subsequent_mask,
    get_patient_indices,
    get_timestamp,
)

GLOBAL_SEED = 123
device = torch.device("cpu")

In [7]:
fpath_train = "/home/gaurang/glycemic_control/data/glycaemia_project_csvs/processed_data/train_test_splits/v1_patient_split_mini/train_mini.csv"
fpath_valid = "/home/gaurang/glycemic_control/data/glycaemia_project_csvs/processed_data/train_test_splits/v1_patient_split_mini/valid_mini.csv"
fpath_test = "/home/gaurang/glycemic_control/data/glycaemia_project_csvs/processed_data/train_test_splits/v1_patient_split_mini/test_mini.csv"

In [8]:
df_train = pd.read_csv(fpath_train)
df_valid = pd.read_csv(fpath_valid)
df_test = pd.read_csv(fpath_test)

In [9]:
np.random.seed(GLOBAL_SEED)
torch.manual_seed(GLOBAL_SEED)

# Hyperparams
lr = 1e-3
num_epochs = 20

# Params
enc_seq_len = 6  # length of input given to encoder
output_sequence_length = 1  # how many future glucose values to predict
step_size = 1  # Step size, i.e. how many time steps does the moving window move at each step
batch_first = True

# Define input variables
exogenous_vars = (
    CONTINUOUS_COVARIATES_PROCESSED + STATIC_COLS
)  # Each element must correspond to a column name
input_variables = TARGET_COL + exogenous_vars

input_size = len(input_variables)
num_predicted_features = 1


print(
    f"Time series params: \nInput sequence lenght: {enc_seq_len} \nOutput sequence lenght:"
    f" {output_sequence_length} \nStep size: {step_size}"
)

# df to patient tensor
scaler_X = get_normalizing_scaler(df_train[input_variables].values, "")
scaler_y = get_normalizing_scaler(df_train[TARGET_COL].values, "")
X_train, y_train = df_to_patient_tensors(
    df_train, feature_cols=input_variables, target_col=TARGET_COL, scaler_x=scaler_X, scaler_y=scaler_y
)
X_valid, y_valid = df_to_patient_tensors(
    df_valid, feature_cols=input_variables, target_col=TARGET_COL, scaler_x=scaler_X, scaler_y=scaler_y
)
X_test, y_test = df_to_patient_tensors(
    df_train, feature_cols=input_variables, target_col=TARGET_COL, scaler_x=scaler_X, scaler_y=None
)

# get subsequence indices
indices_train, num_samples_train = get_patient_indices(
    y_train, input_seq_len=enc_seq_len, forecast_len=output_sequence_length, step_size=step_size
)
indices_valid, num_samples_valid = get_patient_indices(
    y_valid, input_seq_len=enc_seq_len, forecast_len=output_sequence_length, step_size=step_size
)
indices_test, num_samples_test = get_patient_indices(
    y_valid, input_seq_len=enc_seq_len, forecast_len=output_sequence_length, step_size=step_size
)
print(
    f"Number of training samples: {num_samples_train}"
    f" \nNumber of valid samples: {num_samples_valid}"
    f" \nNumber of test samples: {num_samples_test}"
)

# create datasets

dataset_train = GraphDataset(
    data=X_train,
    labels=y_train,
    indices=indices_train,
    num_samples=num_samples_train,
    enc_seq_len=enc_seq_len,
    target_seq_len=output_sequence_length,
)
dataset_valid = GraphDataset(
    data=X_valid,
    labels=y_valid,
    indices=indices_valid,
    num_samples=num_samples_valid,
    enc_seq_len=enc_seq_len,
    target_seq_len=output_sequence_length,
)
dataset_test = GraphDataset(
    data=X_test,
    labels=y_test,
    indices=indices_test,
    num_samples=num_samples_test,
    enc_seq_len=enc_seq_len,
    target_seq_len=output_sequence_length,
)

Time series params: 
Input sequence lenght: 6 
Output sequence lenght: 1 
Step size: 1
Number of training samples: 65 
Number of valid samples: 43 
Number of test samples: 43


In [10]:
X_train, y_train = stack_dataset_featues_target(dataset_train)
X_valid, y_valid = stack_dataset_featues_target(dataset_valid)
X_test, y_test = stack_dataset_featues_target(dataset_test)

In [14]:
adj_matrix = np.ones(shape = (input_size, input_size))
for i in range(adj_matrix.shape[0]):
    adj_matrix[i,i] = 0
edge_index = get_adjacency_coo(adj_matrix)
edge_weights = np.ones(edge_index.shape[1])

In [15]:
dataloader_train = StaticGraphTemporalSignal(edge_index=edge_index, edge_weight=edge_weights, features=X_train, targets=y_train)
dataloader_valid = StaticGraphTemporalSignal(edge_index=edge_index, edge_weight=edge_weights, features=X_valid, targets=y_valid)
dataloader_test = StaticGraphTemporalSignal(edge_index=edge_index, edge_weight=edge_weights, features=X_test, targets=y_test)

In [16]:
a = dataloader_train.__getitem__(9)
print(a)

Data(x=[16, 6], edge_index=[2, 240], edge_attr=[240], y=[1])


In [17]:
model = RecurrentGCN(node_features = enc_seq_len)

optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

model.train()

for epoch in range(100):
    cost = 0
    for time, snapshot in enumerate(dataloader_train):
        y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
        cost = cost + torch.mean((y_hat - snapshot.y)**2)
    cost = cost/(time+1)
    if (epoch + 1) % 25 == 0:
        print(f"Epoch {epoch} : loss {cost.item()}")
    cost.backward()
    optimizer.step()
    optimizer.zero_grad()

Epoch 24 : loss 0.9826641082763672
Epoch 49 : loss 0.8792507648468018
Epoch 74 : loss 0.7558406591415405
Epoch 99 : loss 0.6309853792190552


In [18]:
model.eval()
cost = 0
for time, snapshot in enumerate(dataloader_valid):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr)
    cost = cost + torch.mean((y_hat-snapshot.y)**2)
cost = cost / (time+1)
cost = cost.item()
print("MSE: {:.4f}".format(cost))

MSE: 5.0916


In [19]:
model.eval()
cost = 0
preds = []
actual = []
for time, snapshot in enumerate(dataloader_test):
    y_hat = model(snapshot.x, snapshot.edge_index, snapshot.edge_attr).detach().numpy()
    rescaled_y_hat = scaler_y.inverse_transform(y_hat)
    preds.append(rescaled_y_hat[0][0])
    actual.append(snapshot.y.detach().numpy()[0])

In [20]:
pd.DataFrame(data=zip(preds, actual), columns=['pred', 'actual'])

Unnamed: 0,pred,actual
0,135.970352,131.0
1,119.439888,129.0
2,109.98156,129.0
3,112.259453,114.0
4,119.747116,115.0
5,113.877556,110.0
6,106.873589,92.0
7,106.339058,130.0
8,131.561691,100.0
9,119.392303,97.0


In [63]:
df_res = df_test[TARGET_COL]
df_res['y_hat'] = ys
df_res

ValueError: Length of values (53) does not match length of index (77)