In [None]:
# load some libraries
import sys
import os
import pickle
import gzip
sys.path.insert(1, '..')
os.chdir('..')

import seaborn as sns
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import darts
from darts import metrics

from lib.gluformer.model import *
from lib.latent_ode.trainer_glunet import *
from utils.darts_processing import *
from utils.darts_dataset import *

# Glucose plots

In [None]:
# plots of uninterpolated glucose data for each patient
formatter, series, scalers = load_data(seed=0, study_file=None, dataset='weinstock', use_covs=True, use_static_covs=True)

In [None]:
# extract data to plot: patients with more than 3 segments
plot_data = []
for pateint_id, patient_data in formatter.data.groupby('id'):
    # count number of unique segments, id_segment
    n_segment = patient_data['id_segment'].nunique()
    if n_segment > 3:
        plot_data.append(patient_data)
        if len(plot_data) == 3:
            break
# for each patient, create a column with number of minutes since the absolute start
#    and convert to long format for plotting
for i, patient_data in enumerate(plot_data):
    patient_data['time'] = patient_data['time'] - patient_data['time'].min()
    # convert time to float, number of hours
    patient_data['time'] = patient_data['time'].dt.total_seconds() / 3600
    # convert to long format for plotting
    plot_data[i] = patient_data.melt(id_vars=['time', 'id_segment'], value_vars=['gl'], var_name='y', value_name='x')


In [None]:
# use whitegrid style
sns.set_style('whitegrid')
colors = ['#000000', '#003DFD', '#b512b8', '#11a9ba', '#0d780f', '#f77f07', '#ba0f0f']
# plot using seaborn
fig, axes = plt.subplots(3, 1, figsize=(10, 10))
for i, patient_data in enumerate(plot_data):
    sns.lineplot(x='time', y='x', hue='id_segment', 
                 data=patient_data, ax=axes[i],
                 palette=colors)
    axes[i].set_ylabel('Glucose (mg/dL)')
    # remove legend
    axes[i].get_legend().remove()
    # only add xlabel to last plot
    axes[i].set_xlabel('')
    if i == 2:
        axes[i].set_xlabel('Time (hours)')
    

# Split and interpolation example plots

In [None]:
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
from matplotlib.lines import Line2D

# Define the colors for the rectangles
colors = ['#FEE1C7', '#C8FEC7', '#DAF9FE', '#FEF4FF']
labels = ['Train', 'Validation', 'ID Test', 'OOD Test']

# plot glucose data for each id
fig, axs = plt.subplots(3, 1, figsize=(20, 15))
for i, id in enumerate(ids[:3]):
    for j in range(len(id_series[id][:3])):
        id_series[id][j].plot(ax=axs[i], color='black')  # Set the line color to black

    if i < len(ids[:3]) - 1:  # Only create rectangles for the first two plots
        y_min, y_max = axs[i].get_ylim()
        x_min, x_max = axs[i].get_xlim()
        total_width = x_max - x_min
        first_width = total_width * 2/3
        second_width = (total_width - first_width) / 2

        for k, color in enumerate(colors):
            if k == 0:
                width = first_width
            else:
                width = second_width
            rect = Rectangle((x_min + sum(width for width in [first_width, second_width][:k]), y_min),
                             width, y_max - y_min, facecolor=color, alpha=0.5)
            axs[i].add_patch(rect)
    else:  # Set the background color of the last plot to #D6EDFE
        axs[i].set_facecolor('#FEF4FF')

    axs[i].set_ylabel('Glucose (mg/dL)')
    axs[i].set_xlabel('')
    axs[i].set_title(f'Glucose data for patient {i+1}')
    axs[i].tick_params(axis='x', rotation=0)
    if axs[i].get_legend() is not None:
        axs[i].get_legend().remove()
    # increase font size
    for item in ([axs[i].title, axs[i].xaxis.label, axs[i].yaxis.label] +
                axs[i].get_xticklabels() + axs[i].get_yticklabels()):
        item.set_fontsize(22)

# Create a custom legend for the colors
legend_elements = [Rectangle((0, 0), 1, 1, facecolor=color, edgecolor=color, label=label)
                   for color, label in zip(colors, labels)]
fig.legend(handles=legend_elements, loc='lower center', ncol=len(colors), fontsize=22, frameon=True)

# save as pdf with transparent background
plt.savefig('presentation/plots/glucose_data_split_plot.pdf', bbox_inches='tight', transparent=False)


# Example plots of other Timeseries

In [None]:
# load electrcity data 
from darts.datasets import TemperatureDataset
from darts.datasets import AirPassengersDataset
from darts.datasets import HeartRateDataset

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(30, 15))
dataset_names = ['Temperature', 'AirPassengers', 'HeartRate']
yaxis = ['Temperature (C)', 'AirPassengers', 'HeartRate (bpm)']
for i, dataset in enumerate([TemperatureDataset(), AirPassengersDataset(), HeartRateDataset()]):
    dataset.load().plot(ax=axs[i])
    axs[i].set_ylabel(f'{yaxis[i]}')
    axs[i].set_xlabel('')
    axs[i].tick_params(axis='x', rotation=0)
    axs[i].set_title(f'{dataset_names[i]}')
    if axs[i].get_legend() is not None:
        axs[i].get_legend().remove()
    # increase font size
    for item in ([axs[i].title, axs[i].xaxis.label, axs[i].yaxis.label] +
                axs[i].get_xticklabels() + axs[i].get_yticklabels()):
        item.set_fontsize(22)
# save as pdf
plt.savefig('presentation/plots/ts_data.pdf', bbox_inches='tight')

# Save forecasts of all models: no covariates, ID test

In [None]:
# model params
model_params = {'transformer': {'darts': models.TransformerModel, 'darts_data': SamplingDatasetInferencePast, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'},
                'nhits': {'darts': models.NHiTSModel, 'darts_data': SamplingDatasetInferencePast, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'},
                'tft': {'darts': models.TFTModel, 'darts_data': SamplingDatasetInferenceMixed, 'use_covs': False, 'use_static_covs': True, 'cov_type': 'mixed'},
                'xgboost': {'darts': models.XGBModel, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'},
                'linreg': {'darts': models.LinearRegressionModel, 'use_covs': False, 'use_static_covs': False, 'cov_type': 'past'}}
# data sets
datasets = ['weinstock', 'dubosson', 'colas', 'iglu', 'hall']
save_trues = {}
save_forecasts = {}
# iterate through models and datasets
for model_name in model_params.keys():
    for dataset in datasets:
        print(f'Testing {model_name} for {dataset}')
        formatter, series, scalers = load_data(seed=0, study_file=None, dataset=dataset, 
                                               use_covs=model_params[model_name]['use_covs'], 
                                             use_static_covs=model_params[model_name]['use_static_covs'],
                                             cov_type=model_params[model_name]['cov_type'])
        # load model or refit model
        if model_name in ['tft', 'transformer', 'nhits']:
            # load model: transformer
            model = model_params[model_name]['darts'](input_chunk_length=formatter.params[model_name]['in_len'],
                                              output_chunk_length=formatter.params['length_pred'])
            model = model.load_from_checkpoint(f'tensorboard_{model_name}_{dataset}', work_dir = './output', best=True)
            # define dataset for inference
            test_dataset = model_params[model_name]['darts_data'](target_series=series['test']['target'],
                                                              n=formatter.params['length_pred'],
                                                                input_chunk_length=formatter.params[model_name]['in_len'],
                                                                  output_chunk_length=formatter.params['length_pred'],
                                                                  use_static_covariates=model_params[model_name]['use_static_covs'],
                                                                  max_samples_per_ts = None)
            # get predictions
            forecasts = model.predict_from_dataset(n=formatter.params['length_pred'], 
                                                   input_series_dataset=test_dataset,
                                                   verbose=True,
                                                   num_samples=20 if model_name == 'tft' else 1)
            forecasts = scalers['target'].inverse_transform(forecasts)
            save_forecasts[f'{model_name}_{dataset}'] = forecasts
            # get true values
            save_trues[f'{model_name}_{dataset}'] = [test_dataset.evalsample(i) for i in range(len(test_dataset))]
            save_trues[f'{model_name}_{dataset}'] = scalers['target'].inverse_transform(save_trues[f'{model_name}_{dataset}'])
        elif model_name == 'xgboost':
            # load model: xgboost
            model = model_params[model_name]['darts'](lags=formatter.params[model_name]['in_len'], 
                                                      learning_rate=formatter.params[model_name]['lr'],
                                                      subsample=formatter.params[model_name]['subsample'],
                                                      min_child_weight=formatter.params[model_name]['min_child_weight'],
                                                      colsample_bytree=formatter.params[model_name]['colsample_bytree'],
                                                      max_depth=formatter.params[model_name]['max_depth'],
                                                      gamma=formatter.params[model_name]['gamma'],
                                                      reg_alpha=formatter.params[model_name]['alpha'],
                                                      reg_lambda=formatter.params[model_name]['lambda_'],
                                                      n_estimators=formatter.params[model_name]['n_estimators'],
                                                      random_state=0)
            # fit model
            model.fit(series['train']['target'])
            # get predictions
            forecasts = model.historical_forecasts(series['test']['target'],
                                                   forecast_horizon=formatter.params['length_pred'],
                                                   stride=1,
                                                   retrain=False,
                                                   verbose=True,
                                                   last_points_only=False,
                                                   start=formatter.params["max_length_input"])
            forecasts = [scalers['target'].inverse_transform(forecast) for forecast in forecasts]
            save_forecasts[f'{model_name}_{dataset}'] = forecasts
            # get true values
            save_trues[f'{model_name}_{dataset}'] = scalers['target'].inverse_transform(series['test']['target'])
        elif model_name == 'linreg':
            # load model: linear regression
            model = models.LinearRegressionModel(lags = formatter.params[model_name]['in_len'],
                                                 output_chunk_length = formatter.params['length_pred'])
            model.fit(series['train']['target'])
            # get predictions
            forecasts = model.historical_forecasts(series['test']['target'],
                                                forecast_horizon=formatter.params['length_pred'], 
                                                stride=1,
                                                retrain=False,
                                                verbose=False,
                                                last_points_only=False,
                                                start=formatter.params["max_length_input"])
            forecasts = [scalers['target'].inverse_transform(forecast) for forecast in forecasts]
            save_forecasts[f'{model_name}_{dataset}'] = forecasts
            # get true values
            save_trues[f'{model_name}_{dataset}'] = scalers['target'].inverse_transform(series['test']['target'])

            
 


In [None]:
datasets = ['weinstock', 'dubosson', 'colas', 'iglu', 'hall']
device = 'cuda'

for dataset in datasets:
    print(f'Testing {dataset}')
    formatter, series, scalers = load_data(seed=0, study_file=None, dataset=dataset, use_covs=True, use_static_covs=True)
    # define dataset for inference: gluformer
    dataset_test_glufo = SamplingDatasetInferenceDual(target_series=series['test']['target'],
                                                      covariates=series['test']['future'],
                                                      input_chunk_length=formatter.params['gluformer']['in_len'],
                                                      output_chunk_length=formatter.params['length_pred'],
                                                      use_static_covariates=True,
                                                      array_output_only=True)
    # define dataset for inference: latent ode
    dataset_test_latod = SamplingDatasetInferenceDual(target_series=series['test']['target'],
                                                      covariates=series['test']['future'],
                                                      input_chunk_length=formatter.params['latentode']['in_len'],
                                                      output_chunk_length=formatter.params['length_pred'],
                                                      use_static_covariates=True,
                                                      array_output_only=True)
    # load model: gluformer
    num_dynamic_features = series['train']['future'][-1].n_components
    num_static_features = series['train']['static'][-1].n_components
    glufo = Gluformer(d_model = formatter.params['gluformer']['d_model'],
                      n_heads = formatter.params['gluformer']['n_heads'],
                      d_fcn = formatter.params['gluformer']['d_fcn'],
                      r_drop = 0.2, 
                      activ = 'relu', 
                      num_enc_layers = formatter.params['gluformer']['num_enc_layers'], 
                      num_dec_layers = formatter.params['gluformer']['num_dec_layers'],
                      distil = True, 
                      len_seq = formatter.params['gluformer']['in_len'],
                      label_len = formatter.params['gluformer']['in_len'] // 3,
                      len_pred = formatter.params['length_pred'],
                      num_dynamic_features = num_dynamic_features,
                      num_static_features = num_static_features,)
    glufo.to(device)
    glufo.load_state_dict(torch.load(f'./output/tensorboard_gluformer_{dataset}/model.pt', map_location=torch.device(device)))
    # load model: latent ode
    latod = LatentODEWrapper(device = device,
                             latents = formatter.params['latentode']['latents'],
                             rec_dims = formatter.params['latentode']['rec_dims'],
                             rec_layers = formatter.params['latentode']['rec_layers'],
                             gen_layers = formatter.params['latentode']['gen_layers'],
                             units = formatter.params['latentode']['units'],
                             gru_units = formatter.params['latentode']['gru_units'],)
    latod.load(f'./output/tensorboard_latentode_{dataset}/model.ckpt', device)
    # get predictions: gluformer
    print('Gluformer')
    forecasts, _ = glufo.predict(dataset_test_glufo,
                                 batch_size=8,
                                 num_samples=10,
                                 device=device,
                                 use_tqdm=True)
    forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
    trues = [dataset_test_glufo.evalsample(i) for i in range(len(dataset_test_glufo))]
    trues = scalers['target'].inverse_transform(trues)
    save_forecasts[f'gluformer_{dataset}'] = forecasts
    save_trues[f'gluformer_{dataset}'] = trues
    # get predictions: latent ode
    print('Latent ODE')
    forecasts = latod.predict(dataset_test_latod,
                              batch_size=32,
                              num_samples=20,
                              device=device,
                              use_tqdm=True,)
    forecasts = (forecasts - scalers['target'].min_) / scalers['target'].scale_
    trues = [dataset_test_latod.evalsample(i) for i in range(len(dataset_test_latod))]
    trues = scalers['target'].inverse_transform(trues)
    save_forecasts[f'latentode_{dataset}'] = forecasts
    save_trues[f'latentode_{dataset}'] = trues



In [None]:
# save forecasts
with gzip.open('./presentation/data/compressed_forecasts.pkl', 'wb') as file:
    pickle.dump(save_forecasts, file)
# save true values
with gzip.open('./presentation/data/compressed_trues.pkl', 'wb') as file:
    pickle.dump(save_trues, file)


# Compute day and night-time distribution

In [None]:
# load forecasts
with gzip.open('./presentation/data/compressed_forecasts.pkl', 'rb') as file:
    save_forecasts = pickle.load(file)
# load true values
with gzip.open('./presentation/data/compressed_trues.pkl', 'rb') as file:
    save_trues = pickle.load(file)

In [None]:
day_errors = {}
night_errors = {}
datasets = ['weinstock', 'dubosson', 'colas', 'iglu', 'hall']
models = ['gluformer', 'latentode', 'tft', 'nhits', 'linreg', 'xgboost', 'transformer']

for dataset in datasets:
    for model in models:
        # initialize errors
        day_errors[f'{model}_{dataset}'] = []
        night_errors[f'{model}_{dataset}'] = []
        # get forecasts
        forecasts = save_forecasts[f'{model}_{dataset}']
        # get true values
        trues = save_trues[f'{model}_{dataset}']
        # compute errors
        if model in ['xgboost', 'linreg']:
            for i, fs in enumerate(forecasts):
                t = trues[i]
                for f in fs:
                    hour = f.start_time().hour
                    error = metrics.mae(f, t)
                    if hour >= 9 and hour < 21:
                        day_errors[f'{model}_{dataset}'].append(error)
                    else:
                        night_errors[f'{model}_{dataset}'].append(error)
        elif model in ['tft', 'nhits', 'transformer']:
            for i, f in enumerate(forecasts):
                # extract true value and hour
                t = trues[i]
                hour = t.start_time().hour
                # compute error
                error = metrics.mae(f, t)
                if hour >= 9 and hour < 21:
                    day_errors[f'{model}_{dataset}'].append(error)
                else:
                    night_errors[f'{model}_{dataset}'].append(error)
        else:
            for i, t in enumerate(trues):
                # extract hour and true as array
                hour = t.start_time().hour
                t = t.values().squeeze()
                # get the forecast
                if model == 'gluformer':
                    f = np.mean(forecasts[i, :, :], axis=1)
                elif model == 'latentode':
                    f = np.mean(forecasts[:, i, :, 0], axis=0)
                # compute error
                error = np.mean(np.abs(f - t))
                if hour >= 9 and hour < 21:
                    day_errors[f'{model}_{dataset}'].append(error)  
                else:
                    night_errors[f'{model}_{dataset}'].append(error)                

In [None]:
# save day and night errors
with gzip.open('./presentation/data/compressed_day_errors.pkl', 'wb') as file:
    pickle.dump(day_errors, file)
with gzip.open('./presentation/data/compressed_night_errors.pkl', 'wb') as file:
    pickle.dump(night_errors, file)

In [None]:
fig, ax = plt.subplots(figsize=(6, 6))
day_errors_list = day_errors[f'transformer_weinstock']
night_errors_list = night_errors[f'transformer_weinstock']
# plot normalized histogram with 50 bins
sns.histplot(day_errors_list, stat='density', alpha=0.5, 
        color='blue', label='Day Errors', ax=ax, bins=35)
sns.histplot(night_errors_list, stat='density', alpha=0.5, 
        color='darkgreen', label='Night Errors', ax=ax, bins=35)
# Plot density estimates of the two lists of values
sns.kdeplot(day_errors_list, color='blue', linestyle='-', ax=ax)
sns.kdeplot(night_errors_list, color='darkgreen', linestyle='-', ax=ax)
# Add legend
ax.legend()
# set y label for first column
if j == 0:
        ax.set_ylabel('Density')
else:
        ax.set_ylabel('')
# set x label for last row
if i == len(models) - 1:
        ax.set_xlabel('MAE')
else:
        ax.set_xlabel('')
# add legend to first plot
ax.legend()
# increase font size of x and y labels, title and legend
for item in ([ax.xaxis.label, ax.yaxis.label] +
                ax.get_xticklabels() + ax.get_yticklabels()):
        item.set_fontsize(16)
if ax.get_legend() is not None:
        for item in ax.get_legend().get_texts():
                item.set_fontsize(12)
# save as pdf
plt.tight_layout()
plt.savefig("./presentation/plots/night_vs_day_error_transformer_weinstock.pdf")

In [None]:
# plot density and normalized histogram of day / night errors for all models and datasets
fig, axs = plt.subplots(len(models), len(datasets), figsize=(30, 40))
for i, model in enumerate(models):
        for j, dataset in enumerate(datasets):
                ax = axs[i, j]
                day_errors_list = day_errors[f'{model}_{dataset}']
                night_errors_list = night_errors[f'{model}_{dataset}']
                # plot normalized histogram with 50 bins
                sns.histplot(day_errors_list, stat='density', alpha=0.5, 
                        color='blue', label='Day Errors', ax=ax, bins=35)
                sns.histplot(night_errors_list, stat='density', alpha=0.5, 
                        color='darkgreen', label='Night Errors', ax=ax, bins=35)
                # Plot density estimates of the two lists of values
                sns.kdeplot(day_errors_list, color='blue', linestyle='-', ax=ax)
                sns.kdeplot(night_errors_list, color='darkgreen', linestyle='-', ax=ax)
                # Add legend
                ax.legend()
                # set y label for first column
                if j == 0:
                        ax.set_ylabel('Density')
                else:
                        ax.set_ylabel('')
                # set x label for last row
                if i == len(models) - 1:
                        ax.set_xlabel('MAE')
                else:
                        ax.set_xlabel('')
                # set title
                ax.set_title(f'{model.upper()} - {dataset.upper()}')
                # add legend to first plot
                if i == 0 and j == 0:
                        ax.legend()
                else: 
                        ax.get_legend().remove()
                # increase font size of x and y labels, title and legend
                for item in ([ax.xaxis.label, ax.yaxis.label] +
                                ax.get_xticklabels() + ax.get_yticklabels()):
                        item.set_fontsize(16)
                if ax.get_legend() is not None:
                        for item in ax.get_legend().get_texts():
                                item.set_fontsize(12)
# save as pdf
plt.tight_layout()
plt.savefig("./presentation/plots/night_vs_day_error.pdf")

# Tail simulation to check Gaussianity