# Synthetic Data Experiments

In [None]:
import pickle as pkl
import pandas as pd

import numpy as np
import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('module://ipykernel.pylab.backend_inline')

import seaborn as sns
import torch

import NESDE_SDE
import lstm_SDE
from NESDE import general_utils as utils

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))
display(HTML("<style>.output_result { max-width:90% !important; }</style>"))
np.set_printoptions(precision=3)
np.set_printoptions(linewidth=150)
np.set_printoptions(suppress=True)
torch.set_printoptions(linewidth=150)
device = 'cuda'

In [None]:
import os
res_dirs = ['results_NESDE', 'results_LSTM']
for rdir in res_dirs:
    try:
        os.mkdir(rdir)
    except:
        print("Directory '" + rdir + "' exists!" )

In [None]:
def add_disc_time(df, time_col):
    time_arr = df[time_col].to_numpy()
    start_ids = (time_arr[1:] - time_arr[:-1]) < 0
    ttmp = np.zeros(len(df),dtype=bool)
    ttmp[0] = True
    ttmp[1:] = start_ids
    start_ids = np.arange(len(ttmp))[ttmp]
    time_diffs = []
    n_smps = []
    for i in range(len(start_ids)):
        last_time = df[time_col][start_ids[i]]
        time_diffs.append(last_time)
        n_smps.append(1)
        for j, ctime in enumerate(df[time_col][start_ids[i]+1:len(df) if i == len(start_ids) - 1 else start_ids[i+1]]):
            time_diffs.append(ctime - last_time)
            last_time = ctime
            n_smps.append(n_smps[-1]+1)
    
    time_diffs = np.array(time_diffs)
    n_smps = np.array(n_smps)
    df['time_diff'] = time_diffs
    df['time_diff'].loc[df['time_diff'] < 0] = df[time_col].loc[df['time_diff'] < 0]
    df['n_smps'] = n_smps
    return df

def calc_smp_mean(df, target_col, name):
    targets = []
    nsps = []
    for n_smp in range(1,int(df['n_smps'].max())+1):
        if any(df['n_smps'] == n_smp):
            targets.append(df[df['n_smps'] == n_smp][target_col].mean())
            nsps.append(n_smp)
    return pd.DataFrame({'model':name,'n_smps':np.array(nsps),target_col:np.array(targets)})

## Out-of-Distribution Experiments

### Complex

In [None]:
# TRAIN
do_train = True
n_seeds = 5
n_train = [1000]
ts = [int(0.8*n) for n in n_train][0]
vs = [int(0.2*n) for n in n_train][0]

n_epochs_max = 800

# TEST
n_test = N_TEST = 1000
data_path = "./Data/SDE_ood_complex"
out_prefix = 'ood_complex'

Train NESDE

In [None]:
%%time
res_per_t_ood_nesde, res_per_t_nesde = NESDE_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=True, valid_by_nll=True, 
    stationary_model=True, n_epochs=n_epochs_max, patience=60, batch_size=20, gamma=1, lr=3e-3,
    device=device, sep_datasets=True, data_path=data_path, outs_path="./results_NESDE/" + out_prefix + "_", 
    ood_test=True, random_samples=3, log_rate=10, do_train=do_train, complex=True, skip_first=True)

Train LSTM

In [None]:
%%time
res_per_t_ood_lstm, res_per_t_lstm = lstm_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=False, valid_by_nll=False, 
    n_epochs=n_epochs_max, patience=60, batch_size=50, gamma=1, lr=3e-3, device=device, sep_datasets=True, 
    data_path=data_path, outs_path="./results_LSTM/" + out_prefix + "_", ood_test=True, random_samples=5, 
    log_rate=10, do_train=do_train, skip_first=True)

Process Results

In [None]:
lstm = []
nesde = []
lstm_ood = []
nesde_ood = []
for i in range(n_seeds):
    lstm.append(add_disc_time(pd.read_csv('results_LSTM/' + out_prefix +'_table_res_per_t_exp' + str(i) + '.csv'),'time'))
    lstm_ood.append(add_disc_time(pd.read_csv('results_LSTM/' + out_prefix + '_table_res_per_t_ood_exp' + str(i) + '.csv'),'ood_time'))
    nesde.append(add_disc_time(pd.read_csv('results_NESDE/' + out_prefix + '_table_res_per_t_exp' + str(i) + '.csv'),'time'))
    nesde_ood.append(add_disc_time(pd.read_csv('results_NESDE/' + out_prefix + '_table_res_per_t_ood_exp' + str(i) + '.csv'),'ood_time'))

res_l = []
res_ood_l = []
for i in range(n_seeds):
    res_l.append(calc_smp_mean(nesde[i],'MSE','NESDE'))
    res_ood_l.append(calc_smp_mean(nesde_ood[i],'OOD_MSE','NESDE'))
    res_l.append(calc_smp_mean(lstm[i],'MSE','LSTM'))
    res_ood_l.append(calc_smp_mean(lstm_ood[i],'OOD_MSE','LSTM'))
res = pd.concat(res_l)
ood_res = pd.concat(res_ood_l)

Plots

In [None]:
# Regular test
res_per_t = res.copy()
res_per_t = res_per_t.reset_index()
res_per_t = res_per_t.drop(['index'],axis=1)


axs = utils.Axes(1,1,(3.8,3),fontsize=15)
a = 0

axs[a].tick_params(axis='x', labelsize=13)
axs[a].tick_params(axis='y', labelsize=13)
sns.lineplot(data=res_per_t, hue='model', x='n_smps', y='MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'Observation within trajectory', 'MSE')
axs[a].set_yscale('log')

axs[a].legend(fontsize=14)

axxx = np.array([0.2,0.3,0.4,0.5])
axs[a].set_yticks(axxx)

axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.3,0.4,0.5) else None) for i in axxx], fontsize=13)


plt.tight_layout();

In [None]:
# OOD test
res_per_t = ood_res.copy()
res_per_t = res_per_t.reset_index()
res_per_t = res_per_t.drop(['index'],axis=1)

axs = utils.Axes(1,1,(3.8,3),fontsize=15)
a = 0

axs[a].tick_params(axis='x', labelsize=13)
axs[a].tick_params(axis='y', labelsize=13)
sns.lineplot(data=res_per_t, hue='model', x='n_smps', y='OOD_MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'Observation within trajectory', 'MSE')
axs[a].set_yscale('log')

axs[a].legend(fontsize=14)

axxx = np.arange(0.2,2.81,0.2)
axs[a].set_yticks(axxx)

axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.4,0.8,1.6,2.8) else None) for i in axxx], fontsize=13)
a += 1

plt.tight_layout();

### Real

In [None]:
# TRAIN
do_train = True
n_seeds = 5
n_train = [1000]
ts = [int(0.8*n) for n in n_train][0]
vs = [int(0.2*n) for n in n_train][0]
n_epochs_max = 800

# TEST
n_test = N_TEST = 1000
data_path = "./Data/SDE_ood_real"
out_prefix = 'ood_real'

Train NESDE

In [None]:
%%time
res_per_t_ood_nesde, res_per_t_nesde = NESDE_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=True, valid_by_nll=True, 
    stationary_model=True, n_epochs=n_epochs_max, patience=60, batch_size=20, gamma=1, lr=3e-3,
    device=device, sep_datasets=True, data_path=data_path, outs_path="./results_NESDE/" + out_prefix + "_", 
    ood_test=True, random_samples=5, log_rate=10, do_train=do_train, complex=False, skip_first=True)

Train LSTM

In [None]:
%%time
res_per_t_ood_lstm, res_per_t_lstm = lstm_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=False, valid_by_nll=False, 
    n_epochs=n_epochs_max, patience=60, batch_size=50, gamma=1, lr=3e-3, device=device, sep_datasets=True, 
    data_path=data_path, outs_path="./results_LSTM/" + out_prefix + "_", ood_test=True, random_samples=5, 
    log_rate=10, do_train=do_train, skip_first=True)

Process Results

In [None]:
lstm = []
nesde = []
lstm_ood = []
nesde_ood = []
for i in range(n_seeds):
    lstm.append(add_disc_time(pd.read_csv('results_LSTM/' + out_prefix +'_table_res_per_t_exp' + str(i) + '.csv'),'time'))
    lstm_ood.append(add_disc_time(pd.read_csv('results_LSTM/' + out_prefix + '_table_res_per_t_ood_exp' + str(i) + '.csv'),'ood_time'))
    nesde.append(add_disc_time(pd.read_csv('results_NESDE/' + out_prefix + '_table_res_per_t_exp' + str(i) + '.csv'),'time'))
    nesde_ood.append(add_disc_time(pd.read_csv('results_NESDE/' + out_prefix + '_table_res_per_t_ood_exp' + str(i) + '.csv'),'ood_time'))

res_l = []
res_ood_l = []
for i in range(n_seeds):
    res_l.append(calc_smp_mean(nesde[i],'MSE','NESDE'))
    res_ood_l.append(calc_smp_mean(nesde_ood[i],'OOD_MSE','NESDE'))
    res_l.append(calc_smp_mean(lstm[i],'MSE','LSTM'))
    res_ood_l.append(calc_smp_mean(lstm_ood[i],'OOD_MSE','LSTM'))
res = pd.concat(res_l)
ood_res = pd.concat(res_ood_l)

Plots

In [None]:
# Regular test
res_per_t = res.copy()
res_per_t = res_per_t.reset_index()
res_per_t = res_per_t.drop(['index'],axis=1)


axs = utils.Axes(1,1,(3.8,3),fontsize=15)
a = 0

axs[a].tick_params(axis='x', labelsize=13)
axs[a].tick_params(axis='y', labelsize=13)
sns.lineplot(data=res_per_t, hue='model', x='n_smps', y='MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'Observation within trajectory', 'MSE')
axs[a].set_yscale('log')

axs[a].legend(fontsize=14)

axxx = np.array([0.2,0.3,0.4,0.5])
axs[a].set_yticks(axxx)

axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.3,0.4,0.5) else None) for i in axxx], fontsize=13)


plt.tight_layout();

In [None]:
# OOD test
res_per_t = ood_res.copy()
res_per_t = res_per_t.reset_index()
res_per_t = res_per_t.drop(['index'],axis=1)

axs = utils.Axes(1,1,(3.8,3),fontsize=15)
a = 0

axs[a].tick_params(axis='x', labelsize=13)
axs[a].tick_params(axis='y', labelsize=13)
sns.lineplot(data=res_per_t, hue='model', x='n_smps', y='OOD_MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'Observation within trajectory', 'MSE')
axs[a].set_yscale('log')

axs[a].legend(fontsize=14)

axxx = np.arange(0.2,2.81,0.2)
axs[a].set_yticks(axxx)

axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.4,0.8,1.6,2.8) else None) for i in axxx], fontsize=13)
a += 1

plt.tight_layout();

## Data Efficiency Experiment

### Complex

In [None]:
# TRAIN
do_train = True
n_seeds = 1
n_train = [100, 200, 400, 800]

n_epochs_max = 800

# TEST
n_test = N_TEST = 1000
data_path = "./Data/SDE_efficiency_complex"
out_prefix = 'efficiency_complex'

Train NESDE

In [None]:
%%time
res_per_t_nesde = {}
for tsize in n_train:
    print("Data size: ", tsize)
    ts = int(0.8*tsize)
    vs = int(0.2*tsize)
    _, res_ = NESDE_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=True, valid_by_nll=True, stationary_model=True,
    n_epochs=n_epochs_max, patience=100, batch_size=20, gamma=1, lr=3e-3,
    device=device, sep_datasets=False,
    data_path=data_path + "_" + str(tsize),
    outs_path="./results_NESDE/" + out_prefix + "_" + str(tsize) + "_",
    ood_test=False, random_samples=3, log_rate=10, do_train=do_train, complex=True)
    res_per_t_nesde[str(tsize)] = res_

Train LSTM

In [None]:
%%time
res_per_t_lstm = {}
for tsize in n_train:
    print("Data size: ", tsize)
    ts = int(0.8*tsize)
    vs = int(0.2*tsize)
    _, res_ = lstm_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=False, valid_by_nll=False, 
    n_epochs=n_epochs_max, patience=100, batch_size=50 if tsize > 400 else 20, gamma=1, lr=3e-3, 
    device=device, sep_datasets=False, data_path=data_path + "_" + str(tsize), 
    outs_path="./results_LSTM/" + out_prefix + "_" + str(tsize) + "_", ood_test=False,
    random_samples=5, log_rate=10, do_train=do_train)
    res_per_t_lstm[str(tsize)] = res_

Process Results

In [None]:
lstm = []
nesde = []
for tsize in n_train:
    lstm.append(pd.read_csv('results_LSTM/' + out_prefix + '_' + str(tsize) + '_table_res_per_t_exp0.csv'))
    nesde.append(pd.read_csv('results_NESDE/' + out_prefix + '_' + str(tsize) + '_table_res_per_t_exp0.csv'))


res_per_t_nesde = pd.DataFrame()
res_per_t_lstm = pd.DataFrame()
for i, tsize in enumerate(n_train):
    res_per_t_nesde = res_per_t_nesde.append(pd.DataFrame(dict(model='NESDE', MSE=nesde[i]['MSE'], NLL=nesde[i]['NLL'], nsmp=tsize)))
    res_per_t_lstm = res_per_t_lstm.append(pd.DataFrame(dict(model='LSTM', MSE=lstm[i]['MSE'], NLL=lstm[i]['NLL'], nsmp=tsize)))

df = pd.concat((res_per_t_nesde,res_per_t_lstm))
df = df.reset_index()
df = df.drop(['index'],axis=1)
 

Plots

In [None]:
res_per_t = df.copy()

axs = utils.Axes(1,1,(3.8,3),fontsize=15)
a = 0

axs[a].tick_params(axis='x', labelsize=13)
axs[a].tick_params(axis='y', labelsize=13)
sns.lineplot(data=res_per_t, hue='model', x='nsmp', y='MSE', markers=True, dashes=False, style='model', ax=axs[a])

axs.labs(a, 'Train size', 'MSE')

axs[a].legend(fontsize=14)

axs[a].set_yscale('log')

axxx = np.arange(0.2,2.,0.2)
axs[a].set_yticks(axxx)

axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.4,0.8,1.6) else None) for i in axxx], fontsize=13)
plt.minorticks_off()
plt.tight_layout();

### Real

In [None]:
# TRAIN
do_train = True
n_seeds = 1
n_train = [100, 200, 400, 800]

n_epochs_max = 800

# TEST
n_test = N_TEST = 1000
data_path = "./Data/SDE_efficiency_real"
out_prefix = 'efficiency_real'

Train NESDE

In [None]:
%%time
res_per_t_nesde = {}
for tsize in n_train:
    print("Data size: ", tsize)
    ts = int(0.8*tsize)
    vs = int(0.2*tsize)
    _, res_ = NESDE_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=True, valid_by_nll=True, stationary_model=True,
    n_epochs=n_epochs_max, patience=100, batch_size=20, gamma=1, lr=3e-3,
    device=device, sep_datasets=False,
    data_path=data_path + "_" + str(tsize),
    outs_path="./results_NESDE/" + out_prefix + "_" + str(tsize) + "_",
    ood_test=False, random_samples=5, log_rate=10, do_train=do_train, complex=False)
    res_per_t_nesde[str(tsize)] = res_

Train LSTM

In [None]:
%%time
res_per_t_lstm = {}
for tsize in n_train:
    print("Data size: ", tsize)
    ts = int(0.8*tsize)
    vs = int(0.2*tsize)
    _, res_ = lstm_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=False, valid_by_nll=False, 
    n_epochs=n_epochs_max, patience=100, batch_size=50 if tsize > 400 else 20, gamma=1, lr=3e-3, 
    device=device, sep_datasets=False, data_path=data_path + "_" + str(tsize), 
    outs_path="./results_LSTM/" + out_prefix + "_" + str(tsize) + "_", ood_test=False,
    random_samples=5, log_rate=10, do_train=do_train)
    res_per_t_lstm[str(tsize)] = res_

Process Results

In [None]:
lstm = []
nesde = []
for tsize in n_train:
    lstm.append(pd.read_csv('results_LSTM/' + out_prefix + '_' + str(tsize) + '_table_res_per_t_exp0.csv'))
    nesde.append(pd.read_csv('results_NESDE/' + out_prefix + '_' + str(tsize) + '_table_res_per_t_exp0.csv'))


res_per_t_nesde = pd.DataFrame()
res_per_t_lstm = pd.DataFrame()
for i, tsize in enumerate(n_train):
    res_per_t_nesde = res_per_t_nesde.append(pd.DataFrame(dict(model='NESDE', MSE=nesde[i]['MSE'], NLL=nesde[i]['NLL'], nsmp=tsize)))
    res_per_t_lstm = res_per_t_lstm.append(pd.DataFrame(dict(model='LSTM', MSE=lstm[i]['MSE'], NLL=lstm[i]['NLL'], nsmp=tsize)))

df = pd.concat((res_per_t_nesde,res_per_t_lstm))
df = df.reset_index()
df = df.drop(['index'],axis=1)
 

Plots

In [None]:
res_per_t = df.copy()
axs = utils.Axes(1,1,(3.8,3),fontsize=15)
a = 0

axs[a].tick_params(axis='x', labelsize=13)
axs[a].tick_params(axis='y', labelsize=13)
sns.lineplot(data=res_per_t, hue='model', x='nsmp', y='MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'Train size', 'MSE')
axs[a].set_yscale('log')
axs[a].legend(fontsize=14)
axxx = np.array([0.4,0.8,1.6])
axs[a].set_yticks(axxx)

axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.4,0.8,1.6) else None) for i in axxx], fontsize=13)
plt.minorticks_off()
a += 1

plt.tight_layout();

## Regular Observations Experiment

### Complex

In [None]:
# TRAIN
do_train = True
n_seeds = 1
n_train = [1000]
ts = [int(0.8*n) for n in n_train][0]
vs = [int(0.2*n) for n in n_train][0]

n_epochs_max = 800

# TEST
n_test = N_TEST = 1000
data_path = "./Data/SDE_regular_complex"
out_prefix = 'regular_complex'

Train NESDE

In [None]:
%%time
res_per_t_ood_nesde, res_per_t_nesde = NESDE_SDE.run_multi_experiments(
    n_seeds, train_size=ts, valid_size=vs, test_size=n_test, train_by_nll=True, valid_by_nll=True, 
    stationary_model=True, n_epochs=n_epochs_max, patience=60, batch_size=20, gamma=1, lr=3e-3,
    device=device, sep_datasets=False, data_path=data_path, outs_path="./results_NESDE/" + out_prefix + "_", 
    ood_test=True, random_samples=3, log_rate=10, do_train=do_train, complex=True, skip_first=True)

Train LSTMs

In [None]:
%%time

res_per_t_ood_lstm, res_per_t_lstm = pd.DataFrame(), pd.DataFrame()
for sparsity in (1,8,50):
    dt = 100 // sparsity

    res_ood_, res_ = NESDE_SDE.run_multi_experiments(
        len(n_train), train_size=ts, valid_size=vs, test_size=n_test, const_steps_res=dt, lstm=True,
        n_epochs=n_epochs_max, patience=100, batch_size=20, gamma=1, lr=3e-3,
        device=device, sep_datasets=False,
        data_path=data_path,
        outs_path="./results_LSTM/" + out_prefix + f"_sp{sparsity:d}_",
        ood_test=True, do_train=do_train)
    
    res_ood_['model'] = f'LSTM (1:{sparsity})'
    res_['model'] = f'LSTM (1:{sparsity})'
    res_per_t_ood_lstm = pd.concat((res_per_t_ood_lstm, res_ood_))
    res_per_t_lstm = pd.concat((res_per_t_lstm, res_))

Process Results

In [None]:
res_per_t_nesde['model'] = 'NESDE'
res_per_t_ood_nesde['model'] = 'NESDE'


res_per_t = pd.concat((res_per_t_nesde,res_per_t_lstm))
res_per_t_ood = pd.concat((res_per_t_ood_nesde,res_per_t_ood_lstm))


res_per_t = res_per_t.reset_index()
res_per_t = res_per_t.drop(['index'],axis=1)
res_per_t_ood = res_per_t_ood.reset_index()
res_per_t_ood = res_per_t_ood.drop(['index'],axis=1)

Plots

In [None]:
# Regular test

axs = utils.Axes(1,1,(6,5),fontsize=18)
a = 0

axs[a].tick_params(axis='x', labelsize=18)
axs[a].tick_params(axis='y', labelsize=18)
sns.lineplot(data=res_per_t, hue='model', x='time', y='MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'time', 'MSE')
axs[a].set_yscale('log')
axs[a].set_xlim((1,10))
axs[a].set_ylim((0.13,1.15))
axs[a].legend(fontsize=17)
axs[a].set_yticks(np.arange(0.2,1.2,0.1))
axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.4,0.6,1.0) else None) for i in np.arange(0.2,1.2,0.1)], fontsize=18)
a += 1

plt.tight_layout();

In [None]:
# OOD test

axs = utils.Axes(1,1,(6,5),fontsize=18)
a = 0

axs[a].tick_params(axis='x', labelsize=18)
axs[a].tick_params(axis='y', labelsize=18)
sns.lineplot(data=res_per_t_ood, hue='model', x='ood_time', y='OOD_MSE', markers=True, dashes=False, style='model', ax=axs[a])
axs.labs(a, 'time', 'MSE')
axs[a].set_yscale('log')
axs[a].set_xlim((1,10))
axs[a].set_ylim((0.14,2.4))
axs[a].legend(fontsize=17)
axs[a].set_yticks(np.arange(0.2,2.2,0.1))
axs[a].set_yticklabels([(f'{np.round(i,1):.1f}' if np.round(i,1) in (0.2,0.4,0.6,1.0,2.0) else None) for i in np.arange(0.2,2.2,0.1)], fontsize=18)
a += 1

plt.tight_layout();