#### Latent ODEs for Irregularly-Sampled Time Series
#### Author: Yulia Rubanova

In [1]:
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
from random import SystemRandom
from sklearn import model_selection
from sklearn.preprocessing import RobustScaler
import random

import torch
import torch.nn as nn
import torch.optim as optim

import utils
from utils import compute_loss_all_batches

from ode_rnn import *
from ode_func import ODEFunc, ODEFunc_w_Poisson
from diffeq_solver import DiffeqSolver

In [2]:
np.random.seed(0)
torch.manual_seed(0)
random.seed(0)

In [3]:
USE_NOISY_DATA = False
USE_MIXED_EFFECT = True
timestep_skip = 1
RE = 'Mixed' if USE_MIXED_EFFECT else 'Fixed'

In [4]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
CSV_FILE = "../../../data/synthetic_bph_1/Simulations/simulation1.csv"
CSV_Dtest = "../../../data/synthetic_bph_1/Simulations/01_test.csv"

In [5]:
data = pd.read_csv(CSV_FILE, sep=";", decimal=",")
dtest = pd.read_csv(CSV_Dtest, sep=";", decimal=",")

In [6]:
x_labels = [
    c for c in data.columns if c.startswith("x") and ((("_" in c) is USE_NOISY_DATA and ('obs' in c) is USE_NOISY_DATA))
]
if 'x8' not in x_labels:
    x_labels.append('x8')
#assert len(x_labels) == 8

y_labels = [
    c
    for c in data.columns
    if c.startswith("y")
    and (("_obs" in c))
    and (("_mixed" in c) is USE_MIXED_EFFECT)
]
assert len(y_labels) == 1

print(x_labels)
print(y_labels)

['x1', 'x2', 'x3', 'x4', 'x5', 'x6', 'x7', 'x8']
['y_mixed_obs']


In [7]:
obsrv_std = 0.01
n_ode_gru_dims = 10
input_dim = 8
output_dim = 1
lr = 1e-2
niters = 15000
train_dict={}
test_dict={}

In [8]:
data = data[data['temps']%timestep_skip == 0]
data_train = data.copy()
data_train = data_train.dropna()

scaler_x = RobustScaler()
data_train.loc[:,x_labels] = scaler_x.fit_transform(data_train[x_labels])

scaler_y = RobustScaler()
data_train.loc[:,y_labels] = scaler_y.fit_transform(data_train[y_labels])

groupby = data_train.groupby('individus')[x_labels].apply(np.array)
input_train = [torch.Tensor(x) for x in groupby]
input_train = torch.stack(input_train)
groupby = data_train.groupby('individus')[y_labels].apply(np.array)
target_train = [torch.Tensor(x) for x in groupby]
target_train = torch.stack(target_train)

groupby = data_train.groupby('individus')['temps'].apply(np.array)
observed_tp= [torch.Tensor(x) for x in groupby]
observed_tp = torch.stack(observed_tp)



In [9]:
dtest = dtest[dtest['temps']%timestep_skip == 0]
dtest_norm = dtest.copy()
dtest_norm = dtest_norm.dropna()

scaler_x_test = RobustScaler()
dtest_norm.loc[:,x_labels] = scaler_x.fit_transform(dtest_norm[x_labels])

scaler_y_test = RobustScaler()
dtest_norm.loc[:,y_labels] = scaler_y.fit_transform(dtest_norm[y_labels])

groupby = dtest_norm.groupby('individus')[x_labels].apply(np.array)
input_test = [torch.Tensor(x) for x in groupby]
input_test = torch.stack(input_test)
groupby = dtest_norm.groupby('individus')[y_labels].apply(np.array)
target_test = [torch.Tensor(x) for x in groupby]
target_test = torch.stack(target_test)

In [10]:
train_dict["tp_to_predict"] = observed_tp[0]
train_dict["observed_data"] = input_train
train_dict["observed_tp"] = observed_tp[0]
train_dict["data_to_predict"] = target_train
train_dict["mode"] = None
train_dict['labels'] = None
train_dict["observed_mask"] =  torch.ones(input_train.shape)
train_dict["mask_predicted_data"] = torch.ones(target_train.shape)

In [11]:

ode_func_net = utils.create_net(n_ode_gru_dims, n_ode_gru_dims, 
    n_layers = 2, n_units = 25, nonlinear = nn.Tanh)

rec_ode_func = ODEFunc(
    input_dim = input_dim, 
    latent_dim = n_ode_gru_dims,
    ode_func_net = ode_func_net,
    device = device).to(device)

z0_diffeq_solver = DiffeqSolver(input_dim, rec_ode_func, "euler", 10, 
    odeint_rtol = 1e-3, odeint_atol = 1e-4, device = device)

model = ODE_RNN(input_dim, n_ode_gru_dims, output_dim, device = device, 
    z0_diffeq_solver = z0_diffeq_solver, n_gru_units = 25,
    concat_mask = True, obsrv_std = obsrv_std,
    use_binary_classif = False,
    classif_per_tp = False,
    n_labels = 1,
    train_classif_w_reconstr =  False).to(device)

In [None]:
optimizer = optim.Adamax(model.parameters(), lr=lr)

num_batches = 1

for itr in range(1, num_batches * (niters + 1)):
    optimizer.zero_grad()
    utils.update_learning_rate(optimizer, decay_rate = 0.999, lowest = lr / 10)

    train_res = model.compute_all_losses(train_dict, n_traj_samples = 500)
    train_res["loss"].backward()
    optimizer.step()

In [None]:
test_dict["tp_to_predict"] = observed_tp[0]
test_dict["observed_data"] = input_test
test_dict["observed_tp"] = observed_tp[0]
test_dict["data_to_predict"] = target_test
test_dict["mode"] = None
test_dict['labels'] = None
test_dict["observed_mask"] =  torch.ones(input_test.shape)
test_dict["mask_predicted_data"] = torch.ones(target_test.shape)

In [None]:
test_res = model.compute_all_losses(test_dict, n_traj_samples=500)
print(test_res)
print(test_res["loss"])