# Import this (important)

In [None]:
# Install the latest version of author's repo neural ode implementation
!git clone https://github.com/rtqichen/torchdiffeq.git
!cd torchdiffeq && pip install -e .
!ls torchdiffeq/torchdiffeq

Cloning into 'torchdiffeq'...
remote: Enumerating objects: 1138, done.[K
remote: Counting objects: 100% (434/434), done.[K
remote: Compressing objects: 100% (194/194), done.[K
remote: Total 1138 (delta 256), reused 400 (delta 240), pack-reused 704[K
Receiving objects: 100% (1138/1138), 8.29 MiB | 8.71 MiB/s, done.
Resolving deltas: 100% (682/682), done.
Obtaining file:///content/torchdiffeq
Installing collected packages: torchdiffeq
  Running setup.py develop for torchdiffeq
Successfully installed torchdiffeq-0.2.2
_impl  __init__.py


# New Section

# Rough

## Libraries

In [276]:
import os
import sys
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import matplotlib.pyplot as plt

import time
import datetime
import argparse
import numpy as np
import pandas as pd
from random import SystemRandom
from sklearn import model_selection

import torch
import torch.nn as nn
from torch.nn.functional import relu
import torch.optim as optim


In [None]:
import lib.utils as utils
from lib.plotting import *

from lib.rnn_baselines import *
from lib.ode_rnn import *
from lib.create_latent_ode_model import create_LatentODE_model
from lib.parse_datasets import parse_datasets
from lib.ode_func import ODEFunc, ODEFunc_w_Poisson
from lib.diffeq_solver import DiffeqSolver
from mujoco_physics import HopperPhysics

from lib.utils import compute_loss_all_batches


import os
import numpy as np

import torch
import torch.nn as nn

import lib.utils as utils
from lib.diffeq_solver import DiffeqSolver
from generate_timeseries import Periodic_1d
from torch.distributions import uniform

from torch.utils.data import DataLoader
from mujoco_physics import HopperPhysics
from physionet import PhysioNet, variable_time_collate_fn, get_data_min_max
from person_activity import PersonActivity, variable_time_collate_fn_activity

from sklearn import model_selection
import random

## Parameters

In [None]:
n = 1000  # size of dataset
niters = 500  # number of iterations
lr = 1e-3  # Starting Learning Rate
b = 50  # batch_size
viz = True  # show plots while training
# save: path for saving checkpoints
# load: ID of the experiment to load for evaluation
random_seed = 1991 # random seed
# dataset = Dataset to load. Available: physionet, activity, hopper, periodic
sample_tp = None # number of time points to sub-sample.  If > 1, subsample exact number of points. If the number is in [0,1], take a percentage of available points per time series. If None, do not subsample
c = None # Cut out the section of the timeline of the specified length (in number of points
# quantization: Quantization on the physionet dataset." "Value 1 means quantization by 1 hour, value 0.1 means quantization by 0.1 hour = 6 min
latent_ode = None # latent ode with ode-rnn encoder
# z0-encoder = odernn # Type of encoder for Latent ODE model: odernn or rnn
# classic-rnn
# rnn-cell
# input-decay
# ode-rnn # Run ODE-RNN baseline: RNN-style that sees true points at every point. Used for interpolation only.
# rnn-vae  # Run RNN baseline: seq2seq model with sampling of the h0 and ELBO loss.
latents = 10 # size of the latent state
rec_dims = 20 # dimensionality of the recognition model (ODE or RNN)
rec_layers = 1 # Number of layers in ODE func in recognition ODE
gen_layers = 1 # Number of layers in ODE func in generative ODE
units = 100 # Number of units per layer in ODE func
gru_units = 100 # Number of units per layer in each of GRU update networks
poisson = 'store_true' # Model poisson-process likelihood for the density of events in addition to reconstruction
classif = 'store_true' # Include binary classification loss -- used for Physionet dataset for hospiral mortality
linear_classif = 'store_true' # If using a classifier, use a linear classifier instead of 1-layer NN
extrap = 'store_true' # Set extrapolation mode. If this flag is not set, run interpolation mode
timepoints = 100 # total number of timepoints
max_tp = 5. # We subsample points in the interval [0, args.max_tp]
noise_weight = 0.01 # Noise amplitude for generated traejctories
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [None]:
torch.manual_seed(random_seed)
np.random.seed(random_seed)

<torch._C.Generator at 0x7f03000d14b0>

## Data

In [None]:
from torch.distributions import uniform
n_total_tp = 100
max_t_extrap = max_tp/timepoints * 100

In [None]:
distribution = uniform.Uniform(torch.Tensor([0.0]),torch.Tensor([max_t_extrap]))
time_steps_extrap =  distribution.sample(torch.Size([n_total_tp-1]))[:,0]
print(time_steps_extrap)
print(time_steps_extrap.shape)
time_steps_extrap = torch.cat((torch.Tensor([0.0]), time_steps_extrap))
print(time_steps_extrap.shape)
time_steps_extrap = torch.sort(time_steps_extrap)[0]
print(time_steps_extrap.shape)

tensor([3.7851, 3.3443, 0.1417, 1.3734, 3.0917, 0.3541, 2.0763, 0.4173, 3.6215,
        3.9683, 3.6055, 3.8517, 1.5262, 1.1137, 3.9579, 1.0563, 3.9331, 4.4630,
        3.0525, 4.4004, 2.8379, 1.0336, 0.1935, 2.1825, 0.0623, 0.8038, 0.1491,
        2.8308, 4.4699, 4.7802, 0.0876, 2.9323, 0.6113, 4.8144, 0.1380, 0.2273,
        0.5352, 1.8769, 0.4140, 0.9900, 1.5855, 2.3460, 0.2538, 1.5439, 4.2902,
        3.2616, 2.5595, 4.6672, 3.3973, 2.9913, 1.8209, 4.3530, 3.2576, 3.6419,
        2.3678, 1.2531, 1.5027, 0.8094, 2.2725, 3.1943, 0.9322, 0.1816, 3.5082,
        1.3179, 3.5005, 3.7536, 4.8195, 0.2850, 4.3183, 3.9178, 1.1724, 2.6610,
        1.8388, 0.0661, 2.3603, 1.7746, 4.1492, 0.1078, 4.0930, 2.2240, 2.0680,
        1.2015, 3.2396, 1.0108, 2.0369, 0.2006, 1.9468, 0.9870, 0.1718, 2.8426,
        3.3702, 4.5436, 1.2397, 1.8489, 4.0469, 2.2679, 2.1030, 1.8386, 1.4328])
torch.Size([99])
torch.Size([100])
torch.Size([100])


# Run Model of the author

In [None]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import os
import sys
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import matplotlib.pyplot as plt

import time
import datetime
import argparse
import numpy as np
import pandas as pd
from random import SystemRandom
from sklearn import model_selection

import torch
import torch.nn as nn
from torch.nn.functional import relu
import torch.optim as optim

import lib.utils as utils
from lib.plotting import *

from lib.rnn_baselines import *
from lib.ode_rnn import *
from lib.create_latent_ode_model import create_LatentODE_model
from lib.parse_datasets import parse_datasets
from lib.ode_func import ODEFunc, ODEFunc_w_Poisson
from lib.diffeq_solver import DiffeqSolver
from mujoco_physics import HopperPhysics

from lib.utils import compute_loss_all_batches

# Generative model for noisy data based on ODE
parser = argparse.ArgumentParser('Latent ODE')
parser.add_argument('-n',  type=int, default=100, help="Size of the dataset")
parser.add_argument('--niters', type=int, default=300)
parser.add_argument('--lr',  type=float, default=1e-2, help="Starting learning rate.")
parser.add_argument('-b', '--batch-size', type=int, default=50)
parser.add_argument('--viz', action='store_true', help="Show plots while training")

parser.add_argument('--save', type=str, default='experiments/', help="Path for save checkpoints")
parser.add_argument('--load', type=str, default=None, help="ID of the experiment to load for evaluation. If None, run a new experiment.")
parser.add_argument('-r', '--random-seed', type=int, default=1991, help="Random_seed")

parser.add_argument('--dataset', type=str, default='periodic', help="Dataset to load. Available: physionet, activity, hopper, periodic")
parser.add_argument('-s', '--sample-tp', type=float, default=None, help="Number of time points to sub-sample."
	"If > 1, subsample exact number of points. If the number is in [0,1], take a percentage of available points per time series. If None, do not subsample")

parser.add_argument('-c', '--cut-tp', type=int, default=None, help="Cut out the section of the timeline of the specified length (in number of points)."
	"Used for periodic function demo.")

parser.add_argument('--quantization', type=float, default=0.1, help="Quantization on the physionet dataset."
	"Value 1 means quantization by 1 hour, value 0.1 means quantization by 0.1 hour = 6 min")

parser.add_argument('--latent-ode', action='store_true', help="Run Latent ODE seq2seq model")
parser.add_argument('--z0-encoder', type=str, default='odernn', help="Type of encoder for Latent ODE model: odernn or rnn")

parser.add_argument('--classic-rnn', action='store_true', help="Run RNN baseline: classic RNN that sees true points at every point. Used for interpolation only.")
parser.add_argument('--rnn-cell', default="gru", help="RNN Cell type. Available: gru (default), expdecay")
parser.add_argument('--input-decay', action='store_true', help="For RNN: use the input that is the weighted average of impirical mean and previous value (like in GRU-D)")

parser.add_argument('--ode-rnn', action='store_true', help="Run ODE-RNN baseline: RNN-style that sees true points at every point. Used for interpolation only.")

parser.add_argument('--rnn-vae', action='store_true', help="Run RNN baseline: seq2seq model with sampling of the h0 and ELBO loss.")

parser.add_argument('-l', '--latents', type=int, default=6, help="Size of the latent state")
parser.add_argument('--rec-dims', type=int, default=20, help="Dimensionality of the recognition model (ODE or RNN).")

parser.add_argument('--rec-layers', type=int, default=1, help="Number of layers in ODE func in recognition ODE")
parser.add_argument('--gen-layers', type=int, default=1, help="Number of layers in ODE func in generative ODE")

parser.add_argument('-u', '--units', type=int, default=100, help="Number of units per layer in ODE func")
parser.add_argument('-g', '--gru-units', type=int, default=100, help="Number of units per layer in each of GRU update networks")

parser.add_argument('--poisson', action='store_true', help="Model poisson-process likelihood for the density of events in addition to reconstruction.")
parser.add_argument('--classif', action='store_true', help="Include binary classification loss -- used for Physionet dataset for hospiral mortality")

parser.add_argument('--linear-classif', action='store_true', help="If using a classifier, use a linear classifier instead of 1-layer NN")
parser.add_argument('--extrap', action='store_true', help="Set extrapolation mode. If this flag is not set, run interpolation mode.")

parser.add_argument('-t', '--timepoints', type=int, default=100, help="Total number of time-points")
parser.add_argument('--max-t',  type=float, default=5., help="We subsample points in the interval [0, args.max_tp]")
parser.add_argument('--noise-weight', type=float, default=0.01, help="Noise amplitude for generated traejctories")


args = parser.parse_args()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
file_name = os.path.basename(__file__)[:-3]
utils.makedirs(args.save)

#####################################################################################################

if __name__ == '__main__':
	torch.manual_seed(args.random_seed)
	np.random.seed(args.random_seed)

	experimentID = args.load
	if experimentID is None:
		# Make a new experiment ID
		experimentID = int(SystemRandom().random()*100000)
	ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt')

	start = time.time()
	print("Sampling dataset of {} training examples".format(args.n))
	
	input_command = sys.argv
	ind = [i for i in range(len(input_command)) if input_command[i] == "--load"]
	if len(ind) == 1:
		ind = ind[0]
		input_command = input_command[:ind] + input_command[(ind+2):]
	input_command = " ".join(input_command)

	utils.makedirs("results/")

	##################################################################
	data_obj = parse_datasets(args, device)
	input_dim = data_obj["input_dim"]

	classif_per_tp = False
	if ("classif_per_tp" in data_obj):
		# do classification per time point rather than on a time series as a whole
		classif_per_tp = data_obj["classif_per_tp"]

	if args.classif and (args.dataset == "hopper" or args.dataset == "periodic"):
		raise Exception("Classification task is not available for MuJoCo and 1d datasets")

	n_labels = 1
	if args.classif:
		if ("n_labels" in data_obj):
			n_labels = data_obj["n_labels"]
		else:
			raise Exception("Please provide number of labels for classification task")

	##################################################################
	# Create the model
	obsrv_std = 0.01
	if args.dataset == "hopper":
		obsrv_std = 1e-3 

	obsrv_std = torch.Tensor([obsrv_std]).to(device)

	z0_prior = Normal(torch.Tensor([0.0]).to(device), torch.Tensor([1.]).to(device))

	if args.rnn_vae:
		if args.poisson:
			print("Poisson process likelihood not implemented for RNN-VAE: ignoring --poisson")

		# Create RNN-VAE model
		model = RNN_VAE(input_dim, args.latents, 
			device = device, 
			rec_dims = args.rec_dims, 
			concat_mask = True, 
			obsrv_std = obsrv_std,
			z0_prior = z0_prior,
			use_binary_classif = args.classif,
			classif_per_tp = classif_per_tp,
			linear_classifier = args.linear_classif,
			n_units = args.units,
			input_space_decay = args.input_decay,
			cell = args.rnn_cell,
			n_labels = n_labels,
			train_classif_w_reconstr = (args.dataset == "physionet")
			).to(device)


	elif args.classic_rnn:
		if args.poisson:
			print("Poisson process likelihood not implemented for RNN: ignoring --poisson")

		if args.extrap:
			raise Exception("Extrapolation for standard RNN not implemented")
		# Create RNN model
		model = Classic_RNN(input_dim, args.latents, device, 
			concat_mask = True, obsrv_std = obsrv_std,
			n_units = args.units,
			use_binary_classif = args.classif,
			classif_per_tp = classif_per_tp,
			linear_classifier = args.linear_classif,
			input_space_decay = args.input_decay,
			cell = args.rnn_cell,
			n_labels = n_labels,
			train_classif_w_reconstr = (args.dataset == "physionet")
			).to(device)
	elif args.ode_rnn:
		# Create ODE-GRU model
		n_ode_gru_dims = args.latents
				
		if args.poisson:
			print("Poisson process likelihood not implemented for ODE-RNN: ignoring --poisson")

		if args.extrap:
			raise Exception("Extrapolation for ODE-RNN not implemented")

		ode_func_net = utils.create_net(n_ode_gru_dims, n_ode_gru_dims, 
			n_layers = args.rec_layers, n_units = args.units, nonlinear = nn.Tanh)

		rec_ode_func = ODEFunc(
			input_dim = input_dim, 
			latent_dim = n_ode_gru_dims,
			ode_func_net = ode_func_net,
			device = device).to(device)

		z0_diffeq_solver = DiffeqSolver(input_dim, rec_ode_func, "euler", args.latents, 
			odeint_rtol = 1e-3, odeint_atol = 1e-4, device = device)
	
		model = ODE_RNN(input_dim, n_ode_gru_dims, device = device, 
			z0_diffeq_solver = z0_diffeq_solver, n_gru_units = args.gru_units,
			concat_mask = True, obsrv_std = obsrv_std,
			use_binary_classif = args.classif,
			classif_per_tp = classif_per_tp,
			n_labels = n_labels,
			train_classif_w_reconstr = (args.dataset == "physionet")
			).to(device)
	elif args.latent_ode:
		model = create_LatentODE_model(args, input_dim, z0_prior, obsrv_std, device, 
			classif_per_tp = classif_per_tp,
			n_labels = n_labels)
	else:
		raise Exception("Model not specified")

	##################################################################

	if args.viz:
		viz = Visualizations(device)

	##################################################################
	
	#Load checkpoint and evaluate the model
	if args.load is not None:
		utils.get_ckpt_model(ckpt_path, model, device)
		exit()

	##################################################################
	# Training

	log_path = "logs/" + file_name + "_" + str(experimentID) + ".log"
	if not os.path.exists("logs/"):
		utils.makedirs("logs/")
	logger = utils.get_logger(logpath=log_path, filepath=os.path.abspath(__file__))
	logger.info(input_command)

	optimizer = optim.Adamax(model.parameters(), lr=args.lr)

	num_batches = data_obj["n_train_batches"]

	for itr in range(1, num_batches * (args.niters + 1)):
		optimizer.zero_grad()
		utils.update_learning_rate(optimizer, decay_rate = 0.999, lowest = args.lr / 10)

		wait_until_kl_inc = 10
		if itr // num_batches < wait_until_kl_inc:
			kl_coef = 0.
		else:
			kl_coef = (1-0.99** (itr // num_batches - wait_until_kl_inc))

		batch_dict = utils.get_next_batch(data_obj["train_dataloader"])
		train_res = model.compute_all_losses(batch_dict, n_traj_samples = 3, kl_coef = kl_coef)
		train_res["loss"].backward()
		optimizer.step()

		n_iters_to_viz = 1
		if itr % (n_iters_to_viz * num_batches) == 0:
			with torch.no_grad():

				test_res = compute_loss_all_batches(model, 
					data_obj["test_dataloader"], args,
					n_batches = data_obj["n_test_batches"],
					experimentID = experimentID,
					device = device,
					n_traj_samples = 3, kl_coef = kl_coef)

				message = 'Epoch {:04d} [Test seq (cond on sampled tp)] | Loss {:.6f} | Likelihood {:.6f} | KL fp {:.4f} | FP STD {:.4f}|'.format(
					itr//num_batches, 
					test_res["loss"].detach(), test_res["likelihood"].detach(), 
					test_res["kl_first_p"], test_res["std_first_p"])
		 	
				logger.info("Experiment " + str(experimentID))
				logger.info(message)
				logger.info("KL coef: {}".format(kl_coef))
				logger.info("Train loss (one batch): {}".format(train_res["loss"].detach()))
				logger.info("Train CE loss (one batch): {}".format(train_res["ce_loss"].detach()))
				
				if "auc" in test_res:
					logger.info("Classification AUC (TEST): {:.4f}".format(test_res["auc"]))

				if "mse" in test_res:
					logger.info("Test MSE: {:.4f}".format(test_res["mse"]))

				if "accuracy" in train_res:
					logger.info("Classification accuracy (TRAIN): {:.4f}".format(train_res["accuracy"]))

				if "accuracy" in test_res:
					logger.info("Classification accuracy (TEST): {:.4f}".format(test_res["accuracy"]))

				if "pois_likelihood" in test_res:
					logger.info("Poisson likelihood: {}".format(test_res["pois_likelihood"]))

				if "ce_loss" in test_res:
					logger.info("CE loss: {}".format(test_res["ce_loss"]))

			torch.save({
				'args': args,
				'state_dict': model.state_dict(),
			}, ckpt_path)


			# Plotting
			if args.viz:
				with torch.no_grad():
					test_dict = utils.get_next_batch(data_obj["test_dataloader"])

					print("plotting....")
					if isinstance(model, LatentODE) and (args.dataset == "periodic"): #and not args.classic_rnn and not args.ode_rnn:
						plot_id = itr // num_batches // n_iters_to_viz
						viz.draw_all_plots_one_dim(test_dict, model, 
							plot_name = file_name + "_" + str(experimentID) + "_{:03d}".format(plot_id) + ".png",
						 	experimentID = experimentID, save=True)
						plt.pause(0.01)
	torch.save({
		'args': args,
		'state_dict': model.state_dict(),
	}, ckpt_path)



usage: Latent ODE [-h] [-n N] [--niters NITERS] [--lr LR] [-b BATCH_SIZE]
                  [--viz] [--save SAVE] [--load LOAD] [-r RANDOM_SEED]
                  [--dataset DATASET] [-s SAMPLE_TP] [-c CUT_TP]
                  [--quantization QUANTIZATION] [--latent-ode]
                  [--z0-encoder Z0_ENCODER] [--classic-rnn]
                  [--rnn-cell RNN_CELL] [--input-decay] [--ode-rnn]
                  [--rnn-vae] [-l LATENTS] [--rec-dims REC_DIMS]
                  [--rec-layers REC_LAYERS] [--gen-layers GEN_LAYERS]
                  [-u UNITS] [-g GRU_UNITS] [--poisson] [--classif]
                  [--linear-classif] [--extrap] [-t TIMEPOINTS]
                  [--max-t MAX_T] [--noise-weight NOISE_WEIGHT]
Latent ODE: error: unrecognized arguments: -f /root/.local/share/jupyter/runtime/kernel-b3795be3-3477-4bee-97ed-b38edd553d61.json


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


# Break it down

In [2]:
# Install the latest version of author's repo neural ode implementation
!git clone https://github.com/rtqichen/torchdiffeq.git
!cd torchdiffeq && pip install -e .
!ls torchdiffeq/torchdiffeq

fatal: destination path 'torchdiffeq' already exists and is not an empty directory.
Obtaining file:///content/torchdiffeq
Installing collected packages: torchdiffeq
  Attempting uninstall: torchdiffeq
    Found existing installation: torchdiffeq 0.2.2
    Can't uninstall 'torchdiffeq'. No files were found to uninstall.
  Running setup.py develop for torchdiffeq
Successfully installed torchdiffeq-0.2.2
_impl  __init__.py


## Libraries

In [296]:
# run_models.py
import os
import sys
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import matplotlib.pyplot as plt

import time
import datetime
import argparse
import numpy as np
import pandas as pd
from random import SystemRandom
from sklearn import model_selection

import torch
import torch.nn as nn
from torch.nn.functional import relu
import torch.optim as optim

import lib.utils as utils
from lib.plotting import *

from lib.rnn_baselines import *
from lib.ode_rnn import *
from lib.create_latent_ode_model import create_LatentODE_model
from lib.parse_datasets import parse_datasets
from lib.ode_func import ODEFunc, ODEFunc_w_Poisson
from lib.diffeq_solver import DiffeqSolver
from mujoco_physics import HopperPhysics

from lib.utils import compute_loss_all_batches

import sys
# print(sys.argv[1:])

## Parameters

In [297]:
# Generative model for noisy data based on ODE
parser = argparse.ArgumentParser('Latent ODE')
parser.add_argument('-n',  type=int, default=100, help="Size of the dataset")
parser.add_argument('--niters', type=int, default=300)
parser.add_argument('--lr',  type=float, default=1e-2, help="Starting learning rate.")
parser.add_argument('-b', '--batch-size', type=int, default=50)
parser.add_argument('--viz', action='store_true', help="Show plots while training")

parser.add_argument('--save', type=str, default='experiments/', help="Path for save checkpoints")
parser.add_argument('--load', type=str, default=None, help="ID of the experiment to load for evaluation. If None, run a new experiment.")
parser.add_argument('-r', '--random-seed', type=int, default=1991, help="Random_seed")

parser.add_argument('--dataset', type=str, default='hopper', help="Dataset to load. Available: physionet, activity, hopper, periodic")
parser.add_argument('-s', '--sample-tp', type=float, default=None, help="Number of time points to sub-sample."
	"If > 1, subsample exact number of points. If the number is in [0,1], take a percentage of available points per time series. If None, do not subsample")

parser.add_argument('-c', '--cut-tp', type=int, default=None, help="Cut out the section of the timeline of the specified length (in number of points)."
	"Used for periodic function demo.")

parser.add_argument('--quantization', type=float, default=0.1, help="Quantization on the physionet dataset."
	"Value 1 means quantization by 1 hour, value 0.1 means quantization by 0.1 hour = 6 min")

parser.add_argument('--latent-ode', default = True, action='store_true', help="Run Latent ODE seq2seq model")
parser.add_argument('--z0-encoder', type=str, default='odernn', help="Type of encoder for Latent ODE model: odernn or rnn")

parser.add_argument('--classic-rnn', action='store_true', help="Run RNN baseline: classic RNN that sees true points at every point. Used for interpolation only.")
parser.add_argument('--rnn-cell', default="gru", help="RNN Cell type. Available: gru (default), expdecay")
parser.add_argument('--input-decay', action='store_true', help="For RNN: use the input that is the weighted average of impirical mean and previous value (like in GRU-D)")

parser.add_argument('--ode-rnn', action='store_true', help="Run ODE-RNN baseline: RNN-style that sees true points at every point. Used for interpolation only.")

parser.add_argument('--rnn-vae', action='store_true', help="Run RNN baseline: seq2seq model with sampling of the h0 and ELBO loss.")

parser.add_argument('-l', '--latents', type=int, default=6, help="Size of the latent state")
parser.add_argument('--rec-dims', type=int, default=20, help="Dimensionality of the recognition model (ODE or RNN).")

parser.add_argument('--rec-layers', type=int, default=1, help="Number of layers in ODE func in recognition ODE")
parser.add_argument('--gen-layers', type=int, default=1, help="Number of layers in ODE func in generative ODE")

parser.add_argument('-u', '--units', type=int, default=100, help="Number of units per layer in ODE func")
parser.add_argument('-g', '--gru-units', type=int, default=100, help="Number of units per layer in each of GRU update networks")

parser.add_argument('--poisson', action='store_true', help="Model poisson-process likelihood for the density of events in addition to reconstruction.")
parser.add_argument('--classif', action='store_true', help="Include binary classification loss -- used for Physionet dataset for hospiral mortality")

parser.add_argument('--linear-classif', action='store_true', help="If using a classifier, use a linear classifier instead of 1-layer NN")
parser.add_argument('--extrap', action='store_true', help="Set extrapolation mode. If this flag is not set, run interpolation mode.")

parser.add_argument('-t', '--timepoints', type=int, default=100, help="Total number of time-points")
parser.add_argument('--max-t',  type=float, default=5., help="We subsample points in the interval [0, args.max_tp]")
parser.add_argument('--noise-weight', type=float, default=0.01, help="Noise amplitude for generated traejctories")

sys.argv = ['-f']
args = parser.parse_args()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# file_name = os.path.basename(__file__)[:-3]
utils.makedirs(args.save)  # saves in 'experiments/' folder

## Manual seed, experimentID

In [298]:
torch.manual_seed(args.random_seed)
np.random.seed(args.random_seed)

experimentID = args.load  # None
print(f"experimentID is {experimentID}")

experimentID is None


In [299]:
if experimentID is None:
		# Make a new experiment ID
		experimentID = int(SystemRandom().random()*100000) # from random import SystemRandom
print(f"experimentID is {experimentID}")
ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt') 
print(f"ckpt_path is {ckpt_path}")

experimentID is 34150
ckpt_path is experiments/experiment_34150.ckpt


In [300]:
start = time.time()
print("Sampling dataset of {} training examples".format(args.n))  # n is size of the dataset

Sampling dataset of 100 training examples


In [301]:
print(f"type of args is {type(args)}")
print(f"args is {str(args)}")

type of args is <class 'argparse.Namespace'>
args is Namespace(batch_size=50, classic_rnn=False, classif=False, cut_tp=None, dataset='hopper', extrap=False, gen_layers=1, gru_units=100, input_decay=False, latent_ode=True, latents=6, linear_classif=False, load=None, lr=0.01, max_t=5.0, n=100, niters=300, noise_weight=0.01, ode_rnn=False, poisson=False, quantization=0.1, random_seed=1991, rec_dims=20, rec_layers=1, rnn_cell='gru', rnn_vae=False, sample_tp=None, save='experiments/', timepoints=100, units=100, viz=False, z0_encoder='odernn')


In [302]:
input_command = sys.argv
print(f"input_command is {input_command}")


input_command is ['-f']


In [303]:
ind = [i for i in range(len(input_command)) if input_command[i] == "--load"]
print(f"ind is {ind}")
print(f"len(ind) is {len(ind)}")

ind is []
len(ind) is 0


In [304]:
if len(ind) == 1:
		ind = ind[0]
		input_command = input_command[:ind] + input_command[(ind+2):]
input_command = " ".join(input_command)
print(f"input_command is {input_command}")
utils.makedirs("results/")


input_command is -f


## parse_datasets()

In [305]:
data_obj = parse_datasets(args, device)


Using the function parse_datasets from parse_datasets.py
Inputs are (args, device)
dataset_name is hopper


args.timepoints is 100
args.extrap is False
n_total_tp = args.timepoints + args.extrap  --> is 100


args.max_t is 5.0
max_t_extrap = args.max_t / args.timepoints * n_total_tp --> 5.0


if dataset_name == 'hopper'
root is data
Let's download
we will check os.path.exists(os.path.join(self.data_folder, self.training_file)). If it exists, return
we will check os.path.exists(os.path.join(self.data_folder, self.training_file)). If it exists, return
data_file is data/HopperPhysics/training.pt
Using torch.Tensor(torch.load(data_file)).to(device) to get self.data
self.data.shape is torch.Size([10000, 200, 14])


Using normalize_data()
data.shape is torch.Size([10000, 200, 14])
reshaped = data.reshape(-1, data.size(-1))
reshaped.shape is torch.Size([2000000, 14])
att_min = torch.min(reshaped, 0)[0]
att_min.shape is torch.Size([14])
att_max = torch.max(reshaped, 0)[0]
att_max.shape is torc

In [306]:
input_dim = data_obj["input_dim"]
n_train_batches = data_obj['n_train_batches']
n_test_batches = data_obj['n_test_batches']
print(f"input_dim is {input_dim}")
print(f"n_train_batches is {n_train_batches}")
print(f"n_test_batches is {n_test_batches}")

input_dim is 14
n_train_batches is 2
n_test_batches is 1


In [307]:
data_obj.keys()

dict_keys(['dataset_obj', 'train_dataloader', 'test_dataloader', 'input_dim', 'n_train_batches', 'n_test_batches'])

## Create Model

In [308]:
classif_per_tp = False
if ("classif_per_tp" in data_obj):
  # do classification per time point rather than on a time series as a whole
  classif_per_tp = data_obj["classif_per_tp"]

if args.classif and (args.dataset == "hopper" or args.dataset == "periodic"):
  raise Exception("Classification task is not available for MuJoCo and 1d datasets")

n_labels = 1
if args.classif:
  if ("n_labels" in data_obj):
    n_labels = data_obj["n_labels"]
  else:
    raise Exception("Please provide number of labels for classification task")


In [309]:
print(f"n_labels is {n_labels}")

n_labels is 1


In [310]:
obsrv_std = 0.01
if args.dataset == "hopper":
  obsrv_std = 1e-3 

obsrv_std = torch.Tensor([obsrv_std]).to(device)
print(f"obsrv_std is {obsrv_std}")
z0_prior = Normal(torch.Tensor([0.0]).to(device), torch.Tensor([1.]).to(device))
print(f"z0_prior is {z0_prior}")

obsrv_std is tensor([0.0010])
z0_prior is Normal(loc: tensor([0.]), scale: tensor([1.]))


In [311]:
if args.latent_ode:
    model = create_LatentODE_model(args, input_dim, z0_prior, obsrv_std, device, 
            classif_per_tp = classif_per_tp,
            n_labels = n_labels)
else:
	raise Exception("Model not specified")

Inside create_LatentODE_model
dim is 6


Making ode_func_net
Inside create_net function
n_input for layers is 6
n_units for layers is 100
n_layers is 1
nonlinear is <class 'torch.nn.modules.activation.Tanh'>
for i in range(n_layers):
    layers.append(nonlinear())
    layers.append(nn.Linear(n_units, n_units))
layers.append(nonlinear())
layers.append(nn.Linear(n_units, n_outputs))
n_outputs is 6
Making gen_ode_func
Inside ODEFunc class
input_dim is 14
latent_dim is 6
Inside init_network_weights function
std is 0.1
Making ode_func_net for odernn
Inside create_net function
n_input for layers is 20
n_units for layers is 100
n_layers is 1
nonlinear is <class 'torch.nn.modules.activation.Tanh'>
for i in range(n_layers):
    layers.append(nonlinear())
    layers.append(nn.Linear(n_units, n_units))
layers.append(nonlinear())
layers.append(nn.Linear(n_units, n_outputs))
n_outputs is 20
Making rec_ode_func using ODEFunc
Inside ODEFunc class
input_dim is 28
latent_dim is 20
Inside init_network_w

In [312]:
file_name = os.path.abspath('')
log_path = "logs/" + file_name + "_" + str(experimentID) + ".log"
if not os.path.exists("logs/"):
    utils.makedirs("logs/")
logger = utils.get_logger(logpath=log_path, filepath=os.path.abspath(''))
logger.info(input_command)

optimizer = optim.Adamax(model.parameters(), lr=args.lr)
print(f"optimizer is {optimizer}")
num_batches = data_obj["n_train_batches"]
print(f"num_batches is {num_batches}")

/content
/content
/content
/content
/content
/content
/content
/content
/content
/content
-f
-f
-f
-f
-f
-f
-f
-f
-f
-f


optimizer is Adamax (
Parameter Group 0
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
)
num_batches is 2


In [313]:
for itr in range(1, num_batches * (args.niters + 1)):
    optimizer.zero_grad()
    utils.update_learning_rate(optimizer, decay_rate = 0.999, lowest = args.lr / 10)
    print(f"learning rate is {optimizer.param_groups[0]['lr']}")
    wait_until_kl_inc = 10
    if itr // num_batches < wait_until_kl_inc:
        kl_coef = 0.
    else:
        kl_coef = (1-0.99** (itr // num_batches - wait_until_kl_inc))
    print(f"kl_coef is {kl_coef}")
    batch_dict = utils.get_next_batch(data_obj["train_dataloader"])
    train_res = model.compute_all_losses(batch_dict, n_traj_samples = 1, kl_coef = kl_coef)
    train_res["loss"].backward()
    optimizer.step()
    n_iters_to_viz = 1

    if itr % (n_iters_to_viz * num_batches) == 0:
    	with torch.no_grad():
            test_res = compute_loss_all_batches(model, 
					data_obj["test_dataloader"], args,
					n_batches = data_obj["n_test_batches"],
					experimentID = experimentID,
					device = device,
					n_traj_samples = 1, kl_coef = kl_coef)
            message = 'Epoch {:04d} [Test seq (cond on sampled tp)] | Loss {:.6f} | Likelihood {:.6f} | KL fp {:.4f} | FP STD {:.4f}|'.format(
					itr//num_batches, 
					test_res["loss"].detach(), test_res["likelihood"].detach(), 
					test_res["kl_first_p"], test_res["std_first_p"])
            
            logger.info("Experiment " + str(experimentID))
            logger.info(message)
            logger.info("KL coef: {}".format(kl_coef))
            logger.info("Train loss (one batch): {}".format(train_res["loss"].detach()))
            logger.info("Train CE loss (one batch): {}".format(train_res["ce_loss"].detach()))

            if "auc" in test_res:
                logger.info("Classification AUC (TEST): {:.4f}".format(test_res["auc"]))

            if "mse" in test_res:
                logger.info("Test MSE: {:.4f}".format(test_res["mse"]))

            if "accuracy" in train_res:
                logger.info("Classification accuracy (TRAIN): {:.4f}".format(train_res["accuracy"]))

            if "accuracy" in test_res:
                logger.info("Classification accuracy (TEST): {:.4f}".format(test_res["accuracy"]))

            if "pois_likelihood" in test_res:
                logger.info("Poisson likelihood: {}".format(test_res["pois_likelihood"]))

            if "ce_loss" in test_res:
                logger.info("CE loss: {}".format(test_res["ce_loss"]))


            torch.save({
				'args': args,
				'state_dict': model.state_dict(),
			}, ckpt_path)
    
torch.save({
    'args': args,
    'state_dict': model.state_dict(),
}, ckpt_path)



learning rate is 0.00999
kl_coef is 0.0
Using basic_collate_fn function
Inputs are (batch, time_steps, args = args, device = device, data_type = 'train')


AttributeError: ignored

In [39]:
t1 = torch.tensor([1,2,3,4]) 
t2 = torch.tensor([5,6,7,8]) 
t3 = torch.tensor([9,10,11,12])

t = torch.stack(
    (t1,t2,t3)
    ,dim=-1
)

In [57]:
utils.split_last_dim(t)

(tensor([[1],
         [2],
         [3],
         [4]]), tensor([[ 5,  9],
         [ 6, 10],
         [ 7, 11],
         [ 8, 12]]))

In [53]:
from lib import utils as utils

In [54]:
utils.split_last_dim(t)

(tensor([[1],
         [2],
         [3],
         [4]]), tensor([[ 5,  9],
         [ 6, 10],
         [ 7, 11],
         [ 8, 12]]))

# For Stock Data

In [8]:
# Install the latest version of author's repo neural ode implementation
!git clone https://github.com/rtqichen/torchdiffeq.git
!cd torchdiffeq && pip install -e .
!ls torchdiffeq/torchdiffeq



fatal: destination path 'torchdiffeq' already exists and is not an empty directory.
Obtaining file:///content/torchdiffeq
Installing collected packages: torchdiffeq
  Attempting uninstall: torchdiffeq
    Found existing installation: torchdiffeq 0.2.2
    Can't uninstall 'torchdiffeq'. No files were found to uninstall.
  Running setup.py develop for torchdiffeq
Successfully installed torchdiffeq-0.2.2
_impl  __init__.py  __pycache__


## Libraries

In [10]:
# run_models.py
import os
import sys
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot
import matplotlib.pyplot as plt

import time
import datetime
import argparse
import numpy as np
import pandas as pd
from random import SystemRandom
from sklearn import model_selection

import torch
import torch.nn as nn
from torch.nn.functional import relu
import torch.optim as optim

import lib.utils as utils
# from lib.plotting import *

from lib.rnn_baselines import *
# from lib.ode_rnn import *
# from lib.create_latent_ode_model import create_LatentODE_model
# from lib.parse_datasets import parse_datasets
# from lib.ode_func import ODEFunc, ODEFunc_w_Poisson
from lib.diffeq_solver import DiffeqSolver
# from mujoco_physics import HopperPhysics

from lib.utils import compute_loss_all_batches

import sys
# print(sys.argv[1:])

## Parameters

In [11]:
# Generative model for noisy data based on ODE
parser = argparse.ArgumentParser('Latent ODE')
parser.add_argument('-n',  type=int, default=100, help="Size of the dataset")
parser.add_argument('--niters', type=int, default=10)
parser.add_argument('--lr',  type=float, default=1e-2, help="Starting learning rate.")
parser.add_argument('-b', '--batch-size', type=int, default=50)
parser.add_argument('--viz', action='store_true', help="Show plots while training")

parser.add_argument('--save', type=str, default='experiments/', help="Path for save checkpoints")
parser.add_argument('--load', type=str, default=None, help="ID of the experiment to load for evaluation. If None, run a new experiment.")
parser.add_argument('-r', '--random-seed', type=int, default=1991, help="Random_seed")

parser.add_argument('--dataset', type=str, default='hopper', help="Dataset to load. Available: physionet, activity, hopper, periodic")
parser.add_argument('-s', '--sample-tp', type=float, default=None, help="Number of time points to sub-sample."
	"If > 1, subsample exact number of points. If the number is in [0,1], take a percentage of available points per time series. If None, do not subsample")

parser.add_argument('-c', '--cut-tp', type=int, default=None, help="Cut out the section of the timeline of the specified length (in number of points)."
	"Used for periodic function demo.")

parser.add_argument('--quantization', type=float, default=0.1, help="Quantization on the physionet dataset."
	"Value 1 means quantization by 1 hour, value 0.1 means quantization by 0.1 hour = 6 min")

parser.add_argument('--latent-ode', default = True, action='store_true', help="Run Latent ODE seq2seq model")
parser.add_argument('--z0-encoder', type=str, default='odernn', help="Type of encoder for Latent ODE model: odernn or rnn")

parser.add_argument('--classic-rnn', action='store_true', help="Run RNN baseline: classic RNN that sees true points at every point. Used for interpolation only.")
parser.add_argument('--rnn-cell', default="gru", help="RNN Cell type. Available: gru (default), expdecay")
parser.add_argument('--input-decay', action='store_true', help="For RNN: use the input that is the weighted average of impirical mean and previous value (like in GRU-D)")

parser.add_argument('--ode-rnn', action='store_true', help="Run ODE-RNN baseline: RNN-style that sees true points at every point. Used for interpolation only.")

parser.add_argument('--rnn-vae', action='store_true', help="Run RNN baseline: seq2seq model with sampling of the h0 and ELBO loss.")

parser.add_argument('-l', '--latents', type=int, default=6, help="Size of the latent state")
parser.add_argument('--rec-dims', type=int, default=20, help="Dimensionality of the recognition model (ODE or RNN).")

parser.add_argument('--rec-layers', type=int, default=1, help="Number of layers in ODE func in recognition ODE")
parser.add_argument('--gen-layers', type=int, default=1, help="Number of layers in ODE func in generative ODE")

parser.add_argument('-u', '--units', type=int, default=100, help="Number of units per layer in ODE func")
parser.add_argument('-g', '--gru-units', type=int, default=100, help="Number of units per layer in each of GRU update networks")

parser.add_argument('--poisson', action='store_true', help="Model poisson-process likelihood for the density of events in addition to reconstruction.")
parser.add_argument('--classif', action='store_true', help="Include binary classification loss -- used for Physionet dataset for hospiral mortality")

parser.add_argument('--linear-classif', action='store_true', help="If using a classifier, use a linear classifier instead of 1-layer NN")
parser.add_argument('--extrap', default = False, action='store_true', help="Set extrapolation mode. If this flag is not set, run interpolation mode.")

parser.add_argument('-t', '--timepoints', type=int, default=100, help="Total number of time-points")
parser.add_argument('--max-t',  type=float, default=5., help="We subsample points in the interval [0, args.max_tp]")
parser.add_argument('--noise-weight', type=float, default=0.01, help="Noise amplitude for generated traejctories")

sys.argv = ['-f']
args = parser.parse_args()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# file_name = os.path.basename(__file__)[:-3]
utils.makedirs(args.save)  # saves in 'experiments/' folder

## Manual seed, experimentID

In [12]:
torch.manual_seed(args.random_seed)
np.random.seed(args.random_seed)

experimentID = args.load  # None
print(f"experimentID is {experimentID}")

if experimentID is None:
		# Make a new experiment ID
		experimentID = int(SystemRandom().random()*100000) # from random import SystemRandom
print(f"experimentID is {experimentID}")
ckpt_path = os.path.join(args.save, "experiment_" + str(experimentID) + '.ckpt') 
print(f"ckpt_path is {ckpt_path}")

start = time.time()
print("Sampling dataset of {} training examples".format(args.n))  # n is size of the dataset

print(f"args is {str(args)}")

input_command = sys.argv
print(f"input_command is {input_command}")

ind = [i for i in range(len(input_command)) if input_command[i] == "--load"]
print(f"ind is {ind}")
print(f"len(ind) is {len(ind)}")

if len(ind) == 1:
		ind = ind[0]
		input_command = input_command[:ind] + input_command[(ind+2):]
input_command = " ".join(input_command)
print(f"input_command is {input_command}")
utils.makedirs("results/")


experimentID is None
experimentID is 46595
ckpt_path is experiments/experiment_46595.ckpt
Sampling dataset of 100 training examples
args is Namespace(batch_size=50, classic_rnn=False, classif=False, cut_tp=None, dataset='hopper', extrap=False, gen_layers=1, gru_units=100, input_decay=False, latent_ode=True, latents=6, linear_classif=False, load=None, lr=0.01, max_t=5.0, n=100, niters=10, noise_weight=0.01, ode_rnn=False, poisson=False, quantization=0.1, random_seed=1991, rec_dims=20, rec_layers=1, rnn_cell='gru', rnn_vae=False, sample_tp=None, save='experiments/', timepoints=100, units=100, viz=False, z0_encoder='odernn')
input_command is ['-f']
ind is []
len(ind) is 0
input_command is -f


## Get Dataset

In [13]:
!pip install yfinance



In [14]:
# 1. Get Open and Close Price of asset (o, c) for each trading day.
# libraries
from pandas_datareader import data as pdr
import yfinance as yf
import os

print(f"Get Open and Close Price of Assets")
def download_raw_stock_data(filepath, tickers, start, end, period = '1d'):
    """
    Download Stock tickers
    :Parameters:
        filepath: str
            path to store the raw data
        tickers : str, list
            List of tickers to download
        period: str
            the frequency at which to gather the data; common options would include ‘1d’ (daily), ‘1mo’ (monthly), ‘1y’ (yearly)
        start: str
            the date to start gathering the data. For example ‘2010–1–1’
        end: str
            the date to end gathering the data. For example ‘2020–1–25’
    
    """
    #define the ticker symbol
    tickerSymbol = tickers

    #get data on this ticker
    tickerData = yf.Ticker(tickerSymbol)

    #get the historical prices for this ticker
    tickerDf = tickerData.history(period=period, start=start, end=end)
    tickerDf.to_csv(filepath)

dict_tickers = {
    'Apple': 'AAPL',
    'Microsoft': 'MSFT',
    'Google': 'GOOG',
    'Bitcoin': 'BTC-USD',
    'Facebook': 'FB',
    'Walmart': 'WMT',
    'Amazon': 'AMZN',
    'CVS': 'CVS',
    'Berkshire': 'BRK-B',
    'ExxonMobil': 'XOM',
    'AtandT': 'T',
    'Costco': 'COST',
    'Walgreens': 'WBA',
    'Kroger': 'KR',
    'JPMorgan': 'JPM',
    'Verizon': 'VZ',
    'FordMotor': 'F',
    'GeneralMotors': 'GM',
    'Dell': 'DELL',
    'BankOfAmerica': 'BAC',
    'Target': 'TGT',
    'GeneralElectric': 'GE',
    'JohnsonandJohnson': 'JNJ',
    'Nvidia': 'NVDA',
    'Intel': 'INTC',
}

path = f"raw-stock-data/data-2000-2021"
if not os.path.exists(path):
    # https://appdividend.com/2021/07/03/how-to-create-directory-if-not-exist-in-python/
    # Create a new directory
    os.makedirs(path)
    print(f"{path} directory is created")
period = '1d'
start='2000-1-1'
end='2021-8-31'
for tickerName, ticker in dict_tickers.items():
    tickerName = tickerName
    ticker = ticker
    filepath = f"{path}/{tickerName}.csv"
    download_raw_stock_data(filepath, ticker, start, end, period)


print('\n')

print(f"The size of each asset")
import pandas as pd
for tickerName in dict_tickers.keys():
    df = pd.read_csv(f"{path}/{tickerName}.csv")
    print(f"{tickerName} size: {len(df)}")

# 2. Get weekly data.
# 3. Transform $d_{i}$ to sequences of lag * len($d_{i}$) length.

def stockDataTransformer(filepath):
    df = pd.read_csv(filepath)
    df.set_index('Date', inplace=True)
    df1 = df[['Open', 'Close']].copy()
    data = df1.values
    n_samples = data.shape[0]//10*10
    reshape_number = n_samples*data.shape[1]//10
    data1 = data[:n_samples].reshape((reshape_number, 10))
    return data1

def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
    """
    Frame a time series as a supervised learning dataset.
    Arguments:
        data: Sequence of observations as a list or NumPy array.
        n_in: Number of lag observations as input (X).
        n_out: Number of observations as output (y).
        dropnan: Boolean whether or not to drop rows with NaN values.
    Returns:
        Pandas DataFrame of series framed for supervised learning.
    """
    n_vars = 1 if type(data) is list else data.shape[1]
    df = pd.DataFrame(data)
    cols, names = list(), list()
    # input sequence (t-n, ... t-1)
    for i in range(n_in, 0, -1):
        cols.append(df.shift(i))
        names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
    # forecast sequence (t, t+1, ... t+n)
    for i in range(0, n_out):
        cols.append(df.shift(-i))
        if i == 0:
            names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
        else:
            names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
    # put it all together
    agg = concat(cols, axis=1)
    agg.columns = names
    # drop rows with NaN values
    if dropnan:
        agg.dropna(inplace=True)
    return agg

def split_data(perc_train, perc_valid, lag, data_orig, data_m1, n_features_orig, n_features_median):
    values = data_m1
    
    sizeOfReframed = len(data_m1)
    len_train = int(perc_train*sizeOfReframed) # int(sizeOfReframed - len_test) # - len_valid)
    train_data_orig = data_orig[:len_train, :]
    # valid = values[len_train:len_valid+len_train, :]
    test_data_orig = data_orig[len_train:, :]  # [len_valid+len_train:, :]
    # n_features = n_features
    
    train_data_ml = values[:len_train, :]
    test_data_ml = values[len_train:, :] 
    # split into input and outputs
    n_obs = lag * n_features_orig
    n_obs_median = (lag+forecast) * n_features_median
    train_X, train_y = train_data_orig[:, :n_obs], train_data_ml[:, :n_obs_median]
    test_X, test_y = test_data_orig[:, :n_obs], test_data_ml[:, :n_obs_median]
    # valid_X, valid_y = valid[:, :n_obs], valid[:, -1]
    print(train_X.shape, len(train_X), train_y.shape)
    
    # reshape input to be 3D [samples, features, lag]
    train_X = train_X.reshape((train_X.shape[0], n_features_orig, lag))
    test_X = test_X.reshape((test_X.shape[0], n_features_orig, lag))
    # valid_X = valid_X.reshape((valid_X.shape[0], lag, n_features))
    print(train_X.shape, train_y.shape, test_X.shape, test_y.shape)  # , valid_X.shape, valid_y.shape)
    
    # Get the reconstruction train_y, test_y and extrapolated train_y, test_y
    train_y_recon, train_y_extrapol = train_y[:, :lag], train_y[:, lag:]
    test_y_recon, test_y_extrapol = test_y[:, :lag], test_y[:, lag:]
    dataload = {
        'train_data_orig': train_data_orig,
        'test_data_orig': test_data_orig,
        'train_data_ml': train_data_ml,
        'test_data_ml': test_data_ml,
        # 'valid': valid,
        'train_X': train_X,
        'train_y': train_y,
        'test_X': test_X,
        'test_y': test_y,
        'n_features_orig': n_features_orig,
        'n_features_median': n_features_median,
        'n_obs': n_obs,
        'n_obs_median': n_obs_median,
        # 'valid_X': valid_X,
        # 'valid_y': valid_y,
        'train_y_recon': train_y_recon,
        'train_y_extrapol': train_y_extrapol,
        'test_y_recon': test_y_recon,
        'test_y_extrapol': test_y_extrapol
    }
    
    return dataload

def get_median(array, axis = 1):
    # https://numpy.org/doc/stable/reference/generated/numpy.median.html
    return np.median(array, axis = axis).reshape(data_size, 1)  #, keepdims=True)

from pandas import concat
import numpy as np
week_sequence = {}
median_data_dict = {}
lag = 5
for tickerName in dict_tickers.keys():
    filepath = f"{path}/{tickerName}.csv"
    # Get the data in the required format
    data = stockDataTransformer(filepath)
    # # Total Data Size
    data_size = data.shape[0]
    print(f"{tickerName} data.shape {data.shape}")
    data_orig = series_to_supervised(data, lag).values
    print(f'{tickerName} Data Original after series to supervised on data {data_orig.shape}')
    week_sequence[tickerName] = data_orig

    median_data = get_median(data)
    print(f'{tickerName} Median data')
    # Median data for each week
    print(f"{tickerName} median_data.shape {median_data.shape}")
    # print(pd.DataFrame(median_data, columns = ['median_stockprice_week']).head(10))
    print('\n')

    # Convert median_data to (n_samples, 5) matrix
    data_m1 = series_to_supervised(median_data, lag).values
    median_data_dict[tickerName] = data_m1
    print(f'{tickerName} Median data after series to supervised')
    print(f"{tickerName} data_m1.shape {data_m1.shape}")
    # print(pd.DataFrame(data_m1, columns = [f"week i+{i}" for i in range(1, lag+forecast+1)]))
    print('\n')




Get Open and Close Price of Assets


The size of each asset
Apple size: 5450
Microsoft size: 5450
Google size: 4288
Bitcoin size: 2537
Facebook size: 2336
Walmart size: 5450
Amazon size: 5450
CVS size: 5450
Berkshire size: 5450
ExxonMobil size: 5450
AtandT size: 5450
Costco size: 5450
Walgreens size: 5450
Kroger size: 5450
JPMorgan size: 5450
Verizon size: 5450
FordMotor size: 5450
GeneralMotors size: 2713
Dell size: 1268
BankOfAmerica size: 5450
Target size: 5450
GeneralElectric size: 5450
JohnsonandJohnson size: 5450
Nvidia size: 5451
Intel size: 5450
Apple data.shape (1090, 10)
Apple Data Original after series to supervised on data (1085, 60)
Apple Median data
Apple median_data.shape (1090, 1)


Apple Median data after series to supervised
Apple data_m1.shape (1085, 6)


Microsoft data.shape (1090, 10)
Microsoft Data Original after series to supervised on data (1085, 60)
Microsoft Median data
Microsoft median_data.shape (1090, 1)


Microsoft Median data after series to supervised
Mi

In [15]:
import numpy as np
data = week_sequence['Apple']
# 4. Bundle all sequences together
for tickerName in week_sequence.keys():
    if tickerName != 'Apple':
        data1 = week_sequence[tickerName]
        data = np.concatenate((data, data1))
        print(f"data.shape {data.shape}")

import numpy as np
data_m = median_data_dict['Apple']
# 4. Bundle all sequences together
for tickerName in median_data_dict.keys():
    if tickerName != 'Apple':
        data1 = median_data_dict[tickerName]
        data_m = np.concatenate((data_m, data1))
        print(f"data.shape {data_m.shape}")

data_df = pd.DataFrame(data)
data_df.to_csv(f"all_assets_sequences.csv")

data_m_df = pd.DataFrame(data_m)
data_m_df.to_csv(f"median_sequences.csv")

data.shape (2170, 60)
data.shape (3021, 60)
data.shape (3522, 60)
data.shape (3983, 60)
data.shape (5068, 60)
data.shape (6153, 60)
data.shape (7238, 60)
data.shape (8323, 60)
data.shape (9408, 60)
data.shape (10493, 60)
data.shape (11578, 60)
data.shape (12663, 60)
data.shape (13748, 60)
data.shape (14833, 60)
data.shape (15918, 60)
data.shape (17003, 60)
data.shape (17540, 60)
data.shape (17787, 60)
data.shape (18872, 60)
data.shape (19957, 60)
data.shape (21042, 60)
data.shape (22127, 60)
data.shape (23206, 60)
data.shape (24291, 60)
data.shape (2170, 6)
data.shape (3021, 6)
data.shape (3522, 6)
data.shape (3983, 6)
data.shape (5068, 6)
data.shape (6153, 6)
data.shape (7238, 6)
data.shape (8323, 6)
data.shape (9408, 6)
data.shape (10493, 6)
data.shape (11578, 6)
data.shape (12663, 6)
data.shape (13748, 6)
data.shape (14833, 6)
data.shape (15918, 6)
data.shape (17003, 6)
data.shape (17540, 6)
data.shape (17787, 6)
data.shape (18872, 6)
data.shape (19957, 6)
data.shape (21042, 6)
data

## Parse Datasets

### Libraries

In [16]:
import os
import numpy as np

import torch
import torch.nn as nn

import lib.utils as utils
from lib.diffeq_solver import DiffeqSolver
# from generate_timeseries import Periodic_1d
from torch.distributions import uniform

from torch.utils.data import DataLoader
# from mujoco_physics import HopperPhysics

from sklearn import model_selection
import random


### HopperPhysics class for our data

#### Libraries

In [17]:

import os
import numpy as np
import torch
from lib.utils import get_dict_template
import lib.utils as utils




#### Class

In [18]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Authors: Yulia Rubanova and Ricky Chen
###########################

import os
import numpy as np
import torch
from lib.utils import get_dict_template
import lib.utils as utils
from torchvision.datasets.utils import download_url
import pandas as pd

class HopperPhysics(object):

    T = 6
    print(f"We are inside HopperPhysics class")
    print(f"T is {T}")
    D = 10
    print(f"dim is {D}")

    median_dim = 1
    print(f"median_dim is {median_dim}")

    # n_training_samples = 10000
    # print(f"n_training_samples is {n_training_samples}")

    training_file = 'training.csv'
    print(f"training_file name is {training_file}")

    def __init__(self, trainfile, medianfile, root, download = True, generate=False, device = torch.device("cpu")):
        print(f"root is {root}")
        self.root = root
        if download:
            print(f"Let's load dataset")
            X = pd.read_csv(os.path.join(root, trainfile), index_col=0)
            y = pd.read_csv(os.path.join(root, medianfile), index_col=0)

        if not self._check_exists():
            raise RuntimeError('Dataset not found.' + ' You can use download=True to download it')

        data_file = os.path.join(self.data_folder, self.training_file)
        median_file = os.path.join(self.data_folder, 'median_sequences.csv')
        print(f"data_file is {data_file}")
        # print(f"Using torch.Tensor(torch.load(data_file)).to(device) to get self.data")
        # self.data = torch.Tensor(X.values).to(device) # https://pytorch.org/docs/stable/generated/torch.load.html
        # print(f"self.data.shape is {self.data.shape}")
        # self.median_data = torch.Tensor(y.values).to(device)
        # print(f"self.median_data.shape is {self.median_data.shape}")

        # self.data = torch.reshape(self.data, (self.data.shape[0], T, D))
        # self.median_data = torch.reshape(self.median_data, (self.median_data.shape[0], T, median_dim))
        

        # print(f"self.data.shape is {self.data.shape}")
        # print(f"self.median_data.shape is {self.median_data.shape}")
        # print("\n")
        # self.data, self.data_min, self.data_max = utils.normalize_data(self.data)
        # self.median_data, self.median_data_min, self.median_data_max = utils.normalize_data(self.median_data)
        # self.dataset = torch.cat((self.data, self.median_data), dim=1)
        self.dataset = list(zip(X.values.astype(np.float32), y.values.astype(np.float32)))
        # print(f"self.dataset.shape is {self.dataset.shape}")
        # self.dataset = list(zip(self.data, self.median_data))
        # self.dataset = torch.Tensor(self.dataset).to(device)
        # print(self.dataset[0])

        self.device =device
        self.n_training_samples = len(self.dataset)
        print(f"self.n_training_samples is {self.n_training_samples}")

    def _check_exists(self):
        print(f"we will check os.path.exists(os.path.join(self.data_folder, self.training_file)). If it exists, return")
        return os.path.exists(os.path.join(self.data_folder, self.training_file))

    @property
    def data_folder(self):
        return self.root

        # def __getitem__(self, index):
        #     return self.data[index]

    def get_dataset(self):
        return self.dataset

    def __len__(self):
        return len(self.dataset)

    def size(self, ind = None):
        if ind is not None:
            return self.data.shape[ind]
            return self.data.shape
                
    def __repr__(self):
        fmt_str = 'Dataset ' + self.__class__.__name__ + '\n'
        fmt_str += '    Number of datapoints: {}\n'.format(self.__len__())
        fmt_str += '    Root Location: {}\n'.format(self.root)
        return fmt_str



We are inside HopperPhysics class
T is 6
dim is 10
median_dim is 1
training_file name is training.csv


#### parse_dataset

In [19]:
import os
import numpy as np

import torch
import torch.nn as nn

import lib.utils as utils
from lib.diffeq_solver import DiffeqSolver
# from generate_timeseries import Periodic_1d
from torch.distributions import uniform

from torch.utils.data import DataLoader

from sklearn import model_selection
import random



In [20]:
def parse_datasets(args, device):
    
	# batch the data and median_data together and don't shuffle
	def basic_collate_fn(batch, time_steps, args = args, device = device, data_type = "train"):
    	# batch looks like [(x0+y0), ... ]
		# data = batch[:, :int(T*D)] 
		# error is TypeError: list indices must be integers or slices, not tuple
		# print(type(batch))
		# print(f"T is {T}")
		# print(f"D is {D}")
		# batch = torch.stack(batch)
		# print(f"batch.shape is {batch.shape}")
		# data = batch[:, :int(T*D)] 
		# print(f"data.shape is {data.shape}")
		# # error is TypeError: list indices must be integers or slices, not tuple
		# median_data = batch[:, int(T*D):]
		# print(f"median_data.shape is {median_data.shape}")

		# # median_data = torch.cat(batch[:, int(T*D):], dim=0)
		data_list, median_data_list = [], []
		for (data, median) in batch:
			data_list.append(data)
			median_data_list.append(median)
		data = torch.tensor(data_list).to(device)
		median_data = torch.tensor(median_data_list).to(device)
		print(f"data.shape is {data.shape}")
		print(f"median_data.shape is {median_data.shape}")

		data_dict = {
			"data": data, 
			"median_data": median_data,
			"time_steps": time_steps}
		
		# data_dict = utils.split_and_subsample_batch(data_dict, args, data_type = data_type)
		clone_data = data_dict["data"].clone().to(device)
		clone_median_data = data_dict["median_data"].clone().to(device)
		clone_data = torch.reshape(clone_data, (clone_data.size(0), T, D))
		print(f"clone_data.shape is {clone_data.shape}")
		clone_median_data = torch.reshape(clone_median_data, (clone_median_data.size(0), T, 1))
		print(f"clone_median_data.shape is {clone_median_data.shape}")
		data_dict = {"observed_data": clone_data,
			"observed_tp": data_dict["time_steps"].clone(),
			"data_to_predict": clone_median_data,
			"tp_to_predict": data_dict["time_steps"].clone()}
		print(f"data_dict has keys as {data_dict.keys()}")
		print(f"data_dict['observed_data'].shape is {data_dict['observed_data'].shape}")
		data_dict["observed_mask"] = None 
		data_dict["mask_predicted_data"] = None 
		data_dict["labels"] = None 
		data_dict["mode"] = "interp"
		print(f"data_dict['mode'] is {data_dict['mode']}")
		return data_dict

	dataset_name = args.dataset
	print(f"dataset_name is {dataset_name}")
	n_total_tp = args.timepoints
	max_t_extrap = args.max_t / args.timepoints * n_total_tp

	##################################################################
	# MuJoCo dataset
	if dataset_name == "hopper":
		dataset_obj = HopperPhysics(trainfile, medianfile, root='data', download=True, generate=False, device = device)
		dataset = dataset_obj.get_dataset()
		# print(f"dataset.shape is {dataset.shape}")
		# dataset = dataset.to(device)
		n_tp_data = T
		print(f"n_tp_data is {n_tp_data}")

		# Time steps that are used later on for exrapolation
		time_steps = torch.arange(start=0, end = n_tp_data, step=1).float().to(device)
		time_steps = time_steps / len(time_steps)

		# dataset = dataset.to(device)
		time_steps = time_steps.to(device)
		print(f"time_steps is {time_steps}")

		# if not args.extrap:
		# 	# Creating dataset for interpolation
		# 	# sample time points from different parts of the timeline, 
		# 	# so that the model learns from different parts of hopper trajectory
		# 	n_traj = len(dataset)
		# 	n_tp_data = dataset.shape[1]
		# 	n_reduced_tp = args.timepoints

		# 	# sample time points from different parts of the timeline, 
		# 	# so that the model learns from different parts of hopper trajectory
		# 	start_ind = np.random.randint(0, high=n_tp_data - n_reduced_tp +1, size=n_traj)
		# 	end_ind = start_ind + n_reduced_tp
		# 	sliced = []
		# 	for i in range(n_traj):
		# 		  sliced.append(dataset[i, start_ind[i] : end_ind[i], :])
		# 	dataset = torch.stack(sliced).to(device)
		# 	time_steps = time_steps[:n_reduced_tp]

		# Split into train and test by the time sequences
		# train_y, test_y = utils.split_train_test(dataset, train_fraq = 0.8)
		n_samples = len(dataset)
		print(f"n_samples is {n_samples}")
		train_freq = 0.8
		train_y = dataset[:int(n_samples*train_freq)]
		test_y = dataset[int(n_samples*train_freq):]
		# print(f"train_y.shape is {train_y.shape}")
		# print(f"test_y.shape is {test_y.shape}")

		

		input_dim = D
		print(f"input_dim is {input_dim}")

		batch_size = min(args.batch_size, args.n)
		print(f"batch_size is {batch_size}")
		train_dataloader = DataLoader(train_y, batch_size = batch_size, shuffle=False,
			collate_fn= lambda batch: basic_collate_fn(batch, time_steps, data_type = "train"))
		print(f"train_dataloader.batch_size is {train_dataloader.batch_size}")
		test_dataloader = DataLoader(test_y, batch_size = n_samples, shuffle=False,
			collate_fn= lambda batch: basic_collate_fn(batch, time_steps, data_type = "test"))
		print(f"test_dataloader.batch_size is {test_dataloader.batch_size}")
		
		data_objects = {"dataset_obj": dataset_obj, 
					"train_dataloader": utils.inf_generator(train_dataloader), 
					"test_dataloader": utils.inf_generator(test_dataloader),
					"input_dim": input_dim,
					"n_train_batches": len(train_dataloader),
					"n_test_batches": len(test_dataloader)}
		return data_objects




In [21]:
trainfile = 'training.csv'
medianfile = 'median_sequences.csv'
T = 6
print(f"T is {T}")
D = 10
print("dim is {D}")

median_dim = 1
print("median_dim is {median_dim}")
parse_datasets(args, device)

T is 6
dim is {D}
median_dim is {median_dim}
dataset_name is hopper
root is data
Let's load dataset
we will check os.path.exists(os.path.join(self.data_folder, self.training_file)). If it exists, return
data_file is data/training.csv
self.n_training_samples is 24291
n_tp_data is 6
time_steps is tensor([0.0000, 0.1667, 0.3333, 0.5000, 0.6667, 0.8333])
n_samples is 24291
input_dim is 10
batch_size is 50
train_dataloader.batch_size is 50
test_dataloader.batch_size is 24291


{'dataset_obj': Dataset HopperPhysics
     Number of datapoints: 24291
     Root Location: data,
 'input_dim': 10,
 'n_test_batches': 1,
 'n_train_batches': 389,
 'test_dataloader': <generator object inf_generator at 0x7fc4cc5d87d0>,
 'train_dataloader': <generator object inf_generator at 0x7fc4cc5d89d0>}

In [22]:
data_obj = parse_datasets(args, device)
input_dim = data_obj["input_dim"]
print(f"input_dim is {input_dim}")

classif_per_tp = False
if ("classif_per_tp" in data_obj):
    # do classification per time point rather than on a time series as a whole
    classif_per_tp = data_obj["classif_per_tp"]

if args.classif and (args.dataset == "hopper" or args.dataset == "periodic"):
    raise Exception("Classification task is not available for MuJoCo and 1d datasets")

dataset_name is hopper
root is data
Let's load dataset
we will check os.path.exists(os.path.join(self.data_folder, self.training_file)). If it exists, return
data_file is data/training.csv
self.n_training_samples is 24291
n_tp_data is 6
time_steps is tensor([0.0000, 0.1667, 0.3333, 0.5000, 0.6667, 0.8333])
n_samples is 24291
input_dim is 10
batch_size is 50
train_dataloader.batch_size is 50
test_dataloader.batch_size is 24291
input_dim is 10


## n_labels

In [23]:
n_labels = 1
if args.classif:
    if ("n_labels" in data_obj):
        n_labels = data_obj["n_labels"]
    else:
        raise Exception("Please provide number of labels for classification task")

##################################################################
# Create the model
obsrv_std = 0.01
if args.dataset == "hopper":
    obsrv_std = 1e-3 

obsrv_std = torch.Tensor([obsrv_std]).to(device)

z0_prior = Normal(torch.Tensor([0.0]).to(device), torch.Tensor([1.]).to(device))

## args.latent_ode

#### encoder_decoder.py

In [24]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import relu
import lib.utils as utils
from torch.distributions import Categorical, Normal
import lib.utils as utils
from torch.nn.modules.rnn import LSTM, GRU
from lib.utils import get_device


# GRU description: 
# http://www.wildml.com/2015/10/recurrent-neural-network-tutorial-part-4-implementing-a-grulstm-rnn-with-python-and-theano/
class GRU_unit(nn.Module):
	def __init__(self, latent_dim, input_dim, 
		update_gate = None,
		reset_gate = None,
		new_state_net = None,
		n_units = 100,
		device = torch.device("cpu")):
		super(GRU_unit, self).__init__()
		print(f"Inside GRU unit")
		print(f"\n")
		print(f"self.update_gate = nn.Sequential(" +
			   " nn.Linear(latent_dim * 2 + input_dim, n_units)," +
			   " nn.Tanh()," +
			   " nn.Linear(n_units, latent_dim)," +
			   " nn.Sigmoid())")
		print(f"self.reset_gate = nn.Sequential(" +
			   " nn.Linear(latent_dim * 2 + input_dim, n_units)," +
			   " nn.Tanh()," +
			   " nn.Linear(n_units, latent_dim)," +
			   " nn.Sigmoid())")
		print(f"self.new_state_net = nn.Sequential(" +
			   " nn.Linear(latent_dim * 2 + input_dim, n_units)," +
			   " nn.Tanh()," +
			   " nn.Linear(n_units, latent_dim)," +
			   " nn.Sigmoid())")

		if update_gate is None:
			self.update_gate = nn.Sequential(
			   nn.Linear(latent_dim * 2 + input_dim, n_units),
			   nn.Tanh(),
			   nn.Linear(n_units, latent_dim),
			   nn.Sigmoid())
			utils.init_network_weights(self.update_gate)
		else: 
			self.update_gate  = update_gate

		if reset_gate is None:
			self.reset_gate = nn.Sequential(
			   nn.Linear(latent_dim * 2 + input_dim, n_units),
			   nn.Tanh(),
			   nn.Linear(n_units, latent_dim),
			   nn.Sigmoid())
			utils.init_network_weights(self.reset_gate)
		else: 
			self.reset_gate  = reset_gate

		if new_state_net is None:
			self.new_state_net = nn.Sequential(
			   nn.Linear(latent_dim * 2 + input_dim, n_units),
			   nn.Tanh(),
			   nn.Linear(n_units, latent_dim * 2))
			utils.init_network_weights(self.new_state_net)
		else: 
			self.new_state_net  = new_state_net


	def forward(self, y_mean, y_std, x, masked_update = False):
		y_concat = torch.cat([y_mean, y_std, x], -1)  

		update_gate = self.update_gate(y_concat)
		reset_gate = self.reset_gate(y_concat)
		concat = torch.cat([y_mean * reset_gate, y_std * reset_gate, x], -1)
		
		new_state, new_state_std = utils.split_last_dim(self.new_state_net(concat))
		new_state_std = new_state_std.abs()

		new_y = (1-update_gate) * new_state + update_gate * y_mean
		new_y_std = (1-update_gate) * new_state_std + update_gate * y_std

		assert(not torch.isnan(new_y).any())

		# if masked_update:
    	# 		# IMPORTANT: assumes that x contains both data and mask
		# 	# update only the hidden states for hidden state only if at least one feature is present for the current time point
		# 	n_data_dims = x.size(-1)//2
		# 	mask = x[:, :, n_data_dims:]
		# 	utils.check_mask(x[:, :, :n_data_dims], mask)
			
		# 	mask = (torch.sum(mask, -1, keepdim = True) > 0).float()

		# 	assert(not torch.isnan(mask).any())

		# 	new_y = mask * new_y + (1-mask) * y_mean
		# 	new_y_std = mask * new_y_std + (1-mask) * y_std

		# 	if torch.isnan(new_y).any():
		# 		print("new_y is nan!")
		# 		print(mask)
		# 		print(y_mean)
		# 		print(prev_new_y)
		# 		exit()

		new_y_std = new_y_std.abs()
		print(f"new_y.shape is {new_y.shape}")
		print(f"new_y_std.shape is {new_y_std.shape}")
		return new_y, new_y_std

class Encoder_z0_ODE_RNN(nn.Module):
	# Derive z0 by running ode backwards.
	# For every y_i we have two versions: encoded from data and derived from ODE by running it backwards from t_i+1 to t_i
	# Compute a weighted sum of y_i from data and y_i from ode. Use weighted y_i as an initial value for ODE runing from t_i to t_i-1
	# Continue until we get to z0
	def __init__(self, latent_dim, input_dim, z0_diffeq_solver = None, 
		z0_dim = None, GRU_update = None, 
		n_gru_units = 100, 
		device = torch.device("cpu")):
		
		super(Encoder_z0_ODE_RNN, self).__init__()

		if z0_dim is None:
			self.z0_dim = latent_dim
		else:
			self.z0_dim = z0_dim

		if GRU_update is None:
			self.GRU_update = GRU_unit(latent_dim, input_dim, 
				n_units = n_gru_units, 
				device=device).to(device)
		else:
			self.GRU_update = GRU_update

		self.z0_diffeq_solver = z0_diffeq_solver
		self.latent_dim = latent_dim
		self.input_dim = input_dim
		self.device = device
		self.extra_info = None

		self.transform_z0 = nn.Sequential(
		   nn.Linear(latent_dim * 2, 100),
		   nn.Tanh(),
		   nn.Linear(100, self.z0_dim * 2),)
		utils.init_network_weights(self.transform_z0)


	def forward(self, data, time_steps, run_backwards = True, save_info = False):
		# data, time_steps -- observations and their time stamps
		# IMPORTANT: assumes that 'data' already has mask concatenated to it 
		assert(not torch.isnan(data).any())
		assert(not torch.isnan(time_steps).any())

		n_traj, n_tp, n_dims = data.size()
		if len(time_steps) == 1:
			prev_y = torch.zeros((1, n_traj, self.latent_dim)).to(self.device)
			prev_std = torch.zeros((1, n_traj, self.latent_dim)).to(self.device)

			xi = data[:,0,:].unsqueeze(0)

			last_yi, last_yi_std = self.GRU_update(prev_y, prev_std, xi)
			extra_info = None
		else:
			
			last_yi, last_yi_std, _, extra_info = self.run_odernn(
				data, time_steps, run_backwards = run_backwards,
				save_info = save_info)

		means_z0 = last_yi.reshape(1, n_traj, self.latent_dim)
		std_z0 = last_yi_std.reshape(1, n_traj, self.latent_dim)

		mean_z0, std_z0 = utils.split_last_dim( self.transform_z0( torch.cat((means_z0, std_z0), -1)))
		std_z0 = std_z0.abs()
		if save_info:
			self.extra_info = extra_info

		return mean_z0, std_z0


	def run_odernn(self, data, time_steps, 
		run_backwards = True, save_info = False):
		# IMPORTANT: assumes that 'data' already has mask concatenated to it 

		n_traj, n_tp, n_dims = data.size()
		extra_info = []

		t0 = time_steps[-1]
		if run_backwards:
			t0 = time_steps[0]

		device = get_device(data)

		prev_y = torch.zeros((1, n_traj, self.latent_dim)).to(device)
		prev_std = torch.zeros((1, n_traj, self.latent_dim)).to(device)

		prev_t, t_i = time_steps[-1] + 0.01,  time_steps[-1]

		interval_length = time_steps[-1] - time_steps[0]
		minimum_step = interval_length / 50

		#print("minimum step: {}".format(minimum_step))

		# assert(not torch.isnan(data).any())
		# assert(not torch.isnan(time_steps).any())

		latent_ys = []
		# Run ODE backwards and combine the y(t) estimates using gating
		time_points_iter = range(0, len(time_steps))
		if run_backwards:
			time_points_iter = reversed(time_points_iter)

		for i in time_points_iter:
			if (prev_t - t_i) < minimum_step:
				time_points = torch.stack((prev_t, t_i))
				inc = self.z0_diffeq_solver.ode_func(prev_t, prev_y) * (t_i - prev_t)

				# assert(not torch.isnan(inc).any())

				ode_sol = prev_y + inc
				ode_sol = torch.stack((prev_y, ode_sol), 2).to(device)

				# assert(not torch.isnan(ode_sol).any())
			else:
				n_intermediate_tp = max(2, ((prev_t - t_i) / minimum_step).int())

				time_points = utils.linspace_vector(prev_t, t_i, n_intermediate_tp)
				ode_sol = self.z0_diffeq_solver(prev_y, time_points)

				# assert(not torch.isnan(ode_sol).any())

			if torch.mean(ode_sol[:, :, 0, :]  - prev_y) >= 0.001:
				print("Error: first point of the ODE is not equal to initial value")
				print(torch.mean(ode_sol[:, :, 0, :]  - prev_y))
				exit()
			#assert(torch.mean(ode_sol[:, :, 0, :]  - prev_y) < 0.001)

			yi_ode = ode_sol[:, :, -1, :]
			xi = data[:,i,:].unsqueeze(0)
			
			yi, yi_std = self.GRU_update(yi_ode, prev_std, xi)

			prev_y, prev_std = yi, yi_std			
			prev_t, t_i = time_steps[i],  time_steps[i-1]

			latent_ys.append(yi)

			if save_info:
				d = {"yi_ode": yi_ode.detach(), #"yi_from_data": yi_from_data,
					 "yi": yi.detach(), "yi_std": yi_std.detach(), 
					 "time_points": time_points.detach(), "ode_sol": ode_sol.detach()}
				extra_info.append(d)

		latent_ys = torch.stack(latent_ys, 1)

		# assert(not torch.isnan(yi).any())
		# assert(not torch.isnan(yi_std).any())

		return yi, yi_std, latent_ys, extra_info



class Decoder(nn.Module):
	def __init__(self, latent_dim, input_dim):
		super(Decoder, self).__init__()
		# decode data from latent space where we are solving an ODE back to the data space

		decoder = nn.Sequential(
		   nn.Linear(latent_dim, input_dim),)

		utils.init_network_weights(decoder)	
		self.decoder = decoder

	def forward(self, data):
		return self.decoder(data)




#### likelihood_eval.py

In [25]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import gc
import numpy as np
import sklearn as sk
import numpy as np
#import gc
import torch
import torch.nn as nn
from torch.nn.functional import relu

import lib.utils as utils
from lib.utils import get_device
# from lib.encoder_decoder import *
# from lib.likelihood_eval import *

from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions.normal import Normal
from torch.distributions import kl_divergence, Independent


def gaussian_log_likelihood(mu_2d, data_2d, obsrv_std, indices = None):
	n_data_points = mu_2d.size()[-1]

	if n_data_points > 0:
		gaussian = Independent(Normal(loc = mu_2d, scale = obsrv_std.repeat(n_data_points)), 1)
		log_prob = gaussian.log_prob(data_2d) 
		log_prob = log_prob / n_data_points 
	else:
		log_prob = torch.zeros([1]).to(get_device(data_2d)).squeeze()
	return log_prob


# def poisson_log_likelihood(masked_log_lambdas, masked_data, indices, int_lambdas):
# 	# masked_log_lambdas and masked_data 
# 	n_data_points = masked_data.size()[-1]

# 	if n_data_points > 0:
# 		log_prob = torch.sum(masked_log_lambdas) - int_lambdas[indices]
# 		#log_prob = log_prob / n_data_points
# 	else:
# 		log_prob = torch.zeros([1]).to(get_device(masked_data)).squeeze()
# 	return log_prob

def compute_masked_likelihood(mu, data, mask, likelihood_func):
	# Compute the likelihood per patient and per attribute so that we don't priorize patients with more measurements
	n_traj_samples, n_traj, n_timepoints, n_dims = data.size()

	res = []
	for i in range(n_traj_samples):
		for k in range(n_traj):
			for j in range(n_dims):
				data_masked = torch.masked_select(data[i,k,:,j], mask[i,k,:,j].bool())
				
				#assert(torch.sum(data_masked == 0.) < 10)

				mu_masked = torch.masked_select(mu[i,k,:,j], mask[i,k,:,j].bool())
				log_prob = likelihood_func(mu_masked, data_masked, indices = (i,k,j))
				res.append(log_prob)
	# shape: [n_traj*n_traj_samples, 1]

	res = torch.stack(res, 0).to(get_device(data))
	res = res.reshape((n_traj_samples, n_traj, n_dims))
	# Take mean over the number of dimensions
	res = torch.mean(res, -1) # !!!!!!!!!!! changed from sum to mean
	res = res.transpose(0,1)
	return res


def masked_gaussian_log_density(mu, data, obsrv_std, mask = None):
	# these cases are for plotting through plot_estim_density
	if (len(mu.size()) == 3):
		# add additional dimension for gp samples
		mu = mu.unsqueeze(0)

	if (len(data.size()) == 2):
		# add additional dimension for gp samples and time step
		data = data.unsqueeze(0).unsqueeze(2)
	elif (len(data.size()) == 3):
		# add additional dimension for gp samples
		data = data.unsqueeze(0)

	n_traj_samples, n_traj, n_timepoints, n_dims = mu.size()

	assert(data.size()[-1] == n_dims)

	# Shape after permutation: [n_traj, n_traj_samples, n_timepoints, n_dims]
	if mask is None:
		mu_flat = mu.reshape(n_traj_samples*n_traj, n_timepoints * n_dims)
		n_traj_samples, n_traj, n_timepoints, n_dims = data.size()
		data_flat = data.reshape(n_traj_samples*n_traj, n_timepoints * n_dims)
	
		res = gaussian_log_likelihood(mu_flat, data_flat, obsrv_std)
		res = res.reshape(n_traj_samples, n_traj).transpose(0,1)
	else:
		# Compute the likelihood per patient so that we don't priorize patients with more measurements
		func = lambda mu, data, indices: gaussian_log_likelihood(mu, data, obsrv_std = obsrv_std, indices = indices)
		res = compute_masked_likelihood(mu, data, mask, func)
	return res



def mse(mu, data, indices = None):
	n_data_points = mu.size()[-1]

	if n_data_points > 0:
		mse = nn.MSELoss()(mu, data)
	else:
		mse = torch.zeros([1]).to(get_device(data)).squeeze()
	return mse


def compute_mse(mu, data, mask = None):
	# these cases are for plotting through plot_estim_density
	if (len(mu.size()) == 3):
		# add additional dimension for gp samples
		mu = mu.unsqueeze(0)

	if (len(data.size()) == 2):
		# add additional dimension for gp samples and time step
		data = data.unsqueeze(0).unsqueeze(2)
	elif (len(data.size()) == 3):
		# add additional dimension for gp samples
		data = data.unsqueeze(0)

	n_traj_samples, n_traj, n_timepoints, n_dims = mu.size()
	assert(data.size()[-1] == n_dims)

	# Shape after permutation: [n_traj, n_traj_samples, n_timepoints, n_dims]
	if mask is None:
		mu_flat = mu.reshape(n_traj_samples*n_traj, n_timepoints * n_dims)
		n_traj_samples, n_traj, n_timepoints, n_dims = data.size()
		data_flat = data.reshape(n_traj_samples*n_traj, n_timepoints * n_dims)
		res = mse(mu_flat, data_flat)
	else:
		# Compute the likelihood per patient so that we don't priorize patients with more measurements
		res = compute_masked_likelihood(mu, data, mask, mse)
	return res




def compute_poisson_proc_likelihood(truth, pred_y, info, mask = None):
	# Compute Poisson likelihood
	# https://math.stackexchange.com/questions/344487/log-likelihood-of-a-realization-of-a-poisson-process
	# Sum log lambdas across all time points
	if mask is None:
		poisson_log_l = torch.sum(info["log_lambda_y"], 2) - info["int_lambda"]
		# Sum over data dims
		poisson_log_l = torch.mean(poisson_log_l, -1)
	else:
		# Compute likelihood of the data under the predictions
		truth_repeated = truth.repeat(pred_y.size(0), 1, 1, 1)
		mask_repeated = mask.repeat(pred_y.size(0), 1, 1, 1)

		# Compute the likelihood per patient and per attribute so that we don't priorize patients with more measurements
		int_lambda = info["int_lambda"]
		f = lambda log_lam, data, indices: poisson_log_likelihood(log_lam, data, indices, int_lambda)
		poisson_log_l = compute_masked_likelihood(info["log_lambda_y"], truth_repeated, mask_repeated, f)
		poisson_log_l = poisson_log_l.permute(1,0)
		# Take mean over n_traj
		#poisson_log_l = torch.mean(poisson_log_l, 1)
		
	# poisson_log_l shape: [n_traj_samples, n_traj]
	return poisson_log_l

	



#### base_models.py

In [26]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import relu

import lib.utils as utils

# from lib.likelihood_eval import *

from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions.normal import Normal
from torch.nn.modules.rnn import GRUCell, LSTMCell, RNNCellBase

from torch.distributions.normal import Normal
from torch.distributions import Independent
from torch.nn.parameter import Parameter


def create_classifier(z0_dim, n_labels):
	return nn.Sequential(
			nn.Linear(z0_dim, 300),
			nn.ReLU(),
			nn.Linear(300, 300),
			nn.ReLU(),
			nn.Linear(300, n_labels),)

class VAE_Baseline(nn.Module):
	def __init__(self, input_dim, latent_dim, 
		z0_prior, device,
		obsrv_std = 0.01, 
		use_binary_classif = False,
		classif_per_tp = False,
		use_poisson_proc = False,
		linear_classifier = False,
		n_labels = 1,
		train_classif_w_reconstr = False):

		super(VAE_Baseline, self).__init__()
		
		self.input_dim = input_dim
		self.latent_dim = latent_dim
		self.device = device
		self.n_labels = n_labels

		self.obsrv_std = torch.Tensor([obsrv_std]).to(device)

		self.z0_prior = z0_prior
		self.use_binary_classif = use_binary_classif
		self.classif_per_tp = classif_per_tp
		self.use_poisson_proc = use_poisson_proc
		self.linear_classifier = linear_classifier
		self.train_classif_w_reconstr = train_classif_w_reconstr

		z0_dim = latent_dim
		if use_poisson_proc:
			z0_dim += latent_dim

		if use_binary_classif: 
			if linear_classifier:
				self.classifier = nn.Sequential(
					nn.Linear(z0_dim, n_labels))
			else:
				self.classifier = create_classifier(z0_dim, n_labels)
			utils.init_network_weights(self.classifier)


	def get_gaussian_likelihood(self, truth, pred_y, mask = None):
		# pred_y shape [n_traj_samples, n_traj, n_tp, n_dim]
		# truth shape  [n_traj, n_tp, n_dim]
		n_traj, n_tp, n_dim = truth.size()

		# Compute likelihood of the data under the predictions
		truth_repeated = truth.repeat(pred_y.size(0), 1, 1, 1)
		
		if mask is not None:
			mask = mask.repeat(pred_y.size(0), 1, 1, 1)
		log_density_data = masked_gaussian_log_density(pred_y, truth_repeated, 
			obsrv_std = self.obsrv_std, mask = mask)
		log_density_data = log_density_data.permute(1,0)
		log_density = torch.mean(log_density_data, 1)

		# shape: [n_traj_samples]
		return log_density


	def get_mse(self, truth, pred_y, mask = None):
		# pred_y shape [n_traj_samples, n_traj, n_tp, n_dim]
		# truth shape  [n_traj, n_tp, n_dim]
		n_traj, n_tp, n_dim = truth.size()

		# Compute likelihood of the data under the predictions
		truth_repeated = truth.repeat(pred_y.size(0), 1, 1, 1)
		
		if mask is not None:
			mask = mask.repeat(pred_y.size(0), 1, 1, 1)

		# Compute likelihood of the data under the predictions
		log_density_data = compute_mse(pred_y, truth_repeated, mask = mask)
		# shape: [1]
		return torch.mean(log_density_data)


	def compute_all_losses(self, batch_dict, n_traj_samples = 1, kl_coef = 1.):
		# Condition on subsampled points
		# Make predictions for all the points
		pred_y, info = self.get_reconstruction(batch_dict["tp_to_predict"], 
			batch_dict["observed_data"], batch_dict["observed_tp"], 
			mask = batch_dict["observed_mask"], n_traj_samples = n_traj_samples,
			mode = batch_dict["mode"])

		#print("get_reconstruction done -- computing likelihood")
		fp_mu, fp_std, fp_enc = info["first_point"]
		fp_std = fp_std.abs()
		fp_distr = Normal(fp_mu, fp_std)

		# assert(torch.sum(fp_std < 0) == 0.)

		kldiv_z0 = kl_divergence(fp_distr, self.z0_prior)

		if torch.isnan(kldiv_z0).any():
			print(fp_mu)
			print(fp_std)
			raise Exception("kldiv_z0 is Nan!")

		# Mean over number of latent dimensions
		# kldiv_z0 shape: [n_traj_samples, n_traj, n_latent_dims] if prior is a mixture of gaussians (KL is estimated)
		# kldiv_z0 shape: [1, n_traj, n_latent_dims] if prior is a standard gaussian (KL is computed exactly)
		# shape after: [n_traj_samples]
		kldiv_z0 = torch.mean(kldiv_z0,(1,2))

		# Compute likelihood of all the points
		rec_likelihood = self.get_gaussian_likelihood(
			batch_dict["data_to_predict"], pred_y,
			mask = batch_dict["mask_predicted_data"])

		mse = self.get_mse(
			batch_dict["data_to_predict"], pred_y,
			mask = batch_dict["mask_predicted_data"])

		pois_log_likelihood = torch.Tensor([0.]).to(get_device(batch_dict["data_to_predict"]))
		if self.use_poisson_proc:
			pois_log_likelihood = compute_poisson_proc_likelihood(
				batch_dict["data_to_predict"], pred_y, 
				info, mask = batch_dict["mask_predicted_data"])
			# Take mean over n_traj
			pois_log_likelihood = torch.mean(pois_log_likelihood, 1)

		################################
		# Compute CE loss for binary classification on Physionet
		device = get_device(batch_dict["data_to_predict"])
		ce_loss = torch.Tensor([0.]).to(device)
		if (batch_dict["labels"] is not None) and self.use_binary_classif:

			if (batch_dict["labels"].size(-1) == 1) or (len(batch_dict["labels"].size()) == 1):
				ce_loss = compute_binary_CE_loss(
					info["label_predictions"], 
					batch_dict["labels"])
			else:
				ce_loss = compute_multiclass_CE_loss(
					info["label_predictions"], 
					batch_dict["labels"],
					mask = batch_dict["mask_predicted_data"])

		# IWAE loss
		loss = - torch.logsumexp(rec_likelihood -  kl_coef * kldiv_z0,0)
		if torch.isnan(loss):
			loss = - torch.mean(rec_likelihood - kl_coef * kldiv_z0,0)
			
		if self.use_poisson_proc:
			loss = loss - 0.1 * pois_log_likelihood 

		if self.use_binary_classif:
			if self.train_classif_w_reconstr:
				loss = loss +  ce_loss * 100
			else:
				loss =  ce_loss

		results = {}
		results['pred_y'] = pred_y
		results['true_y'] = batch_dict["data_to_predict"]
		results["loss"] = torch.mean(loss)
		results["likelihood"] = torch.mean(rec_likelihood).detach()
		results["mse"] = torch.mean(mse).detach()
		results["pois_likelihood"] = torch.mean(pois_log_likelihood).detach()
		results["ce_loss"] = torch.mean(ce_loss).detach()
		results["kl_first_p"] =  torch.mean(kldiv_z0).detach()
		results["std_first_p"] = torch.mean(fp_std).detach()

		if batch_dict["labels"] is not None and self.use_binary_classif:
			results["label_predictions"] = info["label_predictions"].detach()

		return results





#### latent_ode.py

In [27]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import numpy as np
import sklearn as sk
import numpy as np
#import gc
import torch
import torch.nn as nn
from torch.nn.functional import relu

import lib.utils as utils
from lib.utils import get_device
# from lib.encoder_decoder import *
# from lib.likelihood_eval import *

from torch.distributions.multivariate_normal import MultivariateNormal
from torch.distributions.normal import Normal
from torch.distributions import kl_divergence, Independent
# from lib.base_models import VAE_Baseline



class LatentODE(VAE_Baseline):
	def __init__(self, input_dim, latent_dim, encoder_z0, decoder, diffeq_solver, 
		z0_prior, device, obsrv_std = None, 
		use_binary_classif = False, use_poisson_proc = False,
		linear_classifier = False,
		classif_per_tp = False,
		n_labels = 1,
		train_classif_w_reconstr = False):

		super(LatentODE, self).__init__(
			input_dim = input_dim, latent_dim = latent_dim, 
			z0_prior = z0_prior, 
			device = device, obsrv_std = obsrv_std, 
			use_binary_classif = use_binary_classif,
			classif_per_tp = classif_per_tp, 
			linear_classifier = linear_classifier,
			use_poisson_proc = use_poisson_proc,
			n_labels = n_labels,
			train_classif_w_reconstr = train_classif_w_reconstr)
		
		print("LatentODE: input_dim: {}, latent_dim: {}, use_binary_classif: {}, use_poisson_proc: {}".format(input_dim, latent_dim, use_binary_classif, use_poisson_proc))

		self.encoder_z0 = encoder_z0
		self.diffeq_solver = diffeq_solver
		self.decoder = decoder
		self.use_poisson_proc = use_poisson_proc

	def get_reconstruction(self, time_steps_to_predict, truth, truth_time_steps, 
		mask = None, n_traj_samples = 1, run_backwards = True, mode = None):

		if isinstance(self.encoder_z0, Encoder_z0_ODE_RNN) or \
			isinstance(self.encoder_z0, Encoder_z0_RNN):

			truth_w_mask = truth
			# if mask is not None:
    		# 		truth_w_mask = torch.cat((truth, mask), -1)
			first_point_mu, first_point_std = self.encoder_z0(
				truth_w_mask, truth_time_steps, run_backwards = run_backwards)

			means_z0 = first_point_mu.repeat(n_traj_samples, 1, 1)
			sigma_z0 = first_point_std.repeat(n_traj_samples, 1, 1)
			first_point_enc = utils.sample_standard_gaussian(means_z0, sigma_z0)

		else:
			raise Exception("Unknown encoder type {}".format(type(self.encoder_z0).__name__))
		
		first_point_std = first_point_std.abs()
		assert(torch.sum(first_point_std < 0) == 0.)

		if self.use_poisson_proc:
			n_traj_samples, n_traj, n_dims = first_point_enc.size()
			# append a vector of zeros to compute the integral of lambda
			zeros = torch.zeros([n_traj_samples, n_traj,self.input_dim]).to(get_device(truth))
			first_point_enc_aug = torch.cat((first_point_enc, zeros), -1)
			means_z0_aug = torch.cat((means_z0, zeros), -1)
		else:
			first_point_enc_aug = first_point_enc
			means_z0_aug = means_z0
			
		assert(not torch.isnan(time_steps_to_predict).any())
		assert(not torch.isnan(first_point_enc).any())
		assert(not torch.isnan(first_point_enc_aug).any())

		# Shape of sol_y [n_traj_samples, n_samples, n_timepoints, n_latents]
		sol_y = self.diffeq_solver(first_point_enc_aug, time_steps_to_predict)

		if self.use_poisson_proc:
			sol_y, log_lambda_y, int_lambda, _ = self.diffeq_solver.ode_func.extract_poisson_rate(sol_y)

			assert(torch.sum(int_lambda[:,:,0,:]) == 0.)
			assert(torch.sum(int_lambda[0,0,-1,:] <= 0) == 0.)

		pred_x = self.decoder(sol_y)

		all_extra_info = {
			"first_point": (first_point_mu, first_point_std, first_point_enc),
			"latent_traj": sol_y.detach()
		}

		if self.use_poisson_proc:
			# intergral of lambda from the last step of ODE Solver
			all_extra_info["int_lambda"] = int_lambda[:,:,-1,:]
			all_extra_info["log_lambda_y"] = log_lambda_y

		if self.use_binary_classif:
			if self.classif_per_tp:
				all_extra_info["label_predictions"] = self.classifier(sol_y)
			else:
				all_extra_info["label_predictions"] = self.classifier(first_point_enc).squeeze(-1)

		return pred_x, all_extra_info


	def sample_traj_from_prior(self, time_steps_to_predict, n_traj_samples = 1):
		# input_dim = starting_point.size()[-1]
		# starting_point = starting_point.view(1,1,input_dim)

		# Sample z0 from prior
		starting_point_enc = self.z0_prior.sample([n_traj_samples, 1, self.latent_dim]).squeeze(-1)

		starting_point_enc_aug = starting_point_enc
		if self.use_poisson_proc:
			n_traj_samples, n_traj, n_dims = starting_point_enc.size()
			# append a vector of zeros to compute the integral of lambda
			zeros = torch.zeros(n_traj_samples, n_traj,self.input_dim).to(self.device)
			starting_point_enc_aug = torch.cat((starting_point_enc, zeros), -1)

		sol_y = self.diffeq_solver.sample_traj_from_prior(starting_point_enc_aug, time_steps_to_predict, 
			n_traj_samples = 3)

		if self.use_poisson_proc:
			sol_y, log_lambda_y, int_lambda, _ = self.diffeq_solver.ode_func.extract_poisson_rate(sol_y)
		
		return self.decoder(sol_y)




#### ode_func.py

In [28]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import numpy as np
import torch
import torch.nn as nn
from torch.nn.utils.spectral_norm import spectral_norm

import lib.utils as utils

#####################################################################################################

class ODEFunc(nn.Module):
	def __init__(self, input_dim, latent_dim, ode_func_net, device = torch.device("cpu")):
		"""
		input_dim: dimensionality of the input
		latent_dim: dimensionality used for ODE. Analog of a continous latent state
		"""
		print(f"Inside ODEFunc class")
		super(ODEFunc, self).__init__()

		self.input_dim = input_dim
		print(f"input_dim is {input_dim}")
		print(f"latent_dim is {latent_dim}")
		self.device = device

		utils.init_network_weights(ode_func_net)
		self.gradient_net = ode_func_net

	def forward(self, t_local, y, backwards = False):
		"""
		Perform one step in solving ODE. Given current data point y and current time point t_local, returns gradient dy/dt at this time point

		t_local: current time point
		y: value at the current time point
		"""
		grad = self.get_ode_gradient_nn(t_local, y)
		if backwards:
			grad = -grad
		return grad

	def get_ode_gradient_nn(self, t_local, y):
		return self.gradient_net(y)

	def sample_next_point_from_prior(self, t_local, y):
		"""
		t_local: current time point
		y: value at the current time point
		"""
		return self.get_ode_gradient_nn(t_local, y)

#### create_latent_ode_model.py

In [29]:
###########################
# Latent ODEs for Irregularly-Sampled Time Series
# Author: Yulia Rubanova
###########################

import os
import numpy as np

import torch
import torch.nn as nn
from torch.nn.functional import relu

import lib.utils as utils
# from lib.latent_ode import LatentODE
# from lib.encoder_decoder import *
from lib.diffeq_solver import DiffeqSolver

from torch.distributions.normal import Normal
# from lib.ode_func import ODEFunc, ODEFunc_w_Poisson

#####################################################################################################

def create_LatentODE_model(args, input_dim, z0_prior, obsrv_std, device, 
	classif_per_tp = False, n_labels = 1):

	dim = args.latents
	print(f"Inside create_LatentODE_model")
	print(f"dim is {dim}")
	print("\n")
	if args.poisson:
		lambda_net = utils.create_net(dim, input_dim, 
			n_layers = 1, n_units = args.units, nonlinear = nn.Tanh)

		# ODE function produces the gradient for latent state and for poisson rate
		ode_func_net = utils.create_net(dim * 2, args.latents * 2, 
			n_layers = args.gen_layers, n_units = args.units, nonlinear = nn.Tanh)

		gen_ode_func = ODEFunc_w_Poisson(
			input_dim = input_dim, 
			latent_dim = args.latents * 2,
			ode_func_net = ode_func_net,
			lambda_net = lambda_net,
			device = device).to(device)
	else:
		dim = args.latents
		print(f"Making ode_func_net") 
		ode_func_net = utils.create_net(dim, args.latents, 
			n_layers = args.gen_layers, n_units = args.units, nonlinear = nn.Tanh)
		print(f"Making gen_ode_func")

		gen_ode_func = ODEFunc(
			input_dim = input_dim, 
			latent_dim = args.latents, 
			ode_func_net = ode_func_net,
			device = device).to(device)

	z0_diffeq_solver = None
	n_rec_dims = args.rec_dims
	enc_input_dim = int(input_dim)  # we concatenate the mask
	gen_data_dim = 1
	print(f"gen_data_dim is {gen_data_dim}")

	z0_dim = args.latents
	if args.poisson:
		z0_dim += args.latents # predict the initial poisson rate

	if args.z0_encoder == "odernn":
		print(f"Making ode_func_net for odernn")
		ode_func_net = utils.create_net(n_rec_dims, n_rec_dims, 
			n_layers = args.rec_layers, n_units = args.units, nonlinear = nn.Tanh)

		print(f"Making rec_ode_func using ODEFunc")
		rec_ode_func = ODEFunc(
			input_dim = enc_input_dim, 
			latent_dim = n_rec_dims,
			ode_func_net = ode_func_net,
			device = device).to(device)

		z0_diffeq_solver = DiffeqSolver(enc_input_dim, rec_ode_func, "euler", args.latents, 
			odeint_rtol = 1e-3, odeint_atol = 1e-4, device = device)
		
		encoder_z0 = Encoder_z0_ODE_RNN(n_rec_dims, enc_input_dim, z0_diffeq_solver, 
			z0_dim = z0_dim, n_gru_units = args.gru_units, device = device).to(device)

	elif args.z0_encoder == "rnn":
		encoder_z0 = Encoder_z0_RNN(z0_dim, enc_input_dim,
			lstm_output_size = n_rec_dims, device = device).to(device)
	else:
		raise Exception("Unknown encoder for Latent ODE model: " + args.z0_encoder)

	decoder = Decoder(args.latents, gen_data_dim).to(device)

	diffeq_solver = DiffeqSolver(gen_data_dim, gen_ode_func, 'dopri5', args.latents, 
		odeint_rtol = 1e-3, odeint_atol = 1e-4, device = device)

	model = LatentODE(
		input_dim = gen_data_dim, 
		latent_dim = args.latents, 
		encoder_z0 = encoder_z0, 
		decoder = decoder, 
		diffeq_solver = diffeq_solver, 
		z0_prior = z0_prior, 
		device = device,
		obsrv_std = obsrv_std,
		use_poisson_proc = args.poisson, 
		use_binary_classif = args.classif,
		linear_classifier = args.linear_classif,
		classif_per_tp = classif_per_tp,
		n_labels = n_labels,
		train_classif_w_reconstr = (args.dataset == "physionet")
		).to(device)

	return model


In [30]:
if args.latent_ode:
    model = create_LatentODE_model(args, input_dim, z0_prior, obsrv_std, device, 
        classif_per_tp = classif_per_tp,
        n_labels = n_labels)
else:
	raise Exception("Model not specified")

Inside create_LatentODE_model
dim is 6


Making ode_func_net
Inside create_net function
n_input for layers is 6
n_units for layers is 100
n_layers is 1
nonlinear is <class 'torch.nn.modules.activation.Tanh'>
for i in range(n_layers):
    layers.append(nonlinear())
    layers.append(nn.Linear(n_units, n_units))
layers.append(nonlinear())
layers.append(nn.Linear(n_units, n_outputs))
n_outputs is 6
Making gen_ode_func
Inside ODEFunc class
input_dim is 10
latent_dim is 6
Inside init_network_weights function
std is 0.1
gen_data_dim is 1
Making ode_func_net for odernn
Inside create_net function
n_input for layers is 20
n_units for layers is 100
n_layers is 1
nonlinear is <class 'torch.nn.modules.activation.Tanh'>
for i in range(n_layers):
    layers.append(nonlinear())
    layers.append(nn.Linear(n_units, n_units))
layers.append(nonlinear())
layers.append(nn.Linear(n_units, n_outputs))
n_outputs is 20
Making rec_ode_func using ODEFunc
Inside ODEFunc class
input_dim is 10
latent_dim is 20
Ins

## Training

In [31]:
file_name = os.path.abspath('')
log_path = "logs/" + file_name + "_" + str(experimentID) + ".log"
if not os.path.exists("logs/"):
    utils.makedirs("logs/")
logger = utils.get_logger(logpath=log_path, filepath=os.path.abspath(''))
logger.info(input_command)

optimizer = optim.Adamax(model.parameters(), lr=args.lr)
print(f"optimizer is {optimizer}")
num_batches = data_obj["n_train_batches"]
print(f"num_batches is {num_batches}")

/content
-f


optimizer is Adamax (
Parameter Group 0
    betas: (0.9, 0.999)
    eps: 1e-08
    lr: 0.01
    weight_decay: 0
)
num_batches is 389


In [None]:
import lib.utils as utils
for itr in range(1, num_batches * (args.niters + 1)):
    optimizer.zero_grad()
    utils.update_learning_rate(optimizer, decay_rate = 0.999, lowest = args.lr / 10)
    print(f"learning rate is {optimizer.param_groups[0]['lr']}")
    wait_until_kl_inc = 10
    if itr // num_batches < wait_until_kl_inc:
        kl_coef = 0.
    else:
        kl_coef = (1-0.99** (itr // num_batches - wait_until_kl_inc))
    print(f"kl_coef is {kl_coef}")
    batch_dict = utils.get_next_batch(data_obj["train_dataloader"])
    train_res = model.compute_all_losses(batch_dict, n_traj_samples = 1, kl_coef = kl_coef)
    train_res["loss"].backward()
    optimizer.step()
    n_iters_to_viz = 1

    if itr % (n_iters_to_viz * num_batches) == 0:
    	with torch.no_grad():
            test_res = compute_loss_all_batches(model, 
					data_obj["test_dataloader"], args,
					n_batches = data_obj["n_test_batches"],
					experimentID = experimentID,
					device = device,
					n_traj_samples = 1, kl_coef = kl_coef)
            message = 'Epoch {:04d} [Test seq (cond on sampled tp)] | Loss {:.6f} | Likelihood {:.6f} | KL fp {:.4f} | FP STD {:.4f}|'.format(
					itr//num_batches, 
					test_res["loss"].detach(), test_res["likelihood"].detach(), 
					test_res["kl_first_p"], test_res["std_first_p"])
            
            logger.info("Experiment " + str(experimentID))
            logger.info(message)
            logger.info("KL coef: {}".format(kl_coef))
            logger.info("Train loss (one batch): {}".format(train_res["loss"].detach()))
            logger.info("Train CE loss (one batch): {}".format(train_res["ce_loss"].detach()))

            if "auc" in test_res:
                logger.info("Classification AUC (TEST): {:.4f}".format(test_res["auc"]))

            if "mse" in test_res:
                logger.info("Test MSE: {:.4f}".format(test_res["mse"]))

            if "accuracy" in train_res:
                logger.info("Classification accuracy (TRAIN): {:.4f}".format(train_res["accuracy"]))

            if "accuracy" in test_res:
                logger.info("Classification accuracy (TEST): {:.4f}".format(test_res["accuracy"]))

            if "pois_likelihood" in test_res:
                logger.info("Poisson likelihood: {}".format(test_res["pois_likelihood"]))

            if "ce_loss" in test_res:
                logger.info("CE loss: {}".format(test_res["ce_loss"]))


            torch.save({
				'args': args,
				'state_dict': model.state_dict(),
			}, ckpt_path)
    
torch.save({
    'args': args,
    'state_dict': model.state_dict(),
}, ckpt_path)



[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_d

Experiment 46595
Epoch 0001 [Test seq (cond on sampled tp)] | Loss 1890767232.000000 | Likelihood -1890767232.000000 | KL fp 301.9954 | FP STD 1.5072|
KL coef: 0.0
Train loss (one batch): 183904544.0
Train CE loss (one batch): 0.0
Test MSE: 3781.5347
Poisson likelihood: 0.0
CE loss: 0.0


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 

Experiment 46595
Epoch 0002 [Test seq (cond on sampled tp)] | Loss 1856517120.000000 | Likelihood -1856517120.000000 | KL fp 354.8120 | FP STD 1.8834|
KL coef: 0.0
Train loss (one batch): 406027968.0
Train CE loss (one batch): 0.0
Test MSE: 3713.0352
Poisson likelihood: 0.0
CE loss: 0.0


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape

Experiment 46595
Epoch 0003 [Test seq (cond on sampled tp)] | Loss 1157321728.000000 | Likelihood -1157321728.000000 | KL fp 186.3289 | FP STD 1.3046|
KL coef: 0.0
Train loss (one batch): 102907720.0
Train CE loss (one batch): 0.0
Test MSE: 2314.6436
Poisson likelihood: 0.0
CE loss: 0.0


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 10, 20])
Should be shape: [n_traj_samples, n_tr

# Rough

In [None]:
X = pd.read_csv(os.path.join('data', trainfile), index_col=0)
y = pd.read_csv(os.path.join('data', medianfile), index_col=0)

print(X.head())
print("\n")
print(y.head())
print("\n")

# result = pd.concat([X, y], axis=1, join='inner')
# print(result.head())
# data = torch.Tensor(X.values).to(device)
# median_data = torch.Tensor(y.values).to(device)

In [538]:
dataset = list(zip(X.values, y.values))
len(dataset)

24291

In [539]:
n_samples = len(data)
len(data[:int(n_samples * 0.8)])

19432

In [540]:
data, data_min, data_max = utils.normalize_data(data)
median_data, median_data_min, median_data_max = utils.normalize_data(median_data)
dataset = torch.cat((data, median_data), dim=1)
print(f"self.dataset.shape is {dataset.shape}")

Using normalize_data()
data.shape is (24291, 60)
reshaped = data.reshape(-1, data.size(-1))


TypeError: ignored

In [541]:
dataset[:, :int(60)].type

TypeError: ignored

In [542]:
torch.stack(dataset)

TypeError: ignored

In [34]:
data_obj["train_dataloader"].__next__()

data.shape is torch.Size([50, 60])
median_data.shape is torch.Size([50, 6])
clone_data.shape is torch.Size([50, 6, 10])
clone_median_data.shape is torch.Size([50, 6, 1])
data_dict has keys as dict_keys(['observed_data', 'observed_tp', 'data_to_predict', 'tp_to_predict'])
data_dict['observed_data'].shape is torch.Size([50, 6, 10])
data_dict['mode'] is interp


{'data_to_predict': tensor([[[0.2281],
          [0.2568],
          [0.2712],
          [0.3028],
          [0.3268],
          [0.3148]],
 
         [[0.2568],
          [0.2712],
          [0.3028],
          [0.3268],
          [0.3148],
          [0.2966]],
 
         [[0.2712],
          [0.3028],
          [0.3268],
          [0.3148],
          [0.2966],
          [0.2889]],
 
         [[0.3028],
          [0.3268],
          [0.3148],
          [0.2966],
          [0.2889],
          [0.2954]],
 
         [[0.3268],
          [0.3148],
          [0.2966],
          [0.2889],
          [0.2954],
          [0.3174]],
 
         [[0.3148],
          [0.2966],
          [0.2889],
          [0.2954],
          [0.3174],
          [0.3014]],
 
         [[0.2966],
          [0.2889],
          [0.2954],
          [0.3174],
          [0.3014],
          [0.3246]],
 
         [[0.2889],
          [0.2954],
          [0.3174],
          [0.3014],
          [0.3246],
          [0.3386]],

In [39]:
model.compute_all_losses(data_obj["train_dataloader"].__next__())

data.shape is torch.Size([50, 60])
median_data.shape is torch.Size([50, 6])
clone_data.shape is torch.Size([50, 6, 10])
clone_median_data.shape is torch.Size([50, 6, 1])
data_dict has keys as dict_keys(['observed_data', 'observed_tp', 'data_to_predict', 'tp_to_predict'])
data_dict['observed_data'].shape is torch.Size([50, 6, 10])
data_dict['mode'] is interp
inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([9, 1, 50, 20])
pred_y.shape after permute is torch.Size([1, 50, 9, 20])
Should be shape: [n_traj_samples, n_traj, n_tp, n_dim]


inside utils' split_last_dim function
last_dim = data.size()[-1] is 20
new_y.shape is torch.Size([1, 50, 20])
new_y_std.shape is torch.Size([1, 50, 20])
first_point.shape is torch.Size([1, 50, 20])
pred_y.shape from odeint is torch.Size([10, 1, 50, 20])
pred_y.shape after 

{'ce_loss': tensor(0.),
 'kl_first_p': tensor(1.3633),
 'likelihood': tensor(-1140880.3750),
 'loss': tensor(1140881.7500, grad_fn=<MeanBackward0>),
 'mse': tensor(2.2818),
 'pois_likelihood': tensor(0.),
 'std_first_p': tensor(0.1712)}

In [35]:
utils.get_next_batch(data_obj["train_dataloader"])

data.shape is torch.Size([50, 60])
median_data.shape is torch.Size([50, 6])
clone_data.shape is torch.Size([50, 6, 10])
clone_median_data.shape is torch.Size([50, 6, 1])
data_dict has keys as dict_keys(['observed_data', 'observed_tp', 'data_to_predict', 'tp_to_predict'])
data_dict['observed_data'].shape is torch.Size([50, 6, 10])
data_dict['mode'] is interp
data_dict['observed_data'].size() is torch.Size([50, 6, 10])


IndexError: ignored