# Multivariate Time Series Generation - MVDTW Evaluation

## If Using Google Colabs

Mount your drive if you are running this on Colabs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Directories

In [None]:
import os
os.chdir('/content/drive/My Drive/MV_GAN_Journal/Multivariate_time_series_gen/')
path = '/content/drive/My Drive/MV_GAN_Journal/Multivariate_time_series_gen/'

data_dir = './Data'
results_dir = './Results/your_result_dir'

## Import Dependencies

In [None]:
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
import random

import json as js
import math
import numpy as np
from tqdm import tqdm

from model import Generator

from torch.autograd.variable import Variable

For Plotting

In [None]:
# Colour Blind Friendly Colours
CB_color_cycle = ['#377eb8', '#ff7f00', '#4daf4a',
                  '#f781bf', '#a65628', '#984ea3',
                  '#999999', '#e41a1c', '#dede00']

### R package for fast MVDTW execution

In [None]:
import rpy2.robjects.numpy2ri
from rpy2.robjects.packages import importr
rpy2.robjects.numpy2ri.activate()
import rpy2.robjects as robj

In [None]:
%load_ext rpy2.ipython

  from pandas.core.index import Index as PandasIndex


In [None]:
%%R
install.packages("dtw")

In [None]:
# Set up our R namespaces
R = rpy2.robjects.r
DTW = importr('dtw')

## GPU

In [None]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
  cuda = True
  print('Using: ' +str(torch.cuda.get_device_name(device)))
else:
  cuda = False
  print('Using: CPU')

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

Using: Tesla T4


## Function Definitions

In [None]:
def load_gen(file):
    gen = torch.load(file)
    return gen

def noise(batch_size, features):
    noise_vec = torch.randn(2, batch_size, features).to(device)
    return noise_vec

def uni_noise(batch_size, features):
    noise_vec = torch.randn(batch_size, features).to(device)
    return noise_vec

def load_params(filename):
    with open(filename, 'r') as f:
        param_dict = js.load(f)
    
    return param_dict

def load_data(filename, batch_size):
    mv_data = torch.load(filename)
    if len(mv_data[0,:,0]) == 501:
        mv_data = mv_data[:, :-1, :] 
    data_loader = torch.utils.data.DataLoader(mv_data, batch_size=batch_size)
    num_batches = len(data_loader)
    
    return data_loader, num_batches 

In [None]:
def load_single_gen(file, params):
    generator  = Generator(params['seq_len'], params['batch_size'], hidden_dim=params['hidden_nodes_g'], num_layers=params['layers'], 
                           tanh_output=params['tanh_layer']).to(device)
    generator.load_state_dict(torch.load(file))
    generator.eval()
    h_g = generator.init_hidden()
    
    # generate noise
    test_noise_sample = noise(params['batch_size'], params['seq_len'])
    # generate batch of data
    gen_data = generator.forward(test_noise_sample,h_g).detach()
    
    return gen_data, test_noise_sample

## DTW_d Programatically

In [None]:
from numba.typed import List
from numba import njit

In [None]:
def distance_matrix(Q, C):
    matrix = np.ones((len(C), len(Q)))
    for i in range(len(C)):
        for j in range(len(Q)):
            matrix[i,j] = (Q[j] - C[i])**2
    distances = np.asmatrix(matrix)
    
    return distances


## Plot the Distance Cost Plot
def distance_cost_plot(distances):
    im = plt.imshow(distances, interpolation='nearest', cmap='Reds') 
    plt.gca().invert_yaxis()
    plt.xlabel("X")
    plt.ylabel("Y")
    plt.grid()
    plt.colorbar();

def accumulated_costs(Q,C, distances):
    accumulated_cost = np.zeros((len(C), len(Q)))
    accumulated_cost[0,0] = distances[0,0]
    
    # First Row Only
    for i in range(1, len(Q)):
        accumulated_cost[0,i] = distances[0,i] + accumulated_cost[0, i-1]
    # First Column Only
    for i in range(1, len(C)):
        accumulated_cost[i,0] = distances[i, 0] + accumulated_cost[i-1, 0]
    # All other Elements
    for i in range(1, len(C)):
        for j in range(1, len(Q)):
            accumulated_cost[i, j] = min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]) + distances[i, j]
    
    return accumulated_cost

def backtrack(Q, C, accumulated_cost, plotting=True):
    path = [[len(Q)-1, len(C)-1]]
    i = len(C)-1
    j = len(Q)-1
    while i>0 and j>0:
        if i==0:
            j = j - 1
        elif j==0:
            i = i - 1
        else:
            if accumulated_cost[i-1, j] == min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]):
                i = i - 1
            elif accumulated_cost[i, j-1] == min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]):
                j = j-1
            else:
                i = i - 1
                j= j- 1
        path.append([j, i])
    path.append([0,0])

    path_x = [point[0] for point in path]
    path_y = [point[1] for point in path]

  
    #if plotting == True:
    #    distance_cost_plot(accumulated_cost)
    #    plt.plot(path_x, path_y)

    return path

def path_cost(Q, C, accumulated_cost, distances):
    path = [[len(Q)-1, len(C)-1]]
    cost = 0
    i = len(C)-1
    j = len(Q)-1
    while i>0 and j>0:
        if i==0:
            j = j - 1
        elif j==0:
            i = i - 1
        else:
            if accumulated_cost[i-1, j] == min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]):
                i = i - 1
            elif accumulated_cost[i, j-1] == min(accumulated_cost[i-1, j-1], accumulated_cost[i-1, j], accumulated_cost[i, j-1]):
                j = j-1
            else:
                i = i - 1
                j= j- 1
        path.append([j, i])
    path.append([0,0])

    for [C, Q] in path:
        cost = cost +distances[Q, C]

    return(path, cost)

def distance_DTWd(Q, C):
    matrix = np.ones((len(C[0]), len(Q[0])))
    for i in range(len(C[0])):
        for j in range(len(Q[0])):
            d = 0
            for M in range(len(Q)):
                d += ((Q[M][j] - C[M][i])**2)
            matrix[i,j] = d
  
    distances = np.asmatrix(matrix)
    return distances

def DTW_i(Q, C):
    c = 0
    p = []
    for i in range(len(Q)):
        distance =  distance_matrix(Q[i], C[i])
        acc_costs = accumulated_costs(Q[i],C[i],distance)
        path = backtrack(Q[i],C[i], acc_costs, plotting=False)
        paths, cost = path_cost(Q[i], C[i], acc_costs, distance)
        c += cost
    return(c)

def DTW_d(Q, C):
    c = []
    p = []
    for i in range(len(Q)):
        distance = distance_DTWd(Q,C)
        acc_costs = accumulated_costs(Q[i],C[i],distance)
        path = backtrack(Q[i],C[i], acc_costs, plotting=False)
        paths, cost = path_cost(Q[i], C[i], acc_costs, distance)
        c.append(cost)

    return(np.min(c))

In [None]:
def evaluate_dtw_sample(gen, real):
    d=[]
    for i in tqdm(range(len(real))):
        #for j in range(len(gen)):
            sample = real[i].permute(1,0)
            gen_data = gen[i].permute(1,0)
            # Compute DTW_d
            d.append(DTW_d(gen_data.detach().cpu().numpy(), sample.detach().cpu().numpy()))
            # Option for DTW_i
            #d.append(DTW_i(gen_data.detach().cpu().numpy(), sample.detach().cpu().numpy()))
    D = np.mean(d)
    
    return D

In [None]:
def run_DTW_d(data_dir, results_dir, params): 
    data_loader, num_batches = load_data(data_dir+'/'+params['data'], params['batch_size'])
    MBD_dtw=[]
    print('data_loaded')
    # Iterate through MBD layers
    for mb in params['minibatch_layer']:
        print("MBD_Layer: "+str(mb))
        D = []
        # Generate batch of data for every epoch
        for e in tqdm(range(0,params['epochs'])):
            file = (results_dir+'/MBD_'+str(mb)+'/gen/generator_state_%s.pt'%(e)) 
            gen_data, sample_noise = load_single_gen(file, params)
            # Compare Generated Data against every batch
            dtw_result = []
            random_int = np.random.randint(low = 1, high=num_batches-1)
            for n_batch, sample_data in enumerate(data_loader):
                if n_batch == random_int:
                    # compute dtw for every batch against our generated data
                    dtw_result.append(evaluate_dtw_sample(gen_data[:1,:,:], sample_data[:1,:,:]))
            # mean of dtw for this epoch
            D.append(np.mean(dtw_result))
        MBD_dtw.append(D)
    return MBD_dtw

If you want to run just one evaluation

In [None]:
def run_DTW_d_short(data_dir, results_dir, params): 

    data_loader, num_batches = load_data(data_dir+'/'+params['data'], params['batch_size'])
    gen_data = torch.load(results_dir+'/Your_GAN/your_clf.pt')
    sample_data = torch.load(data_dir+'/ecg_mit_nsnr.pt')
    print('data_loaded')

    # Compare 50 samples of Generated Data against 50 samples from every batch
    # compute dtw for every batch against our generated data
    dtw_result = (evaluate_dtw_sample(gen_data[:50,:,:], sample_data[:50,:,:]))
    
    return dtw_result

This function was used in previous work.. Not used in this work.

In [None]:
def plot_dtw(results_dir, params, CB):
    dtw = np.loadtxt(results_dir+'/DTW_dependent.csv', delimiter=',')
    # Share a X axis with each column of subplots
    fig, axes = plt.subplots(len(params['minibatch_layer']), 1, sharex='col', figsize=(10, 7))
    plt.subplots_adjust(hspace=0.4)

    epoch_range = np.arange(0, params['epochs'], 1)
    for j in range(len(dtw)):
        axes[j].plot(epoch_range, dtw[j][0:], c=CB[0])
        axes[j].set_title('MBD Layer: ' + str(j))
        axes[j].set_ylabel('DTW Value')
    axes[4].set_xlabel('Epoch')
    plt.savefig(results_dir+'/DTW_dependent.svg', format='svg')

R Multivariate DTW package

In [None]:
"""directories = ['LSGAN_MBD_Training', 'LSGAN_ARR_MBD_Training', 
               'SOFTDTWGAN_MBD_Training', 'SOFTDTWGAN_ARR_MBD_Training',
               'LSGAN_lNSR-RDTW_Training', 'LSGAN_lARR-RDTW_Training',
               'Loss-SenseGAN_NSRfdtw', 'Loss-SenseGAN_Arrfdtw',
               'Loss-SenseGAN_Arr']

files = ['nsr_45_clf.pt', 'arr_46_clf.pt',
         'nsr_19_clf.pt', 'arr_42_clf.pt',
         'nsr_28_clf.pt', 'arr_37_clf.pt',
         'nsr_15_clf.pt', 'arr_43_clf.pt',
         'arr_33_clf.pt']"""

def R_DTW(clf_path, data_file, params): 

  fake = torch.load(clf_path)
  real = torch.load(data_file)

  dtw_dist = []
  for i in range(200):
    for j in range(len(fake)):
      X = real[i,:,:].detach().cpu().numpy()
      Y = fake[j,:,:].detach().cpu().numpy()

      template = X.transpose()
      rt,ct = template.shape
      query = Y.transpose()
      rq,cq = query.shape

      #converting numpy matrices to R matrices
      templateR=R.matrix(template,nrow=rt,ncol=ct)
      queryR=R.matrix(query,nrow=rq,ncol=cq)

      # Calculate the alignment vector and corresponding distance
      alignment = R.dtw(templateR,queryR,keep=True, step_pattern=R.rabinerJuangStepPattern(4,"c"),open_begin=True,open_end=True)

      dist = alignment.rx('distance')[0][0]
      dtw_dist.append(dist)

  return np.mean(dtw_dist)

## Main

Here we are implementing R's Mutivariate DTW (`R_DTW`) for speed of computation. You can use our DTW adaptation via `run_DTW_d_short` or `run_DTW_d` but this takes a while to execute.

In [None]:
params = load_params(results_dir+'/parameters.json')

clf_path = results_dir+'/Your_GAN/your_clf.pt'
data_file = data_dir+'/ecg_mit_nsnr.pt'

dtw_result = R_DTW(clf_path, data_file, params)
#dtw = run_DTW_d_short(data_dir, results_dir, params)

dtw_result = np.asarray(dtw_result)
print(dtw_result)

torch.Size([200, 500, 2])
3.5474548241094386
