In [1]:
# Imports
import numpy as np
from utils.normalisation import normalise_arrays

# Generate a small dataset for checking normalisation by hand
data_in = np.arange(0, 4, 1).reshape(-1, 2)
data_out = np.arange(1, 5, 1).reshape(-1, 2)

# ShiftScale function and normtype

# Check default shift scale should throw out the same guy
print("ShiftScale")
print(normalise_arrays([data_in, data_out], norm_type="ShiftScale")) #( array([[0, 1], [2, 3]]), array([[1, 2], [3, 4]]) ), 0, 1)
# Check shift scale for some standard example scalar example
print(normalise_arrays([data_in, data_out], norm_type="ShiftScale", shift=2, scale=4)) #( array([[0, 2], [4, 6]]), array([[2, 4], [6, 8]]) ), 2, 4)
# Check shift scale for some example with array shifts and scales
print(normalise_arrays([data_in, data_out], norm_type="ShiftScale", shift=[0, 1], scale=[1, 2])) #( array([[0, 4], [2, 8]]), array([[1, 6], [3, 10]]) ), [0,1], [1,2]) 
# Check that unshift and unscaling gives the original datas
print(normalise_arrays([ np.array([[0, 4], [2, 8]]), np.array([[1, 6], [3, 10]]) ], norm_type="ShiftScale", shift=[0, -2], scale=[1, 1/2])) #( array([[0, 1], [2, 3]]), array([[1, 2], [3, 4]]) ), [0, -1], [1, 1/2])

# NormStd
print("NormStd")
print(normalise_arrays([data_in, data_out], norm_type="NormStd")) # ([ [[1, -1], [1, 1]], [[0, 0], [2, 2]], [-1, -2], [1, 1])

# MinMax 
print("MinMax")
# Check the default MinMax works
print(normalise_arrays([data_in, data_out], norm_type="MinMax")) # ([ [[0, 0], [1, 1]], [[1/2, 1/2], [3/2, 3/2] ], [0, -1], [1/2, 1/2])
# Check that a custom MinMax works
print(normalise_arrays([data_in, data_out], norm_type="MinMax", minmax_range=(0, 2))) # ([ [[0, 0], [2, 2]], [[1, 1], [3, 3] ], [0, -1], [1, 1])

# ScaleL2
print("ScaleL2")
scalel2 = normalise_arrays([data_in, data_out], norm_type="ScaleL2")
print(scalel2) # ([ [[0, 1/sqrt(13)], [2/sqrt(13), 3/sqrt(13)]], [[1/sqrt(13), 2/sqrt(13)], [3/sqrt(13), 4/sqrt(13)] ], 0, 1/sqrt(13))
# Check the mean is scaled but not the same
print(np.sqrt(13)*np.mean(scalel2[0][0], axis=0), np.mean(data_in, axis=0))
print(np.sqrt(13)*np.mean(scalel2[0][1], axis=0), np.mean(data_out, axis=0))
# Check that the max norm of the first array is 1
print(np.max([np.linalg.norm(z) for z in scalel2[0][0]]))

# ScaleL2Shift
print("ScaleL2Shift")
scalel2shift = normalise_arrays([data_in, data_out], norm_type="ScaleL2Shift")
print(scalel2shift) # ([ [[-1, -1], [1, 1]], [[-1/sqrt(2), -1/sqrt(2)], [1/sqrt(2), 1/sqrt(2)] ], [-1, -2], 1/sqrt(2))
# Check the mean of the first array is now 0 and the mean of the second array is shifted in mean correctly
print(np.mean(scalel2shift[0][0], axis=0), np.mean(data_in, axis=0))
print(np.mean(scalel2shift[0][1], axis=0), scalel2shift[2]*(np.mean(data_out, axis=0) + scalel2shift[1]))
# Check that the max norm of the first array is 1
print(np.max([np.linalg.norm(z) for z in scalel2[0][0]]))

# Check None really does nothing
print(normalise_arrays([data_in, data_out], norm_type=None)) #( array([[0, 1], [2, 3]]), array([[1, 2], [3, 4]]) ), 0, 1)

ShiftScale
([array([[0, 1],
       [2, 3]]), array([[1, 2],
       [3, 4]])], 0, 1)
([array([[ 8, 12],
       [16, 20]]), array([[12, 16],
       [20, 24]])], 2, 4)
([array([[0, 4],
       [2, 8]]), array([[ 1,  6],
       [ 3, 10]])], [0, 1], [1, 2])
([array([[0., 1.],
       [2., 3.]]), array([[1., 2.],
       [3., 4.]])], [0, -2], [1, 0.5])
NormStd
([array([[-1., -1.],
       [ 1.,  1.]]), array([[0., 0.],
       [2., 2.]])], array([-1., -2.]), array([1., 1.]))
MinMax
([array([[0., 0.],
       [1., 1.]]), array([[0.5, 0.5],
       [1.5, 1.5]])], array([-0., -1.]), array([0.5, 0.5]))
([array([[0., 0.],
       [2., 2.]]), array([[1., 1.],
       [3., 3.]])], array([-0., -1.]), array([1., 1.]))
ScaleL2
([array([[0.        , 0.2773501 ],
       [0.5547002 , 0.83205029]]), array([[0.2773501 , 0.5547002 ],
       [0.83205029, 1.10940039]])], 0, 0.2773500981126146)
[1. 2.] [1. 2.]
[2. 3.] [2. 3.]
1.0
ScaleL2Shift
([array([[-0.70710678, -0.70710678],
       [ 0.70710678,  0.70710678]]), arr

In [2]:
# Check error functions

from utils.errors import calculate_mse, calculate_nmse, calculate_wasserstein1err
from utils.normalisation import normalise_arrays

data_true = np.zeros((2, 2))
data1 = np.arange(0, 4, 1).reshape(-1, 2)
data2 = np.arange(1, 5, 1).reshape(-1, 2)

# Check mse without shifting and scaling
print(calculate_mse(data_true, data1))  # 3.5
print(np.mean(np.mean(data1**2, axis=0)))

# Check mse with shifting and scaling gives back the same as if the unshifted unscaled data was used
data_normed, shift, scale = normalise_arrays([data1, data2], norm_type="ShiftScale", shift=1, scale=2)
print(calculate_mse(data_normed[0], data_normed[1], shift, scale))
print(calculate_mse(data1, data2))

# Check nmse 
print(calculate_nmse(data1, data_true)) # 1.0

# Check nmse with shifting and scaling gives back the same as if the unshifted unscaled data was used
data_normed, shift, scale = normalise_arrays([data1, data2], norm_type="ShiftScale", shift=1, scale=2)
print(calculate_nmse(data_normed[0], data_normed[1], shift, scale))
print(calculate_nmse(data1, data2))

# Check nmse with shifting and scaling gives back the same as if the unshifted unscaled data was used
data_normed, shift, scale = normalise_arrays([data1, data2], norm_type="ShiftScale", shift=1, scale=2)
print(calculate_nmse(data_normed[0], data_normed[1], shift, scale))
print(calculate_nmse(data1, data2))

3.5
3.5
1.0
1.0
1.0
0.35
0.35
0.35
0.35


In [8]:
# Check that the cv code splits data and normalises correctly

import numpy as np
from utils.crossvalidation import CrossValidate
from utils.normalisation import normalise_arrays

# Define a small dataset
data_in = np.arange(0, 5, 1).reshape(-1, 1)
data_out = np.arange(1, 6, 1).reshape(-1, 1)

# Check the data splitting - with one train-validation set
CV = CrossValidate(validation_parameters=[2, 2, 0], validation_type="rolling", task="PathContinue")
print(CV.split_data_to_folds(data_in, data_out))

# With multiple train-validation set
CV = CrossValidate(validation_parameters=[2, 2, 1], validation_type="rolling", task="PathContinue")
print(CV.split_data_to_folds(data_in, data_out))

# Check with normalisation that its correct
# Normalise arrays normally 
print(normalise_arrays([data_in[0:2], data_in[2:]], norm_type="MinMax")[0])
print(normalise_arrays([data_out[0:2], data_out[2:]], norm_type="ShiftScale", shift=1, scale=2)[0])
# Check that the splitting datafodls normalises it correctly
CV = CrossValidate(validation_parameters=[2, 2, 0], validation_type="rolling", task="PathContinue",
                   norm_type_in="MinMax", norm_type_target="ShiftScale", shift_target=1, scale_target=2)
print(CV.split_data_to_folds(data_in, data_out))


[([array([[0],
       [1],
       [2]]), array([[1],
       [2],
       [3]]), array([[3],
       [4]]), array([[4],
       [5]])], 0, 1)]
[([array([[0],
       [1]]), array([[1],
       [2]]), array([[2],
       [3]]), array([[3],
       [4]])], 0, 1), ([array([[1],
       [2]]), array([[2],
       [3]]), array([[3],
       [4]]), array([[4],
       [5]])], 0, 1)]
[array([[0.],
       [1.]]), array([[2.],
       [3.],
       [4.]])]
[array([[4],
       [6]]), array([[ 8],
       [10],
       [12]])]
[([array([[0. ],
       [0.5],
       [1. ]]), array([[0.5],
       [1. ],
       [1.5]]), array([[1.5],
       [2. ]]), array([[2. ],
       [2.5]])], array([-0.]), array([0.5]))]


In [16]:
# Sets the default math computation in numpy to not parallelise (might be MKL)
import os
os.environ['OPENBLAS_NUM_THREADS'] = '1'    

import numpy as np
from time import time
from datagen.data_generate_dde import dde_rk45
from systems.ddes import mackeyglass
from utils.crossvalidation import CrossValidate
from utils.normalisation import normalise_arrays
from estimators.volt_funcs import Volterra
from estimators.ngrc_funcs import NGRC

from crossvalidation2 import CrossValidate as CrossValidate2
# Create the MG dataset
def init(t):
    return 1.2

mg_args = {'delay': 17, 'a': 0.2, 'b': 0.1, 'n': 10 }

h = 0.02
n_intervals = 200
slicing = int(1 / h)

data = dde_rk45(n_intervals, init, mackeyglass, h, mg_args)[1][::slicing]

# Define the training and washout size
ntrain = 3000
washout = 2

# Construct training input and teacher, testing input and teacher
training_input_orig = data[0:ntrain-1] 
training_teacher_orig = data[1:ntrain]

# Normalise training arrays if necessary
normalisation_output = normalise_arrays([training_input_orig, training_teacher_orig], norm_type=None)
training_input, training_teacher = normalisation_output[0]

# Define the range of parameters for which you want to cross validate over
ndelay_range = [2] 
deg_range = [2]
reg_range = [0.001]
param_ranges = [ndelay_range, deg_range, reg_range]

# Define additional input parameters
param_add = [washout, True]

# Instantiate CV, split dataset, crossvalidate in parallel
CV = CrossValidate(validation_parameters=[2000, 500, 100], validation_type="rolling", manage_remainder=False, 
                    task="PathContinue", norm_type_in=None, 
                    error_type="meansquare", log_interval=100)
cv_datasets = CV.split_data_to_folds(training_input, training_teacher)
print(cv_datasets[-1][0][0])
min_error, best_parameters = CV.crossvalidate(NGRC, cv_datasets, param_ranges, param_add, 
                                                num_processes=1, chunksize=1)    
CV = CrossValidate2(validation_parameters=[2000, 500, 100], validation_type="rolling", 
                    task="PathContinue", norm_type=None, 
                    error_type="meansquare", log_interval=100)
cv_datasets2 = CV.split_data_to_folds(training_input, training_teacher)
min_error2, best_parameters2 = CV.crossvalidate(NGRC, cv_datasets, param_ranges, param_add, 
                                                num_processes=1, chunksize=1) 
print(cv_datasets[-1][0][0])    

[[1.23588789]
 [1.23674888]
 [1.22177835]
 ...
 [1.05405358]
 [1.00188219]
 [0.95218652]]
[[1.23588789]
 [1.23674888]
 [1.22177835]
 ...
 [1.05405358]
 [1.00188219]
 [0.95218652]]


In [10]:
min_error

0.055381877141539596

In [11]:
min_error2

0.055357373291253745