# Optimize distance : forecasting of Lorenz data as a function of loss function type (RMSE vs. CRPS)

This notebook accompanies the following publication:
Paul Platzer, Arthur Avenas, Bertrand Chapron, Lucas Drumetz, Alexis Mouche, Léo Vinour. Distance Learning for Analog Methods. 2024. [⟨hal-04841334⟩](https://hal.science/hal-04841334)

It is used to run optimization algorithms for numerical experiments with Lorenz system data. In particular, we investigate the dependency of the optimal distance with the type of loss function (i.e. CRPS or RMSE).

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from tqdm.notebook import tqdm
from sklearn.neighbors import NearestNeighbors
import sys
sys.path.append('../../functions/.')
from analogs import apply_transform, find_analogues, compute_weights, compute_diffs, compute_mae_mad, compute_error
from distance_learning import compute_gradient_MSE, compute_gradient_CRPS, compute_regularization, learn_distance

In [2]:
data_folder = '../../data/lorenz/'
output_folder = '../../output/lorenz/'

# Load catalog

In [3]:
npzfile = np.load(data_folder + 'catalog_small.npz')
traj_norm = npzfile['traj_norm']
stds = npzfile['stds']
tau = npzfile['tau']
dt = npzfile['dt']
Ntrain = npzfile['Ntrain']
h_max = npzfile['h_max']

# Load RMSE optimization results

In [4]:
npzfile = np.load(output_folder + 'optim_lorenz_horizon_variable.npz')

hh_ind_done = npzfile['hh_ind_done']
horizon = npzfile['horizon']
transform_optim_zzvar_RMSE = npzfile['transform_optim_zzvar']
k = npzfile['k']

# CRPS optimization starting from RMSE optimization

In [5]:
## Define training explanatory variable and forecast horizon
i_h = 3
train_x = traj_norm[::int(tau/dt)][:Ntrain]
hh_ind_crps = hh_ind_done[i_h]
horizon_crps = dt * hh_ind_crps

## Set parameters for optimization
k = 200
nn_algo = 'kd_tree'
loo = True
corr_length_train = 0
regul_coef = [0]
n_epoch = 100
learning_rate_factor_crps = 2e2
Itrain = np.arange(len(train_x))
Ntar = 10**4 # smaller test size for optimization as CRPS computation has a higher computational cost
Itar = np.arange(Ntar)

### Run optimization ###

train_y = (traj_norm[hh_ind_crps::int(tau/dt),2][:Ntrain])[:,np.newaxis]

train_X = apply_transform(train_x, transform_optim_zzvar_RMSE[i_h], Itrain)
        
nn = NearestNeighbors( algorithm = nn_algo , 
                          n_neighbors = k + 1 + 2*corr_length_train ) # leave-one-out procedure + anticipating time-correlated data
nn.fit(train_X)

crps_init = compute_error(train_X[Itar], train_y, Itrain, Itar, k, nn, loo=True, corr_length_train=0, vector_out=False, error_type='CRPS')

learning_rate = learning_rate_factor_crps / crps_init

result = learn_distance(train_x, train_y, transform_optim_zzvar_RMSE[i_h], Itrain, Itar, [], 
                   k = k, nn_algo=nn_algo, error_type='CRPS', n_epoch=n_epoch,
                    learning_rate = learning_rate, regul_coef = regul_coef,
                   loo=True, corr_length_train=corr_length_train,
                     batch_size = Ntar, verbose_batch = True)


transform_optim_crps = result[0][-1].copy()
crps_optim = result[2].copy()

Starting distance-learning algorithm with the following parameters:
Error type = CRPS
Transformation type = matrix (general linear transformation)
Number of analogues = 200
Learning rate = 26601.291838246227
Number of Epochs = 100
Mini-batch size = 10000
Regularization = [0]


  0%|          | 0/100 [00:00<?, ?it/s]

epoch 1/100   |   iter. 1/1 : 
CRPS(batch) = 0.00751843 ;  CRPS(train) = 0.00751843 ;  CRPS(test) = [nan]
epoch 2/100   |   iter. 1/1 : 
CRPS(batch) = 0.00693950 ;  CRPS(train) = 0.00693950 ;  CRPS(test) = [nan]
epoch 3/100   |   iter. 1/1 : 
CRPS(batch) = 0.00651957 ;  CRPS(train) = 0.00651957 ;  CRPS(test) = [nan]
epoch 4/100   |   iter. 1/1 : 
CRPS(batch) = 0.00619344 ;  CRPS(train) = 0.00619344 ;  CRPS(test) = [nan]
epoch 5/100   |   iter. 1/1 : 
CRPS(batch) = 0.00593058 ;  CRPS(train) = 0.00593058 ;  CRPS(test) = [nan]
epoch 6/100   |   iter. 1/1 : 
CRPS(batch) = 0.00571670 ;  CRPS(train) = 0.00571670 ;  CRPS(test) = [nan]
epoch 7/100   |   iter. 1/1 : 
CRPS(batch) = 0.00560993 ;  CRPS(train) = 0.00560993 ;  CRPS(test) = [nan]
epoch 8/100   |   iter. 1/1 : 
CRPS(batch) = 0.00569505 ;  CRPS(train) = 0.00569505 ;  CRPS(test) = [nan]
epoch 9/100   |   iter. 1/1 : 
CRPS(batch) = 0.00525186 ;  CRPS(train) = 0.00525186 ;  CRPS(test) = [nan]
epoch 10/100   |   iter. 1/1 : 
CRPS(batch) = 

# Save

In [20]:
np.savez(output_folder + 'optim_lorenz_crps.npz',
        transform_optim_crps = transform_optim_crps,
        transform_optim_rmse = transform_optim_zzvar_RMSE[i_h],
        crps_optim = crps_optim,
         horizon = horizon_crps,
         hh_ind = hh_ind_crps,
         learning_rate_factor_crps = learning_rate_factor_crps,
        )