# Compute score of baseline analog forecasting of Lorenz data as a function of horizon and variable

This notebook accompanies the following publication:
Paul Platzer, Arthur Avenas, Bertrand Chapron, Lucas Drumetz, Alexis Mouche, Léo Vinour. Distance Learning for Analog Methods. 2024. [⟨hal-04841334⟩](https://hal.science/hal-04841334)

It is used to compute the baseline analog forecasting with unoptimized distance for the forecasting of the Lorenz system, at different horizons and for different output variables.

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from tqdm.notebook import tqdm
from sklearn.neighbors import NearestNeighbors
import sys
sys.path.append('../../functions/.')
from analogs import apply_transform, find_analogues, compute_weights, compute_diffs, compute_mae_mad, compute_error

In [2]:
data_folder = '../../data/lorenz/'
output_folder = '../../output/lorenz/'

# Load catalog

In [3]:
npzfile = np.load(data_folder + 'catalog_small.npz')
traj_norm = npzfile['traj_norm']
stds = npzfile['stds']
tau = npzfile['tau']
dt = npzfile['dt']
Ntrain = npzfile['Ntrain']
h_max = npzfile['h_max']

# Compute baseline score for all horizons

In [8]:
## Define training explanatory variable and forecast horizon
train_x = traj_norm[::int(tau/dt)][:Ntrain]
hh_ind = np.arange( int( 0.56 / dt ) )[1:]
horizon = dt * hh_ind

## Set parameters
transform_matrix = np.eye(3)
k = 200
nn_algo = 'kd_tree'
corr_length_train = 0
Itrain = np.arange(len(train_x))

## Empty lists to store baseline scores
hh_ind_done = []
mse0_whole = []
mse0_zzvar = []

for i in tqdm(range(len(hh_ind))):
    
    ### Compute baseline score ###

    print('')
    print('Horizon = '+str(hh_ind[i]*dt))
    
    ## First case : output variables =  all 3 variables
            
    train_y = traj_norm[hh_ind[i]::int(tau/dt)][:Ntrain]
    
    train_X = apply_transform(train_x, transform_matrix, Itrain)
            
    nn = NearestNeighbors( algorithm = nn_algo , 
                              n_neighbors = k + 1 + 2*corr_length_train ) # leave-one-out procedure + anticipating time-correlated data
    nn.fit(train_X)
    
    mse_baseline = compute_error(train_X, train_y, Itrain, Itrain, k, nn, loo=True, corr_length_train=0, vector_out=False, error_type='MSE')

    mse0_whole.append( mse_baseline.copy() )
        
    print('Baseline score for forecast of (x,y,z) = ' + str(mse_baseline)[:10])

    ## Second case : output variable = z variable
        
    train_y = (traj_norm[hh_ind[i]::int(tau/dt),2][:Ntrain])[:,np.newaxis]
    
    mse_baseline = compute_error(train_X, train_y, Itrain, Itrain, k, nn, loo=True, corr_length_train=0, vector_out=False, error_type='MSE')

    print('Baseline score for forecast of (z) = ' + str(mse_baseline)[:10])
    
    mse0_zzvar.append( mse_baseline.copy() )
    hh_ind_done.append( hh_ind[i].copy() )

  0%|          | 0/55 [00:00<?, ?it/s]


Horizon = 0.01
Baseline score for forecast of (x,y,z) = 0.00112896
Baseline score for forecast of (z) = 0.00045717

Horizon = 0.02
Baseline score for forecast of (x,y,z) = 0.00112688
Baseline score for forecast of (z) = 0.00044188

Horizon = 0.03
Baseline score for forecast of (x,y,z) = 0.00113670
Baseline score for forecast of (z) = 0.00044037

Horizon = 0.04
Baseline score for forecast of (x,y,z) = 0.00115704
Baseline score for forecast of (z) = 0.00045114

Horizon = 0.05
Baseline score for forecast of (x,y,z) = 0.00118645
Baseline score for forecast of (z) = 0.00047147

Horizon = 0.06
Baseline score for forecast of (x,y,z) = 0.00122345
Baseline score for forecast of (z) = 0.00049775

Horizon = 0.07
Baseline score for forecast of (x,y,z) = 0.00126664
Baseline score for forecast of (z) = 0.00052607

Horizon = 0.08
Baseline score for forecast of (x,y,z) = 0.00131489
Baseline score for forecast of (z) = 0.00055292

Horizon = 0.09
Baseline score for forecast of (x,y,z) = 0.00136744
Base

In [5]:
# Convert lists to arrays
hh_ind_done = np.array(hh_ind_done)
mse0_whole = np.array(mse0_whole)
mse0_zzvar = np.array(mse0_zzvar)

In [6]:
# Save
np.savez(output_folder + 'score_baseline_lorenz_horizon_variable.npz',
         hh_ind_done = hh_ind_done ,
         horizon = horizon ,
         mse0_whole = mse0_whole ,
         mse0_zzvar = mse0_zzvar ,
         k = k ,
        )