# Run best na and nb again on larger dataset

In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from matplotlib import pyplot as plt

from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, WhiteKernel, RationalQuadratic, Product

from scipy.sparse import csr_matrix

from data import load_narx_data, load_data
from util_fun import calculate_error_nrms, use_NARX_model_in_simulation, plot_NRMS_Pred_vs_Sim

### Change settings for optimization

In [2]:
n_a, n_b = 7,3
Split = [0.6, 0.2, 0.2] # split; [training, validation, test]
total_number_of_points = [1000, 3000, 5000, 10000] # total number of points to consider from the larger dataset (starting from index 0)
restart = [1,5,10]

In [3]:
print('Going to use na={} and nb={}'.format(n_a,n_b))

Going to use na=7 and nb=3


In [4]:
NRMS_train_pred = np.ndarray((len(total_number_of_points),len(restart)))
NRMS_train_sim = np.ndarray((len(total_number_of_points),len(restart)))
NRMS_val_pred = np.ndarray((len(total_number_of_points),len(restart)))
NRMS_val_sim = np.ndarray((len(total_number_of_points),len(restart)))
NRMS_test_pred = np.ndarray((len(total_number_of_points),len(restart)))
NRMS_test_sim = np.ndarray((len(total_number_of_points),len(restart)))

In [5]:
for i,points in enumerate(total_number_of_points):
    for j,res in enumerate(restart):
        print(f"Running {points} points with {res} restarts \n")

        Xtrain_NARX, Ytrain_NARX = load_narx_data(n_a, n_b, points, section="train", split=Split, as_tensor=False)
        Xval_NARX, Yval_NARX = load_narx_data(n_a, n_b, points, section="validation", split=Split, as_tensor=False)
        Xtest_NARX, Ytest_NARX = load_narx_data(n_a, n_b, points, section="test", split=Split, as_tensor=False)

        XtrainNARXSparse = csr_matrix(Xtrain_NARX).toarray()
        YtrainNARXSparse = csr_matrix(Ytrain_NARX).toarray().transpose()

        kernel = RBF(length_scale_bounds=[1,30]) + WhiteKernel(noise_level_bounds=[1e-7,1e-1])
        # kernel = RationalQuadratic() + WhiteKernel()
        reg = GaussianProcessRegressor(kernel, n_restarts_optimizer=res)

        reg.fit(XtrainNARXSparse, YtrainNARXSparse)

        print(reg.kernel_)

        Ytrain_pred, Ytrain_pred_std = reg.predict(Xtrain_NARX,return_std=True)
        NRMS_train_pred[i,j] = calculate_error_nrms(Ytrain_pred, Ytrain_NARX)
        print(f'Train prediction NRMS: {NRMS_train_pred[i,j]:.2f} %')

        Yval_pred, Yval_pred_std = reg.predict(Xval_NARX,return_std=True)
        NRMS_val_pred[i,j] = calculate_error_nrms(Yval_pred, Yval_NARX)
        print(f'Validation prediction NRMS: {NRMS_val_pred[i,j]:.2f} %')

        Ytest_pred, Ytest_pred_std = reg.predict(Xtest_NARX,return_std=True)
        NRMS_test_pred[i,j] = calculate_error_nrms(Ytest_pred, Ytest_NARX)
        print(f'Test prediction NRMS: {NRMS_test_pred[i,j]:.2f} %')


        # Simulation
        Xtrain,Ytrain = load_data(section="train", split=Split, total_number_of_points=points, as_tensor=False)
        Xval,Yval = load_data(section="validation", split=Split, total_number_of_points=points, as_tensor=False)
        Xtest,Ytest = load_data(section="test", split=Split, total_number_of_points=points, as_tensor=False)

        sim_model = lambda u,y: reg.predict(np.concatenate([u,y])[None,:])[0] 

        Ytrain_sim = use_NARX_model_in_simulation(Xtrain, sim_model, n_a, n_b)
        NRMS_train_sim[i,j] = calculate_error_nrms(Ytrain_sim, Ytrain)
        print(f'Train simulation NRMS: {NRMS_train_sim[i,j]:.2f} %')

        Yval_sim = use_NARX_model_in_simulation(Xval, sim_model, n_a, n_b)
        NRMS_val_sim[i,j] = calculate_error_nrms(Yval_sim, Yval)
        print(f'Validation simulation NRMS: {NRMS_val_sim[i,j]:.2f} %')

        Ytest_sim = use_NARX_model_in_simulation(Xtest, sim_model, n_a, n_b)
        NRMS_test_sim[i,j] = calculate_error_nrms(Ytest_sim, Ytest)
        print(f'Test simulation NRMS: {NRMS_test_sim[i,j]:.2f} %')

Using 600 datapoints for training set
RBF(length_scale=7.5) + WhiteKernel(noise_level=9.36e-06)
Train prediction NRMS: 0.79 %
Validation prediction NRMS: 1.78 %
Test prediction NRMS: 0.96 %
Using 600 datapoints for training set
Train simulation NRMS: 7.56 %
Validation simulation NRMS: 30.22 %
Test simulation NRMS: 21.00 %
Using 600 datapoints for training set
RBF(length_scale=7.5) + WhiteKernel(noise_level=9.36e-06)
Train prediction NRMS: 0.79 %
Validation prediction NRMS: 1.78 %
Test prediction NRMS: 0.96 %
Using 600 datapoints for training set
Train simulation NRMS: 7.56 %
Validation simulation NRMS: 30.22 %
Test simulation NRMS: 21.00 %
Using 600 datapoints for training set
RBF(length_scale=7.5) + WhiteKernel(noise_level=9.36e-06)
Train prediction NRMS: 0.79 %
Validation prediction NRMS: 1.78 %
Test prediction NRMS: 0.96 %
Using 600 datapoints for training set
Train simulation NRMS: 7.56 %
Validation simulation NRMS: 30.22 %
Test simulation NRMS: 21.00 %
Using 1800 datapoints for tr

## Final results

In [6]:
# Create empty DataFrames
df_pred = pd.DataFrame(columns=['Train', 'Validation', 'Test'])
df_sim = pd.DataFrame(columns=['Train', 'Validation', 'Test'])

# Fill DataFrames with Pred and Sim data
df_pred['Train'] = NRMS_train_pred.ravel()
df_pred['Validation'] = NRMS_val_pred.ravel()
df_pred['Test'] = NRMS_test_pred.ravel()

df_sim['Train'] = NRMS_train_sim.ravel()
df_sim['Validation'] = NRMS_val_sim.ravel()
df_sim['Test'] = NRMS_test_sim.ravel()

# Create the index
index = ['{} points; {} restarts'.format(points, res) for points in total_number_of_points for res in restart]

# Concatenate DataFrames horizontally
df_combined = pd.concat([df_pred, df_sim], axis=1, keys=['Pred', 'Sim'])

df_combined = df_combined.round(2)

# Set the index
df_combined.index = index

# Display the DataFrame
display(df_combined)


Unnamed: 0_level_0,Pred,Pred,Pred,Sim,Sim,Sim
Unnamed: 0_level_1,Train,Validation,Test,Train,Validation,Test
1000 points; 1 restarts,0.79,1.78,0.96,7.56,30.22,21.0
1000 points; 5 restarts,0.79,1.78,0.96,7.56,30.22,21.0
1000 points; 10 restarts,0.79,1.78,0.96,7.56,30.22,21.0
3000 points; 1 restarts,0.77,0.7,0.98,6.07,20.47,20.7
3000 points; 5 restarts,0.77,0.7,0.98,6.07,20.47,20.7
3000 points; 10 restarts,0.77,0.7,0.98,6.07,20.47,20.7
5000 points; 1 restarts,0.73,0.7,0.77,5.58,6.34,10.23
5000 points; 5 restarts,0.73,0.7,0.77,5.58,6.34,10.23
5000 points; 10 restarts,0.73,0.7,0.77,5.58,6.34,10.23
10000 points; 1 restarts,0.65,0.86,0.65,5.18,17.23,10.17
