In [None]:
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.ticker as ticker

from os import path
from glob import glob
import sys
import csv
sys.path.insert(0, path.abspath('./'))

from src import workdir, parse_model_parameter_file
from src.emulator_BAND import EmulatorBAND
from src.emulator import Emulator


# Define functions to compare multiple GP emulators 

In [None]:
def rms_abs_prediction_err(emu_pred,vali_true):
    rms_abs_pred_err = np.zeros(emu_pred.shape[1])
    for obsIdx in range(emu_pred.shape[1]):
        quantity = np.zeros(emu_pred.shape[1])
        for testpoint in range(emu_pred.shape[0]):
            quantity[obsIdx] += ((emu_pred[testpoint,obsIdx] - vali_true[testpoint,obsIdx]) / vali_true[testpoint,obsIdx])**2.
        rms_abs_pred_err[obsIdx] = np.sqrt(quantity[obsIdx] / emu_pred.shape[0])
    return rms_abs_pred_err

def how_honest_is_GP(emu_pred,emu_pred_err,vali_true):
    rms_quantity = np.zeros(emu_pred.shape[1])
    for obsIdx in range(emu_pred.shape[1]):
        quantity = np.zeros(emu_pred.shape[1])
        for testpoint in range(emu_pred.shape[0]):
            quantity[obsIdx] += ((emu_pred[testpoint,obsIdx] - vali_true[testpoint,obsIdx]) / emu_pred_err[testpoint,obsIdx])**2.
        rms_quantity[obsIdx] = np.sqrt(quantity[obsIdx] / emu_pred.shape[0])
    return rms_quantity

def train_multiple_emulators(training_set, model_par, number_test_points, logFlag, parameterTrafoPCAFlag):
    emu1 = EmulatorBAND(training_set, model_par, method='PCGP', logTrafo=logFlag, parameterTrafoPCA=parameterTrafoPCAFlag)
    emu2 = EmulatorBAND(training_set, model_par, method='PCSK', logTrafo=logFlag, parameterTrafoPCA=parameterTrafoPCAFlag)
    emu3 = Emulator(training_set, model_par, npc = 4, logTrafo=logFlag, parameterTrafoPCA=parameterTrafoPCAFlag)

    output_emu1 = emu1.testEmulatorErrors(number_test_points=number_test_points)
    emu_pred_1 = output_emu1[0]
    emu_pred_err_1 = output_emu1[1]
    vali_data_1 = output_emu1[2]
    vali_data_err_1 = output_emu1[3]

    output_emu2 = emu2.testEmulatorErrors(number_test_points=number_test_points)
    emu_pred_2 = output_emu2[0]
    emu_pred_err_2 = output_emu2[1]
    vali_data_2 = output_emu2[2]
    vali_data_err_2 = output_emu2[3]

    output_emu3 = emu3.testEmulatorErrors(nTestPoints=number_test_points)
    emu_pred_3 = output_emu3[0]
    emu_pred_err_3 = output_emu3[1]
    vali_data_3 = output_emu3[2]
    vali_data_err_3 = output_emu3[3]

    nObs = vali_data_1.shape[1]  # Assuming all datasets have the same number of observables

    rms_abs_pred_err1 = rms_abs_prediction_err(emu_pred_1,vali_data_1)
    rms_abs_pred_err2 = rms_abs_prediction_err(emu_pred_2,vali_data_2)
    rms_abs_pred_err3 = rms_abs_prediction_err(emu_pred_3,vali_data_3)
    honesty_1 = how_honest_is_GP(emu_pred_1,emu_pred_err_1,vali_data_1)
    honesty_2 = how_honest_is_GP(emu_pred_2,emu_pred_err_2,vali_data_2)
    honesty_3 = how_honest_is_GP(emu_pred_3,emu_pred_err_3,vali_data_3)

    X1_obs = []
    X2_obs = []
    X3_obs = []
    moments_X1 = []
    moments_X2 = []
    moments_X3 = []
    for obsIdx in range(nObs):
        X1 = (emu_pred_1[:, obsIdx] - vali_data_1[:, obsIdx]) / emu_pred_err_1[:, obsIdx]
        X2 = (emu_pred_2[:, obsIdx] - vali_data_2[:, obsIdx]) / emu_pred_err_2[:, obsIdx]
        X3 = (emu_pred_3[:, obsIdx] - vali_data_3[:, obsIdx]) / emu_pred_err_3[:, obsIdx]
        X1_obs.append(X1)
        X2_obs.append(X2)
        X3_obs.append(X3)

        # Compute first four moments of the distributions
        mean1 = np.mean(X1)
        variance1 = np.mean((X1 - mean1)**2.)
        skewness1 = np.mean((X1 - mean1)**3.) / variance1**(3./2.)
        kurtosis1 = np.mean((X1 - mean1)**4.) / variance1**(4./2.) - 3.
        moments1 = (mean1, variance1, skewness1, kurtosis1)
        moments_X1.append(moments1)

        mean2 = np.mean(X2)
        variance2 = np.mean((X2 - mean2)**2.)
        skewness2 = np.mean((X2 - mean2)**3.) / variance2**(3./2.)
        kurtosis2 = np.mean((X2 - mean2)**4.) / variance2**(4./2.) - 3.
        moments2 = (mean2, variance2, skewness2, kurtosis2)
        moments_X2.append(moments2)

        mean3 = np.mean(X3)
        variance3 = np.mean((X3 - mean3)**2.)
        skewness3 = np.mean((X3 - mean3)**3.) / variance3**(3./2.)
        kurtosis3 = np.mean((X3 - mean3)**4.) / variance3**(4./2.) - 3.
        moments3 = (mean3, variance3, skewness3, kurtosis3)
        moments_X3.append(moments3)

    return (X1_obs,X2_obs,X3_obs), (moments_X1,moments_X2,moments_X3), (rms_abs_pred_err1,rms_abs_pred_err2,rms_abs_pred_err3), (honesty_1,honesty_2,honesty_3)


Write functions for the output of the different GP's

In [None]:
def write_output_to_csv_uncertainties(filename, data):
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for row in data:
            writer.writerow([row])

def write_output_to_csv_moments_and_X(filename, data):
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.writer(csvfile)
        for row in data:
            writer.writerow(row)

def train_multiple_emulators_and_write_to_csv(training_set, model_par, number_test_points, output_file, logFlag, parameterPCAFlag):
    (X1_obs, X2_obs, X3_obs), (moments_X1, moments_X2, moments_X3), (rms_abs_pred_err1, rms_abs_pred_err2, rms_abs_pred_err3), (honesty_1, honesty_2, honesty_3) = train_multiple_emulators(training_set, model_par, number_test_points, logFlag, parameterPCAFlag)
    
    # Write X1_obs, X2_obs, X3_obs to CSV
    write_output_to_csv_moments_and_X(output_file + f'_{number_test_points}_X1_obs.dat', X1_obs)
    write_output_to_csv_moments_and_X(output_file + f'_{number_test_points}_X2_obs.dat', X2_obs)
    write_output_to_csv_moments_and_X(output_file + f'_{number_test_points}_X3_obs.dat', X3_obs)

    # Write moments_X1, moments_X2, moments_X3 to CSV
    write_output_to_csv_moments_and_X(output_file + f'_{number_test_points}_moments_X1.dat', moments_X1)
    write_output_to_csv_moments_and_X(output_file + f'_{number_test_points}_moments_X2.dat', moments_X2)
    write_output_to_csv_moments_and_X(output_file + f'_{number_test_points}_moments_X3.dat', moments_X3)

    # Write rms_abs_pred_err1, rms_abs_pred_err2, rms_abs_pred_err3 to CSV
    write_output_to_csv_uncertainties(output_file + f'_{number_test_points}_pred_err_obs_1.dat', rms_abs_pred_err1)
    write_output_to_csv_uncertainties(output_file + f'_{number_test_points}_pred_err_obs_2.dat', rms_abs_pred_err2)
    write_output_to_csv_uncertainties(output_file + f'_{number_test_points}_pred_err_obs_3.dat', rms_abs_pred_err3)

    # Write honesty_1, honesty_2, honesty_3 to CSV
    write_output_to_csv_uncertainties(output_file + f'_{number_test_points}_GP_honesty_obs_1.dat', honesty_1)
    write_output_to_csv_uncertainties(output_file + f'_{number_test_points}_GP_honesty_obs_2.dat', honesty_2)
    write_output_to_csv_uncertainties(output_file + f'_{number_test_points}_GP_honesty_obs_3.dat', honesty_3)

Read functions for the different file types

In [None]:
def read_emulator_file_errors(filename):
    data = []
    with open(filename, 'r') as file:
        for line in file:
            data.append(float(line.strip()))
    return data

def read_emulator_file_moments(filename):
    data = []
    with open(filename, 'r') as file:
        for line in file:
            values = line.strip().split(',')
            data.append([float(value) for value in values])
    return data

def read_multiple_emulator_errors_files(number_test_points_list,foldername,filename):
    data_list1 = []
    data_list2 = []
    data_list3 = []
    for i in number_test_points_list:
        filename1 = f"./{foldername}/{filename}_{i}_pred_err_obs_1.dat"
        data1 = read_emulator_file_errors(filename1)
        data_list1.append(data1)

        filename2 = f"./{foldername}/{filename}_{i}_pred_err_obs_2.dat"
        data2 = read_emulator_file_errors(filename2)
        data_list2.append(data2)

        filename3 = f"./{foldername}/{filename}_{i}_pred_err_obs_3.dat"
        data3 = read_emulator_file_errors(filename3)
        data_list3.append(data3)
    
    data_list4 = []
    data_list5 = []
    data_list6 = []
    for i in number_test_points_list:
        filename4 = f"./{foldername}/{filename}_{i}_GP_honesty_obs_1.dat"
        data4 = read_emulator_file_errors(filename4)
        data_list4.append(data4)

        filename5 = f"./{foldername}/{filename}_{i}_GP_honesty_obs_2.dat"
        data5 = read_emulator_file_errors(filename5)
        data_list5.append(data5)

        filename6 = f"./{foldername}/{filename}_{i}_GP_honesty_obs_3.dat"
        data6 = read_emulator_file_errors(filename6)
        data_list6.append(data6)
    return (data_list1,data_list2,data_list3), (data_list4,data_list5,data_list6)

def read_multiple_moments_files(number_test_points_list,foldername,filename):
    data_list1 = []
    data_list2 = []
    data_list3 = []
    for i in number_test_points_list:
        filename1 = f"./{foldername}/{filename}_{i}_moments_X1.dat"
        data1 = read_emulator_file_moments(filename1)
        data_list1.append(data1)

        filename2 = f"./{foldername}/{filename}_{i}_moments_X2.dat"
        data2 = read_emulator_file_moments(filename2)
        data_list2.append(data2)

        filename3 = f"./{foldername}/{filename}_{i}_moments_X3.dat"
        data3 = read_emulator_file_moments(filename3)
        data_list3.append(data3)
    return (data_list1, data_list2, data_list3)

def read_multiple_X_files(number_test_points_list,foldername,filename):
    data_list1 = []
    data_list2 = []
    data_list3 = []
    for i in number_test_points_list:
        filename1 = f"./{foldername}/{filename}_{i}_X1_obs.dat"
        data1 = read_emulator_file_moments(filename1)
        data_list1.append(data1)

        filename2 = f"./{foldername}/{filename}_{i}_X2_obs.dat"
        data2 = read_emulator_file_moments(filename2)
        data_list2.append(data2)

        filename3 = f"./{foldername}/{filename}_{i}_X3_obs.dat"
        data3 = read_emulator_file_moments(filename3)
        data_list3.append(data3)
    return (data_list1, data_list2, data_list3)


Generate the data files for a different number of test points

In [None]:
model_par = "../data/modelDesign_3DMCGlauber.txt"

training_set_list_dNdy = ["../data/AuAu7.7_dNdy.pkl","../data/AuAu19p6_dNdy.pkl","../data/AuAu200_dNdy.pkl","../data/AuAu200_PHOBOSdNdeta.pkl"]
output_file_list_dNdy = ["./emulator_output/7p7_dNdy_emu_out","./emulator_output/19p6_dNdy_emu_out","./emulator_output/200_dNdy_emu_out","./emulator_output/200PHOBOS_dNdeta_emu_out"]
output_file_list_LOGdNdy = ["./emulator_output/7p7_LOGdNdy_emu_out","./emulator_output/19p6_LOGdNdy_emu_out","./emulator_output/200_LOGdNdy_emu_out","./emulator_output/200PHOBOS_LOGdNdeta_emu_out"]

training_set_list_pTvn = ["../data/AuAu7.7_pTvn.pkl","../data/AuAu19p6_pTvn.pkl","../data/AuAu200_pTvn.pkl", "../data/AuAu200_PHOBOSv2eta.pkl"]
output_file_list_pTvn = ["./emulator_output/7p7_pTvn_emu_out","./emulator_output/19p6_pTvn_emu_out","./emulator_output/200_pTvn_emu_out","./emulator_output/200PHOBOS_vn_emu_out"]

In [None]:
for i in [900,800,700,600,500,400,300,200,100]:
    for tr_set in range(len(training_set_list_dNdy)):
        train_multiple_emulators_and_write_to_csv(training_set_list_dNdy[tr_set], model_par, i, output_file_list_dNdy[tr_set], False, False)

In [None]:
for i in [900,800,700,600,500,400,300,200,100]:
    for tr_set in range(len(training_set_list_dNdy)):
        train_multiple_emulators_and_write_to_csv(training_set_list_dNdy[tr_set], model_par, i, output_file_list_LOGdNdy[tr_set], True, False)

In [None]:
for i in [900,800,700,600,500,400,300,200,100]:
    for tr_set in range(len(training_set_list_pTvn)):
        train_multiple_emulators_and_write_to_csv(training_set_list_pTvn[tr_set], model_par, i, output_file_list_pTvn[tr_set], False, False)

Read the files for different numbers of test points and plot the data

In [None]:
filename_prefix = "7p7_dNdy_emu_out"
##filename_prefix = "19p6_dNdy_emu_out"
##filename_prefix = "200_dNdy_emu_out"
##filename_prefix = "200PHOBOS_dNdeta_emu_out"

##filename_prefix = "7p7_LOGdNdy_emu_out"
##filename_prefix = "19p6_LOGdNdy_emu_out"
##filename_prefix = "200_LOGdNdy_emu_out"
##filename_prefix = "200PHOBOS_LOGdNdeta_emu_out"

##filename_prefix = "7p7_pTvn_emu_out"
##filename_prefix = "19p6_pTvn_emu_out"
##filename_prefix = "200_pTvn_emu_out"
##filename_prefix = "200PHOBOS_vn_emu_out"

#[900,800,700,600,500,400,300,200,100]
err, hon = read_multiple_emulator_errors_files([900,800,700,600,500,400,300,200,100],filename_prefix)
#mom1, mom2, mom3 = read_multiple_moments_files([900,800,700,600,500,400,300,200,100],filename_prefix)
#X1, X2, X3 = read_multiple_X_files([900,800,700,600,500,400,300,200,100],filename_prefix)

Quick summary:
- The AuAu7.7_dNdy data set does not sort out any of the 1100 training points.
- The AuAu7.7_pTvn data set sorts out 40 of the 1100 training points due to large statistical errors.
- The AuAu19p6_dNdy data set does not sort out any of the 1100 training points.
- The AuAu19p6_pTvn data set sorts out 5 of the 1100 training points due to large statistical errors.
- The AuAu19p6_PHOBOS_dNdeta data set does not sort out any of the 1100 points.
- The AuAu200_dNdy data set does not sort out any of the 1100 training points.
- The AuAu200_pTvn data set sorts out 46 of the 1100 training points due to large statistical errors.
- The AuAu200_PHOBOS_dNdeta data set does not sort out any of the 1100 training points.
- The AuAu200_PHOBOSv2eta data set does not sort out any of the 1100 training points.

In [None]:
#[60,160,260,360,460,560,660,760,860]
#[195,295,395,495,595,695,795,895,995]
#[200,300,400,500,600,700,800,900,1000]
#[154,254,354,454,554,654,754,854,954]
#[99,199,299,399,499,599,699,799,899]
plot_emulator_errors_combined(err[0],err[1],err[2],[100,200],"./AuAu7p7_emu_uncertainty_dNdy",(7,3))
#plot_emulator_moments_combined(mom1,mom2,mom3,[200,300,400,500,600,700,800,900,1000,1070],"./AuAu19p6_emu_moment_LOGdNdy",(7,3))
#plot_emulator_X_combined(X1[-1],X2[-1],X3[-1],"./AuAu19p6_emu_Xhist_LOGdNdy_1070trainingpoints",(7,3))

In [None]:
def perform_power_law_fit_rel_errors(err1, err2, err3, number_training_points, number_observables):
    log_number_training_points = np.log(number_training_points)

    for obs in range(number_observables):
        log_err1 = np.log([err[obs] for err in err1])
        log_err2 = np.log([err[obs] for err in err2])
        log_err3 = np.log([err[obs] for err in err3])

        fit1 = np.polyfit(log_number_training_points, log_err1, 1)
        fit2 = np.polyfit(log_number_training_points, log_err2, 1)
        fit3 = np.polyfit(log_number_training_points, log_err3, 1)
        
        print(f"Observable {obs + 1} - Fit 1: slope={fit1[0]}, intercept={fit1[1]}")
        print(f"Observable {obs + 1} - Fit 2: slope={fit2[0]}, intercept={fit2[1]}")
        print(f"Observable {obs + 1} - Fit 3: slope={fit3[0]}, intercept={fit3[1]}")


    

Final plot functions

In [None]:
filename_prefix_list = ["7p7_dNdy_emu_out","7p7_pTvn_emu_out","19p6_dNdy_emu_out","19p6_pTvn_emu_out","200_dNdy_emu_out","200_pTvn_emu_out","19p6PHOBOS_dNdeta_emu_out","200PHOBOS_dNdeta_emu_out","200PHOBOS_vn_emu_out"]
foldername = 'emulator_output_PCA'

err1_list = []
err2_list = []
err3_list = []
hon1_list = []
hon2_list = []
hon3_list = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files([100],foldername,file)
    err1_list.append(err[0])
    err2_list.append(err[1])
    err3_list.append(err[2])
    hon1_list.append(hon[0])
    hon2_list.append(hon[1])
    hon3_list.append(hon[2])

colors = ['r','b','g','magenta','teal','orange','purple','grey','seagreen']
labels = [r'${\rm d}N/{\rm d}y|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{7.7\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$',
          r'$\langle p_{\rm T}\rangle|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{200\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{200\;\mathrm{GeV}}$',
          r'$v_2^{\rm ch}(\eta)|^{200\;\mathrm{GeV}}$'
          ]

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(err1_list[:6])):  # Only iterate over the first 5 data sets
    for obs in range(len(err1_list[dataset][-1])):
        axs[0].scatter(idx_observable, np.abs(err1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[0].scatter(idx_observable, np.abs(err2_list[dataset][-1][obs]), color=colors[1], marker='s', s=10)
        axs[0].scatter(idx_observable, np.abs(err3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err1_list[:6]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(6, len(err1_list)):  # Iterate over the remaining data sets
    for obs in range(len(err1_list[dataset][-1])):
        axs[1].scatter(idx_observable, np.abs(err1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[1].scatter(idx_observable, np.abs(err2_list[dataset][-1][obs]), color=colors[1], marker='s', s=10)
        axs[1].scatter(idx_observable, np.abs(err3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err1_list[6:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:6], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[6:], rotation=0, ha='center', fontsize=15)

legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
    plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None')
]
axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{E}$")
axs[1].set_ylabel(r"$\mathcal{E}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("err_full_training_set_PCA.pdf")

# honesty plot
fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(hon1_list[:6])):  # Only iterate over the first 5 data sets
    for obs in range(len(hon1_list[dataset][-1])):
        axs[0].scatter(idx_observable, np.abs(hon1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[0].scatter(idx_observable, np.abs(hon2_list[dataset][-1][obs]), color=colors[1], marker='s', s=10)
        axs[0].scatter(idx_observable, np.abs(hon3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon1_list[:6]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(6, len(hon1_list)):  # Iterate over the remaining data sets
    for obs in range(len(hon1_list[dataset][-1])):
        axs[1].scatter(idx_observable, np.abs(hon1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[1].scatter(idx_observable, np.abs(hon2_list[dataset][-1][obs]), color=colors[1], marker='s', s=10)
        axs[1].scatter(idx_observable, np.abs(hon3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon1_list[6:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:6], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[6:], rotation=0, ha='center', fontsize=15)

legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
    plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None')
]
axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.02, borderaxespad=0.1)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{H}$")
axs[1].set_ylabel(r"$\mathcal{H}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("hon_full_training_set_PCA.pdf")


In [None]:
filename_prefix_list = ["7p7_dNdy_emu_out","19p6_dNdy_emu_out","200_dNdy_emu_out","19p6PHOBOS_dNdeta_emu_out","200PHOBOS_dNdeta_emu_out"]
filename_prefix_list_LOG = ["7p7_LOGdNdy_emu_out","19p6_LOGdNdy_emu_out","200_LOGdNdy_emu_out","19p6PHOBOS_LOGdNdeta_emu_out","200PHOBOS_LOGdNdeta_emu_out"]
foldername = 'emulator_output_PCA'

emulator = 2

err1_list = []
err2_list = []
err3_list = []
hon1_list = []
hon2_list = []
hon3_list = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files([100],foldername,file)
    err1_list.append(err[0])
    err2_list.append(err[1])
    err3_list.append(err[2])
    hon1_list.append(hon[0])
    hon2_list.append(hon[1])
    hon3_list.append(hon[2])

err1_list_LOG = []
err2_list_LOG = []
err3_list_LOG = []
hon1_list_LOG = []
hon2_list_LOG = []
hon3_list_LOG = []
for file in filename_prefix_list_LOG:
    err, hon = read_multiple_emulator_errors_files([100],foldername,file)
    err1_list_LOG.append(err[0])
    err2_list_LOG.append(err[1])
    err3_list_LOG.append(err[2])
    hon1_list_LOG.append(hon[0])
    hon2_list_LOG.append(hon[1])
    hon3_list_LOG.append(hon[2])

colors = ['r','b','g','magenta','teal','orange','purple','grey','seagreen']
labels = [r'${\rm d}N/{\rm d}y|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'${\rm d}N/{\rm d}y|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'${\rm d}N/{\rm d}y|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{200\;\mathrm{GeV}}$',
          ]

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(err1_list[:3])):  # Only iterate over the first 5 data sets
    for obs in range(len(err1_list[dataset][-1])):
        if emulator == 0:
            axs[0].scatter(idx_observable, err1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
            axs[0].scatter(idx_observable, err1_list_LOG[dataset][-1][obs], color=colors[0], marker='o', s=10, facecolors='None')
        elif emulator == 1:
            axs[0].scatter(idx_observable, err2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
            axs[0].scatter(idx_observable, err2_list_LOG[dataset][-1][obs], color=colors[1], marker='s', s=10, facecolors='None')
        elif emulator == 2:
            axs[0].scatter(idx_observable, err3_list[dataset][-1][obs], color=colors[2], marker='^', s=10)
            axs[0].scatter(idx_observable, err3_list_LOG[dataset][-1][obs], color=colors[2], marker='^', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err1_list[:3]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(3, len(err1_list)):  # Iterate over the remaining data sets
    for obs in range(len(err1_list[dataset][-1])):
        if emulator == 0:
            axs[1].scatter(idx_observable, err1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
            axs[1].scatter(idx_observable, err1_list_LOG[dataset][-1][obs], color=colors[0], marker='o', s=10, facecolors='None')
        elif emulator == 1:
            axs[1].scatter(idx_observable, err2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
            axs[1].scatter(idx_observable, err2_list_LOG[dataset][-1][obs], color=colors[1], marker='s', s=10, facecolors='None')
        elif emulator == 2:
            axs[1].scatter(idx_observable, err3_list[dataset][-1][obs], color=colors[2], marker='^', s=10)
            axs[1].scatter(idx_observable, err3_list_LOG[dataset][-1][obs], color=colors[2], marker='^', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err1_list[3:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:3], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[3:], rotation=0, ha='center', fontsize=15)

if emulator == 0:
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
        plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP log', linestyle='None', fillstyle='none')
    ]
elif emulator == 1:
    legend_elements = [
        plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
        plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK log', linestyle='None', fillstyle='none')
    ]
elif emulator == 2:
    legend_elements = [
        plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None'),
        plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP log', linestyle='None', fillstyle='none')
    ]
axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{E}$")
axs[1].set_ylabel(r"$\mathcal{E}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
if emulator == 0:
    plt.savefig("err_full_training_set_LOG_PCGP_PCA.pdf")
elif emulator == 1:
    plt.savefig("err_full_training_set_LOG_PCSK_PCA.pdf")
elif emulator == 2:
    plt.savefig("err_full_training_set_LOG_Scikit_PCA.pdf")

# honesty plot
fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(hon1_list[:3])):  # Only iterate over the first 5 data sets
    for obs in range(len(hon1_list[dataset][-1])):
        if emulator == 0:
            axs[0].scatter(idx_observable, hon1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
            axs[0].scatter(idx_observable, hon1_list_LOG[dataset][-1][obs], color=colors[0], marker='o', s=10, facecolors='None')
        elif emulator == 1:
            axs[0].scatter(idx_observable, hon2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
            axs[0].scatter(idx_observable, hon2_list_LOG[dataset][-1][obs], color=colors[1], marker='s', s=10, facecolors='None')
        elif emulator == 2:
            axs[0].scatter(idx_observable, hon3_list[dataset][-1][obs], color=colors[2], marker='^', s=10)
            axs[0].scatter(idx_observable, hon3_list_LOG[dataset][-1][obs], color=colors[2], marker='^', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon1_list[:3]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(3, len(hon1_list)):  # Iterate over the remaining data sets
    for obs in range(len(hon1_list[dataset][-1])):
        if emulator == 0:
            axs[1].scatter(idx_observable, hon1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
            axs[1].scatter(idx_observable, hon1_list_LOG[dataset][-1][obs], color=colors[0], marker='o', s=10, facecolors='None')
        elif emulator == 1:
            axs[1].scatter(idx_observable, hon2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
            axs[1].scatter(idx_observable, hon2_list_LOG[dataset][-1][obs], color=colors[1], marker='s', s=10, facecolors='None')
        elif emulator == 2:
            axs[1].scatter(idx_observable, hon3_list[dataset][-1][obs], color=colors[2], marker='^', s=10)
            axs[1].scatter(idx_observable, hon3_list_LOG[dataset][-1][obs], color=colors[2], marker='^', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon1_list[3:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:3], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[3:], rotation=0, ha='center', fontsize=15)

if emulator == 0:
    legend_elements = [
        plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
        plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP log', linestyle='None', fillstyle='none')
    ]
elif emulator == 1:
    legend_elements = [
        plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
        plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK log', linestyle='None', fillstyle='none')
    ]
elif emulator == 2:
    legend_elements = [
        plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None'),
        plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP log', linestyle='None', fillstyle='none')
    ]
axs[1].legend(handles=legend_elements, loc='upper left', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{H}$")
axs[1].set_ylabel(r"$\mathcal{H}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
if emulator == 0:
    plt.savefig("hon_full_training_set_LOG_PCGP_PCA.pdf")
elif emulator == 1:
    plt.savefig("hon_full_training_set_LOG_PCSK_PCA.pdf")
elif emulator == 2:
    plt.savefig("hon_full_training_set_LOG_Scikit_PCA.pdf")


Compare the PCSK with itself, once with the uncertainty included and once without. Then do the same comparison with the PCGP emulator to see if the emulators are similar in that case. 

In [None]:
def read_multiple_emulator_errors_files_PCSK_noERR(number_test_points_list,filename):
    data_list2 = []
    for i in number_test_points_list:
        filename2 = f"./emulator_output_PCSK_0ERR/{filename}_{i}_pred_err_obs_2.dat"
        data2 = read_emulator_file_errors(filename2)
        data_list2.extend([data2])

    data_list5 = []
    for i in number_test_points_list:
        filename5 = f"./emulator_output_PCSK_0ERR/{filename}_{i}_GP_honesty_obs_2.dat"
        data5 = read_emulator_file_errors(filename5)
        data_list5.extend([data5])

    return data_list2, data_list5

def read_multiple_emulator_errors_files_PCSK_2ERR(number_test_points_list,filename):
    data_list2 = []
    for i in number_test_points_list:
        filename2 = f"./emulator_output_PCSK_2ERR/{filename}_{i}_pred_err_obs_2.dat"
        data2 = read_emulator_file_errors(filename2)
        data_list2.extend([data2])

    data_list5 = []
    for i in number_test_points_list:
        filename5 = f"./emulator_output_PCSK_2ERR/{filename}_{i}_GP_honesty_obs_2.dat"
        data5 = read_emulator_file_errors(filename5)
        data_list5.extend([data5])

    return data_list2, data_list5

foldername = 'emulator_output'
filename_prefix_list = ["7p7_dNdy_emu_out",
                        "7p7_pTvn_emu_out",
                        "19p6_dNdy_emu_out",
                        "19p6_pTvn_emu_out",
                        "200_dNdy_emu_out",
                        "200_pTvn_emu_out",
                        "19p6PHOBOS_dNdeta_emu_out",
                        "200PHOBOS_dNdeta_emu_out",
                        "200PHOBOS_vn_emu_out"]

err2_list_0ERR = []
hon2_list_0ERR = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files_PCSK_noERR([200,100],file)
    err2_list_0ERR.append(err)
    hon2_list_0ERR.append(hon)

err2_list = []
hon2_list = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files([200,100],foldername,file)
    err2_list.append(err[1])
    hon2_list.append(hon[1])

err2_list_2ERR = []
hon2_list_2ERR = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files_PCSK_2ERR([200,100],file)
    err2_list_2ERR.append(err)
    hon2_list_2ERR.append(hon)

colors = ['r','b','g','magenta','teal','orange','purple','grey','seagreen']
labels = [r'${\rm d}N/{\rm d}y|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{7.7\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$',
          r'$\langle p_{\rm T}\rangle|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{200\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{200\;\mathrm{GeV}}$',
          r'$v_2^{\rm ch}(\eta)|^{200\;\mathrm{GeV}}$'
          ]

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(err2_list[:6])):  # Only iterate over the first 5 data sets
    for obs in range(len(err2_list[dataset][-1])):
        axs[0].scatter(idx_observable, err2_list_0ERR[dataset][-1][obs], color=colors[4], marker='s', s=10, facecolors='None')
        axs[0].scatter(idx_observable, err2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
        axs[0].scatter(idx_observable, err2_list_2ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err2_list[:6]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(6, len(err2_list)):  # Iterate over the remaining data sets
    for obs in range(len(err2_list[dataset][-1])):
        axs[1].scatter(idx_observable, err2_list_0ERR[dataset][-1][obs], color=colors[4], marker='s', s=10, facecolors='None')
        axs[1].scatter(idx_observable, err2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
        axs[1].scatter(idx_observable, err2_list_2ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err2_list[6:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:6], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[6:], rotation=0, ha='center', fontsize=15)
legend_elements = [
    plt.Line2D([0], [0], marker='d', color=colors[4], markersize=10, label=r'PCSK $0\sigma$', linestyle='None', fillstyle='none'),
    plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label=r'PCSK $1\sigma$', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[3], markersize=10, label=r'PCSK $2\sigma$', linestyle='None', fillstyle='none')
]

axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{E}$")
axs[1].set_ylabel(r"$\mathcal{E}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("err_full_training_set_PCSK_ERR_noPCA.pdf")

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(hon2_list[:6])):  # Only iterate over the first 5 data sets
    for obs in range(len(hon2_list[dataset][-1])):
        axs[0].scatter(idx_observable, hon2_list_0ERR[dataset][-1][obs], color=colors[4], marker='s', s=10, facecolors='None')
        axs[0].scatter(idx_observable, hon2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
        axs[0].scatter(idx_observable, hon2_list_2ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon2_list[:6]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(6, len(hon2_list)):  # Iterate over the remaining data sets
    for obs in range(len(hon2_list[dataset][-1])):
        axs[1].scatter(idx_observable, hon2_list_0ERR[dataset][-1][obs], color=colors[4], marker='s', s=10, facecolors='None')
        axs[1].scatter(idx_observable, hon2_list[dataset][-1][obs], color=colors[1], marker='s', s=10)
        axs[1].scatter(idx_observable, hon2_list_2ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon2_list[6:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:6], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[6:], rotation=0, ha='center', fontsize=15)
legend_elements = [
    plt.Line2D([0], [0], marker='s', color=colors[4], markersize=10, label=r'PCSK $0\sigma$', linestyle='None', fillstyle='none'),
    plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label=r'PCSK $1\sigma$', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[3], markersize=10, label=r'PCSK $2\sigma$', linestyle='None', fillstyle='none')
]

axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.05, borderaxespad=0.15)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{H}$")
axs[1].set_ylabel(r"$\mathcal{H}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("hon_full_training_set_PCSK_ERR_noPCA.pdf")


PCSK no uncertainty and PCGP comparison:

In [None]:
def read_multiple_emulator_errors_files_PCSKERR(number_test_points_list,filename):
    data_list2 = []
    for i in number_test_points_list:
        filename2 = f"./emulator_output_PCSK_0ERR/{filename}_{i}_pred_err_obs_2.dat"
        data2 = read_emulator_file_errors(filename2)
        data_list2.extend([data2])

    data_list5 = []
    for i in number_test_points_list:
        filename5 = f"./emulator_output_PCSK_0ERR/{filename}_{i}_GP_honesty_obs_2.dat"
        data5 = read_emulator_file_errors(filename5)
        data_list5.extend([data5])

    return data_list2, data_list5

foldername = 'emulator_output'
filename_prefix_list = ["7p7_dNdy_emu_out",
                        "7p7_pTvn_emu_out",
                        "19p6_dNdy_emu_out",
                        "19p6_pTvn_emu_out",
                        "200_dNdy_emu_out",
                        "200_pTvn_emu_out",
                        "19p6PHOBOS_dNdeta_emu_out",
                        "200PHOBOS_dNdeta_emu_out",
                        "200PHOBOS_vn_emu_out"]

err1_list = []
hon1_list = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files([200,100],foldername,file)
    err1_list.append(err[0])
    hon1_list.append(hon[0])

err2_list_0ERR = []
hon2_list_0ERR = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files_PCSKERR([200,100],file)
    err2_list_0ERR.append(err)
    hon2_list_0ERR.append(hon)

colors = ['r','b','g','magenta','teal','orange','purple','grey','seagreen']
labels = [r'${\rm d}N/{\rm d}y|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{7.7\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$',
          r'$\langle p_{\rm T}\rangle|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$' + '\\\n' + r'$v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{200\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{200\;\mathrm{GeV}}$',
          r'$v_2^{\rm ch}(\eta)|^{200\;\mathrm{GeV}}$'
          ]

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(err1_list[:6])):  # Only iterate over the first 5 data sets
    for obs in range(len(err1_list[dataset][-1])):
        axs[0].scatter(idx_observable, err1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
        axs[0].scatter(idx_observable, err2_list_0ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err1_list[:6]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(6, len(err1_list)):  # Iterate over the remaining data sets
    for obs in range(len(err1_list[dataset][-1])):
        axs[1].scatter(idx_observable, err1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
        axs[1].scatter(idx_observable, err2_list_0ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(err1_list[6:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:6], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[6:], rotation=0, ha='center', fontsize=15)
legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[3], markersize=10, label=r'PCSK $0\sigma$', linestyle='None', fillstyle='none')
]

axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{E}$")
axs[1].set_ylabel(r"$\mathcal{E}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("err_full_training_set_PCSK_0ERR_PCGP_noPCA.pdf")

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(hon1_list[:6])):  # Only iterate over the first 5 data sets
    for obs in range(len(hon1_list[dataset][-1])):
        axs[0].scatter(idx_observable, hon1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
        axs[0].scatter(idx_observable, hon2_list_0ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon1_list[:6]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(6, len(hon1_list)):  # Iterate over the remaining data sets
    for obs in range(len(hon1_list[dataset][-1])):
        axs[1].scatter(idx_observable, hon1_list[dataset][-1][obs], color=colors[0], marker='o', s=10)
        axs[1].scatter(idx_observable, hon2_list_0ERR[dataset][-1][obs], color=colors[3], marker='s', s=10, facecolors='None')
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if idx_dataset < len(hon1_list[6:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:6], rotation=0, ha='center', fontsize=15)
axs[1].set_xticklabels(labels[6:], rotation=0, ha='center', fontsize=15)
legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[3], markersize=10, label=r'PCSK $0\sigma$', linestyle='None', fillstyle='none')
]

axs[1].legend(handles=legend_elements, loc='upper left', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{H}$")
axs[1].set_ylabel(r"$\mathcal{H}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("hon_full_training_set_PCSK_0ERR_PCGP_noPCA.pdf")

In [None]:
filename_prefix_list = ["7p7_dNdy_emu_out","7p7_pTvn_emu_out","19p6_dNdy_emu_out","19p6_pTvn_emu_out","19p6PHOBOS_dNdeta_emu_out","200_dNdy_emu_out","200_pTvn_emu_out","200PHOBOS_dNdeta_emu_out","200PHOBOS_vn_emu_out"]
filename_prefix_list_LOG = ["7p7_LOGdNdy_emu_out","19p6_LOGdNdy_emu_out","19p6PHOBOS_LOGdNdeta_emu_out","200_LOGdNdy_emu_out","200PHOBOS_LOGdNdeta_emu_out"]
datasets_LOG = [0,2,4,5,7]
foldername = 'emulator_output'

err1_list = []
err2_list = []
err3_list = []
hon1_list = []
hon2_list = []
hon3_list = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files([900,800,700,600,500,400,300,200,100],foldername,file)
    err1_list.append(err[0])
    err2_list.append(err[1])
    err3_list.append(err[2])
    hon1_list.append(hon[0])
    hon2_list.append(hon[1])
    hon3_list.append(hon[2])

err1_list_LOG = []
err2_list_LOG = []
err3_list_LOG = []
hon1_list_LOG = []
hon2_list_LOG = []
hon3_list_LOG = []
for file in filename_prefix_list_LOG:
    err, hon = read_multiple_emulator_errors_files([900,800,700,600,500,400,300,200,100],foldername,file)
    err1_list_LOG.append(err[0])
    err2_list_LOG.append(err[1])
    err3_list_LOG.append(err[2])
    hon1_list_LOG.append(hon[0])
    hon2_list_LOG.append(hon[1])
    hon3_list_LOG.append(hon[2])

colors = ['r','b','g','magenta','teal','orange','purple','grey','seagreen']
labels = [r'${\rm d}N/{\rm d}y|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{7.7\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$, $v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{7.7\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p}$',
          r'$\langle p_{\rm T}\rangle|^{19.6\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$, $v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{19.6\;\mathrm{GeV}}$',
          r'${\rm d}N/{\rm d}y|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$',
          r'$\langle p_{\rm T}\rangle|^{200\;\mathrm{GeV}}_{\pi^+,K^+,p,\bar{p}}$, $v_{2/3}^{\rm ch}\lbrace 2\rbrace|^{200\;\mathrm{GeV}}$',
          r'${\rm d}N_{\rm ch}/{\rm d}\eta|^{200\;\mathrm{GeV}}$',
          r'$v_2^{\rm ch}(\eta)|^{200\;\mathrm{GeV}}$'
          ]

fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_dataset_LOG = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(err1_list[:5])):  # Only iterate over the first 5 data sets
    for obs in range(len(err1_list[dataset][-1])):
        axs[0].scatter(idx_observable, np.abs(err1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[0].scatter(idx_observable, np.abs(err2_list[dataset][-1][obs]), color=colors[1], marker='s', s=10)
        axs[0].scatter(idx_observable, np.abs(err3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        if dataset in datasets_LOG:
            axs[0].scatter(idx_observable, np.abs(err1_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[0], marker='o', s=10, facecolors='None')
            axs[0].scatter(idx_observable, np.abs(err2_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[1], marker='s', s=10, facecolors='None')
            axs[0].scatter(idx_observable, np.abs(err3_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[2], marker='^', s=10, facecolors='None')
        
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if dataset in datasets_LOG:
        idx_dataset_LOG += 1
    if idx_dataset < len(err1_list[:5]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(5, len(err1_list)):  # Iterate over the remaining data sets
    for obs in range(len(err1_list[dataset][-1])):
        axs[1].scatter(idx_observable, np.abs(err1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[1].scatter(idx_observable, np.abs(err2_list[dataset][-1][obs]), color=colors[1], marker='x', s=10)
        axs[1].scatter(idx_observable, np.abs(err3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        if dataset in datasets_LOG:
            axs[1].scatter(idx_observable, np.abs(err1_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[0], marker='o', s=10, facecolors='None')
            axs[1].scatter(idx_observable, np.abs(err2_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[1], marker='s', s=10, facecolors='None')
            axs[1].scatter(idx_observable, np.abs(err3_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[2], marker='^', s=10, facecolors='None')
        
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if dataset in datasets_LOG:
        idx_dataset_LOG += 1
    if idx_dataset < len(err1_list[5:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:5], rotation=18, ha='right')
axs[1].set_xticklabels(labels[5:], rotation=18, ha='right')

legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
    plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None')
]
axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{E}$")
axs[1].set_ylabel(r"$\mathcal{E}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("err_full_training_noPCA.pdf")

# plot honesty
fig, axs = plt.subplots(2, 1, figsize=(12, 6))
plt.rc('font', size=18)
idx_dataset = 0
idx_dataset_LOG = 0
idx_observable = 0
dataset_boundaries = [0]
for dataset in range(len(hon1_list[:5])):  # Only iterate over the first 5 data sets
    for obs in range(len(hon1_list[dataset][-1])):
        axs[0].scatter(idx_observable, np.abs(hon1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[0].scatter(idx_observable, np.abs(hon2_list[dataset][-1][obs]), color=colors[1], marker='s', s=10)
        axs[0].scatter(idx_observable, np.abs(hon3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        if dataset in datasets_LOG:
            axs[0].scatter(idx_observable, np.abs(hon1_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[0], marker='o', s=10, facecolors='None')
            axs[0].scatter(idx_observable, np.abs(hon2_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[1], marker='s', s=10, facecolors='None')
            axs[0].scatter(idx_observable, np.abs(hon3_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[2], marker='^', s=10, facecolors='None')
        
        idx_observable += 1

    dataset_boundaries.append(idx_observable)
    idx_dataset += 1
    if dataset in datasets_LOG:
        idx_dataset_LOG += 1
    if idx_dataset < len(hon1_list[:5]):
        axs[0].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))  

# Plot for the bottom part
idx_dataset = 0
idx_observable = 0
dataset_boundaries1 = [0]
for dataset in range(5, len(hon1_list)):  # Iterate over the remaining data sets
    for obs in range(len(hon1_list[dataset][-1])):
        axs[1].scatter(idx_observable, np.abs(hon1_list[dataset][-1][obs]), color=colors[0], marker='o', s=10)
        axs[1].scatter(idx_observable, np.abs(hon2_list[dataset][-1][obs]), color=colors[1], marker='x', s=10)
        axs[1].scatter(idx_observable, np.abs(hon3_list[dataset][-1][obs]), color=colors[2], marker='^', s=10)
        if dataset in datasets_LOG:
            axs[1].scatter(idx_observable, np.abs(hon1_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[0], marker='o', s=10, facecolors='None')
            axs[1].scatter(idx_observable, np.abs(hon2_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[1], marker='s', s=10, facecolors='None')
            axs[1].scatter(idx_observable, np.abs(hon3_list_LOG[idx_dataset_LOG][-1][obs]), color=colors[2], marker='^', s=10, facecolors='None')
        
        idx_observable += 1

    dataset_boundaries1.append(idx_observable)
    idx_dataset += 1
    if dataset in datasets_LOG:
        idx_dataset_LOG += 1
    if idx_dataset < len(hon1_list[5:]):
        axs[1].axvline(x=idx_observable, color='k', linestyle='--', zorder=(-10))

# Compute the average index for each dataset
average_indices1 = [(start + end) // 2 for start, end in zip(dataset_boundaries[:-1], dataset_boundaries[1:])]
average_indices2 = [(start + end) // 2 for start, end in zip(dataset_boundaries1[:-1], dataset_boundaries1[1:])]

# Set the x-labels at the average indices
axs[0].set_xticks(average_indices1)
axs[1].set_xticks(average_indices2)
axs[0].set_xticklabels(labels[:5], rotation=18, ha='right')
axs[1].set_xticklabels(labels[5:], rotation=18, ha='right')

legend_elements = [
    plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
    plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
    plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None')
]
axs[0].legend(handles=legend_elements, loc='upper right', ncols=3, borderpad=0.12, borderaxespad=0.25)
axs[0].set_xlim([-1, dataset_boundaries[-1] + 1])
axs[1].set_xlim([-1, dataset_boundaries1[-1] + 1])
axs[0].set_ylabel(r"$\mathcal{H}$")
axs[1].set_ylabel(r"$\mathcal{H}$")
axs[0].set_yscale('log')
axs[1].set_yscale('log')
axs[0].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[0].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
axs[1].yaxis.set_major_formatter(ticker.FuncFormatter(lambda x, _: '{:g}'.format(x)))
axs[1].yaxis.set_major_locator(ticker.LogLocator(subs=[1.0, 2.0, 5.0]))  # Ensure at least two labels
plt.tight_layout()
plt.savefig("hon_full_training_noPCA.pdf")

Quick summary:
- The AuAu7.7_dNdy data set does not sort out any of the 1100 training points.
- The AuAu7.7_pTvn data set sorts out 40 of the 1100 training points due to large statistical errors.
- The AuAu19p6_dNdy data set does not sort out any of the 1100 training points.
- The AuAu19p6_pTvn data set sorts out 5 of the 1100 training points due to large statistical errors.
- The AuAu19p6_PHOBOS_dNdeta data set does not sort out any of the 1100 points.
- The AuAu200_dNdy data set does not sort out any of the 1100 training points.
- The AuAu200_pTvn data set sorts out 46 of the 1100 training points due to large statistical errors.
- The AuAu200_PHOBOS_dNdeta data set does not sort out any of the 1100 training points.
- The AuAu200_PHOBOSv2eta data set does not sort out any of the 1100 training points.

In [None]:
filename_prefix_list = ["7p7_dNdy_emu_out",
                        "7p7_pTvn_emu_out",
                        "19p6_dNdy_emu_out",
                        "19p6_pTvn_emu_out",
                        "200_dNdy_emu_out",
                        "200_pTvn_emu_out",
                        "19p6PHOBOS_dNdeta_emu_out",
                        "200PHOBOS_dNdeta_emu_out",
                        "200PHOBOS_vn_emu_out"]
filename_prefix_list_LOG = ["7p7_LOGdNdy_emu_out",
                            "19p6_LOGdNdy_emu_out",
                            "200_LOGdNdy_emu_out",
                            "19p6PHOBOS_LOGdNdeta_emu_out",
                            "200PHOBOS_LOGdNdeta_emu_out"]
datasets_LOG = [0,2,4,6,7]
colors = ['r','b','g','magenta','teal','orange','purple','grey','seagreen']
foldername = 'emulator_output_PCA'

err1_list = []
err2_list = []
err3_list = []
hon1_list = []
hon2_list = []
hon3_list = []
for file in filename_prefix_list:
    err, hon = read_multiple_emulator_errors_files([900,800,700,600,500,400,300,200,100,30],foldername,file)
    err1_list.append(err[0])
    err2_list.append(err[1])
    err3_list.append(err[2])
    hon1_list.append(hon[0])
    hon2_list.append(hon[1])
    hon3_list.append(hon[2])

err1_list_LOG = []
err2_list_LOG = []
err3_list_LOG = []
hon1_list_LOG = []
hon2_list_LOG = []
hon3_list_LOG = []
for file in filename_prefix_list_LOG:
    err, hon = read_multiple_emulator_errors_files([900,800,700,600,500,400,300,200,100,30],foldername,file)
    err1_list_LOG.append(err[0])
    err2_list_LOG.append(err[1])
    err3_list_LOG.append(err[2])
    hon1_list_LOG.append(hon[0])
    hon2_list_LOG.append(hon[1])
    hon3_list_LOG.append(hon[2])

numberValidTrainingPoints = [
    [200,300,400,500,600,700,800,900,1000,1070],
    [160,260,360,460,560,660,760,860,960,1030],
    [200,300,400,500,600,700,800,900,1000,1070],
    [195,295,395,495,595,695,795,895,995,1065],
    [200,300,400,500,600,700,800,900,1000,1070],
    [200,300,400,500,600,700,800,900,1000,1070],
    [154,254,354,454,554,654,754,854,954,1024],
    [200,300,400,500,600,700,800,900,1000,1070],
    [200,300,400,500,600,700,800,900,1000,1070]
]

idx_dataset_LOG = 0
for d in range(len(filename_prefix_list)):
    numTrain = numberValidTrainingPoints[d]
    numObs = len(err1_list[d][0])
    for o in range(numObs):
        fig, axs = plt.subplots(1, 1, figsize=(6, 4))
        plt.rc('font', size=15)
        for i in range(len(numTrain)):
            axs.scatter(numTrain[i], err1_list[d][i][o], color=colors[0], marker='o', s=14)
            axs.scatter(numTrain[i], err2_list[d][i][o], color=colors[1], marker='s', s=14)
            axs.scatter(numTrain[i], err3_list[d][i][o], color=colors[2], marker='^', s=14)
            if d in datasets_LOG:
                axs.scatter(numTrain[i], err1_list_LOG[idx_dataset_LOG][i][o], color=colors[0], marker='o', s=14, facecolors='None')
                axs.scatter(numTrain[i], err2_list_LOG[idx_dataset_LOG][i][o], color=colors[1], marker='s', s=14, facecolors='None')
                axs.scatter(numTrain[i], err3_list_LOG[idx_dataset_LOG][i][o], color=colors[2], marker='^', s=14, facecolors='None')
        legend_elements = [
            plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
            plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
            plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None')
        ]
        axs.legend(handles=legend_elements, loc='upper right', ncols=1, borderpad=0.12, borderaxespad=0.25, fontsize=12)
        axs.set_ylabel(r"$\mathcal{E}$")
        axs.set_xlabel(r"training points")
        #axs.set_yscale('log')
        plt.tight_layout()
        plt.savefig(f"./err_training_points_PCA/err_dataset{d}_obs{o}_PCA.pdf")
    if d in datasets_LOG:
        idx_dataset_LOG += 1

idx_dataset_LOG = 0
for d in range(len(filename_prefix_list)):
    numTrain = numberValidTrainingPoints[d]
    numObs = len(err1_list[d][0])
    for o in range(numObs):
        fig, axs = plt.subplots(1, 1, figsize=(6, 4))
        plt.rc('font', size=15)
        for i in range(len(numTrain)):
            axs.scatter(numTrain[i], hon1_list[d][i][o], color=colors[0], marker='o', s=14)
            axs.scatter(numTrain[i], hon2_list[d][i][o], color=colors[1], marker='s', s=14)
            axs.scatter(numTrain[i], hon3_list[d][i][o], color=colors[2], marker='^', s=14)
            if d in datasets_LOG:
                axs.scatter(numTrain[i], hon1_list_LOG[idx_dataset_LOG][i][o], color=colors[0], marker='o', s=14, facecolors='None')
                axs.scatter(numTrain[i], hon2_list_LOG[idx_dataset_LOG][i][o], color=colors[1], marker='s', s=14, facecolors='None')
                axs.scatter(numTrain[i], hon3_list_LOG[idx_dataset_LOG][i][o], color=colors[2], marker='^', s=14, facecolors='None')
        legend_elements = [
            plt.Line2D([0], [0], marker='o', color=colors[0], markersize=10, label='PCGP', linestyle='None'),
            plt.Line2D([0], [0], marker='s', color=colors[1], markersize=10, label='PCSK', linestyle='None'),
            plt.Line2D([0], [0], marker='^', color=colors[2], markersize=10, label='Scikit GP', linestyle='None')
        ]
        axs.legend(handles=legend_elements, loc='upper right', ncols=1, borderpad=0.12, borderaxespad=0.25, fontsize=12)
        axs.set_ylabel(r"$\mathcal{H}$")
        axs.set_xlabel(r"training points")
        #axs.set_yscale('log')
        plt.tight_layout()
        plt.savefig(f"./hon_training_points_PCA/hon_dataset{d}_obs{o}_PCA.pdf")
    if d in datasets_LOG:
        idx_dataset_LOG += 1

### Old functions

In [None]:
def plot_emulator_vs_truth(vali_data,vali_data_err,emu_pred,emu_pred_err):
    nValidationPoints, nObs = vali_data_1.shape

    for obsIdx in range(nObs):
        fig = plt.figure()
        ax = plt.axes([0.12, 0.12, 0.83, 0.83])
        plt.errorbar(vali_data[:, obsIdx], emu_pred[:, obsIdx],
                    yerr=np.sqrt(emu_pred_err[:, obsIdx,obsIdx]),
                    xerr=vali_data_err[:, obsIdx],
                    marker="o", linestyle="")
        plt.plot([-200, 200], [-200, 200], '--k')
        plt.xlim([vali_data[:, obsIdx].min() - 1,
                vali_data[:, obsIdx].max() + 1])
        plt.ylim([emu_pred[:, obsIdx].min() - 1,
                emu_pred[:, obsIdx].max() + 1])
        ax.set_aspect('equal')
        ax.text(0.05, 0.95, "obs {}".format(obsIdx), fontsize=20, transform=ax.transAxes, verticalalignment='top')
        plt.xlabel("truth")
        plt.ylabel("emulator results")

def plot_emulator_vs_truth_relative(vali_data,vali_data_err,emu_pred,emu_pred_err):
    nValidationPoints, nObs = vali_data.shape

    for obsIdx in range(nObs):
        fig = plt.figure()
        ax = plt.axes([0.12, 0.12, 0.83, 0.83])
        plt.plot(range(len(vali_data)),
                (emu_pred[:, obsIdx] - vali_data[:, obsIdx])/np.sqrt(vali_data_err[:, obsIdx]**2.+ emu_pred_err[:, obsIdx, obsIdx]),
                marker="o", linestyle="")
        plt.fill_between([-nValidationPoints, nValidationPoints], [2, 2], [-2, -2], color='g', alpha=0.2)
        plt.xlim([-1,len(vali_data)+1])
        plt.ylim([-4, 4])
        ax.text(0.05, 0.95, "obs {}".format(obsIdx), fontsize=20, transform=ax.transAxes, verticalalignment='top')
        plt.xlabel("test point")
        plt.ylabel("relative diff. [$\sigma$]")

def plot_histogram_emulator_vs_truth_relative(vali_data_list,emu_pred_list,emu_pred_err_list,dataset_labels=None):
    if not isinstance(vali_data_list, list):
        vali_data_list = [vali_data_list]
    if not isinstance(emu_pred_list, list):
        emu_pred_list = [emu_pred_list]
    if not isinstance(emu_pred_err_list, list):
        emu_pred_err_list = [emu_pred_err_list]
    if not isinstance(dataset_labels, list):
        dataset_labels = [dataset_labels]
    
    nObs = vali_data_list[0].shape[1]  # Assuming all datasets have the same number of observables

    for obsIdx in range(nObs):
        fig, ax = plt.subplots()
        ax.set_xlabel("A = (GP-truth)/GPerr")
        ax.set_ylabel("p(A)")
        
        moments = []
        handles = []
        for vali_data, emu_pred, emu_pred_err, label in zip(vali_data_list, emu_pred_list, emu_pred_err_list, dataset_labels):
            A = (emu_pred[:, obsIdx] - vali_data[:, obsIdx]) / np.sqrt(emu_pred_err[:, obsIdx, obsIdx])
            ax.hist(A, bins=25, density=True, histtype='step', label=label)

            # Compute first four moments of the histogram
            mean = np.mean(A)
            variance = np.mean((A - mean)**2.)
            skewness = np.mean((A - mean)**3.) / variance**(3./2.)
            kurtosis = np.mean((A - mean)**4.) / variance**(4./2.)
            moment_string = f'μ={mean:.2f}, σ²= {variance:.2f}, γ₁={skewness:.2f}, γ₂={kurtosis:.2f}'
            moments.append((label, moment_string))

            patch = mpatches.Patch(color='none', label=label)
            handles.append(patch)

        x = np.linspace(-6, 6, 300)
        y = (1 / (np.sqrt(2*np.pi))) * np.exp(-0.5 * x**2)        
        plt.plot(x, y, color = 'black', zorder = 2, linewidth = 2, label = 'Normal dist.')

        ax.text(0.05, 0.8, "obs {}".format(obsIdx), fontsize=20, transform=ax.transAxes, verticalalignment='top')
        ax.legend()

        # Add a separate legend for the moment strings
        ax2 = ax.twinx()
        ax2.legend(handles, moments, loc='upper left', fontsize=6)
        ax2.axis('off')  # Turn off the axis for the second legend
        plt.tight_layout() 
        plt.show()

In [None]:
def plot_emulator_errors_combined(err1, err2, err3, number_training_points, filename, plotformat):
    rows = plotformat[0]
    cols = plotformat[1]
    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10, 20))
    
    for i in range(rows):  # Rows
        for j in range(cols):  # Columns
            idx = i * cols + j
            axs[i, j].plot(number_training_points, [err[idx] for err in err1], label="PCGP")
            axs[i, j].plot(number_training_points, [err[idx] for err in err2], label="PCSK")
            axs[i, j].plot(number_training_points, [err[idx] for err in err3], label="Scikit GP")
            axs[i, j].set_title(f"Observable {idx+1}")
            if i == 0 and j == 0:
                axs[i, j].legend()
            axs[i, j].set_xscale('log')
            axs[i, j].set_yscale('log')
            if i == rows-1:  # Set x label for bottom row
                axs[i, j].set_xlabel("Training Points")
            if j == 0:  # Set y label for leftmost column
                axs[i, j].set_ylabel("Relative Uncertainty")
    
    plt.tight_layout()
    plt.savefig(filename+".pdf")
    plt.show()

def plot_emulator_moments_combined(mom1, mom2, mom3, number_training_points, filename, plotformat):
    rows = plotformat[0]
    cols = plotformat[1]
    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10, 20))
    for i in range(rows):  # Rows
        for j in range(cols):  # Columns
            idx = i * cols + j
            axs[i, j].plot(number_training_points, [mom[idx][0] for mom in mom1], label="PCGP")
            axs[i, j].plot(number_training_points, [mom[idx][0] for mom in mom2], label="PCSK")
            axs[i, j].plot(number_training_points, [mom[idx][0] for mom in mom3], label="Scikit GP")
            axs[i, j].set_title(f"Observable {idx+1}")
            if i == 0 and j == 0:
                axs[i, j].legend()
            if i == rows-1:  # Set x label for bottom row
                axs[i, j].set_xlabel("Training Points")
            if j == 0:  # Set y label for leftmost column
                axs[i, j].set_ylabel("Mean X")
    
    plt.tight_layout()
    plt.savefig(filename+"_mean.pdf")
    plt.show()
    plt.clf()

    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10, 20))
    for i in range(rows):  # Rows
        for j in range(cols):  # Columns
            idx = i * cols + j
            axs[i, j].plot(number_training_points, [mom[idx][1] for mom in mom1], label="PCGP")
            axs[i, j].plot(number_training_points, [mom[idx][1] for mom in mom2], label="PCSK")
            axs[i, j].plot(number_training_points, [mom[idx][1] for mom in mom3], label="Scikit GP")
            axs[i, j].set_title(f"Observable {idx+1}")
            if i == 0 and j == 0:
                axs[i, j].legend()
            if i == rows-1:  # Set x label for bottom row
                axs[i, j].set_xlabel("Training Points")
            if j == 0:  # Set y label for leftmost column
                axs[i, j].set_ylabel("Variance X")
    
    plt.tight_layout()
    plt.savefig(filename+"_variance.pdf")
    plt.show()
    plt.clf()

    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10, 20))  # Create subplots
    for i in range(rows):  # Rows
        for j in range(cols):  # Columns
            idx = i * cols + j
            axs[i, j].plot(number_training_points, [mom[idx][2] for mom in mom1], label="PCGP")
            axs[i, j].plot(number_training_points, [mom[idx][2] for mom in mom2], label="PCSK")
            axs[i, j].plot(number_training_points, [mom[idx][2] for mom in mom3], label="Scikit GP")
            axs[i, j].set_title(f"Observable {idx+1}")
            if i == 0 and j == 0:
                axs[i, j].legend()
            if i == rows-1:  # Set x label for bottom row
                axs[i, j].set_xlabel("Training Points")
            if j == 0:  # Set y label for leftmost column
                axs[i, j].set_ylabel("Skewness X")
    
    plt.tight_layout()
    plt.savefig(filename+"_skewness.pdf")
    plt.show()
    plt.clf()

    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10, 20))  # Create subplots
    for i in range(rows):  # Rows
        for j in range(cols):  # Columns
            idx = i * cols + j
            axs[i, j].plot(number_training_points, [mom[idx][3] for mom in mom1], label="PCGP")
            axs[i, j].plot(number_training_points, [mom[idx][3] for mom in mom2], label="PCSK")
            axs[i, j].plot(number_training_points, [mom[idx][3] for mom in mom3], label="Scikit GP")
            axs[i, j].set_title(f"Observable {idx+1}")
            if i == 0 and j == 0:
                axs[i, j].legend()
            if i == rows-1:  # Set x label for bottom row
                axs[i, j].set_xlabel("Training Points")
            if j == 0:  # Set y label for leftmost column
                axs[i, j].set_ylabel("Excess Kurtosis X")
    
    plt.tight_layout()
    plt.savefig(filename+"_kurtosis.pdf")
    plt.show()
    plt.clf()

def plot_emulator_X_combined(X1, X2, X3, filename, plotformat):
    rows = plotformat[0]
    cols = plotformat[1]
    fig, axs = plt.subplots(nrows=rows, ncols=cols, figsize=(10, 20))  # Create subplots
    
    x = np.linspace(-5, 5, 300)
    y = (1 / (np.sqrt(2*np.pi))) * np.exp(-0.5 * x**2)
    for i in range(rows):  # Rows
        for j in range(cols):  # Columns
            idx = i * cols + j
            axs[i, j].hist(X1[idx], density=True, histtype='step', label="PCGP")
            axs[i, j].hist(X2[idx], density=True, histtype='step', label="PCSK")
            axs[i, j].hist(X3[idx], density=True, histtype='step',  label="Scikit GP")
            axs[i, j].set_title(f"Observable {idx+1}")
            if i == 0 and j == 0:
                axs[i, j].legend()
            if i == rows-1:  # Set x label for bottom row
                axs[i, j].set_xlabel("X")
            if j == 0:  # Set y label for leftmost column
                axs[i, j].set_ylabel("p(X)")

            axs[i, j].plot(x, y, color = 'black', zorder = -2, linewidth = 2, label = 'Normal dist.')
    
    plt.tight_layout()
    plt.savefig(filename+".pdf")
    plt.show()
