In [1]:
%matplotlib inline
import numpy as np

import os
from os import path
import sys
import dill
import pickle
sys.path.insert(0, path.abspath('./'))

from src.emulator_BAND import EmulatorBAND
from src.emulator import Emulator


# Define function to create pickle file with training dataset and the second one with the test points

Use the last 5 posterior points for the second file and the 1000 LHC points + 95 posterior points for the training of the emulators.

In [None]:
path_data = '../data/'
path_output = './separate_training_posterior_data_1095/'
datasets = ['AuAu7.7_dNdy.pkl',
            'AuAu7.7_pTvn.pkl',
            'AuAu19p6_dNdy.pkl',
            'AuAu19p6_PHOBOSdNdeta.pkl',
            'AuAu19p6_pTvn.pkl',
            'AuAu200_dNdy.pkl',
            'AuAu200_PHOBOSdNdeta.pkl',
            'AuAu200_PHOBOSv2eta.pkl',
            'AuAu200_pTvn.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

def check_file_length(filename, expected_length):
    with open(f"{path_output}{filename}", 'rb') as f:
        data = pickle.load(f)
    if len(data) == expected_length:
        print(f"{filename} has the correct length: {expected_length}")
    else:
        print(f"{filename} does not have the correct length. Expected: {expected_length}, Actual: {len(data)}")

for dataset in datasets:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)
    
    print(f"The dataset {dataset} has length {len(data)}.")
    # Separate data based on event ID
    sorted_event_ids = sorted(data.keys(), key=lambda x: int(x))
    first_data = {event_id: data[event_id] for event_id in sorted_event_ids[:1095]}
    second_data = {event_id: data[event_id] for event_id in sorted_event_ids[1095:1100]}

    # Save separated data to pickle files
    with open(f'{path_output}{dataset.split(".p")[0]}_train.pkl', 'wb') as pf1:
        pickle.dump(first_data, pf1)
        
    with open(f'{path_output}{dataset.split(".p")[0]}_posterior.pkl', 'wb') as pf2:
        pickle.dump(second_data, pf2)

    check_file_length(f'{dataset.split(".p")[0]}_train.pkl', 1095)
    check_file_length(f'{dataset.split(".p")[0]}_posterior.pkl', 5) 

# Perform the training of the emulators for all datasets

After training the emulators, we save them with `dill`, such that they can be reloaded from file for the MCMC later.

Perform the training without the `parameterTrafoPCA` (for $\zeta/s(T)$, $\eta/s(\mu_B)$, $\langle y_{\rm loss}\rangle(y_{\rm init})$).

In [None]:
model_par = '../data/modelDesign_3DMCGlauber.txt'
path_input = './separate_training_posterior_data/'
path_output = './trained_emulators_no_PCA/'

datasets_train = ['AuAu7.7_dNdy_train.pkl',
            'AuAu7.7_pTvn_train.pkl',
            'AuAu19p6_dNdy_train.pkl',
            'AuAu19p6_PHOBOSdNdeta_train.pkl',
            'AuAu19p6_pTvn_train.pkl',
            'AuAu200_dNdy_train.pkl',
            'AuAu200_PHOBOSdNdeta_train.pkl',
            'AuAu200_PHOBOSv2eta_train.pkl',
            'AuAu200_pTvn_train.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

for dataset in datasets_train:
    emu1 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCGP', logTrafo=False, parameterTrafoPCA=False)
    emu1.trainEmulatorAutoMask()
    emu2 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCSK', logTrafo=False, parameterTrafoPCA=False)
    emu2.trainEmulatorAutoMask()
    emu3 = Emulator(f'{path_input}{dataset}', model_par, npc = 4, logTrafo=False, parameterTrafoPCA=False)
    emu3.trainEmulatorAutoMask()

    with open(f'{path_output}{dataset.split(".p")[0]}_PCGP_trained.sav', 'wb') as f:
        dill.dump(emu1, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_PCSK_trained.sav', 'wb') as f:
        dill.dump(emu2, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_Scikit_trained.sav', 'wb') as f:
        dill.dump(emu3, f)

Perform the training with the `parameterTrafoPCA` (for $\zeta/s(T)$, $\eta/s(\mu_B)$, $\langle y_{\rm loss}\rangle(y_{\rm init})$).

In [None]:
model_par = '../data/modelDesign_3DMCGlauber.txt'
path_input = './separate_training_posterior_data/'
path_output = './trained_emulators_PCA/'

datasets_train = ['AuAu7.7_dNdy_train.pkl',
            'AuAu7.7_pTvn_train.pkl',
            'AuAu19p6_dNdy_train.pkl',
            'AuAu19p6_PHOBOSdNdeta_train.pkl',
            'AuAu19p6_pTvn_train.pkl',
            'AuAu200_dNdy_train.pkl',
            'AuAu200_PHOBOSdNdeta_train.pkl',
            'AuAu200_PHOBOSv2eta_train.pkl',
            'AuAu200_pTvn_train.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

for dataset in datasets_train:
    emu1 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCGP', logTrafo=False, parameterTrafoPCA=True)
    emu1.trainEmulatorAutoMask()
    emu2 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCSK', logTrafo=False, parameterTrafoPCA=True)
    emu2.trainEmulatorAutoMask()
    emu3 = Emulator(f'{path_input}{dataset}', model_par, npc = 4, logTrafo=False, parameterTrafoPCA=True)
    emu3.trainEmulatorAutoMask()

    with open(f'{path_output}{dataset.split(".p")[0]}_PCGP_trained.sav', 'wb') as f:
        dill.dump(emu1, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_PCSK_trained.sav', 'wb') as f:
        dill.dump(emu2, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_Scikit_trained.sav', 'wb') as f:
        dill.dump(emu3, f)

Generate an 'experimental' dataset from one of the posterior points for closure testing.

In [3]:
path_data = './separate_training_posterior_data_1095/'
path_output = './separate_training_posterior_data_1095/'
datasets_posterior = [
            'AuAu200_dNdy_posterior.pkl',
            'AuAu200_PHOBOSdNdeta_posterior.pkl',
            'AuAu200_PHOBOSv2eta_posterior.pkl',
            'AuAu200_pTvn_posterior.pkl',
            'AuAu19p6_dNdy_posterior.pkl',
            'AuAu19p6_PHOBOSdNdeta_posterior.pkl',
            'AuAu19p6_pTvn_posterior.pkl',
            'AuAu7.7_dNdy_posterior.pkl',
            'AuAu7.7_pTvn_posterior.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

event_data = []
for dataset in datasets_posterior:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    # Get the first event from the posterior dataset
    test_data = {event_id: data[event_id] for event_id in sorted(data.keys())[4:5]}
    event_data.append(test_data)


for event_dict in event_data[1:]:
    # Get the 'obs' array for the current event
    obs_array_new = event_dict['1099']['obs']
    
    # Extend the 'obs' array of the first element with the values from the current event
    event_data[0]['1099']['obs'] = np.concatenate((event_data[0]['1099']['obs'], obs_array_new), axis=1)

# Save separated data to pickle files
with open(f'{path_output}example_data_test_point1099.pkl', 'wb') as pf1:
    pickle.dump(event_data[0], pf1)


In [2]:
path_data = './separate_training_posterior_data_1095/'
path_output = './separate_training_posterior_data_1095/'
datasets_posterior = [
            'AuAu200_dNdy_posterior.pkl',
            'AuAu200_PHOBOSdNdeta_posterior.pkl',
            'AuAu200_PHOBOSv2eta_posterior.pkl',
            'AuAu200_pTvn_posterior.pkl',
            'AuAu19p6_dNdy_posterior.pkl',
            'AuAu19p6_PHOBOSdNdeta_posterior.pkl',
            'AuAu19p6_pTvn_posterior.pkl',
            'AuAu7.7_dNdy_posterior.pkl',
            'AuAu7.7_pTvn_posterior.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

event_data = []
for dataset in datasets_posterior:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    # Get the first event from the posterior dataset
    test_data = {event_id: data[event_id] for event_id in sorted(data.keys())[2:3]}
    event_data.append(test_data)


for event_dict in event_data[1:]:
    # Get the 'obs' array for the current event
    obs_array_new = event_dict['1097']['obs']
    
    # Extend the 'obs' array of the first element with the values from the current event
    event_data[0]['1097']['obs'] = np.concatenate((event_data[0]['1097']['obs'], obs_array_new), axis=1)

# Save separated data to pickle files
with open(f'{path_output}example_data_test_point1097.pkl', 'wb') as pf1:
    pickle.dump(event_data[0], pf1)


Generate the point for the test of the logarithmic training

In [5]:
path_data = './separate_training_posterior_data_1095/'
path_output = './separate_training_posterior_data_1095/'
datasets_posterior = [
            'AuAu200_dNdy_posterior.pkl',
            'AuAu200_PHOBOSdNdeta_posterior.pkl',
            'AuAu200_PHOBOSv2eta_posterior.pkl',
            'AuAu200_pTvn_posterior.pkl',
            'AuAu19p6_dNdy_posterior.pkl',
            'AuAu19p6_PHOBOSdNdeta_posterior.pkl',
            'AuAu19p6_pTvn_posterior.pkl',
            'AuAu7.7_dNdy_posterior.pkl',
            'AuAu7.7_pTvn_posterior.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

event_data = []
for dataset in datasets_posterior:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    # Get the first event from the posterior dataset
    test_data = {event_id: data[event_id] for event_id in sorted(data.keys())[4:5]}
    event_data.append(test_data)

# modify the datasets with the multiplicities and take the log
datasets_to_modify = [0,1,4,5,7]
for i in datasets_to_modify:
    event_data[i]['1099']['obs'][0,:] = np.log(np.abs(event_data[i]['1099']['obs'][0,:]) + 1e-30)
    event_data[i]['1099']['obs'][1,:] = np.abs(event_data[i]['1099']['obs'][1,:]/event_data[i]['1099']['obs'][0,:] + 1e-30)

for event_dict in event_data[1:]:
    # Get the 'obs' array for the current event
    obs_array_new = event_dict['1099']['obs']
    
    # Extend the 'obs' array of the first element with the values from the current event
    event_data[0]['1099']['obs'] = np.concatenate((event_data[0]['1099']['obs'], obs_array_new), axis=1)

# Save separated data to pickle files
with open(f'{path_output}example_data_test_point1099_LOG.pkl', 'wb') as pf1:
    pickle.dump(event_data[0], pf1)

Generate one dataset from all of the training and posterior points.

In [8]:
path_data = '../data/'
path_output = './full_data_one_pkl/'
datasets_posterior = [
            'AuAu200_dNdy.pkl',
            'AuAu200_PHOBOSdNdeta.pkl',
            'AuAu200_PHOBOSv2eta.pkl',
            'AuAu200_pTvn.pkl',
            'AuAu19p6_dNdy.pkl',
            'AuAu19p6_PHOBOSdNdeta.pkl',
            'AuAu19p6_pTvn.pkl',
            'AuAu7.7_dNdy.pkl',
            'AuAu7.7_pTvn.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

event_data = []
for dataset in datasets_posterior:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    # Get the first event from the posterior dataset
    sorted_keys = sorted(data.keys())
    test_data = {event_id: data[event_id] for event_id in sorted_keys}
    event_data.append(test_data)

for dataset in event_data[1:]:
    for event in sorted_keys:
        # Get the 'obs' array for the current event
        obs_array_new = dataset[event]['obs']
        
        # Extend the 'obs' array of the first dataset with the values from the others
        event_data[0][event]['obs'] = np.concatenate((event_data[0][event]['obs'], obs_array_new), axis=1)

# Save separated data to pickle files
with open(f'{path_output}all_points_all_observables.pkl', 'wb') as pf1:
    pickle.dump(event_data[0], pf1)

# Delete parameters 16 and 17 from pkl files (bulk_max_rhob2,bulk_max_rhob4)

In [None]:
path_data = '../data/'
path_output = '../data_new/'
datasets = ['AuAu7.7_dNdy.pkl',
            'AuAu7.7_pTvn.pkl',
            'AuAu19p6_dNdy.pkl',
            'AuAu19p6_PHOBOSdNdeta.pkl',
            'AuAu19p6_pTvn.pkl',
            'AuAu200_dNdy.pkl',
            'AuAu200_PHOBOSdNdeta.pkl',
            'AuAu200_PHOBOSv2eta.pkl',
            'AuAu200_pTvn.pkl',
            'AuAu7.7_logdNdy.pkl',
            'AuAu19p6_logdNdy.pkl',
            'AuAu19p6_logPHOBOSdNdeta.pkl',
            'AuAu200_logdNdy.pkl',
            'AuAu200_logPHOBOSdNdeta.pkl'
            ]

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

for dataset in datasets:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    print(f"The dataset {dataset} has length {len(data)}.")
    # Separate data based on event ID
    sorted_event_ids = sorted(data.keys(), key=lambda x: int(x))
    first_data = {event_id: data[event_id] for event_id in sorted_event_ids}
    print("Parameters before =",len(first_data[f'{sorted_event_ids[0]}']['parameter']))

    for point in range(len(sorted_event_ids)):
        first_data[f'{sorted_event_ids[point]}']['parameter'] = np.delete(first_data[f'{sorted_event_ids[point]}']['parameter'], [16,17])

    print("Parameters after =",len(first_data[f'{sorted_event_ids[0]}']['parameter']))
    # Save new data to pickle files
    with open(f'{path_output}{dataset.split(".p")[0]}.pkl', 'wb') as pf1:
        pickle.dump(first_data, pf1)