In [None]:
%matplotlib inline
import numpy as np

import os
from os import path
from glob import glob
import sys
import dill
import pickle
sys.path.insert(0, path.abspath('./'))

from src import workdir, parse_model_parameter_file
from src.emulator_BAND import EmulatorBAND
from src.emulator import Emulator


# Define function to create pickle file with training dataset and the second one with the test points

Use the 100 posterior points for the second file and the 1000 LHC points for the training of the emulators.

In [None]:
path_data = '../data/'
path_output = './separate_training_posterior_data/'
datasets = ['AuAu7.7_dNdy.pkl',
            'AuAu7.7_pTvn.pkl',
            'AuAu19p6_dNdy.pkl',
            'AuAu19p6_PHOBOSdNdeta.pkl',
            'AuAu19p6_pTvn.pkl',
            'AuAu200_dNdy.pkl',
            'AuAu200_PHOBOSdNdeta.pkl',
            'AuAu200_PHOBOSv2eta.pkl',
            'AuAu200_pTvn.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

def check_file_length(filename, expected_length):
    with open(f"{path_output}{filename}", 'rb') as f:
        data = pickle.load(f)
    if len(data) == expected_length:
        print(f"{filename} has the correct length: {expected_length}")
    else:
        print(f"{filename} does not have the correct length. Expected: {expected_length}, Actual: {len(data)}")

for dataset in datasets:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)
    
    print(f"The dataset {dataset} has length {len(data)}.")
    # Separate data based on event ID
    sorted_event_ids = sorted(data.keys(), key=lambda x: int(x))
    first_data = {event_id: data[event_id] for event_id in sorted_event_ids[:1000]}
    second_data = {event_id: data[event_id] for event_id in sorted_event_ids[1000:1100]}

    # Save separated data to pickle files
    with open(f'{path_output}{dataset.split(".p")[0]}_train.pkl', 'wb') as pf1:
        pickle.dump(first_data, pf1)
        
    with open(f'{path_output}{dataset.split(".p")[0]}_posterior.pkl', 'wb') as pf2:
        pickle.dump(second_data, pf2)

    check_file_length(f'{dataset.split(".p")[0]}_train.pkl', 1000)
    check_file_length(f'{dataset.split(".p")[0]}_posterior.pkl', 100) 

# Perform the training of the emulators for all datasets

After training the emulators, we save them with `dill`, such that they can be reloaded from file for the MCMC later.

Perform the training without the `parameterTrafoPCA` (for $\zeta/s(T)$).

In [None]:
model_par = '../data/modelDesign_3DMCGlauber.txt'
path_input = './separate_training_posterior_data/'
path_output = './trained_emulators_no_PCA/'

datasets_train = ['AuAu7.7_dNdy_train.pkl',
            'AuAu7.7_pTvn_train.pkl',
            'AuAu19p6_dNdy_train.pkl',
            'AuAu19p6_PHOBOSdNdeta_train.pkl',
            'AuAu19p6_pTvn_train.pkl',
            'AuAu200_dNdy_train.pkl',
            'AuAu200_PHOBOSdNdeta_train.pkl',
            'AuAu200_PHOBOSv2eta_train.pkl',
            'AuAu200_pTvn_train.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

for dataset in datasets_train:
    emu1 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCGP', logTrafo=False, parameterTrafoPCA=False)
    emu1.trainEmulatorAutoMask()
    emu2 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCSK', logTrafo=False, parameterTrafoPCA=False)
    emu2.trainEmulatorAutoMask()
    emu3 = Emulator(f'{path_input}{dataset}', model_par, npc = 4, logTrafo=False, parameterTrafoPCA=False)
    emu3.trainEmulatorAutoMask()

    with open(f'{path_output}{dataset.split(".p")[0]}_PCGP_trained.sav', 'wb') as f:
        dill.dump(emu1, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_PCSK_trained.sav', 'wb') as f:
        dill.dump(emu2, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_Scikit_trained.sav', 'wb') as f:
        dill.dump(emu3, f)

Perform the training with the `parameterTrafoPCA` (for $\zeta/s(T)$, $\eta/s(\mu_B)$, $\langle y_{\rm loss}\rangle(y_{\rm init})$).

In [None]:
model_par = '../data/modelDesign_3DMCGlauber.txt'
path_input = './separate_training_posterior_data/'
path_output = './trained_emulators_PCA/'

datasets_train = ['AuAu7.7_dNdy_train.pkl',
            'AuAu7.7_pTvn_train.pkl',
            'AuAu19p6_dNdy_train.pkl',
            'AuAu19p6_PHOBOSdNdeta_train.pkl',
            'AuAu19p6_pTvn_train.pkl',
            'AuAu200_dNdy_train.pkl',
            'AuAu200_PHOBOSdNdeta_train.pkl',
            'AuAu200_PHOBOSv2eta_train.pkl',
            'AuAu200_pTvn_train.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

for dataset in datasets_train:
    emu1 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCGP', logTrafo=False, parameterTrafoPCA=True)
    emu1.trainEmulatorAutoMask()
    emu2 = EmulatorBAND(f'{path_input}{dataset}', model_par, method='PCSK', logTrafo=False, parameterTrafoPCA=True)
    emu2.trainEmulatorAutoMask()
    emu3 = Emulator(f'{path_input}{dataset}', model_par, npc = 4, logTrafo=False, parameterTrafoPCA=True)
    emu3.trainEmulatorAutoMask()

    with open(f'{path_output}{dataset.split(".p")[0]}_PCGP_trained.sav', 'wb') as f:
        dill.dump(emu1, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_PCSK_trained.sav', 'wb') as f:
        dill.dump(emu2, f)
    with open(f'{path_output}{dataset.split(".p")[0]}_Scikit_trained.sav', 'wb') as f:
        dill.dump(emu3, f)

Generate an 'experimental' dataset from one of the posterior points for testing.

In [None]:
path_data = './separate_training_posterior_data/'
path_output = './separate_training_posterior_data/'
datasets_posterior = [
            'AuAu200_dNdy_posterior.pkl',
            'AuAu200_PHOBOSdNdeta_posterior.pkl',
            'AuAu200_PHOBOSv2eta_posterior.pkl',
            'AuAu200_pTvn_posterior.pkl',
            'AuAu19p6_dNdy_posterior.pkl',
            'AuAu19p6_PHOBOSdNdeta_posterior.pkl',
            'AuAu19p6_pTvn_posterior.pkl',
            'AuAu7.7_dNdy_posterior.pkl',
            'AuAu7.7_pTvn_posterior.pkl']

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

first_event = []
for dataset in datasets_posterior:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    # Get the first event from the posterior dataset
    test_data = {event_id: data[event_id] for event_id in sorted(data.keys())[0:1]}
    first_event.append(test_data)


for event_dict in first_event[1:]:
    # Get the 'obs' array for the current event
    obs_array_new = event_dict['1000']['obs']
    
    # Extend the 'obs' array of the first element with the values from the current event
    first_event[0]['1000']['obs'] = np.concatenate((first_event[0]['1000']['obs'], obs_array_new), axis=1)

# Save separated data to pickle files
with open(f'{path_output}example_data_test.pkl', 'wb') as pf1:
    pickle.dump(first_event[0], pf1)


# Delete parameters 16 and 17 from pkl files (bulk_max_rhob2,bulk_max_rhob4)

In [None]:
path_data = '../data/'
path_output = '../data_new/'
datasets = ['AuAu7.7_dNdy.pkl',
            'AuAu7.7_pTvn.pkl',
            'AuAu19p6_dNdy.pkl',
            'AuAu19p6_PHOBOSdNdeta.pkl',
            'AuAu19p6_pTvn.pkl',
            'AuAu200_dNdy.pkl',
            'AuAu200_PHOBOSdNdeta.pkl',
            'AuAu200_PHOBOSv2eta.pkl',
            'AuAu200_pTvn.pkl',
            'AuAu7.7_logdNdy.pkl',
            'AuAu19p6_logdNdy.pkl',
            'AuAu19p6_logPHOBOSdNdeta.pkl',
            'AuAu200_logdNdy.pkl',
            'AuAu200_logPHOBOSdNdeta.pkl'
            ]

# Check if the output folder exists, if not, create it
if not os.path.exists(path_output):
    os.makedirs(path_output)

for dataset in datasets:
    current_path = path_data + dataset
    with open(current_path, "rb") as pf:
        data = pickle.load(pf)

    print(f"The dataset {dataset} has length {len(data)}.")
    # Separate data based on event ID
    sorted_event_ids = sorted(data.keys(), key=lambda x: int(x))
    first_data = {event_id: data[event_id] for event_id in sorted_event_ids}

    for point in range(len(sorted_event_ids)):
        first_data[f'{sorted_event_ids[point]}']['parameter'] = np.delete(first_data[f'{sorted_event_ids[point]}']['parameter'], [16,17])

    # Save new data to pickle files
    with open(f'{path_output}{dataset.split(".p")[0]}.pkl', 'wb') as pf1:
        pickle.dump(first_data, pf1)