In [37]:
from py21cmemu import Emulator
import numpy as np
from scipy.stats import qmc
from expandLHS import ExpandLHS
import pandas as pd

np.random.seed(42)

In [45]:
def lhs_sampler(n_samples, num_rounds, lower_boundaries, upper_boundaries, column):

    sampler = qmc.LatinHypercube(d = len(lower_boundaries), optimization = 'random-cd')
    sample = sampler.random(n = n_samples)

    print('Unprogressed sample discrepancy:', qmc.discrepancy(sample))

    for i in range(2, num_rounds + 1):
                
            eLHS = ExpandLHS(sample)

            sample = eLHS(n_samples, optimize = 'discrepancy')

            print(f'Progressed sample {i} discrepancy:', qmc.discrepancy(sample))
    

    scaled_sample = qmc.scale(sample, lower_boundaries, upper_boundaries)
    all_points = pd.DataFrame(scaled_sample, columns = column)


    return all_points


In [46]:
test_param = [-0.98454527, 0.84028646, -1.01608287, 0.03414988, 9.02499104, 0.45168016, 40.0, 500.0, 1.0]
keys = ['F_STAR10', 'ALPHA_STAR', 'F_ESC10', 'ALPHA_ESC', 'M_TURN', 't_STAR', 'L_X','NU_X_THRESH', 'X_RAY_SPEC_INDEX']

input_dict = {k:v for k, v in zip(keys, test_param)}

df = pd.DataFrame([input_dict])

lower_boundaries = [value - abs(value) * 0.1 for value in df.iloc[0]]
upper_boundaries = [value + abs(value) * 0.1 for value in df.iloc[0]]


In [47]:
#Traning 5000 samples for 2 rounds, i.e. 10000 samples with 9 parameters each took 21 minutes and 41.2 seconds without any speedup algorithm on my laptop

training_samples = 2986   
training_rounds = 3



training_data_8960 = lhs_sampler(n_samples = training_samples, 
                                  num_rounds = training_rounds, 
                                  lower_boundaries = lower_boundaries, 
                                  upper_boundaries = upper_boundaries, 
                                  column = keys)




Unprogressed sample discrepancy: 0.00026687041190154304
Progressed sample 2 discrepancy: 0.0002930396888563003
Progressed sample 3 discrepancy: 0.00023005730821834547


In [48]:
validation_samples = 746
validation_rounds = 3

validation_data_2240 = lhs_sampler(n_samples = validation_samples, 
                                  num_rounds = validation_rounds, 
                                  lower_boundaries = lower_boundaries, 
                                  upper_boundaries = upper_boundaries, 
                                  column = keys)


Unprogressed sample discrepancy: 0.0011381400807661812
Progressed sample 2 discrepancy: 0.0012183326947861062
Progressed sample 3 discrepancy: 0.0009916127487028525


In [49]:
test_samples = 933
test_rounds = 3

test_data_2800 = lhs_sampler(n_samples = test_samples, 
                                  num_rounds = test_rounds, 
                                  lower_boundaries = lower_boundaries, 
                                  upper_boundaries = upper_boundaries, 
                                  column = keys)

Unprogressed sample discrepancy: 0.0008844806732533073
Progressed sample 2 discrepancy: 0.0008613728707795865
Progressed sample 3 discrepancy: 0.0006754072906347552


In [51]:
training_data_8960.to_hdf('training_data_8960.h5', mode = 'w', key = 'Set8960')
validation_data_2240.to_hdf('validation_data_2240.h5', mode = 'w', key = 'Set2240')
test_data_2800.to_hdf('test_data_2800.h5', mode = 'w', key = 'Set2800')

In [54]:
training_dict = training_data_8960.to_dict('records')
validation_dict = validation_data_2240.to_dict('records')
test_dict = test_data_2800.to_dict('records')


In [52]:
emu = Emulator()

2026-02-12 17:48:42.376771: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2026-02-12 17:48:42.681448: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2026-02-12 17:48:44.746743: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2026-02-12 17:48:44.746819: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2026-02-12 17:48:44.752936: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

In [None]:
#USE BATCHING!!!
normed_input_params, output, output_errors = emu.predict(training_dict, verbose = True)



: 

In [30]:
hdf5_data = pd.read_hdf('training_data1_hdf5.h5', key = 'Set1')
hdf5_emu = hdf5_data.to_dict('records')