DVARS code

In [4]:
from bayesian_optimization import BayesianOptimization
import pandas as pd 
from botorch.test_functions import Ackley
from botorch.test_functions import Levy 
from botorch.test_functions import Rosenbrock
from varstool import DVARS
import seaborn as sns
import matplotlib.pyplot as plt

# Variables that determine the size of the dataset
N_INITIAL = 50
EPOCHS = 10
BATCH_SIZE = 5
DIM = 2
LOWER = -10
UPPER = 10

optimizer = BayesianOptimization(fun=Levy(dim=DIM,negate=True), 
                                 batch_size=BATCH_SIZE, 
                                 dim=DIM, 
                                 epochs=EPOCHS, 
                                 n_init=N_INITIAL, 
                                 lower_bound=LOWER,
                                 upper_bound=UPPER,
                                 seed=3,
                                 acqf_type='qUCB')

x_max, y_max = optimizer.run()
data = optimizer.get_data()
full = optimizer.format(data, dim=DIM, n_init=N_INITIAL, batch_size=BATCH_SIZE, epochs=EPOCHS)
display(full)

vars_data = full.drop(columns=['Batch'])

display(vars_data)
vars_data.to_csv('vars_data.csv', index=False)
h = 0.3
experiment = DVARS(data_file = 'vars_data.csv',
                   outvarname = 'y',
                   ivars_range = h,
                   phi0=1,
                   phi_max=1e6,
                   tol = 1e-6,
                   correlation_func_type='linear',
                   report_verbose=False)

experiment.run()
display(experiment.ratios)

Unnamed: 0,Batch,x1,x2,y
0,0.0,-8.630697,-5.509461,-15.177179
1,0.0,2.429707,4.461679,-3.034724
2,0.0,9.223992,-0.138533,-40.431662
3,0.0,-0.581540,9.872013,-10.784934
4,0.0,-4.725866,-2.768824,-6.485489
...,...,...,...,...
95,10.0,-4.200538,7.433987,-6.014844
96,10.0,0.829132,0.503622,-0.053322
97,10.0,3.667881,9.389405,-7.052950
98,10.0,8.585462,-2.151338,-18.928258


Unnamed: 0,x1,x2,y
0,-8.630697,-5.509461,-15.177179
1,2.429707,4.461679,-3.034724
2,9.223992,-0.138533,-40.431662
3,-0.581540,9.872013,-10.784934
4,-4.725866,-2.768824,-6.485489
...,...,...,...
95,-4.200538,7.433987,-6.014844
96,0.829132,0.503622,-0.053322
97,3.667881,9.389405,-7.052950
98,8.585462,-2.151338,-18.928258


Unnamed: 0,x1,x2
0.3,0.897739,0.102261


In [22]:
from bayesian_optimization import BayesianOptimization
import pandas as pd 
from botorch.test_functions import Levy 
from varstool import DVARS
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import numpy.linalg as linalg

N_INITIAL = 50
EPOCHS = 10
BATCH_SIZE = 5
DIM = 2
LOWER = -10
UPPER = 10
REPETITIONS = 20  

all_ratios = []

for i in range(REPETITIONS):
    try:
        optimizer = BayesianOptimization(
            fun=Levy(dim=DIM, negate=True), 
            batch_size=BATCH_SIZE, 
            dim=DIM, 
            epochs=EPOCHS, 
            n_init=N_INITIAL, 
            lower_bound=LOWER,
            upper_bound=UPPER,
            seed=i,
            acqf_type='qUCB'
        )

        x_max, y_max = optimizer.run()
        data = optimizer.get_data()
        full = optimizer.format(data, dim=DIM, n_init=N_INITIAL, batch_size=BATCH_SIZE, epochs=EPOCHS)
        
        vars_data = full.drop(columns=['Batch'])
        vars_data.to_csv('vars_data.csv', index=False)
        
        h = 0.3
        experiment = DVARS(
            data_file='vars_data.csv',
            outvarname='y',
            ivars_range=h,
            phi0=1,
            phi_max=1e6,
            tol=1e-6,
            correlation_func_type='linear',
            report_verbose=False
        )
        
        experiment.run()
        all_ratios.append(experiment.ratios)

    except linalg.LinAlgError:
        print("LinAlgError encountered, skipping this repetition.")


if all_ratios:  
    ratios_array = np.array(all_ratios)
    mean_ratios = np.mean(ratios_array, axis=0)
    std_ratios = np.std(ratios_array, axis=0)
    print(ratios_array)
    print("Mean Ratios:", mean_ratios)
    print("Standard Deviation of Ratios:", std_ratios)
else:
    print("No valid data to process.")

LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
LinAlgError encountered, skipping this repetition.
[[[0.90161907 0.09838093]]

 [[0.91453217 0.08546783]]

 [[0.88491622 0.11508378]]

 [[0.90743325 0.09256675]]

 [[0.88708447 0.11291553]]

 [[0.89868119 0.10131881]]

 [[0.90003798 0.09996202]]

 [[0.8967867  0.1032133 ]]]
Mean Ratios: [[0.89888638 0.10111362]]
Standard Deviation of Ratios: [[0.00912095 0.00912095]]


VARS code

In [16]:
import torch
DIM = 2
LOWER = -10
UPPER = 10

# Initialize the Levy function
fun = Levy(dim=DIM, negate=True)
fun.bounds[0, :].fill_(LOWER)  # Bounds of the function
fun.bounds[1, :].fill_(UPPER)

def wrapped_levy_function(x):
    tensor_input = torch.tensor(x.values, dtype=torch.float32)
    
    if tensor_input.dim() == 1:
        tensor_input = tensor_input.unsqueeze(0)
    
    result = fun(tensor_input)
    
    return result.item()


df = pd.DataFrame([[1, 1]], columns=['x1', 'x2'])
output = wrapped_levy_function(df)
print(output)

-7.64274186065882e-15


In [17]:
import numpy as np
import pandas as pd
from varstool import VARS, Model

my_parameters = { 'x1': [ -10, 10 ], 
                  'x2': [ -10, 10 ], }


levy_model = Model(wrapped_levy_function)

# Initialize the VARS object
experiment_1 = VARS(parameters     = my_parameters,
                    num_stars      = 100,
                    delta_h        = 0.1,
                    ivars_scales   = (0.1, 0.3, 0.5),
                    sampler        = 'sobol_seq',
                    seed           = 123456789,
                    model          = levy_model,
                    bootstrap_flag = True,
                    bootstrap_size = 100,
                    bootstrap_ci   = 0.9,
                    report_verbose = True,
                    )

# Run the analysis
experiment_1.run_online()
cols = experiment_1.parameters.keys()
df = experiment_1.ivars[cols]

row_sums = df.sum(axis=1)

normalized_df = df.div(row_sums, axis=0)
display(normalized_df)

model evaluation: 100%|██████████| 1900/1900 [00:00<00:00, 3972.57it/s]
building pairs: 100%|██████████| 200/200 [00:00<00:00, 327.90it/s]
VARS analysis: 100%|██████████| 10/10 [00:00<00:00, 219.16it/s]
factor ranking: 100%|██████████| 2/2 [00:00<00:00, 111.29it/s]
bootstrapping: 100%|██████████| 100/100 [00:15<00:00,  6.56it/s]
  result_bs_sobol_ranking[param].eq(st_factor_ranking[param][0]).sum() / bootstrap_size)


In [18]:
import matplotlib.pyplot as plt
cols = experiment_1.parameters.keys()
df = experiment_1.ivars[cols]

row_sums = df.sum(axis=1)

normalized_df = df.div(row_sums, axis=0)
display(normalized_df)

Unnamed: 0,x1,x2
0.1,0.985902,0.014098
0.3,0.962494,0.037506
0.5,0.953577,0.046423


In [19]:
import numpy as np
import pandas as pd
from varstool import VARS, Model

my_parameters = {'x1': [-10, 10], 'x2': [-10, 10]}
levy_model = Model(wrapped_levy_function)

def run_experiment(seed):
    experiment = VARS(
        parameters=my_parameters,
        num_stars=100,
        delta_h=0.1,
        ivars_scales=(0.1, 0.3, 0.5),
        sampler='sobol_seq',
        seed=seed,
        model=levy_model,
        bootstrap_flag=True,
        bootstrap_size=100,
        bootstrap_ci=0.9,
        report_verbose=True,
    )
    experiment.run_online()
    cols = experiment.parameters.keys()
    df = experiment.ivars[cols]
    row_sums = df.sum(axis=1)
    normalized_df = df.div(row_sums, axis=0)
    return normalized_df

In [20]:
num_repeats = 10
results = []

for i in range(num_repeats):
    seed = 123456789 + i  # Use different seeds
    normalized_df = run_experiment(seed)
    results.append(normalized_df)

all_results = pd.concat(results)

mean_df = all_results.groupby(all_results.index).mean()
std_df = all_results.groupby(all_results.index).std()

display(mean_df)
display(std_df)

model evaluation: 100%|██████████| 1900/1900 [00:00<00:00, 2577.23it/s]
building pairs: 100%|██████████| 200/200 [00:00<00:00, 359.19it/s]
VARS analysis: 100%|██████████| 10/10 [00:00<00:00, 261.71it/s]
factor ranking: 100%|██████████| 2/2 [00:00<00:00, 377.22it/s]
bootstrapping: 100%|██████████| 100/100 [00:14<00:00,  6.97it/s]
  result_bs_sobol_ranking[param].eq(st_factor_ranking[param][0]).sum() / bootstrap_size)
model evaluation: 100%|██████████| 1900/1900 [00:00<00:00, 4956.09it/s]
building pairs: 100%|██████████| 200/200 [00:00<00:00, 369.58it/s]
VARS analysis: 100%|██████████| 10/10 [00:00<00:00, 294.65it/s]
factor ranking: 100%|██████████| 2/2 [00:00<00:00, 690.82it/s]
bootstrapping: 100%|██████████| 100/100 [00:14<00:00,  7.10it/s]
  result_bs_sobol_ranking[param].eq(st_factor_ranking[param][0]).sum() / bootstrap_size)
model evaluation: 100%|██████████| 1900/1900 [00:00<00:00, 4824.82it/s]
building pairs: 100%|██████████| 200/200 [00:00<00:00, 359.63it/s]
VARS analysis: 100%|█

Unnamed: 0,x1,x2
0.1,0.985902,0.014098
0.3,0.962494,0.037506
0.5,0.953577,0.046423


Unnamed: 0,x1,x2
0.1,2.605319e-10,2.605318e-10
0.3,6.424707e-10,6.424707e-10
0.5,7.945072e-10,7.945072e-10
