In [11]:
from data_utils import *
from model_utils import *

from tqdm import tqdm
import json

In [2]:
TRAIN_PATH = '/Users/carlo/VS_Projects/DiscreteBO/Combinatorial_BO-1/Datasets/ai-drug-discovery/training-set.csv'

## Load Data

In [3]:
# target variable
targetVar = 'sol'

# load data
df_X = getProcessedDF(df_path=TRAIN_PATH, binaryFlag=True, order=5)
df_y = getY(df_path=TRAIN_PATH, label=targetVar)

# Load entire dataset
X = df_X.to_numpy(dtype=np.int32)
y = df_y.to_numpy()

# derive dimensions
d = X.shape[1]
p = 1 + d + math.comb(d,2)

## Setup Oracle

In [4]:
# set dimensions
d = X.shape[1]
p = 1 + d + math.comb(d,2)

# load oracle
o1 = Oracle(fun=lambda x: f_bin(x, X=X, y=y), N_total=5000, seed=0, sigma_2=0.2)

## Initialize BO Models

In [None]:
%%time

N_EXPERIMENTS = 16
N_ITERATIONS  = 120

# initial D_{0}
# - sample random data poitns 

# logging dicts
dictRS, dictBOCS = {}, {}

# - let BOCS explore
for it in range(N_EXPERIMENTS):
    # init oracle
    o1 = Oracle(fun=lambda x: f_bin(x, X=X, y=y), N_total=5000, seed=it*190, sigma_2=0.0)
    
    # BOCS instance
    bocs = BOCS(variant='SDP', oracle=o1, N=250, B=50, T=2, lambd=0.5, mode='min', seed=it*185)
    rs   = RandomSearch(oracle=o1, d=d, seed=it*1478)

    # init
    yList_BOCS = [np.inf]
    yList_RS   = [np.inf]
    
    # sample
    idx_0 = np.random.choice(range(len(X)), 10, replace=False)

    # - subset
    X0 = X[idx_0,:]
    y0 = y[idx_0]

    # fit BOCS on initial data
    bocs.fit(X0,y0)
    
    # loop
    for k in tqdm(range(N_ITERATIONS)):
        # append BOCS
        x_new, y_new = bocs.update()
        yList_BOCS.append(min(min(yList_BOCS), y_new))

        # append RandomSearch
        x_new, y_new = rs.update()
        yList_RS.append(min(min(yList_RS), y_new))

    # append dictionaries
    dictRS[it]   = yList_RS[1:]
    dictBOCS[it] = yList_BOCS[1:]
    
# store dictionaries
with open(f'./data/BOCS_EXP/dictRS_{N_EXPERIMENTS}_00.json', 'w+') as f:
    json.dump(dictRS, f)
with open(f'./data/BOCS_EXP/dictBOCS_{N_EXPERIMENTS}_00.json', 'w+') as f:
    json.dump(dictBOCS, f)

 63%|██████████████████████████▌               | 76/120 [02:36<01:35,  2.16s/it]