## Setup

The first step is to make sure the `ART` library is available in your kernel.

Clone the corresponding git repository:


`git clone https://github.com/JBEI/AutomatedRecommendationTool.git`  

or pull the latest version. 

Information about licensing ART is available at https://github.com/JBEI/ART.



We can then add library to the path and do the necessary import:


In [3]:
import sys
sys.path.append('../AutomatedRecommendationTool')        # Make sure this is the location for the ART library 
    
from art.core import *                                  


## User parameters

Define true response function:

In [None]:
def true_function(x, dimension):

    f = 0.
    for i in range(dimension):
        f += x[i] ** 4 - 16 * x[i] ** 2 + 5 * x[i]

    f *= 1 / dimension
    return -1.*f

In [4]:
def f1(x, dimension):

    term1 = 0.
    term2 = 0.
    for i in range(dimension):
        term1 += (x[i] - 5)**2
        term2 += x[i]**2

    return -1*(1/dimension * term1 + np.exp(-term2)) + 25

In [6]:
dim = 10


In [7]:
global_optimum_value = {1: 25.,
                        2: 78.332}
global_optimum = {1: 5.*np.ones(dim),
                  2: -2.903534*np.ones(dim)}

lb = {1: -5, 2: -5}
ub = {1: 10, 2: 5}

## Assign input parameters

In [10]:
func_number = 1
n_cycles = 2
alphas = [None, 1.]
        
global_optimum_value = global_optimum_value[func_number]
global_optimum = global_optimum[func_number]
lb = lb[func_number]
ub = ub[func_number]

In [12]:
# Additional variables for plotting models
n_points = 50
x1 = np.linspace(lb,ub,n_points)
x2 = np.linspace(lb,ub,n_points)
X1, Y1 = np.meshgrid(x1, x2)

# pred_mean = np.zeros((n_points,n_points))
# pred_std = np.zeros((n_points,n_points))
# n_draws = 5000 # for sampling posterior samples

In [11]:
# Create file with bounds
bounds_file = '../data/sim_data_bounds.csv'
df = pd.DataFrame(columns=['Variable', 'Min', 'Max', 'Scaling'])
df['Variable'] = ['x_' + str(i) for i in range(1, dim + 1)]
df['Min'] = lb*np.ones(dim)
df['Max'] = ub*np.ones(dim)
df['Scaling'] = np.ones(dim)
df = df.set_index('Variable')
df.to_csv(path_or_buf=bounds_file)


Plot functions for D=2

In [None]:
dim = 2
# 1
Z1 = f1(np.array([X1, Y1]),dim)
f_max1 = f1([5,5],dim)
min_z1 = -100

fig = plt.figure(figsize=(4, 4.5), dpi=300)

ax = fig.add_subplot(1, 1, 1, projection='3d')
ax.plot_surface(X1, Y1, Z1, rstride=10, cstride=1, cmap=cmap, alpha=0.5,edgecolor='None')
ax.scatter(5, 5, min_z1, s=50, c='r',edgecolor='r')
ax.set_zlim3d(min_z1, f_max1)
cset = ax.contour(X1, Y1, Z1, zdir='z', offset=min_z1, cmap=cmap)
cset = ax.contour(X1, Y1, Z1, zdir='x', offset=-5, cmap=cmap)
cset = ax.contour(X1, Y1, Z1, zdir='y', offset=10, cmap=cmap)
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_zlabel('$F_E(x)$')
plt.tight_layout()
fig.savefig(f'F_1.png')

# 2
fig = plt.figure(figsize=(4, 4.5), dpi=300)
x1 = np.linspace(-5,5,50)
x2 = np.linspace(-5,5,50)
X2, Y2 = np.meshgrid(x1, x2)
Z2 = f2(np.array([X2, Y2]),dim)
f_max2 = f2([-2.903534, -2.903534],dim)
min_z2 = -250
ax = fig.add_subplot(1, 1, 1, projection='3d')
ax.plot_surface(X2, Y2, Z2, rstride=10, cstride=1, cmap=cmap, alpha=0.5,edgecolor='None')
ax.set_zlim3d(min_z2, f_max2)
ax.scatter(-2.903534, -2.903534, min_z2, s=50, c='r',edgecolor='r')
cset = ax.contour(X2, Y2, Z2, zdir='z', offset=min_z2, cmap=cmap)
cset = ax.contour(X2, Y2, Z2, zdir='x', offset=-5, cmap=cmap)
cset = ax.contour(X2, Y2, Z2, zdir='y', offset=5, cmap=cmap)
ax.set_xlabel('$x_1$')
ax.set_ylabel('$x_2$')
ax.set_zlabel('$F_M(x)$')
plt.tight_layout()
fig.savefig(f'F_2.png')


In [None]:
data_file = f'../data/sim_data_training.csv'
art_params = {}
art_params['bounds_file'] = bounds_file
art_params['input_var'] = ['x_' + str(i) for i in range(1, dim + 1)]
art_params['response_var'] = ['y']
art_params['objective'] = 'maximize'
art_params['threshold'] = 0.2
art_params['verbose'] = 1
art_params['seed'] = 42
art_params['recommend'] = False
art_params['output_directory'] = f'../results/'


Save simulated cycle 1 data into EDD-style files

In [None]:
def save_edd_csv(X, y, file_name):
    """A function to write EDD type files from a given data frame.
    
    # Create EDD style data file_name.csv with columns: Line Name / Type / 0
    """
    
    n_points = X.shape[0]
    dim = X.shape[1]
    
    cols =  ['x_' + str(i) for i in range(1, dim + 1)]
    line_names = [str(i) for i in range(n_points)]
    
    df = pd.DataFrame(X, index=line_names, columns=cols) 
    df['y'] = y
    df.index.name = 'Line Name'

    edd_df = pd.melt(df.reset_index(), id_vars=['Line Name'], value_vars=list(df.columns))
    edd_df.columns = ['Line Name','Type',0.0]
    edd_df.set_index('Line Name').to_csv(file_name)


In [None]:
lb, ub = -5, 10
x1 = lb + lhs(dim, samples=n_points, criterion='maximin')*(ub - lb)
y1 = f1(x1.T, dim)
file_name = '../data/simulated_data/' + str(dim) + 'dim_benchmark_f1.csv'
save_edd_csv(x1, y1, file_name)


Generate training data set using ART, LHS

In [None]:
for cycle in range(init_cycle - 1, n_cycles):

    print(f'Run {run}\nCycle {cycle + 1}\nAlpha = {alphas[cycle]}')

    if cycle == 0:
        data_file = f'../data/simulated_data/{bad_initial_dir}{str(dim)}dim_benchmark_f{str(func_number)}.csv'
    else:
        data_file = f'{art_params["output_directory"]}/Data_Cycle{str(cycle + 1)}.csv'

    art_params['alpha'] = alphas[cycle]

    df = utils.load_study(data_file=data_file)
    art = RecommendationEngine(df, **art_params)

    # Save recommendations for the next cycle
    file_path = f'{art.outDir}/recommendations_Cycle{str(cycle + 1)}.csv'
    art.recommendations.to_csv(path_or_buf=file_path, sep='\t')

    # Evaluate models
    art.evaluate_models()
    mae_score_train[cycle] = art.model_df[0]['MAE']['Ensemble Model']
    X_test = art.recommendations.values[:, :-1]
    y_test = func(X_test.T, dim).reshape(-1, art.num_response_var)
    art.evaluate_models(X_test, y_test)
    mae_score_test[cycle] = art.model_df[0]['MAE']['Ensemble Model']

    # error[run, cycle] = np.abs(art.recommendations.values[0, -1] - global_optimum_value)
    best_prediction[cycle] = np.max(art.recommendations.values[:, -1])
    std[cycle] = art.post_pred_stats(art.recommendations.values[0, :-1])[1][0][0]
    cumulative_success_prob = art.calculate_success_prob(current_best=art.find_current_best())
    prob_success[:, cycle] = cumulative_success_prob[0]

    # Save data for the next cycle
    X = np.concatenate((art.X, X_test))
    y = np.concatenate((art.y, y_test))
    file_name = f'{art.outDir}/Data_Cycle{str(cycle + 2)}.csv'
    utils.save_edd_csv(X, y, art.input_var, file_name)

    # For the first run, plot all models for each cycle
    if run == 1:
        plot.all_models_benchmark(art, func, dim, X1, Y1, lb, ub, global_optimum,
                                  num_models=8, cycle=cycle, alpha=alphas[cycle])

    # For each run, plot all models for the first cycle
    if cycle == 0:
        plot.all_models_benchmark(art, func, dim, X1, Y1, lb, ub, global_optimum,
                                  num_models=8, cycle=cycle + 1, alpha=alphas[cycle])

    # For each run and cycle save the metrics
    for name, value in results.items():
        file_name = f'{art.outDir}/{name}.csv'
        np.savetxt(file_name, value, fmt='%4.4f', delimiter=',', newline='\n')
    
# For each run save the metrics
for name, value in results.items():
    file_name = f'{art.outDir}/{name}.csv'
    np.savetxt(file_name, value, fmt='%4.4f', delimiter=',', newline='\n')