In [3]:
# import external packages
import numpy as np
import cvxpy as cp
import mosek
from sklearn.model_selection import train_test_split
import time

# import internal packages
import phi_divergence as phi
from iter_gen_and_eval_alg import iter_gen_and_eval_alg

The toy problem we examine is as follows:

\begin{align}\label{toy_model_2}
    \begin{split}
        \max_{\mathbf{0} \leq \mathbf{x} \leq \mathbf{10}}\{\mathbf{e}^T \mathbf{x}: \mathbf{\xi}^T \mathbf{x} \leq 1,~\sum_{j=1}^{k-1}x_j-x_k\leq -1,~\mathbf{x}\leq 10\}.
    \end{split}
\end{align}


With $\mathbf{x} \in \mathbb{R}^k$ and $\mathbf{\xi}\in [-1,1]^{k}$ (assume uniformly distributed).

We would like to obtain solutions for which we can make the following probabilistic guarantee regarding its feasibility: $$\mathbb{P}^*(\mathbf{\xi}^T \mathbf{x} \leq 1)\geq 1 - \epsilon$$

In [4]:
# problem specific functions:
def generate_data(random_seed, N, **kwargs):
    np.random.seed(random_seed)
    dim_x = kwargs.get('dim_x',2)
    data = np.random.uniform(-1,1,size = (N,dim_x)) # generates N random scenarios    
    return data 

def generate_data_with_nominal(random_seed, N, **kwargs):
    np.random.seed(random_seed)
    dim_x = kwargs.get('dim_x',2)
    data_nominal = np.array([[0] * dim_x])
    data = np.random.uniform(-1,1,size = (N-1,dim_x)) # generate N-1 scenarios
    data = np.concatenate((data_nominal,data)) # add nominal case to training data
    return data

def solve_P_SCP(S, **kwargs):
    dim_x = kwargs.get('dim_x', 2)
    x = cp.Variable(dim_x, nonneg = True)
    setup_time_start = time.time()
    constraints = [cp.sum(x[0:(dim_x-1)]) <= x[dim_x-1]-1, x<=10]
    for s in range(len(S)):
        constraints.append(cp.multiply(S[s], x) - 1 <= 0)
    obj = cp.Minimize(- cp.sum(x)) # formulate as a minimization problem
    prob = cp.Problem(obj,constraints)
    time_limit = kwargs.get('time_limit', 2*60*60) - (time.time() - setup_time_start)
    if time_limit < 0:
        print("Error: did not provide sufficient time for setting up & solving problem")
        return (None, None)
    try:
#         prob.solve(solver=cp.MOSEK, mosek_params = {mosek.dparam.optimizer_max_time: time_limit})
        prob.solve(solver=cp.GUROBI, verbose=False, TimeLimit=time_limit)
    except cp.error.SolverError:
        return (None, None)
    return (x.value, prob.value)

def unc_func(x, data, **kwargs):
    return (np.dot(data,x)) - 1

def analytic_eval(x, problem_info):
    dim_x = problem_info['dim_x']
    return(1/2+1/(2*x[dim_x-1]))
    
def get_true_prob(x, dim_x):
    return(1/2+1/(2*x[dim_x-1]))
    
def solve_toyproblem_true_prob(beta, dim_x):
    x = cp.Variable(dim_x, nonneg = True)
    constraints = [(1-2*beta)*x[dim_x-1] + 1 >= 0, cp.sum(x[0:(dim_x-1)]) <= x[dim_x-1]-1, x<=10]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(x.value, prob.value)

In [5]:
# set parameter values
dim_x = 2
problem_instance = {'dim_x': dim_x, 'time_limit': 10*60}

In [6]:
# generate and split data into train and test
random_seed = 0
N_total = 10000
data = generate_data(random_seed, N_total, dim_x=dim_x)

N_train = N_total / 2
data_train, data_test = train_test_split(data, train_size=(N_train/N_total), random_state=random_seed)

In [None]:
# set our own algorithm parameter values
risk_param_epsilon = 0.10
conf_param_alpha = 0.05
add_strategy = 'random_vio'
remove_strategy = 'random_any'

In [None]:
# provide functions and other info for generating & evaluating solutions
solve_SCP = solve_P_SCP
eval_unc_obj = None
eval_unc_constr = [{'function': unc_func,
                    'info': {'risk_measure': 'probability', # must be either 'probability' or 'expectation'
                             'desired_rhs': 1 - risk_param_epsilon}}]

In [None]:
# run the algorithm
alg = iter_gen_and_eval_alg(solve_SCP, problem_instance, eval_unc_obj, eval_unc_constr, 
                            data_train, data_test, conf_param_alpha=conf_param_alpha,
                            add_strategy=add_strategy ,remove_strategy=remove_strategy,
                            verbose=True)

stop_criteria={'max_elapsed_time': 0.5*60} # in seconds (time provided to search algorithm)

(best_sol, runtime, num_iter, pareto_frontier, S_history) = alg.run(stop_criteria=stop_criteria)

In [None]:
runtime

In [None]:
num_iter

In [None]:
len(solutions)

In [None]:
best_sol

In [None]:
#OPTIONAL:
N_eval = 50000
data_eval = generate_data(random_seed + 99, N_eval, k=k)

In [None]:
beta = problem_info['desired_prob_guarantee_beta']
x_true, obj_true = solve_toyproblem_true_prob(beta, k)
obj_alg = best_sol['obj']
obj_gap_true =  100*(obj_true - obj_alg)/obj_true
obj_gap_true

In [None]:
# Determine optimal solution given data_test
runtime, opt_x, opt_sum_y, opt_obj, opt_lb = util.compute_opt_given_data(conf_param_alpha, beta, phi_div, phi_dot, data_test)
obj_alg = best_sol['obj']
obj_gap_opt = 100*(opt_obj - obj_alg)/opt_obj
obj_gap_opt

In [None]:
for i,sol in enumerate(solutions):
    if i<=7:
        Z_arr = data_train[sol['scenario_set']]
        true_prob = get_true_prob(sol['sol'], k)
        if i == 0:
            dataio.plot_iter(i, data_train, Z_arr, sol['sol'], sol['obj'], 
                             sol['p_train'], sol['lb_train'], true_prob,
                             True, "png", True, N_train, alpha, beta)
        else:
            dataio.plot_iter(i, data_train, Z_arr, sol['sol'], sol['obj'], 
                             sol['p_train'], sol['lb_train'], true_prob,
                             True, "png", False, N_train, alpha, beta)
        
            dataio.plot_iter(i, data_test, None, sol['sol'], sol['obj'], 
                             sol['p_test'], sol['lb_test'], true_prob,
                             True, "png", False, N_test, alpha, beta)
    else:
        break

In [None]:
dataio.plot_pareto_curve(pareto_solutions, beta, None, None, None, None)

In [None]:
dataio.plot_obj_over_time(solutions, best_sol, None, None, None, None)

In [None]:
dataio.plot_size_set_over_time(solutions, best_sol, None, None, None, None)

In [None]:
# Plot final solution found by algorithm
name = 'Strategy: '+ str(add_strategy)
save_plot = False
plot_type = "eps"
show_legend = True
Z_values = data_train[best_sol['scenario_set']]
dataio.plot_solution(name, data_train, Z_values, best_sol['sol'], 
              best_sol['obj'], best_sol['lb_test'], save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Compute optimal solution with true probability constraint
prob_true = beta
[x_true, obj_true] = solve_toyproblem_true_prob(prob_true, k)
constr = uncertain_constraint(data_test, x_true)
vio = constr[constr>(0+numeric_precision)]   
p_vio = len(vio)/N_train
p = np.array([1-p_vio, p_vio])
r = phi_dot/(2*N_test)*scipy.stats.chi2.ppf(1-alpha, 1)
lb = rs.compute_lb(p, r, par, phi_div)
print(p)
print(lb)
print(obj_true)

In [None]:
name = "TrueProb="+str(prob_true)
save_plot = False
plot_type = "eps"
show_legend = True
dataio.plot_solution(name, data_test, None, x_true, obj_true, lb, save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Determine optimal solution given data_test
runtime, opt_x, opt_sum_y, opt_obj, opt_lb = util.compute_opt_given_data(alpha, beta, par, phi_div, data_test, time_limit_mosek)

In [None]:
runtime

In [None]:
# Plot optimal solution given data_test
name = 'Opt_given_test_data'
save_plot = False
plot_type = "eps"
show_legend = True
dataio.plot_solution(name, data_test, None, opt_x, opt_obj, opt_lb, save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Compute solution via Campi method
data = generate_data(k, N_campi)
runtime, campi_x, campi_obj, campi_true_prob, Z_arr = util.solve_with_campi_N(alpha, beta, numeric_precision, data, time_limit_mosek)

In [None]:
# Plot Campi solution
name = 'Campi method'
save_plot = False
plot_type = "eps"
show_legend = True
dataio.plot_solution(name, data, Z_arr, campi_x, campi_obj, 0, save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Compute Garatti2022 solution
k = 1000
dim_x = k
beta = 0.95
alpha = 10e-6
time_limit_solve = 5*60
numeric_precision = 1e-6

set_sizes, time_determine_set_sizes = util.Garatti2022_determine_set_sizes(dim_x, beta, alpha)

In [None]:
set_sizes

In [None]:
random_seed_settings = [i for i in range(1, 11)]

# Store output in lists
import statistics as s
vec_obj = []
vec_true_prob = []
vec_mean_size_S = []
vec_max_size_S = []
vec_num_iter = []
vec_time = []
vec_data = []

run_count = 0
for random_seed in random_seed_settings:
    (x, obj, j, s_j, set_sizes, 
     time_main_solves, 
     time_determine_supp) = util.solve_with_Garatti2022(dim_x, set_sizes, solve_SCP, uncertain_constraint, 
                               generate_data, random_seed, time_limit_solve,
                               numeric_precision)
    
    true_prob = get_true_prob(x, k)
    total_time = time_determine_set_sizes + time_main_solves + time_determine_supp
    mean_size_S = s.mean([set_sizes[i] for i in range(0,j+1)])
    max_size_S = set_sizes[j]
    
    # add to output data:
    vec_obj.append(obj)
    vec_true_prob.append(true_prob)
    vec_mean_size_S.append(mean_size_S)
    vec_max_size_S.append(max_size_S)
    vec_num_iter.append(j)
    vec_time.append(total_time)
    vec_data.append(max_size_S)
    
    run_count += 1
    print(run_count)

In [None]:
# Now output mean and std in proper string format
import statistics as s

print(f'{round(s.mean(vec_obj),3):.3f}' + " ("+f'{round(s.stdev(vec_obj),3):.3f}'+")")
print(f'{round(s.mean(vec_true_prob),3):.3f}' + " ("+f'{round(s.stdev(vec_true_prob),3):.3f}'+")")
print(f'{round(s.mean(vec_mean_size_S),1):.1f}' + " ("+f'{round(s.stdev(vec_mean_size_S),1):.1f}'+")")
print(f'{round(s.mean(vec_max_size_S),1):.1f}' + " ("+f'{round(s.stdev(vec_max_size_S),1):.1f}'+")")
print(f'{round(s.mean(vec_num_iter),1):.1f}' + " ("+f'{round(s.stdev(vec_num_iter),1):.1f}'+")")
print(f'{round(s.mean(vec_time),1):.1f}' + " ("+f'{round(s.stdev(vec_time),1):.1f}'+")")
print(f'{round(s.mean(vec_data),1):.1f}' + " ("+f'{round(s.stdev(vec_data),1):.1f}'+")")

In [None]:
import importlib
importlib.reload(util)

In [None]:
# Compute Calafiore2016 solution
k = 10
dim_x = k
beta = 0.95
alpha = 10e-6
time_limit_solve = 5*60
numeric_precision = 1e-6

scale_eps_prime = 0.7
N_eval = 10000

N, time_determine_set_sizes = util.determine_N_calafiore2016(dim_x, beta, alpha, scale_eps_prime, N_eval)

In [None]:
N

In [None]:
random_seed_settings = [i for i in range(1, 11)]

# Store output in lists
import statistics as s
vec_obj = []
vec_true_prob = []
vec_mean_size_S = []
vec_max_size_S = []
vec_num_iter = []
vec_time = []
vec_data = []

run_count = 0
for random_seed in random_seed_settings:
    (x, obj, iter_j, 
     total_train_data_used, 
     total_test_data_used, 
     total_time) = util.solve_with_calafiore2016(N, N_eval, scale_eps_prime, dim_x, beta, alpha, solve_SCP, uncertain_constraint, 
                                            generate_data, random_seed, time_limit_solve,
                                            numeric_precision)
    
    true_prob = get_true_prob(x, k)
    total_time = total_time
    mean_size_S = N
    max_size_S = N
    total_data_used = total_train_data_used + total_test_data_used
    
    # add to output data:
    vec_obj.append(obj)
    vec_true_prob.append(true_prob)
    vec_mean_size_S.append(mean_size_S)
    vec_max_size_S.append(max_size_S)
    vec_num_iter.append(iter_j)
    vec_time.append(total_time)
    vec_data.append(total_data_used)
    
    run_count += 1
    print(run_count)

In [None]:
# Now output mean and std in proper string format
import statistics as s

print(f'{round(s.mean(vec_obj),3):.3f}' + " ("+f'{round(s.stdev(vec_obj),3):.3f}'+")")
print(f'{round(s.mean(vec_true_prob),3):.3f}' + " ("+f'{round(s.stdev(vec_true_prob),3):.3f}'+")")
print(f'{round(s.mean(vec_mean_size_S),1):.1f}' + " ("+f'{round(s.stdev(vec_mean_size_S),1):.1f}'+")")
print(f'{round(s.mean(vec_max_size_S),1):.1f}' + " ("+f'{round(s.stdev(vec_max_size_S),1):.1f}'+")")
print(f'{round(s.mean(vec_num_iter),1):.1f}' + " ("+f'{round(s.stdev(vec_num_iter),1):.1f}'+")")
print(f'{round(s.mean(vec_time),1):.1f}' + " ("+f'{round(s.stdev(vec_time),1):.1f}'+")")
print(f'{round(s.mean(vec_data),1):.1f}' + " ("+f'{round(s.stdev(vec_data),1):.1f}'+")")

# The following cells are used to obtain output and write to latex tables

In [None]:
output_file_name = 'new_output_data'

headers = ['$k$', 'seed', '$n_{\mathcal{X}}$',
           'Obj.~(RS)', 'Obj.~(TP)', 'Gap TP.~(\%)', 
           'Obj.~($\mathcal{D}^{\\text{test}}_{N_2}$)', 'Gap $\mathcal{D}^{\\text{test}}_{N_2}$ (\%)',
           'Time', '$|\mathcal{X}|$']

# Write headers to .txt file
with open(r'output/headers_'+output_file_name+'.txt','w+') as f:
    f.write(str(headers))

output_data = {}

# Variable parameter values
k_settings = [2]#, 10, 100]#, 1000]
random_seed_settings = [i for i in range(1, 4)]
n_sol_settings = [1, 100, 500, 1000, 5000, 10000]

# Fixed parameter values
N_total = 500
p_train = 0.5
risk_measure = 'chance_constraint' # options: 'chance_constraint', 'exp_constraint'
alpha = 0.01
beta = 0.90

# LB-related parameters
par = 1
phi_div = phi.mod_chi2_cut
phi_dot = 2
numeric_precision = 1e-6 # To correct for floating-point math operations

# RS-related parameters
time_limit_search = 15*60
time_limit_solve = 5*60 # in seconds
max_nr_solutions = 10000 # for easy problems with long time limits, we may want extra restriction
add_strategy = 'random_vio'
remove_strategy = 'random_any'
clean_strategy = (30000, 'random_inactive')
add_remove_threshold = 0.0 # controls the ambiguity around adding/removing
use_tabu = False

N_train = round(p_train * N_total)
N_test = N_total - N_train

run_count = 0
for k in k_settings:
    
    # Compute true opt
    x_true, obj_true = solve_toyproblem_true_prob(beta, k)
    
    for random_seed in random_seed_settings:  
        
        #data = generate_data(random_seed, k, N_total)
        #data_train, data_test = train_test_split(data, train_size=(N_train/N_total), random_state=random_seed)
        
        data_train = generate_data(random_seed, k, N_train)
        data_test = generate_data(random_seed, k, N_test)

        # compute opt given data_test
        runtime_opt, x_opt, sum_y, obj_opt, p_min_opt = util.compute_opt_given_data(alpha, beta, par, phi_div, phi_dot, 
                                                                               data_test, time_limit_solve)
        
            
        # Gen and eval algorithm
        (runtime, num_iter, solutions, 
         best_sol, pareto_solutions) = rs.gen_and_eval_alg(data_train, data_test, beta, alpha, time_limit_search, time_limit_solve, 
                                                    max_nr_solutions, add_strategy, remove_strategy, clean_strategy, 
                                                    add_remove_threshold, use_tabu,
                                                    phi_div, phi_dot, numeric_precision,
                                                    solve_SCP, uncertain_constraint, risk_measure, random_seed)


        for i,n_sol in enumerate(n_sol_settings):
            if i == 0 and len(solutions) == 0:
                output_data[(k, random_seed, n_sol)] = [np.nan,
                                                         obj_true,
                                                         np.nan,
                                                         obj_opt,
                                                         np.nan,
                                                         runtime,
                                                         0]
                break

            elif i == 0 or (i > 0 and len(solutions) > n_sol_settings[i-1]):

                sub_solutions = solutions[0:n_sol]
                time = sub_solutions[-1]['time']
                best_in_sub_sol = {'sol': None}
                for sol_info in sub_solutions:
                    obj = sol_info['obj']
                    lb = sol_info['lb_test']
                    if best_in_sub_sol['sol'] is None or (best_in_sub_sol['lb_test'] < beta and lb > best_in_sub_sol['lb_test']):
                        best_in_sub_sol = sol_info
                    elif ((lb >= beta and obj > best_in_sub_sol['obj']) 
                          or (lb > best_in_sub_sol['lb_test'] and obj >= best_in_sub_sol['obj'])):
                        best_in_sub_sol = sol_info

                obj_rs = best_in_sub_sol['obj']
                obj_gap_true = 100*(obj_true - obj_rs)/obj_true
                obj_gap_opt = 100*(obj_opt - obj_rs)/obj_opt

                output_data[(k, random_seed, n_sol)] = [obj_rs,
                                                         obj_true,
                                                         obj_gap_true,
                                                         obj_opt,
                                                         obj_gap_opt,
                                                         time,
                                                         len(sub_solutions)]


        output_file_name = 'new_output_data'
        with open(r'output/'+output_file_name+'.txt','w+') as f:
            f.write(str(output_data))

        run_count += 1
        print("Completed run: " + str(run_count))

In [None]:
output_data_str = {}
for i,res in output_data.items():
    res_str = []
    for i2,el in enumerate(res):
        if i2 < 5:
            if np.isnan(el):
                res_str.append('-')
            else:
                res_str.append(f'{round(el,2):.2f}') 
        elif i2 == 5:
            res_str.append(f'{round(el,0):.0f}') 
        else:
            res_str.append(el)
    
    output_data_str[i] = res_str

In [None]:
headers = ['$k$', 'seed', 'remove strategy', '$n_{\mathcal{X}}$',
           'Obj.~(RS)', 'Obj.~(TP)', 'Gap TP.~(\%)', 
           'Obj.~($\mathcal{D}^{\\text{test}}_{N_2}$)', 'Gap $\mathcal{D}^{\\text{test}}_{N_2}$ (\%)',
           'Time', '$|\mathcal{X}|$']

In [None]:
dataio.write_output_to_latex(4, headers, output_data_str)

In [None]:
output_file_name = 'new_output'

In [None]:
# Write headers + output to .txt file
with open(r'output/headers_'+output_file_name+'.txt','w+') as f:
    f.write(str(headers))

# with open(r'output/'+output_file_name+'.txt','w+') as f:
#     f.write(str(output_data))

In [None]:
output_file_name = 'eval_gap_as_L_to_inf_k=[2,10]'

In [None]:
# Read from .txt file
file_path = 'output/'+output_file_name+'.txt'
dic = ''
with open(file_path,'r') as f:
         for i in f.readlines():
            dic=i #string
output_data_read = eval(dic)
output_data_read

In [None]:
output_data = output_data_read
output_data

In [None]:
# Read from .txt file
file_path = 'output/headers_'+output_file_name+'.txt'
dic = ''
with open(file_path,'r') as f:
         for i in f.readlines():
            dic=i #string
output_data_headers_read = eval(dic)

In [None]:
headers = output_data_headers_read
headers

In [None]:
dataio.write_output_to_latex(3, headers, output_data_str)

In [None]:
import pandas as pd

k = 1000
beta = 0.9
N_total_settings = [100]
p_train_settings = [0.25, 0.5, 0.75]
random_seed_data_settings = [i for i in range(1, 7)]
random_seed_split_settings = [i for i in range(1, 11)]

output_data_agg = {}
for N_total in N_total_settings:
    for p_train in p_train_settings:
        
        N_train = round(p_train * N_total)
        N_test = N_total - N_train
        
        df = pd.DataFrame({key: pd.Series(val) for key, val in output_data.items() if (key[0] == N_train
                                                                                       and key[1] == N_test
                                                                                       and key[2] in random_seed_data_settings)})
        df = df.astype(float)
        df_agg = df.agg(["mean","std"], axis="columns")

        df_feas = df.loc[:,df.iloc[3,:] >= beta]
        df_feas_agg = df_feas.agg(["mean","std"], axis="columns")

        prob_FF = sum(df.iloc[3,:] >= beta) / len(df.columns)
        true_prob_FF = sum(df.iloc[4,:] >= beta) / len(df.columns)

        avg_obj = df_agg.loc[0,'mean']
        std_obj = df_agg.loc[0,'std']

        if prob_FF > 0:
            avg_obj_F = df_feas_agg.loc[0, 'mean']
            std_obj_F = df_feas_agg.loc[0, 'std']
            avg_gap_F = df_feas_agg.loc[1, 'mean']
            std_gap_F = df_feas_agg.loc[1, 'std']
        else:
            avg_obj_F = 0
            std_obj_F = 0
            avg_gap_F = 0
            std_gap_F = 0

        avg_lb_train = df_agg.loc[2,'mean']
        std_lb_train = df_agg.loc[2,'std']
        avg_lb_test = df_agg.loc[3,'mean']
        std_lb_test = df_agg.loc[3,'std']
        avg_true_prob = df_agg.loc[4,'mean']
        std_true_prob = df_agg.loc[4,'std']
        
        avg_num_test_feas_found = df_agg.loc[6,'mean']
        std_num_test_feas_found = df_agg.loc[6,'std']
        
        avg_time_spent = df_agg.loc[8,'mean']
        std_time_spent = df_agg.loc[8,'std']

        li = []
        li.append(f'{prob_FF:.2f}')
        li.append(f'{true_prob_FF:.2f}')
        #li.append(f'{round(avg_obj,3):.3f}' + " ("+f'{round(std_obj,3):.3f}'+")")
        if prob_FF > 0:
            li.append(f'{round(avg_obj_F,3):.3f}' + " ("+f'{round(std_obj_F,3):.3f}'+")")
            li.append(f'{round(avg_gap_F,3):.3f}' + " ("+f'{round(std_gap_F,3):.3f}'+")")
        else:
            li.append("-")
            li.append("-")
        li.append(f'{round(avg_lb_train,3):.3f}' + " ("+f'{round(std_lb_train,3):.3f}'+")")
        li.append(f'{round(avg_lb_test,3):.3f}' + " ("+f'{round(std_lb_test,3):.3f}'+")")
        li.append(f'{round(avg_true_prob,3):.3f}' + " ("+f'{round(std_true_prob,3):.3f}'+")")
        li.append(f'{round(avg_num_test_feas_found,1):.1f}' + " ("+f'{round(std_num_test_feas_found,1):.1f}'+")")
        li.append(f'{round(avg_time_spent,1):.1f}' + " ("+f'{round(std_time_spent,1):.1f}'+")")

        output_data_agg[(N_train, N_test, 'Single Split Run')] = li
        
        
        count_FF = 0
        count_true_FF = 0
        best_obj = []
        best_gap = []
        lb_train = []
        lb_test = []
        true_prob = []
        num_test_feas_found = []
        time_spent = []
        
        for random_seed_data in random_seed_data_settings:
            df = pd.DataFrame({key: pd.Series(val) for key, val in output_data.items() if (key[0] == N_train
                                                                                       and key[1] == N_test
                                                                                       and key[2] == random_seed_data)})
            
            if sum(df.iloc[3,:] >= beta) > 0:
                count_FF += 1
            if sum(df.iloc[4,:] >= beta) > 0:
                count_true_FF += 1
                
            df_feas = df.loc[:,df.iloc[3,:] >= beta]
            #df_feas_agg = df_feas.agg(["mean","std","max", "min"], axis="columns")
            if not df_feas.empty:
                best_i = df_feas.idxmax(axis=1)[0]
                best_obj.append(df_feas.loc[0, best_i])
                best_gap.append(df_feas.loc[1, best_i])
                lb_train.append(df_feas.loc[2, best_i])
                lb_test.append(df_feas.loc[3, best_i])
                true_prob.append(df_feas.loc[4, best_i])
            
            num_test_feas_found.append(sum(df.iloc[6,:]))
            time_spent.append(sum(df.iloc[8,:]))
            
        prob_FF = count_FF / len(random_seed_data_settings)
        true_prob_FF = count_true_FF / len(random_seed_data_settings)
        avg_obj_F = np.mean(best_obj)
        std_obj_F = np.std(best_obj)
        avg_gap_F = np.mean(best_gap)
        std_gap_F = np.std(best_gap)
        avg_lb_train = np.mean(lb_train)
        std_lb_train = np.std(lb_train)
        avg_lb_test = np.mean(lb_test)
        std_lb_test = np.std(lb_test)
        avg_true_prob = np.mean(true_prob)
        std_true_prob = np.std(true_prob)
        avg_num_test_feas_found = np.mean(num_test_feas_found)
        std_num_test_feas_found = np.std(num_test_feas_found)
        avg_time_spent = np.mean(time_spent)
        std_time_spent = np.std(time_spent)
        
        li = []
        li.append(f'{prob_FF:.2f}')
        li.append(f'{true_prob_FF:.2f}')
        #li.append(f'{round(avg_obj,3):.3f}' + " ("+f'{round(std_obj,3):.3f}'+")")
        if prob_FF > 0:
            li.append(f'{round(avg_obj_F,3):.3f}' + " ("+f'{round(std_obj_F,3):.3f}'+")")
            li.append(f'{round(avg_gap_F,3):.3f}' + " ("+f'{round(std_gap_F,3):.3f}'+")")
        else:
            li.append("-")
            li.append("-")
        li.append(f'{round(avg_lb_train,3):.3f}' + " ("+f'{round(std_lb_train,3):.3f}'+")")
        li.append(f'{round(avg_lb_test,3):.3f}' + " ("+f'{round(std_lb_test,3):.3f}'+")")
        li.append(f'{round(avg_true_prob,3):.3f}' + " ("+f'{round(std_true_prob,3):.3f}'+")")
        li.append(f'{round(avg_num_test_feas_found,1):.1f}' + " ("+f'{round(std_num_test_feas_found,1):.1f}'+")")
        li.append(f'{round(avg_time_spent,1):.1f}' + " ("+f'{round(std_time_spent,1):.1f}'+")")
        
        output_data_agg[(N_train, N_test, 'Best of 10 Split Runs')] = li

In [None]:
headers_agg = ['$N_{train}$', '$N_{test}$', 'Strategy',
               'Prob.~FF', 'True Prob.~FF', 'Obj.~(Feas)', 'Gap (\%)', '$LB_{train}$', '$LB_{test}$', 'True Prob.',
              '\# FF (test)', 'Time']

dataio.write_output_to_latex(3, headers_agg, output_data_agg)

In [None]:
count_FF

In [None]:
# To plot histograms for random seed output
# Read from .txt file
output_file_name = 'new_output'
file_path = 'output/'+output_file_name+'.txt'
dic = ''
with open(file_path,'r') as f:
         for i in f.readlines():
            dic=i #string
output_data_read = eval(dic)

df = pd.DataFrame.from_dict(output_data_read, orient='index')
#li1 = [col for col in df.columns if 'add + improve' == col[1]]

df2 = pd.DataFrame({key: pd.Series(val) for key, val in output_data_read.items() if key[1] == 'add + improve + remove'})
obj2 = df2.iloc[0,:].astype(float)

title = 'Distribution of best found solution objective for random add + improve + remove'# for $\beta = 0.95$, $\alpha=10^{-6}$, $N_{1} = 1,000$, $N_{2} = 10,000$ and time limit $\mathcal{L} = 1$ minute'
dataio.plot_hist(obj2, 'Gap (%)', 'Frequency', title, 20, 0.75)