In [None]:
# Import packages
import numpy as np
import cvxpy as cp
import mosek
import time

import phi_divergence as phi
import robust_sampling as rs
import dataio
import util

The toy model we examine is as follows:

\begin{align}\label{toy_model_2}
    \begin{split}
        \max_{\mathbf{x} \geq \mathbf{0}}\{\mathbf{e}^T \mathbf{x}: \mathbb{P}^*(\mathbf{\xi}\in [-1,1]^{k}: \mathbf{\xi}^T \mathbf{x} \leq 1)\geq \beta\}.
    \end{split}
\end{align}

In [None]:
# Problem specific functions:
def generate_data(random_seed, k, N):
    np.random.seed(random_seed)
    data = np.random.uniform(-1,1,size = (N,k)) # generates N random scenarios    
    return data 

def generate_data_with_nominal(random_seed, k, N):
    data_nominal = np.array([[0] * k])
    np.random.seed(random_seed)
    data = np.random.uniform(-1,1,size = (N-1,k)) # generate N-1 scenarios
    data = np.concatenate((data_nominal,data)) # add nominal case to training data
    return data

def solve_SCP(S, time_limit):
    k = S.shape[1]
    x = cp.Variable(k, nonneg = True)
    constraints = [(S @ x) - 1 <= 0, cp.sum(x[0:(k-1)]) <= x[k-1]-1, x<=10]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    try:
        prob.solve(solver=cp.MOSEK, mosek_params = {mosek.dparam.optimizer_max_time: time_limit})
    except cp.error.SolverError:
        return (None, None)
    return (x.value, prob.value)

def uncertain_constraint(S, x):
    return (np.dot(S,x)) - 1

def get_true_prob(x, k):
    return(1/2+1/(2*x[k-1]))
    
def solve_toyproblem_true_prob(beta, k):
    x = cp.Variable(k, nonneg = True)
    constraints = [(1-2*beta)*x[k-1] + 1 >= 0, cp.sum(x[0:(k-1)]) <= x[k-1]-1, x<=10]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(x.value, prob.value)

In [None]:
# Set parameter values
random_seed = 1
alpha = 10**-6
beta = 0.75
k = 2
N_train = 20
N_test = 1000
par = 1
phi_div = phi.mod_chi2_cut
phi_dot = 2
numeric_precision = 1e-6 # To correct for floating-point math operations

In [None]:
# Get generated data
#N_campi = util.determine_campi_N_min(k, beta, alpha)
#N_train = N_campi
data_train = generate_data_with_nominal(random_seed, k, N_train)
data_test = generate_data(random_seed, k, N_test)

In [None]:
time_limit_search = 5 #0.25*60 # in seconds (time provided to search algorithm)
#time_limit_mosek = 10*60 # in seconds (for larger MIP / LP solves)
time_limit_solve = 5*60 # in seconds (for individuals solves of SCP)
max_nr_solutions = 1000 # for easy problems with long time limits, we may want extra restriction
add_remove_threshold = 0.00 # This determines when randomness is introduced in add/removal decision

#add_strategy = 'smallest_vio'
#add_strategy = 'N*(beta-lb)_smallest_vio'
add_strategy = 'random_vio'
#add_strategy = 'random_weighted_vio'
remove_strategy = 'random_active'
clean_strategy = (30, 'all_inactive')



runtime_search, num_iter, solutions = rs.search_alg(data_train, N_test, beta, alpha, time_limit_search, time_limit_solve, 
                                                   max_nr_solutions, add_strategy, remove_strategy, clean_strategy, 
                                                   add_remove_threshold,  
                                                   par, phi_div, phi_dot, numeric_precision,
                                                   solve_SCP, uncertain_constraint, random_seed)

runtime_eval, best_sol, pareto_solutions = rs.evaluate_alg(solutions, data_test, beta, alpha, par, phi_div, phi_dot, 
                                                           uncertain_constraint, numeric_precision)

In [None]:
runtime_search

In [None]:
num_iter

In [None]:
runtime_eval

In [None]:
dataio.print_solution_info(best_sol)

In [None]:
x_true, obj_true = solve_toyproblem_true_prob(beta, k)
obj_alg = best_sol['obj']
obj_gap_true =  100*(obj_true - obj_alg)/obj_true
obj_gap_true

In [None]:
for i,sol in enumerate(solutions):
    if i<=7:
        Z_arr = data_train[sol['scenario_set']]
        true_prob = get_true_prob(sol['sol'], k)
        if i == 0:
            dataio.plot_iter(i, data_train, Z_arr, sol['sol'], sol['obj'], 
                             sol['p_train'], sol['lb_train'], true_prob,
                             True, "png", True, N_train, alpha, beta)
        else:
            dataio.plot_iter(i, data_train, Z_arr, sol['sol'], sol['obj'], 
                             sol['p_train'], sol['lb_train'], true_prob,
                             True, "png", False, N_train, alpha, beta)
        
            dataio.plot_iter(i, data_test, None, sol['sol'], sol['obj'], 
                             sol['p_test'], sol['lb_test'], true_prob,
                             True, "png", False, N_test, alpha, beta)
    else:
        break

In [None]:
dataio.plot_pareto_curve(pareto_solutions, beta, None, None, None, None)

In [None]:
dataio.plot_obj_over_time(solutions, best_sol, None, None, None, None)

In [None]:
dataio.plot_size_set_over_time(solutions, best_sol, None, None, None, None)

In [None]:
# Plot final solution found by algorithm
name = 'Strategy: '+ str(add_strategy)
save_plot = False
plot_type = "eps"
show_legend = True
Z_values = data_train[best_sol['scenario_set']]
dataio.plot_solution(name, data_train, Z_values, best_sol['sol'], 
              best_sol['obj'], best_sol['lb_test'], save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Compute optimal solution with true probability constraint
prob_true = beta
[x_true, obj_true] = solve_toyproblem_true_prob(prob_true, k)
constr = uncertain_constraint(data_test, x_true)
vio = constr[constr>(0+numeric_precision)]   
p_vio = len(vio)/N_train
p = np.array([1-p_vio, p_vio])
r = phi_dot/(2*N_test)*scipy.stats.chi2.ppf(1-alpha, 1)
lb = rs.compute_lb(p, r, par, phi_div)
print(p)
print(lb)
print(obj_true)

In [None]:
name = "TrueProb="+str(prob_true)
save_plot = False
plot_type = "eps"
show_legend = True
dataio.plot_solution(name, data_test, None, x_true, obj_true, lb, save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Determine optimal solution given data_test
runtime, opt_x, opt_sum_y, opt_obj, opt_lb = util.compute_opt_given_data(alpha, beta, par, phi_div, data_test, time_limit_mosek)

In [None]:
runtime

In [None]:
# Plot optimal solution given data_test
name = 'Opt_given_test_data'
save_plot = False
plot_type = "eps"
show_legend = True
dataio.plot_solution(name, data_test, None, opt_x, opt_obj, opt_lb, save_plot, plot_type, show_legend, N, alpha, beta)

In [None]:
# Compute solution via Campi method
data = generate_data(k, N_campi)
runtime, campi_x, campi_obj, campi_true_prob, Z_arr = util.solve_with_campi_N(alpha, beta, numeric_precision, data, time_limit_mosek)

In [None]:
# Plot Campi solution
name = 'Campi method'
save_plot = False
plot_type = "eps"
show_legend = True
dataio.plot_solution(name, data, Z_arr, campi_x, campi_obj, 0, save_plot, plot_type, show_legend, N, alpha, beta)

# The following cells are used to obtain output and write to latex tables

In [None]:
headers = ['k', 'N_train', 'N_test', 'seed', 
           'Obj.~(true prob.)', 'Obj.~(RS)', 'Gap (\%)', 'LB_{train}', 'LB_{test}', 'True Prob.', 
           '\#Iter.~(add)', '\#Iter.~(remove)', '\#Iter.~(clean)', '$|\mathcal{X}|$',
           '$|\mathcal{S}_{max}|$', 'Time']

output_data = {}

# Variable parameter values
N_total_settings = [100, 500, 1000]
p_train_settings = [0.2, 0.4, 0.5, 0.6, 0.8]
random_seed_settings = [i for i in range(1, 11)]

# Fixed parameter values
k = 1000
alpha = 0.10
beta = 0.90

# LB-related parameters
par = 1
phi_div = phi.mod_chi2_cut
phi_dot = 1
numeric_precision = 1e-6 # To correct for floating-point math operations

# RS-related parameters
time_limit_search = 1*60
time_limit_solve = 1*time_limit_search # in seconds 
max_nr_solutions = 10000 # for easy problems with long time limits, we may want extra restriction
add_strategy = 'random_vio'
remove_strategy = 'random_active'
clean_strategy = (20, 'all_inactive')
add_remove_threshold = 0.00 # should be fractional in relation to beta, set to 0 if not used

# Compute true opt
x_true, obj_true = solve_toyproblem_true_prob(beta, k)

run_count = 0
for N_total in N_total_settings:
    for p_train in p_train_settings:
        
        N_train = round(p_train * N_total)
        N_test = N_total - N_train
        
        for random_seed in random_seed_settings:
            
            data = generate_data(random_seed, k, N_total)               
            data_train, data_test = train_test_split(data, train_size=(N_train/N_total), random_state=random_seed)
            
            runtime_search, num_iter, solutions = rs.search_alg(data_train, N_test, beta, alpha, time_limit_search, time_limit_solve, 
                                                       max_nr_solutions, add_strategy, remove_strategy, clean_strategy, 
                                                       add_remove_threshold,  
                                                       par, phi_div, phi_dot, numeric_precision,
                                                       solve_SCP, uncertain_constraint, random_seed)

            runtime_eval, best_sol, pareto_solutions = rs.evaluate_alg(solutions, data_test, beta, alpha, par, phi_div, phi_dot, 
                                                                       uncertain_constraint, numeric_precision)
            x_true_prob = get_true_prob(best_sol['sol'], k)
        
            # Compute gap with true prob opt
            obj_gap_true = 100*(obj_true - best_sol['obj'])/obj_true
            
            # Get max scenario_set size
            max_S = 0
            for sol in solutions:
                S = len(sol['scenario_set'])
                if S > max_S:
                    max_S = S
            
            output_data[(k, N_train, N_test, random_seed)] = [obj_true, 
                                                              best_sol['obj'],
                                                              obj_gap_true,
                                                              best_sol['lb_train'],
                                                              best_sol['lb_test'],
                                                              x_true_prob,
                                                              num_iter['add'], num_iter['remove'], num_iter['clean'],
                                                              len(solutions), max_S, best_sol['time']]
    
            output_file_name = 'new_output_data_2'
            with open(r'output/'+output_file_name+'.txt','w+') as f:
                f.write(str(output_data))
    
            run_count += 1
            print("Completed run: " + str(run_count))

In [None]:
output_data

In [None]:
dataio.write_output_to_latex(2, headers, output_data)

In [None]:
output_file_name = 'new_output'

In [None]:
# Write output to .txt file
with open(r'output/'+output_file_name+'.txt','w+') as f:
    f.write(str(output_data))

In [None]:
# Read from .txt file
file_path = 'output/'+output_file_name+'.txt'
dic = ''
with open(file_path,'r') as f:
         for i in f.readlines():
            dic=i #string
output_data_read_2 = eval(dic)

In [None]:
import pandas as pd

k = 1000
output_data_agg = {}
for N_total in N_total_settings:
    for p_train in p_train_settings:
        
        N_train = round(p_train * N_total)
        N_test = N_total - N_train
        
        df = pd.DataFrame({key: pd.Series(val) for key, val in output_data.items() if (key[1] == N_train
                                                                                       and key[2] == N_test)})
        df = df.astype(float)
        df_agg = df.agg(["mean","std"], axis="columns")
        records = df_agg.to_records(index=False)
        result = list(records)
        li = []
        for i, (mu,std) in enumerate(result):
            if i >= 5:
                li.append(f'{round(mu,1):.1f}' + " ("+f'{round(std,1):.1f}'+")")
            else:
                li.append(f'{round(mu,3):.3f}' + " ("+f'{round(std,3):.3f}'+")")
        
        output_data_agg[(k, N_train, N_test)] = li
output_data_agg

In [None]:
headers_agg = ['k', 'N_train', 'N_test', 
           'Obj.~(true prob.)', 'Obj.~(RS)', 'Gap (\%)', 'LB_{train}', 'LB_{test}', 'True Prob.', 
           '\#Iter.~(add)', '\#Iter.~(remove)', '\#Iter.~(clean)', '$|\mathcal{X}|$',
           '$|\mathcal{S}_{max}|$', 'Time']

dataio.write_output_to_latex(3, headers_agg, output_data_agg)

In [None]:
# To plot histograms for random seed output
# Read from .txt file
output_file_name = 'new_output'
file_path = 'output/'+output_file_name+'.txt'
dic = ''
with open(file_path,'r') as f:
         for i in f.readlines():
            dic=i #string
output_data_read = eval(dic)

df = pd.DataFrame.from_dict(output_data_read, orient='index')
#li1 = [col for col in df.columns if 'add + improve' == col[1]]

df2 = pd.DataFrame({key: pd.Series(val) for key, val in output_data_read.items() if key[1] == 'add + improve + remove'})
obj2 = df2.iloc[0,:].astype(float)

title = 'Distribution of best found solution objective for random add + improve + remove'# for $\beta = 0.95$, $\alpha=10^{-6}$, $N_{1} = 1,000$, $N_{2} = 10,000$ and time limit $\mathcal{L} = 1$ minute'
dataio.plot_hist(obj2, 'Gap (%)', 'Frequency', title, 20, 0.75)