In [1]:
import numpy as np
import pandas as pd
import cvxpy as cp
import mosek
import matplotlib.pyplot as plt
import scipy.stats
import phi_divergence as phi
import time

In [None]:
# Matplotlib settings:
# # need to have LaTeX installed, can install with: conda install -c conda-forge miktex
# import matplotlib
# #import matplotlib.patches as mpatches
# from matplotlib.backends.backend_pgf import FigureCanvasPgf
# matplotlib.backend_bases.register_backend('pdf', FigureCanvasPgf)

# matplotlib.rcParams.update({
#     "pgf.texsystem": "pdflatex",
#     'font.family': 'serif',
#     'text.usetex': True,
#     'pgf.rcfonts': False,
# })

plt.rcParams['figure.figsize'] = [9, 7]
plt.rcParams['figure.dpi'] = 100

The toy model we examine is as follows:

\begin{align}\label{toy_model}
    \begin{split}
        \max_{x_1,x_2\geq 0}\{x_1+x_2: \mathbb{P}^*(\mathbf{\xi}\in [-1,1]^2: \xi_1x_1+\xi_2x_2\leq 1)\geq \beta, x_1-x_2\leq -1, x_1,x_2\leq 10\}.
    \end{split}
\end{align}

In [2]:
def toymodel(Z_arr):
    x = cp.Variable(2, nonneg = True)
    constraints = [Z_arr @ x <= 1, x[0] <= x[1]-1, x<=10]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(x.value, prob.value)

def toymodel_true(beta, k):
    x = cp.Variable(k, nonneg = True)
    constraints = [(1-2*beta)*x[k-1] + 1 >= 0, cp.sum(x[0:(k-1)]) <= x[k-1]-1, x<=10]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(x.value, prob.value)

def toymodel_large(Z_arr, k, time_limit):
    x = cp.Variable(k, nonneg = True)
    constraints = [Z_arr @ x <= 1, cp.sum(x[0:(k-1)]) <= x[k-1]-1, x<=10]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK, mosek_params = {mosek.dparam.optimizer_max_time: time_limit})
    return(x.value, prob.value)

def lowbound(p,r, par, phi_div):
    q = cp.Variable(2, nonneg = True)
    constraints = [cp.sum(q) == 1]
    constraints = phi_div(p,q,r,par,constraints)
    obj = cp.Minimize(q[0])
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK)
    return(prob.value)

def opt_set(data, k, F, M, time_limit):
    N = len(data)
    x = cp.Variable(k, nonneg = True)
    y = cp.Variable(N, boolean = True)
    constraints = [cp.sum(x[0:(k-1)]) <= x[k-1]-1, x <= 10, data @ x <= 1 + (1-y)*M, cp.sum(y) >= F]
    obj = cp.Maximize(cp.sum(x))
    prob = cp.Problem(obj,constraints)
    prob.solve(solver=cp.MOSEK, mosek_params = {mosek.dparam.optimizer_max_time: time_limit})
    return(x.value, y.value, prob.value)

def check_conv_comb(Z_arr):
    conv_comb_points = []
    if len(Z_arr) >= 3:
        for i in range(len(Z_arr)):
            z_i = Z_arr[i]
            Z_rest = np.append(Z_arr[:i], Z_arr[(i+1):], axis = 0)
            # solve optimization problem, if feasible, z_i is convex combination of points in Z_rest 
            # (https://en.wikipedia.org/wiki/Convex_combination)
            alpha = cp.Variable(len(Z_rest), nonneg = True)
            constraints = [alpha @ Z_rest == z_i, cp.sum(alpha) == 1]
            obj = cp.Maximize(alpha[0]) # no true objective function, only interested whether feasible solution exists
            prob = cp.Problem(obj,constraints)
            prob.solve(solver=cp.MOSEK)
            if prob.status != 'infeasible':
                conv_comb_points.append(i)
            
    return conv_comb_points

def compute_calafiore_N_min(dim_x, beta, alpha):
    return np.ceil(dim_x /((1-beta)*alpha)).astype(int)

In [None]:
def plot_iter(num_iter, data, Z_arr, x, obj, lb, save_plot, plot_type, show_legend):
    plt.plot(data[:,0],data[:,1],'ok',markersize=1, label = 'All scenarios')
    
    if Z_arr != None:
        plt.plot(Z_arr[:,0],Z_arr[:,1], color='blue', marker='+', linestyle='',
                 markersize=10, label = 'Chosen scenarios')

    # Add constraint to plot, given solution x
    constraint_x = np.linspace(-1, 1, 1000)
    constraint_y = (1 - x[0]*constraint_x) / x[1]
    plt.plot(constraint_x, constraint_y, '--r', label = r'$\xi_{1}x_{1}^{*}+\xi_{2}x_{2}^{*}\leq 1$' ,alpha=1)

    plt.title('Iteration '+str(num_iter)+': Solution = (' + str(round(x[0],3)) + ', ' 
              + str(round(x[1],3)) + '), Objective value = ' + str(round(obj,3)) 
              + ', Lower bound = '+ str(round(lb,3)))
    plt.xlabel(r'$\xi_1$')
    plt.ylabel(r'$\xi_2$')
    
    if show_legend:
        plt.legend(bbox_to_anchor=(1.01, 0.6), loc='upper left')
    
    plt.tight_layout()
    
    if save_plot:
        plot_name = 'Figures/ToyModel/Scenarios_wConstraint_iter='+str(num_iter)+'_N=' + str(N) + '_alpha=' + str(alpha) + "_beta="+ str(beta)
        plt.savefig(plot_name + '.' + plot_type)
    
    plt.show()

In [None]:
# For plotting a singular solution
def plot_solution(name, data, Z_arr, x, obj, lb, save_plot, plot_type, show_legend):
    plt.plot(data[:,0],data[:,1],'ok',markersize=1, label = 'All scenarios')
    
    if Z_arr != None:
        plt.plot(Z_arr[:,0],Z_arr[:,1], color='blue', marker='+', linestyle='',
                 markersize=10, label = 'Chosen scenarios')

    # Add constraint to plot, given solution x
    constraint_x = np.linspace(-1, 1, 1000)
    constraint_y = (1 - x[0]*constraint_x) / x[1]
    plt.plot(constraint_x, constraint_y, '--r', label = r'$\xi_{1}x_{1}^{*}+\xi_{2}x_{2}^{*}\leq 1$' ,alpha=1)

    plt.title(name +': Solution = (' + str(round(x[0],3)) + ', ' 
              + str(round(x[1],3)) + '), Objective value = ' + str(round(obj,3)) 
              + ', Lower bound = '+ str(round(lb,3)))
    plt.xlabel(r'$\xi_1$')
    plt.ylabel(r'$\xi_2$')
    
    if show_legend:
        plt.legend(bbox_to_anchor=(1.01, 0.6), loc='upper left')
    
    plt.tight_layout()
    
    if save_plot:
        plot_name = 'Figures/ToyModel/Scenarios_wConstraint_'+name+'_N=' + str(N) + '_alpha=' + str(alpha) + "_beta="+ str(beta)
        plt.savefig(plot_name + '.' + plot_type)
    
    plt.show()

In [13]:
def write_output_to_latex(num_settings, headers, data):
    textabular = f"{'l'*num_settings}|{'r'*(len(headers)-num_settings)}"
    texheader = " & ".join(headers) + "\\\\"
    texdata = "\\hline\n"
    for label in data:
        if num_settings == 1:
            texdata += f"{label} & {' & '.join(map(str,data[label]))} \\\\\n"
        elif num_settings == 2:
            texdata += f"{label[0]} & {label[1]} & {' & '.join(map(str,data[label]))} \\\\\n"
        elif num_settings == 3:
            texdata += f"{label[0]} & {label[1]} & {label[2]} & {' & '.join(map(str,data[label]))} \\\\\n"
        else:
            print("ERROR: provided none OR more than 3 settings")

    print("\\begin{tabular}{"+textabular+"}")
    print(texheader)
    print(texdata,end="")
    print("\\end{tabular}")

In [4]:
# Set parameter values
par = 1
phi_div = phi.mod_chi2_cut
k = 2
N = 1000
phi_dot = 1
alpha = 0.1
beta = 0.9
r = phi_dot/(2*N)*scipy.stats.chi2.ppf(1-alpha, 1)
time_limit_mosek = 10*60 #in seconds 
time_limit_alg = 10*60
numeric_precision = 1e-6 # To correct for floating-point math

In [None]:
# Generate data
Z_nominal = np.array([[0] * k])
np.random.seed(1)
data = np.random.uniform(-1,1,size = (N-1,k)) # generate N-1 scenarios
data = np.concatenate((data,Z_nominal)) # add nominal case

In [None]:
# Check if toymodel_large can be solved for nominal case
toymodel_large(Z_nominal, k, time_limit_mosek)

In [None]:
# Algorithm 1 applied to toymodel
# (strategy1: add N*(beta-lb)-th scenario)
Z_arr = Z_nominal
lb = -np.inf
num_iter = 0

remove_cc = True
count_removed = 0

start_time = time.time()

while lb < beta:
    #[x, obj] = toymodel(Z_arr)
    [x, obj] = toymodel_large(Z_arr, k, time_limit_mosek)
    constr = data.dot(x)
    vio = constr[constr>(1+numeric_precision)]
    p = np.array([(N-len(vio))/N, len(vio)/N])
    lb = lowbound(p,r, par, phi_div)
    gap = np.ceil(N*(beta-lb)).astype(int)
    
    if gap > 0:
        vio_sort = np.sort(vio)    #the violations are ranked here
        vio_value = vio_sort[gap-1]
        ind = np.where(constr == vio_value)[0][0]   # the N*(beta-lb)-th scenario is added
        Z_arr = np.append(Z_arr, np.array([data[ind]]),axis = 0)
        
        # identify and then remove convex combination points
        conv_comb_points = check_conv_comb(Z_arr)
        if len(conv_comb_points) > 0:
            print("Found " + str(len(conv_comb_points)) + " conv combs in set " + str(conv_comb_points))
            Z_arr = np.delete(Z_arr, conv_comb_points, axis=0)
        
        num_iter += 1
        
    if (time.time() - start_time) > time_limit_alg:
        break

print("Runtime  :", time.time() - start_time)        
print("Obj      :", obj)
print("LB       :", lb)
print('true prob:', 1/2+1/(2*x[k-1]))
print("#Iter    :", num_iter)
print("Size S*  :", len(Z_arr))

In [None]:
# Algorithm 1 applied to toymodel
# (strategy2: add N_{violations}*(beta-lb)-th scenario)
Z_arr = Z_nominal
lb = -np.inf
num_iter = 0

remove_cc = True
count_removed = 0

start_time = time.time()

while lb < beta:
    #[x, obj] = toymodel(Z_arr)
    [x, obj] = toymodel_large(Z_arr, k, time_limit_mosek)
    constr = data.dot(x)
    vio = constr[constr>(1+numeric_precision)]
    p = np.array([(N-len(vio))/N, len(vio)/N])
    lb = lowbound(p,r, par, phi_div)
    gap = np.ceil(len(vio)*(beta-lb)).astype(int)
    
    if gap > 0:
        vio_sort = np.sort(vio)    #the violations are ranked here
        vio_value = vio_sort[gap-1]
        ind = np.where(constr == vio_value)[0][0]   # the N*(beta-lb)-th scenario is added
        Z_arr = np.append(Z_arr, np.array([data[ind]]),axis = 0)
        
        # identify and then remove convex combination points
        if remove_cc:
            conv_combs = check_conv_comb(Z_arr)
            Z_arr = np.delete(Z_arr, conv_combs, axis=0)
            count_removed += len(conv_combs)
        
        num_iter += 1
        
    if (time.time() - start_time) > time_limit_alg:
        break

print("Runtime  :", time.time() - start_time)        
print("Obj      :", obj)
print("LB       :", lb)
print('true prob:', 1/2+1/(2*x[k-1]))
print("#Iter    :", num_iter)
print("Size S*  :", len(Z_arr))

In [None]:
# Algorithm 1 applied to toymodel
# (strategy3: add smallest violation scenario)
Z_arr = Z_nominal
lb = -np.inf
num_iter = 0
start_time = time.time()

# num_iter_to_plot = -1
# special_iter_to_plot = [50,100,150,200]
# save_plot = True
# plot_type = 'eps'
# show_legend = False

remove_cc = True
count_removed = 0

while lb < beta:
    #[x, obj] = toymodel(Z_arr)
    [x, obj] = toymodel_large(Z_arr, k, time_limit_mosek)
    constr = data.dot(x)
    vio = constr[constr>(1+numeric_precision)]
    p = np.array([(N-len(vio))/N, len(vio)/N])
    lb = lowbound(p,r, par, phi_div)
    
#     if num_iter_to_plot >= 0:
#         if num_iter <= num_iter_to_plot or num_iter in special_iter_to_plot:
#             plot_iter(num_iter, data, Z_arr, x, obj, lb, save_plot, plot_type, show_legend)
    
    if lb < beta and len(vio) != 0:
        vio_min = np.min(vio)        # the least violated scenario is added
        ind = np.where(constr == vio_min)[0][0]
        Z_arr = np.append(Z_arr, np.array([data[ind]]), axis = 0)
        
        # identify and then remove convex combination points
        if remove_cc:
            conv_combs = check_conv_comb(Z_arr)
            Z_arr = np.delete(Z_arr, conv_combs, axis=0)
            count_removed += len(conv_combs)
        
        num_iter += 1
        
    if (time.time() - start_time) > time_limit_alg:
        break

print("Runtime  :", time.time() - start_time)        
print("Obj      :", obj)
print("LB       :", lb)
print('true prob:', 1/2+1/(2*x[k-1]))
print("#Iter    :", num_iter)
print("Size S*  :", len(Z_arr))
print("#Removed :", count_removed)

In [None]:
# Plot final solution found by algorithm
plot_solution(num_iter, data, Z_arr, x, obj, lb, save_plot, plot_type, show_legend)

In [None]:
# Plot optimal solution with true probability constraint
[x_true, obj_true] = toymodel_true(beta)
constr = data.dot(x_true)
p = np.array([len(constr[constr<=1])/N,len(constr[constr>1])/N])
lb = lowbound(p,r, par, phi_div)
print(p)
print(lb)
print(obj_true)

plot_solution("TrueProb", data, None, x_true, obj_true, lb, True, 'eps', True)

In [None]:
# Use bisection-esque search to determine minimum p_1 for which lowbound >= beta
p_hat = np.array([beta, 1-beta])
delta = 0.1
epsilon = 0.0001
l_hat = lowbound(p_hat, r, par, phi_div)
p_hat_prev = p_hat
while True:
    if p_hat[0] + delta > 1 - epsilon:
        delta = delta/10
    p_hat = p_hat + np.array([delta, -delta])
    l_hat = lowbound(p_hat, r, par, phi_div)
    #print('p :', p_hat, ' l :', l_hat)
    if l_hat < beta:
        continue
    else:
        delta = delta / 10
        if delta < epsilon:
            break
        else:
            p_hat = p_hat_prev
print('FINAL: p :', p_hat, ' l :', l_hat)
p_min = p_hat
l_min = l_hat

In [None]:
M = 1000
F = np.ceil(p_min * N)
x_val, y_val, prob_val = opt_set(data, k, F, M, time_limit_mosek)
print(x_val, np.sum(y_val), prob_val)

In [None]:
plot_solution("BestPossible", data, None, x_val, prob_val, l_min, True, 'eps', False)

Following cells are used to obtain output and write to latex tables

In [18]:
headers = ['k', 'Setting', 'Run Time (s)', 'Obj.~Value', 'Lower Bound', '\#Iterations', '$|\mathcal{S}^{*}|$', '\#Removed']
output_data = {}

# Variables parameter values
k_settings = [100000]
settings = ['Original']#, 'With Removal']

# Fixed parameter values
N = 100
par = 1
phi_div = phi.mod_chi2_cut
phi_dot = 1
alpha = 0.1
beta = 0.9
r = phi_dot/(2*N)*scipy.stats.chi2.ppf(1-alpha, 1)
time_limit_mosek = 10*60 #in seconds 
time_limit_alg = 10*60
numeric_precision = 1e-6 # To correct for floating-point math

for k in k_settings:
    
    # Generate data
    Z_nominal = np.array([[0] * k])
    np.random.seed(1)
    data = np.random.uniform(-1,1,size = (N-1,k)) # generate N-1 scenarios
    data = np.concatenate((data,Z_nominal)) # add nominal case
    
    for setting in settings:
        Z_arr = Z_nominal
        lb = -np.inf
        num_iter = 0
        if setting == 'Original':
            remove_cc = False
        else:
            remove_cc = True
        count_removed = 0

        start_time = time.time()
        while lb < beta:
            [x, obj] = toymodel_large(Z_arr, k, time_limit_mosek)
            constr = data.dot(x)
            vio = constr[constr>(1+numeric_precision)]
            p = np.array([(N-len(vio))/N, len(vio)/N])
            lb = lowbound(p,r, par, phi_div)

            if lb < beta and len(vio) != 0:
                vio_min = np.min(vio)        # the least violated scenario is added
                ind = np.where(constr == vio_min)[0][0]
                Z_arr = np.append(Z_arr, np.array([data[ind]]), axis = 0)

                # identify and then remove convex combination points
                if remove_cc:
                    conv_combs = check_conv_comb(Z_arr)
                    Z_arr = np.delete(Z_arr, conv_combs, axis=0)
                    count_removed += len(conv_combs)

                num_iter += 1

            if (time.time() - start_time) > time_limit_alg:
                break

        output_data[(k,setting)] = [f'{round(time.time()-start_time, 0):.0f}',
                                    f'{round(obj,3):.3f}',
                                    f'{round(lb,3):.3f}',
                                    num_iter, len(Z_arr), count_removed]
    
write_output_to_latex(2, headers, output_data)

\begin{tabular}{ll|rrrrrr}
k & Setting & Run Time (s) & Obj.~Value & Lower Bound & \#Iterations & $|\mathcal{S}^{*}|$ & \#Removed\\
\hline
100000 & Original & 306 & 2.469 & 0.900 & 52 & 53 & 0 \\
\end{tabular}


In [19]:
headers = ['k', 'N', 'Best Possible Obj.', 'Obj.~Alg.', 'Gap (%)',
           'Lower Bound', '\#Iterations', 'Run Time Alg.~(s)', 'Run Time MIP (s)']

output_data = {}

# Variables parameter values
k_settings = [10000]
N_settings = [100]

# Fixed parameter values
remove_cc = False
par = 1
phi_div = phi.mod_chi2_cut
phi_dot = 1
alpha = 0.1
beta = 0.9
time_limit_mosek = 10*60 #in seconds 
time_limit_alg = 10*60
numeric_precision = 1e-6 # To correct for floating-point math

for k in k_settings:
    for N in N_settings:
       
        # Generate data
        Z_nominal = np.array([[0] * k])
        np.random.seed(1)
        data = np.random.uniform(-1,1,size = (N-1,k)) # generate N-1 scenarios
        data = np.concatenate((data,Z_nominal)) # add nominal case
        
        r = phi_dot/(2*N)*scipy.stats.chi2.ppf(1-alpha, 1)
        
        Z_arr = Z_nominal
        lb = -np.inf
        num_iter = 0
        count_removed = 0

        start_time = time.time()
        while lb < beta:
            [x, obj] = toymodel_large(Z_arr, k, time_limit_mosek)
            constr = data.dot(x)
            vio = constr[constr>(1+numeric_precision)]
            p = np.array([(N-len(vio))/N, len(vio)/N])
            lb = lowbound(p, r, par, phi_div)

            if lb < beta and len(vio) != 0:
                vio_min = np.min(vio)        # the least violated scenario is added
                ind = np.where(constr == vio_min)[0][0]
                Z_arr = np.append(Z_arr, np.array([data[ind]]), axis = 0)

                if remove_cc: # identify and then remove convex combination points
                    conv_combs = check_conv_comb(Z_arr)
                    Z_arr = np.delete(Z_arr, conv_combs, axis=0)
                    count_removed += len(conv_combs)

                num_iter += 1

            if (time.time() - start_time) > time_limit_alg:
                break

        output_data[(k, N)] = ["-", f'{round(obj,3):.3f}', "-",  
                               f'{round(lb,3):.3f}', num_iter, 
                               f'{round(time.time()-start_time, 0):.0f}',"-"]
    
write_output_to_latex(2, headers, output_data)

\begin{tabular}{ll|rrrrrrr}
k & N & Best Possible Obj. & Obj.~Alg. & Gap (%) & Lower Bound & \#Iterations & Run Time Alg.~(s) & Run Time MIP (s)\\
\hline
10000 & 100 & - & 2.430 & - & 0.900 & 45 & 27 & - \\
\end{tabular}


In [20]:
time_limit_mosek = 10*60 #in seconds 
k_settings = [10000]
N_settings = [100]

alpha = 0.1
beta = 0.9
par = 1
phi_div = phi.mod_chi2_cut
phi_dot = 1
delta = 0.1
epsilon = 0.0001


for N in N_settings:
        
    # Use bisection-esque search to determine minimum p_1 for which lowbound >= beta
    r = phi_dot/(2*N)*scipy.stats.chi2.ppf(1-alpha, 1)
    p_hat = np.array([beta, 1-beta])
    l_hat = lowbound(p_hat, r, par, phi_div)
    p_hat_prev = p_hat
    while True:
        if p_hat[0] + delta > 1 - epsilon:
            delta = delta/10
        p_hat = p_hat + np.array([delta, -delta])
        l_hat = lowbound(p_hat, r, par, phi_div)
        if l_hat < beta:
            continue
        else:
            delta = delta / 10
            if delta < epsilon:
                break
            else:
                p_hat = p_hat_prev
    p_min = p_hat
    l_min = l_hat
    
    for k in k_settings:
        # Generate data
        Z_nominal = np.array([[0] * k])
        np.random.seed(1)
        data = np.random.uniform(-1,1,size = (N-1,k)) # generate N-1 scenarios
        data = np.concatenate((data,Z_nominal)) # add nominal case
        
        M = 1000
        F = np.ceil(p_min * N)
        
        start_time = time.time()
        x_val, y_val, prob_val = opt_set(data, k, F, M, time_limit_mosek)
        
        print(k,N,round(prob_val,3), round(time.time() - start_time, 0))
        


10000 100 2.286 602.0
