## Imports and preliminaries

In [None]:
import sys, os
sys.path.append(os.path.join(os.getcwd(), '../src'))
import numpy as np
import matplotlib.pyplot as plt
import pdb

import recursiveLeastSquares
import splitConformalPrediction
from MultiValidPrediction import MultiValidPrediction
from calibrationScorers import residualCalibrationScorer, customResidualCalibrationScorer

In [None]:
def produce_group(feat_index, feat_value):
    '''
        Input: 
            - feat_index: index of desired input feature
            - feat_value: desired value of that feature
        Output:
            - f - function which defines a group; f(x) returns True
                  iff x[feat_index] == feat_value and returns False otherwise.
    '''
    def f(x):
        return True if x[feat_index] == feat_value else False
    
    return f

# Define group that includes all points
def all_points(x):
    return True

basic_group = [all_points]

# Define 20 overlapping sub-groups - each defined by the value of a single binary feature

twenty_groups = list()
num_groups = 0
for i in range(10):
    for j in range(2):
        curr_group = produce_group(i, j)
        twenty_groups.append(curr_group)

num_groups = len(twenty_groups)

## Synthetic Experiment with groups - Split-Conformal vs. MVP: Single trial

##### Set all parameters for a single trial

In [None]:
# Parameters for our uncertainty quantifier
T = 20000
n = 40
r = 80000000
delta = 0.1
K_e = 2.12

eta = np.sqrt(np.log(num_groups * n) / (2 * K_e * num_groups * n))

# Parameters for data generation
x_std = 0.1
y_std = 0.25
d = 300 # choose d > 10

##### Generating data according to ordinary least-squares model - with one "high-noise" group and one "low-noise" group

In [None]:
theta = np.random.normal(loc=np.zeros(d), scale=x_std)

# d-dimension features - first 10 features are binary
xs_binvars = np.random.randint(low = 0, high = 2, size = (T, 10))
xs_remvars = np.random.normal(loc=np.zeros(d - 10), scale=x_std, size=(T, d - 10))
xs = np.concatenate((xs_binvars, xs_remvars), axis = 1)
std_dev_list = np.array([3.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])
std_dev = np.dot(xs_binvars, std_dev_list) + y_std
ys = np.dot(xs, theta) + np.random.normal(loc=0, scale= std_dev, size=T)

##### Initializing uncertainty-quantifiers and small initial calibration set for split-conformal prediction

In [None]:
myUncertaintyQuantifier = MultiValidPrediction(delta, n, twenty_groups, eta, r, normalize_by_counts=True)
myConformalPredictor = splitConformalPrediction.splitConformal(num_groups, twenty_groups, delta)
myBasicConformalPredictor = splitConformalPrediction.splitConformal(1, basic_group, delta)
myRLS_conformal = recursiveLeastSquares.RLS(d, 1.0, 1)
myRLS_ours = recursiveLeastSquares.RLS(d, 1.0, 1)

mult_factor = 8
myResidualCalibrationScorer = customResidualCalibrationScorer.customResidualCalibrationScorer(mult_factor)

# arrays for MVP
q_array = []
y_pred_ours_array = []
covered_ours_array = []

# arrays (per group) for MVP
q_array_groups = [list() for i in range(num_groups)] 
y_pred_ours_groups = [list() for i in range(num_groups)] 
covered_ours_groups = [list() for i in range(num_groups)] 

# arrays for conformal prediction (with groups)
y_pred_conformal_array = []
w_t_conformal_array = []
covered_conformal_array = []

# arrays (per group) for conformal prediction (with groups)
y_pred_conformal_groups = [list() for i in range(num_groups)]
w_t_conformal_groups = [list() for i in range(num_groups)] 
covered_conformal_groups = [list() for i in range(num_groups)]


# arrays for basic split-conformal (without groups)
y_pred_basic_array = []
w_t_basic_array = []
covered_basic_array = []


# array (per group) for basic split-conformal (without groups)
y_pred_basic_groups = [list() for i in range(num_groups)]
w_t_basic_groups = [list() for i in range(num_groups)]
covered_basic_groups = [list() for i in range(num_groups)]

# keep track of y values per group
ys_groups = [list() for i in range(num_groups)]


# Conformal warm-start calibration set using the same distribution. 
init_size = 10
conformal_calibration_xs_binvars = np.random.randint(low = 0, high = 2, size = (init_size, 10))
conformal_calibration_xs_remvars = np.random.normal(loc=np.zeros(d - 10), scale=x_std, size=(init_size, d - 10))
xs_cc = np.concatenate((conformal_calibration_xs_binvars, conformal_calibration_xs_remvars), axis = 1)
std_dev = np.dot(conformal_calibration_xs_binvars, std_dev_list) + y_std 
ys_cc = np.dot(xs_cc, theta) + np.random.normal(loc=0, scale= std_dev, size=init_size)

for i, curr_y in enumerate(ys_cc):
    curr_x = xs_cc[i]
    myConformalPredictor.update_calibration_data(curr_x, curr_y)
    myBasicConformalPredictor.update_calibration_data(curr_x, curr_y)

##### Check if calibration data covers all groups. If it doesn't, run previous cell again (to generate new data) or try increasing init_size (size of warm-start calibration set)

In [None]:
print('Does initial calibration data cover all groups: ' + str(myConformalPredictor.all_groups_covered()))

##### Running MVP and split-conformal prediction across generated data

In [None]:
for t in range(T):
    x_t = xs[t]
    y_t = ys[t]
    
    # 1. CONFORMAL WITH GROUPS
    
    y_pred_conformal_t = myRLS_conformal.predict(x_t)
    myResidualCalibrationScorer.update(myRLS_conformal.predict)
    
    w_t_conformal = myConformalPredictor.select_best_width(myResidualCalibrationScorer, x_t)
    conformal_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, w_t_conformal, mult_factor)
    covered_conformal_t = conformal_prediction_set.cover(y_t)

    w_t_basic = myBasicConformalPredictor.select_best_width(myResidualCalibrationScorer, x_t)
    basic_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, w_t_basic, mult_factor)
    covered_basic_t = basic_prediction_set.cover(y_t)

    if t % 2 == 0:
        myConformalPredictor.update_calibration_data(x_t, y_t)
        myBasicConformalPredictor.update_calibration_data(x_t, y_t)
    else:
        # update the linear regression model
        myRLS_conformal.add_obs(x_t, y_t)

    y_pred_conformal_array.append(y_pred_conformal_t)
    w_t_conformal_array.append(mult_factor * w_t_conformal)
    covered_conformal_array.append(covered_conformal_t)

    y_pred_basic_array.append(y_pred_conformal_t)
    w_t_basic_array.append(mult_factor * w_t_basic)
    covered_basic_array.append(covered_basic_t)


    # 2. MVP
    y_pred_ours_t = myRLS_ours.predict(x_t)
    myResidualCalibrationScorer.update(myRLS_ours.predict)

    q_t = myUncertaintyQuantifier.predict(x_t)
    curr_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, q_t, mult_factor)

    covered_ours_t = curr_prediction_set.cover(y_t)

    s_t = myResidualCalibrationScorer.calc_score(x_t, y_t)

    myRLS_ours.add_obs(x_t.T, y_t)
    myUncertaintyQuantifier.update(x_t, q_t, s_t)

    y_pred_ours_array.append(y_pred_ours_t)
    q_array.append(mult_factor * q_t)
    covered_ours_array.append(covered_ours_t)
    
    # Adding relevant values to specific groups to calculate performance metrics
    for i in range(10):
        for j in range(2):
            # current group is feature i with value j
            curr_index = (i * 2) + j
            curr_group = twenty_groups[curr_index]
            if curr_group(x_t):
                ys_groups[curr_index].append(y_t)
                y_pred_conformal_groups[curr_index].append(y_pred_conformal_t)
                w_t_conformal_groups[curr_index].append(w_t_conformal)
                covered_conformal_groups[curr_index].append(covered_conformal_t)
                q_array_groups[curr_index].append(q_t)
                y_pred_ours_groups[curr_index].append(y_pred_ours_t)
                covered_ours_groups[curr_index].append(covered_ours_t)
                y_pred_basic_groups[curr_index].append(y_pred_conformal_t)
                w_t_basic_groups[curr_index].append(w_t_basic)
                covered_basic_groups[curr_index].append(covered_basic_t)

print('Trial complete')

##### Overall statistics (disregarding groups)

In [None]:
y_pred_conformal_array = np.array(y_pred_conformal_array)
w_t_conformal_array = np.array(w_t_conformal_array)

y_pred_basic_array = np.array(y_pred_basic_array)
w_t_basic_array = np.array(w_t_basic_array)

y_pred_ours_array = np.array(y_pred_ours_array)
q_array = np.array(q_array)

print("*** COVERAGE ***")
print("Split-conformal (without groups): {0}".format(np.average(covered_basic_array)))
print("Split-conformal (with groups, conservative method): {0}".format(np.average(covered_conformal_array)))
print("MVP: {0}".format(np.average(covered_ours_array)))
print("")

print("*** WIDTH *** ")
print("Split-conformal (without groups): {0}".format(np.average(w_t_basic_array)))
print("Split-conformal (with groups, conservative method): {0}".format(np.average(w_t_conformal_array)))
print("MVP: {0}".format(np.average(q_array)))
print("")

print("*** SQUARED LOSS ***")
squared_loss_basic = np.linalg.norm(ys - y_pred_basic_array)
squared_loss_conformal = np.linalg.norm(ys - y_pred_conformal_array)
squared_loss_ours = np.linalg.norm(ys - y_pred_ours_array)
print("Split-conformal (without groups): {0}".format(squared_loss_basic))
print("Split-conformal (with groups, conservative method): {0}".format(squared_loss_conformal))
print("MVP: {0}".format(squared_loss_ours))

##### Plots (results across all groups)

In [None]:
# Plotting coverage across groups

barWidth = 0.25
br1 = np.arange(len(covered_ours_groups))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

coverage_conformal = [np.average(group) for group in covered_conformal_groups]
coverage_ours = [np.average(group) for group in covered_ours_groups]
coverage_basic = [np.average(group) for group in covered_basic_groups]

plt.bar(br1, coverage_basic, color = 'b', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: Without groups', linewidth = 0.5)
plt.bar(br2, coverage_conformal, color = 'm', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: With groups, conservative approach', linewidth = 0.5)
plt.bar(br3, coverage_ours, color = 'c', width = barWidth, edgecolor = 'gray', label = 'Our Method', linewidth = 0.5)

group_labels = [str(i) for i in range(num_groups)]
plt.xticks([r + barWidth for r in range(len(covered_ours_groups))], group_labels)
plt.axhline(y= 1 - delta, c = 'r', linewidth = 0.5)
plt.text(19.55, 1 - delta + 0.02, '  desired')
plt.text(19.55, 1 - delta - 0.04, '  coverage')
plt.legend()
plt.ylim([0.0,1.4])
plt.yticks(np.arange(0, 1.1, 0.1))
plt.xlabel('Groups')
plt.ylabel('Average Coverage')
plt.title('Comparison of group-wise coverage: Split-Conformal vs. MVP \n')
plt.show()

In [None]:
# Plotting interval width across groups

barWidth = 0.25
br1 = np.arange(len(covered_ours_groups))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

width_conformal = [2 * mult_factor * np.average(group) for group in  w_t_conformal_groups]
width_ours = [2 * mult_factor * np.average(group) for group in  q_array_groups]
width_basic = [2 * mult_factor * np.average(group) for group in  w_t_basic_groups]

plt.bar(br1, width_basic, color = 'b', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: Without groups', linewidth = 0.5)
plt.bar(br2, width_conformal, color = 'm', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: With groups, conservative approach', linewidth = 0.5)
plt.bar(br3, width_ours, color = 'c', width = barWidth, edgecolor = 'gray', label = 'MVP', linewidth = 0.5)
group_labels = [str(i) for i in range(num_groups)]
plt.xticks([r + barWidth for r in range(len(covered_ours_groups))], group_labels)
plt.legend()
plt.ylim([0.0,19.0])
plt.title('Comparison of group-wise interval-width: Split-Conformal vs. MVP \n')
plt.xlabel('Groups')
plt.ylabel('Average Interval Width')
plt.savefig('width-mean-img.png')
plt.show()

##### Choose any specific group (defined by feature number and value)

In [None]:
group_feat = 0 # can be any i in [num_groups]
feat_val = 1 # can be 0 or 1 (binary feature)

##### Statistics for specified group

In [None]:
index = (group_feat * 2) + feat_val
ys_group = ys_groups[index]

y_pred_conformal_array_group = y_pred_conformal_groups[index]
w_t_conformal_array_group = w_t_conformal_groups[index]
y_pred_conformal_array_group = np.array(y_pred_conformal_array_group)
w_t_conformal_array_group = np.array(w_t_conformal_array_group)

y_pred_basic_array_group = y_pred_basic_groups[index]
w_t_basic_array_group = w_t_basic_groups[index]
y_pred_basic_array_group = np.array(y_pred_basic_array_group)
w_t_basic_array_group = np.array(w_t_basic_array_group)

y_pred_ours_array_group = y_pred_ours_groups[index]
q_array_group = q_array_groups[index]
y_pred_ours_array_group = np.array(y_pred_ours_array_group)
q_array_group = np.array(q_array_group)

covered_basic_array_group = covered_basic_groups[index]
covered_conformal_array_group = covered_conformal_groups[index]
covered_ours_array_group = covered_ours_groups[index]
    
print("======= CURRENT GROUP: FEATURE " + str(group_feat) + " = " + str(feat_val) + " =======")
print("*** COVERAGE ***")
print("Split-conformal (without): {0}".format(np.average(covered_basic_array_group)))
print("Split-conformal (with groups, conservative approach): {0}".format(np.average(covered_conformal_array_group)))
print("MVP: {0}".format(np.average(covered_ours_array_group)))
print("")

print("*** WIDTH *** ")
print("Split-conformal (without): {0}".format(np.average(w_t_basic_array_group)))
print("Split-conformal (with groups, conservative approach): {0}".format(np.average(w_t_conformal_array_group)))
print("MVP: {0}".format(np.average(q_array_group)))
print("")

print("*** SQUARED LOSS ***")
squared_loss_basic = np.linalg.norm(ys_group - y_pred_basic_array_group)
squared_loss_conformal = np.linalg.norm(ys_group - y_pred_conformal_array_group)
squared_loss_ours = np.linalg.norm(ys_group - y_pred_ours_array_group)
print("Split-conformal (without groups): {0}".format(squared_loss_basic))
print("Split-conformal (with groups, conservative approach): {0}".format(squared_loss_conformal))
print("MVP: {0}".format(squared_loss_ours))


##### Plots for specified group

In [None]:
# Plotting average coverage (conformal - basic) over time - for selected group
covered_basic_over_time_group = [np.average(covered_basic_array_group[:t+1]) for t in range(len(covered_basic_array_group))]
plt.plot(range(len(covered_basic_array_group)), covered_basic_over_time_group)
plt.yticks(np.arange(0, 1.05, 0.05))
plt.xlabel("No. of rounds")
plt.ylabel("Marginal coverage")
plt.axhline(y = 1 - delta, color = 'red', linestyle = '-', linewidth = 0.9)
plt.text(len(covered_basic_array_group), 1 - delta + 0.01, '  desired coverage')
plt.title("Coverage over time: Split-conformal (without groups)")
plt.show()

In [None]:
# Plotting average coverage (conformal) over time - for selected group
covered_conformal_over_time_group = [np.average(covered_conformal_array_group[:t+1]) for t in range(len(covered_conformal_array_group))]
plt.plot(range(len(covered_conformal_array_group)), covered_conformal_over_time_group)
plt.yticks(np.arange(0, 1.05, 0.05))
plt.xlabel("No. of rounds")
plt.ylabel("Marginal coverage")
plt.axhline(y = 1 - delta, color = 'red', linestyle = '-', linewidth = 0.9)
plt.text(len(covered_conformal_array_group), 1 - delta + 0.01, '  desired coverage')
plt.title("Coverage over time: Split-conformal (With groups, conservative approach)")
plt.show()

In [None]:
# Plotting average coverage(our method) over time - for chosen group
covered_our_over_time_group = [np.average(covered_ours_array_group[:t+1]) for t in range(len(covered_ours_array_group))]
plt.plot(range(len(covered_ours_array_group)), covered_our_over_time_group)
plt.yticks(np.arange(0, 1.05, 0.05))
plt.xlabel("No. of rounds")
plt.ylabel("Marginal coverage")
plt.axhline(y = 1 - delta, color = 'red', linestyle = '-', linewidth = 0.9)
plt.text(len(covered_ours_array_group), 1 - delta + 0.01, '  desired coverage')
plt.title("Coverage over time: MVP")
plt.show()

## Running several trials

##### Set all parameters and number of trials to run

In [None]:
# Parameters for our uncertainty quantifier
T = 20000
n = 40
r = 80000000
delta = 0.1
K_e = 2.12

eta = np.sqrt(np.log(num_groups * n) / (2 * K_e * num_groups * n))

# data generation - constants
x_std = 0.1
y_std = 0.25
d = 300 # choose d > 10
mult_factor = 10
std_dev_list = np.array([3.0, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1])

# How many trials would you like to run?
num_rounds = 10

##### Start trials

In [None]:
coverage_conformal_per_group_per_round = np.zeros((num_rounds, num_groups))
coverage_ours_per_group_per_round = np.zeros((num_rounds, num_groups))
coverage_basic_per_group_per_round = np.zeros((num_rounds, num_groups))
width_conformal_per_group_per_round = np.zeros((num_rounds, num_groups))
width_ours_per_group_per_round = np.zeros((num_rounds, num_groups))
width_basic_per_group_per_round = np.zeros((num_rounds, num_groups))

for k in range(num_rounds):
    print('Running trial: ' + str(k))
    
    # data generation - random for each run
    theta = np.random.normal(loc=np.zeros(d), scale=x_std)

    # d-dimension features - first 10 features are binary
    xs_binvars = np.random.randint(low = 0, high = 2, size = (T, 10))
    xs_remvars = np.random.normal(loc=np.zeros(d - 10), scale=x_std, size=(T, d - 10))
    xs = np.concatenate((xs_binvars, xs_remvars), axis = 1)
    std_dev = np.dot(xs_binvars, std_dev_list) + y_std
    ys = np.dot(xs, theta) + np.random.normal(loc=0, scale= std_dev, size=T)
    
    # Defining all uncertainty quantifiers and regressors

    myUncertaintyQuantifier = MultiValidPrediction(delta, n, twenty_groups, eta, r, normalize_by_counts=True)
    myConformalPredictor = splitConformalPrediction.splitConformal(num_groups, twenty_groups, delta)
    myBasicConformalPredictor = splitConformalPrediction.splitConformal(1, basic_group, delta)
    myRLS_conformal = recursiveLeastSquares.RLS(d, 1.0, 1)
    myRLS_ours = recursiveLeastSquares.RLS(d, 1.0, 1)

    myResidualCalibrationScorer = customResidualCalibrationScorer.customResidualCalibrationScorer(mult_factor)

    # arrays for our method
    q_array = []
    y_pred_ours_array = []
    covered_ours_array = []

    # arrays (per group) for our method
    q_array_groups = [list() for i in range(num_groups)] 
    y_pred_ours_groups = [list() for i in range(num_groups)] 
    covered_ours_groups = [list() for i in range(num_groups)] 

    # arrays for conformal prediction
    y_pred_conformal_array = []
    w_t_conformal_array = []
    covered_conformal_array = []

    # arrays (per group) for conformal prediction
    y_pred_conformal_groups = [list() for i in range(num_groups)]
    w_t_conformal_groups = [list() for i in range(num_groups)] 
    covered_conformal_groups = [list() for i in range(num_groups)]
    
    # arrays for basic method
    y_pred_basic_array = []
    w_t_basic_array = []
    covered_basic_array = []
    
    # array (per group) for basic prediction
    y_pred_basic_groups = [list() for i in range(num_groups)]
    w_t_basic_groups = [list() for i in range(num_groups)]
    covered_basic_groups = [list() for i in range(num_groups)]

    # keep track of y values per group
    ys_groups = [list() for i in range(num_groups)]


    # Conformal warm-start calibration set using the same distribution. 
    init_size = 15
    conformal_calibration_xs_binvars = np.random.randint(low = 0, high = 2, size = (init_size, 10))
    conformal_calibration_xs_remvars = np.random.normal(loc=np.zeros(d - 10), scale=x_std, size=(init_size, d - 10))
    xs_cc = np.concatenate((conformal_calibration_xs_binvars, conformal_calibration_xs_remvars), axis = 1)
    std_dev = np.dot(conformal_calibration_xs_binvars, std_dev_list) + y_std
    ys_cc = np.dot(xs_cc, theta) + np.random.normal(loc=0, scale= std_dev, size=init_size)

    for i, curr_y in enumerate(ys_cc):
        curr_x = xs_cc[i]
        myConformalPredictor.update_calibration_data(curr_x, curr_y)
        myBasicConformalPredictor.update_calibration_data(curr_x, curr_y)

    print('Does initial calibration data cover all groups: ' + str(myConformalPredictor.all_groups_covered()))
    # Run algorithm
    
    for t in range(T):
        x_t = xs[t]
        y_t = ys[t]
        
        # 1. SPLIT-CONFORMAL WITH AND WITHOUT GROUPS

        y_pred_conformal_t = myRLS_conformal.predict(x_t)
        myResidualCalibrationScorer.update(myRLS_conformal.predict)

        w_t_conformal = myConformalPredictor.select_best_width(myResidualCalibrationScorer, x_t)
        conformal_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, w_t_conformal, mult_factor)

        covered_conformal_t = conformal_prediction_set.cover(y_t)
    
        w_t_basic = myBasicConformalPredictor.select_best_width(myResidualCalibrationScorer, x_t)
        basic_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, w_t_basic, mult_factor)
        
        covered_basic_t = basic_prediction_set.cover(y_t)

        if t % 2 == 0:
            myConformalPredictor.update_calibration_data(x_t, y_t)
            myBasicConformalPredictor.update_calibration_data(x_t, y_t)
        else:
            # update the linear regression model
            myRLS_conformal.add_obs(x_t, y_t)

        y_pred_conformal_array.append(y_pred_conformal_t)
        w_t_conformal_array.append(mult_factor * w_t_conformal)
        covered_conformal_array.append(covered_conformal_t)
        
        y_pred_basic_array.append(y_pred_conformal_t)
        w_t_basic_array.append(mult_factor * w_t_basic)
        covered_basic_array.append(covered_basic_t)


        # 2. MVP
        y_pred_ours_t = myRLS_ours.predict(x_t)
        myResidualCalibrationScorer.update(myRLS_ours.predict)

        q_t = myUncertaintyQuantifier.predict(x_t)
        curr_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, q_t, mult_factor)

        covered_ours_t = curr_prediction_set.cover(y_t)

        s_t = myResidualCalibrationScorer.calc_score(x_t, y_t)

        myRLS_ours.add_obs(x_t.T, y_t)
        myUncertaintyQuantifier.update(x_t, q_t, s_t)

        y_pred_ours_array.append(y_pred_ours_t)
        q_array.append(mult_factor * q_t)
        covered_ours_array.append(covered_ours_t)

        # Adding relevant values to specific groups
        for i in range(10):
            for j in range(2):
                # current group is feature i with value j
                curr_index = (i * 2) + j
                curr_group = twenty_groups[curr_index]
                if curr_group(x_t):
                    ys_groups[curr_index].append(y_t)
                    y_pred_conformal_groups[curr_index].append(y_pred_conformal_t)
                    w_t_conformal_groups[curr_index].append(w_t_conformal)
                    covered_conformal_groups[curr_index].append(covered_conformal_t)
                    q_array_groups[curr_index].append(q_t)
                    y_pred_ours_groups[curr_index].append(y_pred_ours_t)
                    covered_ours_groups[curr_index].append(covered_ours_t)
                    y_pred_basic_groups[curr_index].append(y_pred_conformal_t)
                    w_t_basic_groups[curr_index].append(w_t_basic)
                    covered_basic_groups[curr_index].append(covered_basic_t)
    
    y_pred_conformal_array = np.array(y_pred_conformal_array)
    w_t_conformal_array = np.array(w_t_conformal_array)
    w_t_basic_array = np.array(w_t_basic_array)

    y_pred_ours_array = np.array(y_pred_ours_array)
    q_array = np.array(q_array)
    
    coverage_conformal = [np.average(group) for group in covered_conformal_groups]
    coverage_ours = [np.average(group) for group in covered_ours_groups]
    coverage_basic = [np.average(group) for group in covered_basic_groups]
    
    width_conformal = [2 * mult_factor * np.average(group) for group in  w_t_conformal_groups]
    width_ours = [2 * mult_factor * np.average(group) for group in  q_array_groups]
    width_basic = [2 * mult_factor * np.average(group) for group in w_t_basic_groups]
    
    coverage_conformal_per_group_per_round[k] = coverage_conformal
    coverage_ours_per_group_per_round[k] = coverage_ours
    coverage_basic_per_group_per_round[k] = coverage_basic
    
    width_conformal_per_group_per_round[k] = width_conformal
    width_ours_per_group_per_round[k] = width_ours
    width_basic_per_group_per_round[k] = width_basic
    
print('All trials complete')   

##### Plots for statistics across groups

In [None]:
# Plotting average coverage across all rounds for all groups

coverage_conformal_across_rounds = np.mean(coverage_conformal_per_group_per_round, axis = 0)
coverage_ours_across_rounds = np.mean(coverage_ours_per_group_per_round, axis = 0)
coverage_basic_across_rounds = np.mean(coverage_basic_per_group_per_round, axis = 0)

barWidth = 0.25
br1 = np.arange(len(coverage_ours_across_rounds))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

plt.bar(br1, coverage_basic_across_rounds, color = 'b', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: Without groups', linewidth = 0.5)
plt.bar(br2, coverage_conformal_across_rounds, color = 'm', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: With groups, conservative approach', linewidth = 0.5)
plt.bar(br3, coverage_ours_across_rounds, color = 'c', width = barWidth, edgecolor = 'gray', label = 'MVP', linewidth = 0.5)
group_labels = [str(i) for i in range(num_groups)]
plt.xticks([r + barWidth for r in range(len(coverage_ours_across_rounds))], group_labels)
plt.axhline(y= 1 - delta, c = 'r', linewidth = 0.5)
plt.text(19.55, 1 - delta + 0.02, '  desired')
plt.text(19.55, 1 - delta - 0.04, '  coverage')
plt.legend()
plt.ylim([0.0,1.4])
plt.yticks(np.arange(0, 1.1, 0.1))
plt.title('Comparison of group-wise coverage: Split-Conformal vs. MVP \n')
plt.xlabel('Groups')
plt.ylabel('Average Coverage')
# plt.savefig('coverage-mean.pdf')
plt.show()

In [None]:
# Plotting average interval width across all rounds for all groups

width_conformal_across_rounds = np.mean(width_conformal_per_group_per_round, axis = 0)
width_ours_across_rounds = np.mean(width_ours_per_group_per_round, axis = 0)
width_basic_across_rounds = np.mean(width_basic_per_group_per_round, axis = 0)

barWidth = 0.25
br1 = np.arange(len(coverage_ours_across_rounds))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

plt.bar(br1, width_basic_across_rounds, color = 'b', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: Without groups', linewidth = 0.5)
plt.bar(br2, width_conformal_across_rounds, color = 'm', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: With groups, conservative approach', linewidth = 0.5)
plt.bar(br3, width_ours_across_rounds, color = 'c', width = barWidth, edgecolor = 'gray', label = 'MVP', linewidth = 0.5)
group_labels = [str(i) for i in range(num_groups)]
plt.xticks([r + barWidth for r in range(len(coverage_ours_across_rounds))], group_labels)
plt.legend()
plt.ylim([0.0,19.0])
plt.title('Comparison of group-wise interval-width: Split-Conformal vs. MVP \n')
plt.xlabel('Groups')
plt.ylabel('Average Interval Width')
# plt.savefig('width-mean.pdf')
plt.show()

In [None]:
# Plotting median coverage across all rounds for all groups, along with 25th to 75th quantile bar

coverage_conformal_across_rounds = np.median(coverage_conformal_per_group_per_round, axis = 0)
coverage_ours_across_rounds = np.median(coverage_ours_per_group_per_round, axis = 0)
coverage_basic_across_rounds = np.median(coverage_basic_per_group_per_round, axis = 0)

coverage_conformal_across_rounds_mean = np.mean(coverage_conformal_per_group_per_round, axis = 0)
coverage_ours_across_rounds_mean = np.mean(coverage_ours_per_group_per_round, axis = 0)
coverage_basic_across_rounds_mean = np.mean(coverage_basic_per_group_per_round, axis = 0)

coverage_conformal_25_quantile = np.quantile(coverage_conformal_per_group_per_round, 0.25, axis = 0)
coverage_ours_25_quantile = np.quantile(coverage_ours_per_group_per_round, 0.25, axis = 0)
coverage_basic_25_quantile = np.quantile(coverage_basic_per_group_per_round, 0.25, axis = 0)
coverage_conformal_75_quantile = np.quantile(coverage_conformal_per_group_per_round, 0.75, axis = 0)
coverage_ours_75_quantile = np.quantile(coverage_ours_per_group_per_round, 0.75, axis = 0)
coverage_basic_75_quantile = np.quantile(coverage_basic_per_group_per_round, 0.75, axis = 0)

barWidth = 0.25
br1 = np.arange(len(coverage_ours_across_rounds))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

for k, val in enumerate(br1):
    plt.vlines(x = val, ymin = coverage_basic_25_quantile[k], ymax = coverage_basic_75_quantile[k], color = 'black', linewidth = 0.6)

for i, val in enumerate(br2):
    plt.vlines(x = val, ymin = coverage_conformal_25_quantile[i], ymax = coverage_conformal_75_quantile[i], color = 'black', linewidth = 0.6)    

for j, val in enumerate(br3):
    plt.vlines(x = val, ymin = coverage_ours_25_quantile[j], ymax = coverage_ours_75_quantile[j], color = 'black', linewidth = 0.6)

plt.bar(br1, coverage_basic_across_rounds, color = 'b', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: Without groups', linewidth = 0.4, alpha = 0.6)
plt.bar(br2, coverage_conformal_across_rounds, color = 'm', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: With groups, conservative approach', linewidth = 0.4, alpha =0.6)
plt.bar(br3, coverage_ours_across_rounds, color = 'c', width = barWidth, edgecolor = 'gray', label = 'MVP', linewidth = 0.4, alpha = 0.6)
group_labels = [str(i) for i in range(num_groups)]
plt.xticks([r + barWidth for r in range(len(coverage_ours_across_rounds))], group_labels)
plt.axhline(y= 1 - delta, c = 'r', linewidth = 0.5)
plt.text(19.55, 1 - delta + 0.02, '  desired')
plt.text(19.55, 1 - delta - 0.04, '  coverage')
plt.legend()
plt.ylim([0.0,1.5])
plt.yticks(np.arange(0, 1.1, 0.1))
plt.xlabel('Groups')
plt.ylabel('Coverage (Median)')
plt.title('Comparison of group-wise coverage: Split-Conformal vs. MVP \n')
# plt.savefig('../../coverage-median.pdf')
plt.show()


In [None]:
# Plotting median interval width across all rounds for all groups, along with 25th to 75th quantile bar

width_conformal_across_rounds = np.median(width_conformal_per_group_per_round, axis = 0)
width_ours_across_rounds = np.median(width_ours_per_group_per_round, axis = 0)
width_basic_across_round = np.median(width_basic_per_group_per_round, axis = 0)

width_conformal_25_quantile = np.quantile(width_conformal_per_group_per_round, 0.25, axis = 0)
width_ours_25_quantile = np.quantile(width_ours_per_group_per_round, 0.25, axis = 0)
width_basic_25_quantile = np.quantile(width_basic_per_group_per_round, 0.25, axis = 0)
width_conformal_75_quantile = np.quantile(width_conformal_per_group_per_round, 0.75, axis = 0)
width_ours_75_quantile = np.quantile(width_ours_per_group_per_round, 0.75, axis = 0)
width_basic_75_quantile = np.quantile(width_basic_per_group_per_round, 0.75, axis = 0)
    
barWidth = 0.25
br1 = np.arange(len(width_conformal_across_rounds))
br2 = [x + barWidth for x in br1]
br3 = [x + barWidth for x in br2]

for k, val in enumerate(br1):
    plt.vlines(x = val, ymin = width_basic_25_quantile[k], ymax = width_basic_75_quantile[k], color = 'black', linewidth = 0.6)
    
for i, val in enumerate(br2):
    plt.vlines(x = val, ymin = width_conformal_25_quantile[i], ymax = width_conformal_75_quantile[i], color = 'black', linewidth = 0.6)

for j, val in enumerate(br3):
    plt.vlines(x = val, ymin = width_ours_25_quantile[j], ymax = width_ours_75_quantile[j], color = 'black', linewidth = 0.6)
    
plt.bar(br1, width_basic_across_round, color = 'b', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: Without groups', linewidth = 0.4, alpha = 0.6)
plt.bar(br2, width_conformal_across_rounds, color = 'm', width = barWidth, edgecolor = 'gray', label = 'Split-Conformal: With groups, conservative approach', linewidth = 0.4, alpha = 0.6)
plt.bar(br3, width_ours_across_rounds, color = 'c', width = barWidth, edgecolor = 'gray', label = 'MVP', linewidth = 0.4, alpha = 0.6)
group_labels = [str(i) for i in range(num_groups)]
plt.xticks([r + barWidth for r in range(len(covered_ours_groups))], group_labels)
plt.legend()
plt.ylim([0.0,20.0])
plt.title('Comparison of group-wise interval-width: Split-Conformal vs. MVP \n')
plt.xlabel('Groups')
plt.ylabel('Interval Width (Median)')
# plt.savefig('../../width-median.pdf')
plt.show()