In [None]:
import warnings
warnings.filterwarnings('ignore', category=DeprecationWarning)

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys

## UCI Airfoil data

In [None]:
full_data = pd.read_csv("data/airfoil_self_noise.dat", sep='\t', header=None)

full_data[0] = np.log(full_data[0])
full_data[4] = np.log(full_data[4])

dataset_name = 'UCI Airfoil'

In [None]:
# append constant to all data as needed for LS implementation
full_data.insert(0, "constant", [1] * len(full_data))

In [None]:
def airfoil_split(data):
    """
    Split airfoil data into separate folds for least squares data, calibration data, and evaluation data.

    Returns:
    ls_x, ls_y: data to train least squares model, 25% of full dataset
    calibration_data: data to feed to calibration algorithm, 25% of full dataset
    evaluation_data: data to feed to calibration algorithm and evaluate performance on, 50% of full dataset
    xs_iid, ys_iid: concatenation of calibration_data and evaluation_data
    """
    pre_train = data.sample(frac=.5, axis=0)

    evaluation_data = data.drop(index=pre_train.index)

    train_model = pre_train.sample(frac=.5, axis=0)

    calibration_data = pre_train.drop(index=train_model.index)

    ls_x = train_model.iloc[:, :6]
    ls_y = train_model.iloc[:, 6:]

    data_no_shift = pd.concat([calibration_data, evaluation_data])

    xs_iid = data_no_shift.iloc[:, :6]
    ys_iid = data_no_shift.iloc[:, 6:]
    
    return ls_x, ls_y, calibration_data, evaluation_data, xs_iid, ys_iid
    

In [None]:
# residual conformal score normalization 
l = 0
u = 120

## Function definitions

In [None]:
def train_ls(x_model, y_model):
    myRLS = recursiveLeastSquares.RLS(6, 1.0, 1)
    ls_x = x_model.to_numpy()
    ls_x = [np.matrix(x).T for x in ls_x]
    ls_y = y_model.to_numpy()
    ls_y = [y[0] for y in ls_y]
    myRLS.fit(ls_x, ls_y)
    return myRLS

In [None]:
def all_points(x):
    return True

def calibrate(x_input, y_input, bucket, r, delta, l, u, T, T_calibrate, myRLS):
    n = bucket
    r = r
        
    groups = [all_points]

    eta = np.sqrt(np.log(2 * len(groups) * n) / T)

    delta = delta

    myUncertaintyQuantifier = MultiValidPrediction(delta, n, groups, eta, r)

    myResidualCalibrationScorer = residualCalibrationScorer.ResidualCalibrationScorer()

    myResidualCalibrationScorer.update(myRLS.predict)
    
    y_input = np.asarray(y_input)

    covered_arr = []
    width_arr = []

    for t in range(T):    
        # data arrival
        x_t = (x_input.iloc[t])
        y_t = y_input[t]

        # calculate the new threshold 
        norm_q_t = myUncertaintyQuantifier.predict(x_t)
        
        # rescale threshold
        q_t = norm_q_t * (u - l) + l

        # check if the prediction set covers the data
        curr_prediction_set = myResidualCalibrationScorer.get_prediction_set(x_t, q_t)
        covered_t = curr_prediction_set.cover(np.matrix(y_t))
        covered_arr.append(covered_t)

        # get prediction interval width
        width_arr.append(curr_prediction_set.interval_width)
        
        if (t > T_calibrate): # evaluate coverage / width after calibration data
            covered_arr.append(covered_t)
            width_arr.append(curr_prediction_set.interval_width)


        # update the calibrator mutlivalidator 
        s_t = myResidualCalibrationScorer.calc_score(x_t, np.matrix(y_t))

        # normalize score
        norm_s_t = (s_t - l ) / (u - l)

        myUncertaintyQuantifier.update(x_t, norm_q_t, norm_s_t)

        # update the calibration scorer
        myResidualCalibrationScorer.update(myRLS.predict)
    
    return covered_arr, width_arr
    

## Experiments

In [None]:
sys.path.append('../src')
from MultiValidPrediction import MultiValidPrediction
from calibrationScorers import residualCalibrationScorer
import recursiveLeastSquares

## Experimental parameters

In [None]:
## airfoil params
num_trials = 500
buckets = [40]
d = 6

# coverage
alpha = .1

model_name = 'Linear Regression'

## Data without covariate shift:

In [None]:
# for each setting of number of buckets, we have num_trials coverage values
no_shift_coverage = []
no_shift_width = []

In [None]:
for num_bucket in buckets:
    
    trial_coverage = []
    trial_width = []
    
    for i in range(num_trials):
        
        # split data
        ls_x, ls_y, calibration, evaluation, xs_iid, ys_iid = airfoil_split(full_data)        

        # retrain LS
        myRLS = train_ls(ls_x, ls_y)

        # calibrate
        coverage_res, width_res = calibrate(xs_iid, ys_iid, num_bucket, 1000, alpha, l, u, len(xs_iid), len(calibration), myRLS)

        # store average coverage and width for this trial
        trial_coverage.append(np.mean(coverage_res))
        trial_width.append(np.median(width_res))
    
    no_shift_coverage.append(trial_coverage)
    no_shift_width.append(trial_width)

In [None]:
print("without shifted data over {0} trials\n".format(num_trials))
for i in range(len(buckets)):
    print("coverage with {0} buckets is {1}".format(buckets[i], np.mean(no_shift_coverage[i])))
    print("width with {0} buckets is {1}\n".format(buckets[i], np.mean(no_shift_width[i])))

## Data with covariate shift: 
### Evaluating calibration on shifted data for us and oracle weights for weighted split conformal

Here our comparison is a warm-start of our uncertainty prediction algorithm,
by calibrating on data drawn from the shifted distribution we see at evalutation time, 
to simulate a fair comparison to weighted split conformal using likelihood ratios
of the shifted evaluation data

In [None]:
beta = [0, -1, 0, 0, 0, 1, 0] # weights for exponential tilting shift

In [None]:
# for each setting of number of buckets, we have num_trials coverage values
shifted_coverage = []
shifted_width = []

In [None]:
for num_bucket in buckets:
    
    trial_coverage = []
    trial_width = []
    trial_cov_splitconf = []
    trial_width_splitconf = []
    
    for i in range(num_trials):
        
        # split data
        ls_x, ls_y, calibration, evaluation, xs_iid, ys_iid = airfoil_split(full_data)        
        
        # apply shift to evaluation data
        beta = [0, -1, 0, 0, 0, 1, 0] # weights for shift
        shift = evaluation.copy()
        weight = np.exp(np.dot(shift, beta))
        shift = shift.sample(frac=.25, axis=0, weights=weight, replace=True)
                
        weight = np.exp(np.dot(calibration, beta))
        calibration = calibration.sample(frac=1, axis=0, weights=weight, replace=True)
        
        data_with_shift = pd.concat([calibration, shift])
        xs_shift = data_with_shift.iloc[:, :6]
        ys_shift = data_with_shift.iloc[:, 6:]
    
        # retrain LS
        myRLS = train_ls(ls_x, ls_y)

        # calibrate
        coverage_res, width_res = calibrate(xs_shift, ys_shift, num_bucket, 1000, alpha, l, u, len(xs_shift), len(calibration), myRLS)

        # store average coverage and width for this trial
        trial_coverage.append(np.mean(coverage_res))
        trial_width.append(np.median(width_res))

        
    shifted_coverage.append(trial_coverage)
    shifted_width.append(trial_width)

In [None]:
print("with shifted data over {0} trials\n".format(num_trials))
for i in range(len(buckets)):
    print("coverage with {0} buckets is {1}".format(buckets[i], np.mean(shifted_coverage[i])))
    print("width with {0} buckets is {1}\n".format(buckets[i], np.mean(shifted_width[i])))

## Plotting

In [None]:
# read mvp results
shifted_coverage_res = np.array(pd.read_csv('airfoil-results/coverage-mvp.csv', header=None))
shifted_width_res = np.array(pd.read_csv('airfoil-results/width-mvp.csv', header=None))

In [None]:
# read weighted split conformal results
shifted_coverage_conf = pd.read_csv('airfoil-results/coverage-tibs.csv')
shifted_coverage_conf = np.array(shifted_coverage_conf['x'])
shifted_coverage_conf = np.random.choice(shifted_coverage_conf, 500, replace=False)
shifted_width_conf = pd.read_csv('airfoil-results/width-tibs.csv')
shifted_width_conf = np.array(shifted_width_conf['x'])
shifted_width_conf = np.random.choice(shifted_width_conf, 500, replace=False)

In [None]:
dataset_name = 'UCI Airfoil'
model_name = 'Linear Regression'

In [None]:
bins = np.linspace(0.775, 0.975, 40)
plt.hist(shifted_coverage_res[0], label='MVP', color= 'blue', alpha = 0.4, linewidth=0.5, edgecolor = 'blue', bins=bins)
plt.hist(shifted_coverage_conf, label='weighted split conformal', color= 'orange', alpha = 0.6, linewidth=0.5, edgecolor = 'orange', bins=bins)
plt.legend()
plt.axvline(x = .9, color = 'red', linestyle = '--', linewidth = 0.9)
plt.title('Mean Coverage ({0} trials, target coverage .9) \n {1} \n {2} Data'.format(len(shifted_coverage_res[0]), model_name, dataset_name))
plt.xlabel('Mean Coverage')
plt.ylabel('No. of Rounds')
plt.show()

plt.hist(shifted_width_res[0], label='MVP', color= 'blue', alpha = 0.4, linewidth=0.5, edgecolor = 'blue')
plt.hist(shifted_width_conf, label='weighted split conformal', color= 'orange', alpha = 0.6, linewidth=0.5, edgecolor = 'orange')
plt.legend()
plt.title('Median Width ({0} trials, target coverage .9) \n {1} \n {2} Data'.format(len(shifted_coverage_res[0]), model_name, dataset_name))
plt.xlabel('Median Interval Width (in decibels)')
plt.ylabel('No. of Rounds')
plt.show()