In [1]:
import pandas as pd
import numpy as np
import csv
import os
import time

In [2]:
from gp_pref_elicit_luisa import gaussian_process as GP 
from gp_pref_elicit_luisa import dataset as data 
from gp_pref_elicit_luisa import acquisition_function as acquisition_function
from gp_pref_elicit_luisa.gp_utilities import utils_ccs as gp_utils_ccs
from gp_pref_elicit_luisa.gp_utilities import utils_data as gp_utils_data
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users

In [3]:
# generating synthetic dataset which outputs value vectors
# to generate new dataset: uncomment the line below and comment the dataset above
# gp_utils_ccs.get_ccs(num_objectives=2, ccs_size=100) #feel free to modify the number of objectives
# note: after getting the synthetic data, store the data in a variable and comment the line above otherwise this will 
# not give proper insights for experiments as the data will be generated again.
# synthetic_pcs
# outputs a synthetic Pareto Coverage Set of value vectors with 2 objectives and 100 datapoints synthetic Pareto Coverage Set of value vectors with 2 objectives and 20 datapoints

In [4]:
synthetic_pcs = np.genfromtxt('synthetic_pcs_sets/obj7size100.csv', delimiter=',')
synthetic_pcs

array([[0.27245516, 0.18801804, 0.29548956, 0.36379381, 0.22100262,
        0.53015755, 0.58382206],
       [0.22777092, 0.36762785, 0.20545326, 0.47606599, 0.72272039,
        0.02840893, 0.144874  ],
       [0.13380531, 0.49233777, 0.19270044, 0.2166606 , 0.05813444,
        0.76557891, 0.25716475],
       [0.16187179, 0.07775722, 0.50906352, 0.17183022, 0.10232582,
        0.494842  , 0.65095381],
       [0.21706077, 0.1988527 , 0.4401911 , 0.57762827, 0.14617582,
        0.55104311, 0.24678678],
       [0.31908727, 0.56530095, 0.10985501, 0.20944572, 0.35891013,
        0.62374209, 0.06936744],
       [0.20722591, 0.05725423, 0.03897873, 0.61711273, 0.45573931,
        0.0793733 , 0.5978574 ],
       [0.34585068, 0.53958725, 0.66542164, 0.34543603, 0.03152709,
        0.08632052, 0.13665908],
       [0.60797899, 0.27959379, 0.0232642 , 0.21560566, 0.53603888,
        0.34943857, 0.30938137],
       [0.37249   , 0.28928013, 0.3423449 , 0.21115495, 0.5410539 ,
        0.56807894, 0.0

In [5]:
# initializing Gaussian Process, Dataset
GP = GP.GPPairwise(num_objectives=7)
utils_comparisons = data.DatasetPairwise(num_objectives=7)
acquisition_function_DA = acquisition_function.DiscreteAcquirer(input_domain=synthetic_pcs, query_type='pairwise', seed=None, acquisition_type='expected improvement')
acquisition_function_EI = acquisition_function.get_expected_improvement(datapoints=synthetic_pcs, gaussian_process=GP, datapoints_hist=acquisition_function_DA.history, xi=0.01)
# getting user preferences to generate a ground truth utility function
user_pref = gp_utils_users.UserPreference(num_objectives=7, std_noise=0.1)

In [6]:
# ground truth utility function for synthetic dataset
ground_truth_utility_function_dataset = user_pref.get_preference(synthetic_pcs, add_noise=False)

# getting the maximum utility which is the actual best utility
ground_truth_utility_function_dataset = np.max(ground_truth_utility_function_dataset)
ground_truth_utility_function_dataset

0.19918430252865177

In [7]:
# starting points in the dataset
start_points = acquisition_function_DA.get_start_points(gaussian_process=GP) 

# generating ground truth utility function for the starting points
ground_truth_utility_function_start = user_pref.get_preference(start_points, add_noise=False)

# getting the index of the highest utility
highest_utility_index_start = np.argmax(ground_truth_utility_function_start)

# getting the index of the lowest utility
lowest_utility_index_start = np.argmin(ground_truth_utility_function_start)

# mapping the indices of the highest utility to the actual datapoint in the dataset
highest_utility_point_dataset = start_points[highest_utility_index_start]

# mapping the indices of the lowest utility to the actual datapoint in the dataset
lowest_utility_point_dataset = start_points[lowest_utility_index_start]


# adding the highest utility point, which is the winner, to the dataset 
utils_comparisons.add_single_comparison(highest_utility_point_dataset, lowest_utility_point_dataset)

# adding the actual point to the array 
current_best_point = highest_utility_point_dataset

# updating the GP
GP.update(utils_comparisons)

# checking which points to exclude
exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)

print('Start points: ', start_points, '\n',
      'Point in the dataset having highest utility: ', highest_utility_point_dataset, '\n',
      'Point in the dataset having lowest utility: ', lowest_utility_point_dataset, '\n',
      'Current max: ', current_best_point, '\n', 
      'Excluded points: ', exclude_points)

Start points:  (array([0.07897996, 0.39947132, 0.24014406, 0.51782136, 0.12082687,
       0.17323687, 0.68100408]), array([0.56651191, 0.02643008, 0.72784371, 0.27499983, 0.22574684,
       0.05341316, 0.13845484])) 
 Point in the dataset having highest utility:  [0.56651191 0.02643008 0.72784371 0.27499983 0.22574684 0.05341316
 0.13845484] 
 Point in the dataset having lowest utility:  [0.07897996 0.39947132 0.24014406 0.51782136 0.12082687 0.17323687
 0.68100408] 
 Current max:  [0.56651191 0.02643008 0.72784371 0.27499983 0.22574684 0.05341316
 0.13845484] 
 Excluded points:  [[0.56651191 0.02643008 0.72784371 0.27499983 0.22574684 0.05341316
  0.13845484]
 [0.07897996 0.39947132 0.24014406 0.51782136 0.12082687 0.17323687
  0.68100408]]


We have 2 stopping conditions (similar to the GP with EI):
1. If the next point found according to Expected Improvement is already in the list of compared points (the points which we exclude for comparison - excluded points) then the loop breaks. This is because the loop has explored all the points available to it.
2. If we have a large dataset where the above condition might not always be true then we look at the probability of improvement between points. If the probability of improvement is below 5% (0.05) then the loop terminates because there is no further scope of improvement.

In [8]:
# setting the threshold to 5%
threshold = 0.05
stop = False
counter = 0 # counter variable to keep track of number of queries 
regret_hist = [] # to track and store the regret at each step 
last_counter = 0 # to store the total number of queries (which we get from the last iteration)
last_regret = 0 # to store the regret gained from the last iteration

first_run = not os.path.exists('experiments/output-TS_7-100.csv') # checking if the file exists already in the directory

regret_file_csv = 'experiments/regret-TS_7-100.csv' 
regret_file_check = os.path.exists(regret_file_csv) # checking if the file containing regret values at each step exists

# we output all values for regret gained at each iteration in a new csv file
with open(regret_file_csv, 'a', newline='') as csvfile:
  write_to_regret_csv = csv.writer(csvfile)

# checking if the file exists already, if not we include the header or else we don't
  if not regret_file_check:
    write_to_regret_csv.writerow(['Step', 'Regret'])

  starting_time = time.time() # starting the timer

  while not stop:
    # next points in the dataset according to Thompson Sampling 
    next_point_TS = acquisition_function_DA.get_next_point_thompson(gaussian_process=GP, exclude=exclude_points) 

    # applying first stopping condition
    if next_point_TS.tolist() in exclude_points.tolist():
      last_counter = counter
      last_regret = regret
      stop = True


    # generating ground truth utility function for the next points
    ground_truth_utility_function_next_TS = user_pref.get_preference(next_point_TS, add_noise=False)
    # generating ground truth utility function for the current best point
    ground_truth_utility_function_current_best = user_pref.get_preference(current_best_point, add_noise=False)

    # adding the highest utility point, which is the winner, to the dataset only if the current point being evaluated according to 
    # expected improvement has a greater utility than the utility of the current best point evaluated previously
    if ground_truth_utility_function_next_TS > ground_truth_utility_function_current_best:
      utils_comparisons.add_single_comparison(next_point_TS, current_best_point)
      current_best_point = next_point_TS # we update the current best point 
    else:
      utils_comparisons.add_single_comparison(current_best_point, next_point_TS)
    
    # computing the datapoints for this comparisons dataset
    utility_comparisons_datapoints_next_EI = utils_comparisons.datapoints

    # updating the GP
    GP.update(utils_comparisons)

    # checking which points to exclude
    exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)
    exclude_points = np.append(exclude_points, [current_best_point], axis=0)

    # difference between actual best utility and observed best utility
    regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

    # calculating regret for each step
    regret_step = regret
    regret_hist.append(regret_step) # appending the list to contain regret at every step 
    print('Regret at step {}: {}'.format(counter, regret_step))
    write_to_regret_csv.writerow([counter+1, regret_step]) # writing to csv
    print('Regret history saved')

    # calculating the probability of improvement
    prob_imprv = acquisition_function.get_probability_of_improvement(x=next_point_TS, gaussian_process=GP, x_previous=current_best_point)

    # incrementing the counter variable
    counter +=1

    # stop the timer
    ending_time = time.time()
    total_time = ending_time - starting_time

    print('Excluded points: ', exclude_points, '\n',
      'next points for the GP accordint to Thompson Sampling: ', next_point_TS, '\n',
      'Comparison dataset datapoints: ', utility_comparisons_datapoints_next_EI, '\n',
      'Current max: ', current_best_point, '\n', 
      'Ground truth utility of the current best: ', ground_truth_utility_function_current_best, '\n',
      'Excluded points: ', len(exclude_points), '\n',
      'Simple Regret: ', regret, '\n',
      'Probability of improvement: ', prob_imprv, '\n',
      'Number of queries: ', counter, '\n',
      'Computation Time: ', total_time)
    
    # applying second stopping condition
    if prob_imprv < threshold:
      last_counter = counter
      last_regret = regret
      stop = True

# we output the counter and regret of the last iteration to a csv for comparisons
with open('experiments/output-TS_7-100.csv', 'a', newline='') as csvfile:
  write_to_csv = csv.writer(csvfile)

  # write the header row only for the first run
  if first_run: # if the file does not exist then we output the header otherwise we output only the data
    write_to_csv.writerow(['Number of Queries', 'Regret'])
  write_to_csv.writerow([last_counter + 1, last_regret])
print('Successfully written to csv')


Regret at step 0: [0.02202108]
Regret history saved
Excluded points:  [[0.26210791 0.54890694 0.13459564 0.25947725 0.31354956 0.43361461
  0.50815473]
 [0.56651191 0.02643008 0.72784371 0.27499983 0.22574684 0.05341316
  0.13845484]
 [0.26210791 0.54890694 0.13459564 0.25947725 0.31354956 0.43361461
  0.50815473]] 
 next points for the GP accordint to Thompson Sampling:  [0.26210791 0.54890694 0.13459564 0.25947725 0.31354956 0.43361461
 0.50815473] 
 Comparison dataset datapoints:  [[0.56651191 0.02643008 0.72784371 0.27499983 0.22574684 0.05341316
  0.13845484]
 [0.07897996 0.39947132 0.24014406 0.51782136 0.12082687 0.17323687
  0.68100408]
 [0.26210791 0.54890694 0.13459564 0.25947725 0.31354956 0.43361461
  0.50815473]] 
 Current max:  [0.26210791 0.54890694 0.13459564 0.25947725 0.31354956 0.43361461
 0.50815473] 
 Ground truth utility of the current best:  [0.17716322] 
 Excluded points:  3 
 Simple Regret:  [0.02202108] 
 Probability of improvement:  [0.48358378] 
 Number of q

Regret at step 5: [0.00419344]
Regret history saved
Excluded points:  [[0.16820781 0.63424619 0.33678249 0.35257216 0.14881148 0.54696993
  0.10191846]
 [0.26210791 0.54890694 0.13459564 0.25947725 0.31354956 0.43361461
  0.50815473]
 [0.16815744 0.47463709 0.01972592 0.33292063 0.43534058 0.47979759
  0.46420933]
 [0.09608933 0.37077999 0.02896033 0.60575694 0.02659921 0.67394641
  0.17492165]
 [0.55643152 0.06675641 0.17959479 0.01008934 0.74388766 0.30578754
  0.08183267]
 [0.11560706 0.05304863 0.20685641 0.43438034 0.05811566 0.71810862
  0.48299853]
 [0.16820781 0.63424619 0.33678249 0.35257216 0.14881148 0.54696993
  0.10191846]] 
 next points for the GP accordint to Thompson Sampling:  [0.11560706 0.05304863 0.20685641 0.43438034 0.05811566 0.71810862
 0.48299853] 
 Comparison dataset datapoints:  [[0.56651191 0.02643008 0.72784371 0.27499983 0.22574684 0.05341316
  0.13845484]
 [0.07897996 0.39947132 0.24014406 0.51782136 0.12082687 0.17323687
  0.68100408]
 [0.26210791 0.5489