In [1]:
import pandas as pd
import numpy as np
import csv
import os
import time

In [2]:
from gp_pref_elicit_luisa import gaussian_process as GP 
from gp_pref_elicit_luisa import dataset as data 
from gp_pref_elicit_luisa import acquisition_function as acquisition_function
from gp_pref_elicit_luisa.gp_utilities import utils_ccs as gp_utils_ccs
from gp_pref_elicit_luisa.gp_utilities import utils_data as gp_utils_data
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users

In [3]:
synthetic_pcs = np.genfromtxt('synthetic_pcs_sets/obj4size100.csv', delimiter=',')
output_csv = 'experiments/output-TS_4-100.csv'
output_regret_csv = 'experiments/regret-csvs/regret-TS_4-100.csv'
objectives = 4

In [4]:
# initializing Gaussian Process, Dataset
GP = GP.GPPairwise(num_objectives=objectives)
utils_comparisons = data.DatasetPairwise(num_objectives=objectives)
acquisition_function_DA = acquisition_function.DiscreteAcquirer(input_domain=synthetic_pcs, query_type='pairwise', seed=None, acquisition_type='expected improvement')
acquisition_function_EI = acquisition_function.get_expected_improvement(datapoints=synthetic_pcs, gaussian_process=GP, datapoints_hist=acquisition_function_DA.history, xi=0.01)
# getting user preferences to generate a ground truth utility function
user_pref = gp_utils_users.UserPreference(num_objectives=objectives, std_noise=0.1)

In [5]:
# ground truth utility function for synthetic dataset
ground_truth_utility_function_dataset = user_pref.get_preference(synthetic_pcs, add_noise=False)

# getting the maximum utility which is the actual best utility
ground_truth_utility_function_dataset = np.max(ground_truth_utility_function_dataset)
ground_truth_utility_function_dataset

0.5814644336868326

In [6]:
# starting points in the dataset
start_points = acquisition_function_DA.get_start_points(gaussian_process=GP) 

# generating ground truth utility function for the starting points
ground_truth_utility_function_start = user_pref.get_preference(start_points, add_noise=False)

# getting the index of the highest utility
highest_utility_index_start = np.argmax(ground_truth_utility_function_start)

# getting the index of the lowest utility
lowest_utility_index_start = np.argmin(ground_truth_utility_function_start)

# mapping the indices of the highest utility to the actual datapoint in the dataset
highest_utility_point_dataset = start_points[highest_utility_index_start]

# mapping the indices of the lowest utility to the actual datapoint in the dataset
lowest_utility_point_dataset = start_points[lowest_utility_index_start]


# adding the highest utility point, which is the winner, to the dataset 
utils_comparisons.add_single_comparison(highest_utility_point_dataset, lowest_utility_point_dataset)

# adding the actual point to the array 
current_best_point = highest_utility_point_dataset

# updating the GP
GP.update(utils_comparisons)

# checking which points to exclude
exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)

print('Start points: ', start_points, '\n',
      'Point in the dataset having highest utility: ', highest_utility_point_dataset, '\n',
      'Point in the dataset having lowest utility: ', lowest_utility_point_dataset, '\n',
      'Current max: ', current_best_point, '\n', 
      'Excluded points: ', exclude_points)

Start points:  (array([0.99965068, 0.02642936]), array([0.95953129, 0.28160204])) 
 Point in the dataset having highest utility:  [0.95953129 0.28160204] 
 Point in the dataset having lowest utility:  [0.99965068 0.02642936] 
 Current max:  [0.95953129 0.28160204] 
 Excluded points:  [[0.95953129 0.28160204]
 [0.99965068 0.02642936]]


We have 2 stopping conditions (similar to the GP with EI):
1. If the next point found according to Expected Improvement is already in the list of compared points (the points which we exclude for comparison - excluded points) then the loop breaks. This is because the loop has explored all the points available to it.
2. If we have a large dataset where the above condition might not always be true then we look at the probability of improvement between points. If the probability of improvement is below 5% (0.05) then the loop terminates because there is no further scope of improvement.

In [7]:
# setting the threshold to 5%
threshold = 0.05
stop = False
counter = 0 # counter variable to keep track of number of queries 
regret_hist = [] # to track and store the regret at each step 
last_counter = 0 # to store the total number of queries (which we get from the last iteration)
last_regret = 0 # to store the regret gained from the last iteration

first_run = not os.path.exists(output_csv) # checking if the file exists already in the directory
 
regret_file_check = os.path.exists(output_regret_csv) # checking if the file containing regret values at each step exists

# we output all values for regret gained at each iteration in a new csv file
with open(output_regret_csv, 'a', newline='') as csvfile:
  write_to_regret_csv = csv.writer(csvfile)

  # checking if the file exists already, if not we include the header or else we don't
  if not regret_file_check:
    write_to_regret_csv.writerow(['Step', 'Regret'])

  starting_time = time.time() # starting the timer

  while not stop:
    # next points in the dataset according to Thompson Sampling 
    next_point_TS = acquisition_function_DA.get_next_point_thompson(gaussian_process=GP, exclude=exclude_points) 

    # applying first stopping condition
    if next_point_TS.tolist() in exclude_points.tolist():
      last_counter = counter
      last_regret = regret
      stop = True


    # generating ground truth utility function for the next points
    ground_truth_utility_function_next_TS = user_pref.get_preference(next_point_TS, add_noise=False)
    # generating ground truth utility function for the current best point
    ground_truth_utility_function_current_best = user_pref.get_preference(current_best_point, add_noise=False)

    # adding the highest utility point, which is the winner, to the dataset only if the current point being evaluated according to 
    # expected improvement has a greater utility than the utility of the current best point evaluated previously
    if ground_truth_utility_function_next_TS > ground_truth_utility_function_current_best:
      utils_comparisons.add_single_comparison(next_point_TS, current_best_point)
      current_best_point = next_point_TS # we update the current best point 
    else:
      utils_comparisons.add_single_comparison(current_best_point, next_point_TS)
    
    # computing the datapoints for this comparisons dataset
    utility_comparisons_datapoints_next_EI = utils_comparisons.datapoints

    # updating the GP
    GP.update(utils_comparisons)

    # checking which points to exclude
    exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)
    exclude_points = np.append(exclude_points, [current_best_point], axis=0)

    # difference between actual best utility and observed best utility
    regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

    # calculating regret for each step
    regret_step = regret
    regret_hist.append(regret_step) # appending the list to contain regret at every step 
    print('Regret at step {}: {}'.format(counter, regret_step))
    write_to_regret_csv.writerow([counter+1, regret_step]) # writing to csv
    print('Regret history saved')

    # calculating the probability of improvement
    prob_imprv = acquisition_function.get_probability_of_improvement(x=next_point_TS, gaussian_process=GP, x_previous=current_best_point)

    # incrementing the counter variable
    counter +=1

    # stop the timer
    ending_time = time.time()
    total_time = ending_time - starting_time

    print('Excluded points: ', exclude_points, '\n',
      'next points for the GP accordint to Thompson Sampling: ', next_point_TS, '\n',
      'Comparison dataset datapoints: ', utility_comparisons_datapoints_next_EI, '\n',
      'Current max: ', current_best_point, '\n', 
      'Ground truth utility of the current best: ', ground_truth_utility_function_current_best, '\n',
      'Ground truth utility of the thompson sampled point: ',  ground_truth_utility_function_next_TS, '\n',
      'Excluded points: ', len(exclude_points), '\n',
      'Simple Regret: ', regret, '\n',
      'Probability of improvement: ', prob_imprv, '\n',
      'Number of queries: ', counter, '\n',
      'Computation Time: ', total_time)
    
    # applying second stopping condition
    if prob_imprv < threshold:
      last_counter = counter
      last_regret = regret
      stop = True

# we output the counter and regret of the last iteration to a csv for comparisons
with open(output_csv, 'a', newline='') as csvfile:
  write_to_csv = csv.writer(csvfile)

  # write the header row only for the first run
  if first_run: # if the file does not exist then we output the header otherwise we output only the data
    write_to_csv.writerow(['Number of Queries', 'Regret'])
  write_to_csv.writerow([last_counter + 1, last_regret])
print('Successfully written to csv')


Regret at step 0: [3.36008671e-05]
Regret history saved
Excluded points:  [[0.95953129 0.28160204]
 [0.99965068 0.02642936]
 [0.9913516  0.13123265]
 [0.95953129 0.28160204]] 
 next points for the GP accordint to Thompson Sampling:  [0.9913516  0.13123265] 
 Comparison dataset datapoints:  [[0.95953129 0.28160204]
 [0.99965068 0.02642936]
 [0.9913516  0.13123265]] 
 Current max:  [0.95953129 0.28160204] 
 Ground truth utility of the current best:  [0.58143083] 
 Ground truth utility of the thompson sampled point:  [0.57533395] 
 Excluded points:  4 
 Simple Regret:  [3.36008671e-05] 
 Probability of improvement:  [0.45357645] 
 Number of queries:  1 
 Computation Time:  0.03789472579956055
Regret at step 1: [3.36008671e-05]
Regret history saved
Excluded points:  [[0.95953129 0.28160204]
 [0.99965068 0.02642936]
 [0.9913516  0.13123265]
 [0.00145999 0.99999893]
 [0.95953129 0.28160204]] 
 next points for the GP accordint to Thompson Sampling:  [0.00145999 0.99999893] 
 Comparison datase

Regret at step 6: [3.36008671e-05]
Regret history saved
Excluded points:  [[0.95953129 0.28160204]
 [0.99965068 0.02642936]
 [0.9913516  0.13123265]
 [0.00145999 0.99999893]
 [0.73031602 0.68310944]
 [0.40787128 0.91303944]
 [0.20234593 0.97931411]
 [0.40905377 0.91251028]
 [0.4482646  0.89390092]
 [0.95953129 0.28160204]] 
 next points for the GP accordint to Thompson Sampling:  [0.4482646  0.89390092] 
 Comparison dataset datapoints:  [[0.95953129 0.28160204]
 [0.99965068 0.02642936]
 [0.9913516  0.13123265]
 [0.00145999 0.99999893]
 [0.73031602 0.68310944]
 [0.40787128 0.91303944]
 [0.20234593 0.97931411]
 [0.40905377 0.91251028]
 [0.4482646  0.89390092]] 
 Current max:  [0.95953129 0.28160204] 
 Ground truth utility of the current best:  [0.58143083] 
 Ground truth utility of the thompson sampled point:  [0.51206788] 
 Excluded points:  10 
 Simple Regret:  [3.36008671e-05] 
 Probability of improvement:  [0.40739336] 
 Number of queries:  7 
 Computation Time:  0.2848806381225586
R