In [1]:
import pandas as pd
import numpy as np
import csv
import os
import time

In [2]:
from gp_pref_elicit_luisa import gaussian_process as GP 
from gp_pref_elicit_luisa import dataset as data 
from gp_pref_elicit_luisa import acquisition_function as acquisition_function
from gp_pref_elicit_luisa.gp_utilities import utils_ccs as gp_utils_ccs
from gp_pref_elicit_luisa.gp_utilities import utils_data as gp_utils_data
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users

In [3]:
# generating synthetic dataset which outputs value vectors
# to generate new dataset: uncomment the line below and comment the dataset above
# gp_utils_ccs.get_ccs(num_objectives=2, ccs_size=100) #feel free to modify the number of objectives
# note: after getting the synthetic data, store the data in a variable and comment the line above otherwise this will 
# not give proper insights for experiments as the data will be generated again.
# synthetic_pcs
# outputs a synthetic Pareto Coverage Set of value vectors with 2 objectives and 100 datapoints synthetic Pareto Coverage Set of value vectors with 2 objectives and 20 datapoints

In [4]:
# 4 obj with 20 datapoints
synthetic_pcs = np.array([[0.66500185, 0.26659716, 0.67901091, 0.03109566],
       [0.20469148, 0.76427099, 0.36782735, 0.49741987],
       [0.33503033, 0.77705333, 0.51253625, 0.17377681],
       [0.83500893, 0.5046724 , 0.01472368, 0.19546026],
       [0.26821178, 0.01401519, 0.26253134, 0.94111005],
       [0.6437465 , 0.50606921, 0.56073838, 0.13618861],
       [0.25769534, 0.94840384, 0.19842136, 0.02113471],
       [0.69526968, 0.18128043, 0.66306254, 0.29841504],
       [0.60907163, 0.57855736, 0.30396907, 0.42269316],
       [0.9430651 , 0.31184776, 0.02707273, 0.08230146],
       [0.2116937 , 0.53391414, 0.16623161, 0.7888748 ],
       [0.85858347, 0.005654  , 0.05969642, 0.52920896],
       [0.05754097, 0.15067613, 0.36271233, 0.92030222],
       [0.00759711, 0.08798964, 0.93090403, 0.37347468],
       [0.11029485, 0.34372918, 0.4246108 , 0.82135914],
       [0.25767723, 0.66289864, 0.70792229, 0.0295834 ],
       [0.14777658, 0.77781636, 0.59761621, 0.0549196 ],
       [0.17786425, 0.95120839, 0.24415824, 0.08024447],
       [0.1117502 , 0.4898303 , 0.8301486 , 0.25883329],
       [0.74079664, 0.07523264, 0.22997674, 0.60926852]])


In [5]:
# initializing Gaussian Process, Dataset
GP = GP.GPPairwise(num_objectives=4)
utils_comparisons = data.DatasetPairwise(num_objectives=4)
acquisition_function_DA = acquisition_function.DiscreteAcquirer(input_domain=synthetic_pcs, query_type='pairwise', seed=None, acquisition_type='expected improvement')
acquisition_function_EI = acquisition_function.get_expected_improvement(datapoints=synthetic_pcs, gaussian_process=GP, datapoints_hist=acquisition_function_DA.history, xi=0.01)
# getting user preferences to generate a ground truth utility function
user_pref = gp_utils_users.UserPreference(num_objectives=4, std_noise=0.1)

In [6]:
# ground truth utility function for synthetic dataset
ground_truth_utility_function_dataset = user_pref.get_preference(synthetic_pcs, add_noise=True)

# getting the maximum utility which is the actual best utility
ground_truth_utility_function_dataset = np.max(ground_truth_utility_function_dataset)
ground_truth_utility_function_dataset

0.5245782118600147

In [7]:
# starting points in the dataset
start_points = acquisition_function_DA.get_start_points(gaussian_process=GP) 

# generating ground truth utility function for the starting points
ground_truth_utility_function_start = user_pref.get_preference(start_points, add_noise=True)

# getting the index of the highest utility
highest_utility_index_start = np.argmax(ground_truth_utility_function_start)

# getting the index of the lowest utility
lowest_utility_index_start = np.argmin(ground_truth_utility_function_start)

# mapping the indices of the highest utility to the actual datapoint in the dataset
highest_utility_point_dataset = start_points[highest_utility_index_start]

# mapping the indices of the lowest utility to the actual datapoint in the dataset
lowest_utility_point_dataset = start_points[lowest_utility_index_start]


# adding the highest utility point, which is the winner, to the dataset 
utils_comparisons.add_single_comparison(highest_utility_point_dataset, lowest_utility_point_dataset)

# adding the actual point to the array 
current_best_point = highest_utility_point_dataset

# updating the GP
GP.update(utils_comparisons)

# checking which points to exclude
exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)

print('Start points: ', start_points, '\n',
      'Point in the dataset having highest utility: ', highest_utility_point_dataset, '\n',
      'Point in the dataset having lowest utility: ', lowest_utility_point_dataset, '\n',
      'Current max: ', current_best_point, '\n', 
      'Excluded points: ', exclude_points)

Start points:  (array([0.69526968, 0.18128043, 0.66306254, 0.29841504]), array([0.33503033, 0.77705333, 0.51253625, 0.17377681])) 
 Point in the dataset having highest utility:  [0.69526968 0.18128043 0.66306254 0.29841504] 
 Point in the dataset having lowest utility:  [0.33503033 0.77705333 0.51253625 0.17377681] 
 Current max:  [0.69526968 0.18128043 0.66306254 0.29841504] 
 Excluded points:  [[0.69526968 0.18128043 0.66306254 0.29841504]
 [0.33503033 0.77705333 0.51253625 0.17377681]]


We have 2 stopping conditions (similar to the GP with EI):
1. If the next point found according to Expected Improvement is already in the list of compared points (the points which we exclude for comparison - excluded points) then the loop breaks. This is because the loop has explored all the points available to it.
2. If we have a large dataset where the above condition might not always be true then we look at the probability of improvement between points. If the probability of improvement is below 5% (0.05) then the loop terminates because there is no further scope of improvement.

In [8]:
# setting the threshold to 5%
threshold = 0.05
stop = False
counter = 0 # counter variable to keep track of number of queries 
last_counter = 0 # to store the total number of queries (which we get from the last iteration)
last_regret = 0 # to store the regret gained from the last iteration

first_run = not os.path.exists('experiments/output-TS_4-20.csv') # checking if the file exists already in the directory

starting_time = time.time() # starting the timer

while not stop:
  # next points in the dataset according to Thompson Sampling 
  next_point_TS = acquisition_function_DA.get_next_point_thompson(gaussian_process=GP, exclude=exclude_points) 

  # applying first stopping condition
  if next_point_TS.tolist() in exclude_points.tolist():
    last_counter = counter
    last_regret = regret
    stop = True


  # generating ground truth utility function for the next points
  ground_truth_utility_function_next_TS = user_pref.get_preference(next_point_TS, add_noise=True)
  # generating ground truth utility function for the current best point
  ground_truth_utility_function_current_best = user_pref.get_preference(current_best_point, add_noise=True)

  # adding the highest utility point, which is the winner, to the dataset only if the current point being evaluated according to 
  # expected improvement has a greater utility than the utility of the current best point evaluated previously
  if ground_truth_utility_function_next_TS > ground_truth_utility_function_current_best:
    utils_comparisons.add_single_comparison(next_point_TS, current_best_point)
    current_best_point = next_point_TS # we update the current best point 
  else:
    utils_comparisons.add_single_comparison(current_best_point, next_point_TS)
  
  # computing the datapoints for this comparisons dataset
  utility_comparisons_datapoints_next_EI = utils_comparisons.datapoints

  # updating the GP
  GP.update(utils_comparisons)

  # checking which points to exclude
  exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)
  exclude_points = np.append(exclude_points, [current_best_point], axis=0)

  # difference between actual best utility and observed best utility
  regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

  # calculating the probability of improvement
  prob_imprv = acquisition_function.get_probability_of_improvement(x=next_point_TS, gaussian_process=GP, x_previous=current_best_point)

  # incrementing the counter variable
  counter +=1

  # stop the timer
  ending_time = time.time()
  total_time = ending_time - starting_time

  print('Excluded points: ', exclude_points, '\n',
    'next points for the GP accordint to Thompson Sampling: ', next_point_TS, '\n',
    'Comparison dataset datapoints: ', utility_comparisons_datapoints_next_EI, '\n',
    'Current max: ', current_best_point, '\n', 
    'Ground truth utility of the current best: ', ground_truth_utility_function_current_best, '\n',
    'Excluded points: ', len(exclude_points), '\n',
    'Simple Regret: ', regret, '\n',
    'Probability of improvement: ', prob_imprv, '\n',
    'Number of queries: ', counter, '\n',
    'Computation Time: ', total_time)
  
  # applying second stopping condition
  if prob_imprv < threshold:
    last_counter = counter
    last_regret = regret
    stop = True

# we output the counter and regret of the last iteration to a csv for comparisons
with open('experiments/output-TS_4-20.csv', 'a', newline='') as csvfile:
  write_to_csv = csv.writer(csvfile)

  # write the header row only for the first run
  if first_run: # if the file does not exist then we output the header otherwise we output only the data
    write_to_csv.writerow(['Number of Queries', 'Regret', 'Computation Time'])
  write_to_csv.writerow([last_counter + 1, last_regret, total_time])
print('Successfully written to csv')


Excluded points:  [[0.00759711 0.08798964 0.93090403 0.37347468]
 [0.69526968 0.18128043 0.66306254 0.29841504]
 [0.00759711 0.08798964 0.93090403 0.37347468]] 
 next points for the GP accordint to Thompson Sampling:  [0.00759711 0.08798964 0.93090403 0.37347468] 
 Comparison dataset datapoints:  [[0.69526968 0.18128043 0.66306254 0.29841504]
 [0.33503033 0.77705333 0.51253625 0.17377681]
 [0.00759711 0.08798964 0.93090403 0.37347468]] 
 Current max:  [0.00759711 0.08798964 0.93090403 0.37347468] 
 Ground truth utility of the current best:  [0.30289075] 
 Excluded points:  3 
 Simple Regret:  [0.22168746] 
 Probability of improvement:  [0.48356242] 
 Number of queries:  1 
 Computation Time:  0.027338504791259766
Excluded points:  [[0.00759711 0.08798964 0.93090403 0.37347468]
 [0.69526968 0.18128043 0.66306254 0.29841504]
 [0.1117502  0.4898303  0.8301486  0.25883329]
 [0.00759711 0.08798964 0.93090403 0.37347468]] 
 next points for the GP accordint to Thompson Sampling:  [0.1117502  