In [1]:
import pandas as pd
import numpy as np

In [2]:
from gp_pref_elicit_luisa import gaussian_process as GP 
from gp_pref_elicit_luisa import dataset as data 
from gp_pref_elicit_luisa import acquisition_function as acquisition_function
from gp_pref_elicit_luisa.gp_utilities import utils_ccs as gp_utils_ccs
from gp_pref_elicit_luisa.gp_utilities import utils_data as gp_utils_data
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users

In [3]:
# generating synthetic dataset which outputs value vectors
# gp_utils_ccs.get_ccs(2, 20)
# outputs a synthetic Pareto Coverage Set of value vectors with 2 objectives and 20 datapoints

In [4]:
synthetic_pcs = np.array([[0.14370116, 0.99159928],
       [0.9797389 , 0.2242916 ],
       [0.        , 1.        ],
       [0.91055917, 0.45020785],
       [0.59678925, 0.81854996],
       [1.        , 0.        ],
       [0.94198057, 0.352479  ],
       [0.81501748, 0.65358114],
       [0.99814566, 0.05429028],
       [0.33305315, 0.94955291],
       [0.28860669, 0.96123215],
       [0.99999796, 0.02591092],
       [0.98910769, 0.14092867],
       [0.18584726, 0.98334638],
       [0.05210043, 0.99838732],
       [0.87761802, 0.52114756],
       [0.74002719, 0.7144284 ],
       [0.21487083, 0.97724899],
       [0.43622937, 0.90230767],
       [0.99525346, 0.08213535]])

# to generate new dataset: uncomment the line below and comment the dataset above
# synthetic_pcs = gp_utils_ccs.get_ccs(num_objectives=2, ccs_size=20) #feel free to modify the number of objectives



In [5]:
# initializing Gaussian Process, Dataset
GP = GP.GPPairwise(num_objectives=2)
utils_comparisons = data.DatasetPairwise(num_objectives=2)
acquisition_function_DA = acquisition_function.DiscreteAcquirer(input_domain=synthetic_pcs, query_type='pairwise', seed=None, acquisition_type='expected improvement')
acquisition_function_EI = acquisition_function.get_expected_improvement(datapoints=synthetic_pcs, gaussian_process=GP, datapoints_hist=acquisition_function_DA.history, xi=0.01)
# getting user preferences to generate a ground truth utility function
user_pref = gp_utils_users.UserPreference(num_objectives=2, std_noise=0.1)

In [6]:
# ground truth utility function for synthetic dataset
ground_truth_utility_function_dataset = user_pref.get_preference(synthetic_pcs, add_noise=True)
# getting the maximum utility which is the actual best utility
ground_truth_utility_function_dataset = np.max(ground_truth_utility_function_dataset)
ground_truth_utility_function_dataset

0.7357820718463928

In [7]:
# starting points in the dataset
start_points = acquisition_function_DA.get_start_points(gaussian_process=GP) 
# generating ground truth utility function for the starting points
ground_truth_utility_function_start = user_pref.get_preference(start_points, add_noise=True)
# getting the index of the highest utility
highest_utility_index_start = np.argmax(ground_truth_utility_function_start)
# getting the index of the lowest utility
lowest_utility_index_start = np.argmin(ground_truth_utility_function_start)
# mapping the indices of the highest utility to the actual datapoint in the dataset
highest_utility_point_dataset = start_points[highest_utility_index_start]
# mapping the indices of the lowest utility to the actual datapoint in the dataset
lowest_utility_point_dataset = start_points[lowest_utility_index_start]

# adding the highest utility point, which is the winner, to the dataset 
utils_comparisons.add_single_comparison(highest_utility_point_dataset, lowest_utility_point_dataset)

# adding the actual point to the array 
current_best_point = highest_utility_point_dataset

# updating the GP
GP.update(utils_comparisons)

# checking which points to exclude
exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)

print('Start points: ', start_points, '\n',
      'Point in the dataset having highest utility: ', highest_utility_point_dataset, '\n',
      'Point in the dataset having lowest utility: ', lowest_utility_point_dataset, '\n',
      'Current max: ', current_best_point, '\n', 
      'Excluded points: ', exclude_points)

Start points:  (array([0.33305315, 0.94955291]), array([0.21487083, 0.97724899])) 
 Point in the dataset having highest utility:  [0.21487083 0.97724899] 
 Point in the dataset having lowest utility:  [0.33305315 0.94955291] 
 Current max:  [0.21487083 0.97724899] 
 Excluded points:  [[0.21487083 0.97724899]
 [0.33305315 0.94955291]]


We have 2 stopping conditions (similar to the GP with EI):
1. If the next point found according to Expected Improvement is already in the list of compared points (the points which we exclude for comparison - excluded points) then the loop breaks. This is because the loop has explored all the points available to it.
2. If we have a large dataset where the above condition might not always be true then we look at the probability of improvement between points. If the probability of improvement is below 5% (0.05) then the loop terminates because there is no further scope of improvement.

In [8]:
# setting the threshold to 5%
threshold = 0.05
stop_condition = False

while not stop_condition:
  # next points in the dataset according to Thompson Sampling 
  next_point_TS = acquisition_function_DA.get_next_point_thompson(gaussian_process=GP, exclude=exclude_points) 

  # applying first stopping condition
  if next_point_TS.tolist() in exclude_points.tolist():
    break

  # generating ground truth utility function for the next points
  ground_truth_utility_function_next_TS = user_pref.get_preference(next_point_TS, add_noise=True)
  # generating ground truth utility function for the current best point
  ground_truth_utility_function_current_best = user_pref.get_preference(current_best_point, add_noise=True)

  # adding the highest utility point, which is the winner, to the dataset only if the current point being evaluated according to 
  # expected improvement has a greater utility than the utility of the current best point evaluated previously
  if ground_truth_utility_function_next_TS > ground_truth_utility_function_current_best:
    utils_comparisons.add_single_comparison(next_point_TS, current_best_point)
    current_best_point = next_point_TS # we update the current best point 
  else:
    utils_comparisons.add_single_comparison(current_best_point, next_point_TS)
  
  # computing the datapoints for this comparisons dataset
  utility_comparisons_datapoints_next_EI = utils_comparisons.datapoints

  # updating the GP
  GP.update(utils_comparisons)

  # checking which points to exclude
  exclude_points = acquisition_function.exclude_points_pairwise(dataset=utils_comparisons)
  exclude_points = np.append(exclude_points, [current_best_point], axis=0)

  # difference between actual best utility and observed best utility
  regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

  # calculating the probability of improvement
  prob_imprv = acquisition_function.get_probability_of_improvement(x=next_point_TS, gaussian_process=GP, x_previous=current_best_point)

  print('Excluded points: ', exclude_points, '\n',
    'next points for the GP accordint to Thompson Sampling: ', next_point_TS, '\n',
    'Comparison dataset datapoints: ', utility_comparisons_datapoints_next_EI, '\n',
    'Current max: ', current_best_point, '\n', 
    'Ground truth utility of the current best: ', ground_truth_utility_function_current_best, '\n',
    'Excluded points: ', len(exclude_points), '\n',
    'Simple Regret: ', regret, '\n',
    'Probability of improvement: ', prob_imprv)
  
  # applying second stopping condition
  if prob_imprv < threshold:
    stop_condition = True

Excluded points:  [[0.21487083 0.97724899]
 [0.33305315 0.94955291]
 [1.         0.        ]
 [0.21487083 0.97724899]] 
 next points for the GP accordint to Thompson Sampling:  [1. 0.] 
 Comparison dataset datapoints:  [[0.21487083 0.97724899]
 [0.33305315 0.94955291]
 [1.         0.        ]] 
 Current max:  [0.21487083 0.97724899] 
 Ground truth utility of the current best:  [0.5340687] 
 Excluded points:  4 
 Simple Regret:  [0.20171337] 
 Probability of improvement:  [0.42630467]
Excluded points:  [[0.21487083 0.97724899]
 [0.33305315 0.94955291]
 [1.         0.        ]
 [0.91055917 0.45020785]
 [0.21487083 0.97724899]] 
 next points for the GP accordint to Thompson Sampling:  [0.91055917 0.45020785] 
 Comparison dataset datapoints:  [[0.21487083 0.97724899]
 [0.33305315 0.94955291]
 [1.         0.        ]
 [0.91055917 0.45020785]] 
 Current max:  [0.21487083 0.97724899] 
 Ground truth utility of the current best:  [0.58414507] 
 Excluded points:  5 
 Simple Regret:  [0.15163701]