In [1]:
import pandas as pd
import numpy as np
from scipy.stats import multivariate_normal
from gp_pref_elicit_luisa import dataset as data
from gp_pref_elicit_luisa import gaussian_process as GP
from momabs_bayesian import bayes_logistic as bayes_logistic
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users
from logistic_user import LogisticDecisionMaker as logistic 

In [2]:
# initializing dataset class
utils_comparisons = data.DatasetPairwise(num_objectives=2)
GP = GP.GPPairwise(num_objectives=2)
# making an object of LogisticDecisionMaker class
user_simulation = logistic(no_obj=2, num_features=4)

In [3]:
# synthetic pareto coverage set
synthetic_pcs = np.array([[0.14370116, 0.99159928],
       [0.9797389 , 0.2242916 ],
       [0.        , 1.        ],
       [0.91055917, 0.45020785],
       [0.59678925, 0.81854996],
       [1.        , 0.        ],
       [0.94198057, 0.352479  ],
       [0.81501748, 0.65358114],
       [0.99814566, 0.05429028],
       [0.33305315, 0.94955291],
       [0.28860669, 0.96123215],
       [0.99999796, 0.02591092],
       [0.98910769, 0.14092867],
       [0.18584726, 0.98334638],
       [0.05210043, 0.99838732],
       [0.87761802, 0.52114756],
       [0.74002719, 0.7144284 ],
       [0.21487083, 0.97724899],
       [0.43622937, 0.90230767],
       [0.99525346, 0.08213535]])

In [4]:
ground_utility_dataset = user_simulation.ground_utility(synthetic_pcs)
ground_truth_utility_function_dataset = np.max(ground_utility_dataset)
ground_truth_utility_function_dataset

0.7668917939758261

In [5]:
# generating random points to start with from synthetic pcs 
start_points = synthetic_pcs[np.random.choice(synthetic_pcs.shape[0], size=2, replace=False)]
start_points

array([[0.43622937, 0.90230767],
       [0.18584726, 0.98334638]])

In [6]:

# comparing the initial points for getting the current best
current_best = user_simulation.exact_compare(start_points[0], start_points[1]) 
current_best

array([0.43622937, 0.90230767])

In [7]:
utility_current_best = user_simulation.ground_utility(current_best)
utility_current_best

array([0.39788135])

In [8]:
# getting points according to thompson sampling 
thompson_point = user_simulation.thompson_sampled_point(synthetic_pcs)
thompson_point

excluded points:  [array([0.99525346, 0.08213535])]


array([0.74002719, 0.7144284 ])

In [9]:
utility_thompson_point = user_simulation.ground_utility(thompson_point)
utility_thompson_point

array([0.50167928])

In [10]:
# comparing the current best and the thompson sampled point
comp = user_simulation.exact_compare(current_best, thompson_point)
comp

array([0.74002719, 0.7144284 ])

In [11]:
exclude_point = user_simulation.exclude_points([comp])
exclude_point

[array([0.74002719, 0.7144284 ])]

As seen above, the covariance matrix has independent variances.

In [12]:
# TODO: loop for BLR with stopping condition: if the current best point is already in the list of excluded points
# terminate the loop else continue
stop = False

while not stop:

    next_point_ts, exclude_point = user_simulation.thompson_sampled_point(synthetic_pcs, exclude_point)

    # for point_to_explore in current_best:
    #     if np.array_equal(next_point_ts, point_to_explore):
    #         stop = True
    #         break
    if next_point_ts.tolist() in current_best.tolist():
        stop = True
    
    # print('previous comparisons: ', user_simulation.previous_comparisons)
    # print('previous outcomes: ', user_simulation.previous_outcomes)
    # exclude_point_ts = user_simulation.exclude_points(synthetic_pcs)
    # print('exclude point: ', exclude_point_ts)
    current_best = user_simulation.exact_compare(next_point_ts, current_best)
    
    ground_truth_utility_function_next_point_ts = user_simulation.ground_utility(next_point_ts)
    ground_truth_utility_function_current_best = user_simulation.ground_utility(current_best)

    regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

    print('Next Point according to thompson sampling: ', next_point_ts, '\n',
          'Current best point: ', current_best, '\n',
          'Ground Truth Utility of Current best point: ', ground_truth_utility_function_current_best, '\n',
          'Simple Regret: ', regret)

excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [1. 0.] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.55635035] 
 Simple Regret:  [0.21054144]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [0.81501748 0.65358114] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.61307034] 
 Simple Regret:  [0.15382145]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [0. 1.] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.55080415] 
 Simple Regret:  [0.21608765]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [0.91055917 0.45020785] 
 Current best point:  [0.91055917 0.45020785] 
 Ground Truth Utility of Current best point:  [0.67523226] 
 Simple Regret:  [0.09165954]
excluded points:  [array([0.99525346, 0.08213535

Next Point according to thompson sampling:  [1. 0.] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.5525715] 
 Simple Regret:  [0.21432029]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [0. 1.] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.73152468] 
 Simple Regret:  [0.03536711]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [1. 0.] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.72829843] 
 Simple Regret:  [0.03859336]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [0. 1.] 
 Current best point:  [1. 0.] 
 Ground Truth Utility of Current best point:  [0.43885178] 
 Simple Regret:  [0.32804002]
excluded points:  [array([0.99525346, 0.08213535])]
Next Point according to thompson sampling:  [0. 1.] 
 Current best point:  [1. 0.] 
 Ground Truth

KeyboardInterrupt: 