In [14]:
import pandas as pd
import numpy as np
from scipy.stats import multivariate_normal
from gp_pref_elicit_luisa import dataset as data
from gp_pref_elicit_luisa import gaussian_process as GP
from momabs_bayesian import bayes_logistic as bayes_logistic
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users
from logistic_user import LogisticDecisionMaker as logistic 

In [15]:
# initializing dataset class
utils_comparisons = data.DatasetPairwise(num_objectives=2)
GP = GP.GPPairwise(num_objectives=2)
# making an object of LogisticDecisionMaker class
user_simulation = logistic(no_obj=2, num_features=4)

In [16]:
# synthetic pareto coverage set
synthetic_pcs = np.array([[0.14370116, 0.99159928],
       [0.9797389 , 0.2242916 ],
       [0.        , 1.        ],
       [0.91055917, 0.45020785],
       [0.59678925, 0.81854996],
       [1.        , 0.        ],
       [0.94198057, 0.352479  ],
       [0.81501748, 0.65358114],
       [0.99814566, 0.05429028],
       [0.33305315, 0.94955291],
       [0.28860669, 0.96123215],
       [0.99999796, 0.02591092],
       [0.98910769, 0.14092867],
       [0.18584726, 0.98334638],
       [0.05210043, 0.99838732],
       [0.87761802, 0.52114756],
       [0.74002719, 0.7144284 ],
       [0.21487083, 0.97724899],
       [0.43622937, 0.90230767],
       [0.99525346, 0.08213535]])

In [17]:
ground_utility_dataset = user_simulation.ground_utility(synthetic_pcs)
ground_truth_utility_function_dataset = np.max(ground_utility_dataset)
ground_truth_utility_function_dataset

0.8912907792277498

In [18]:
# generating random points to start with from synthetic pcs 
start_points = synthetic_pcs[np.random.choice(synthetic_pcs.shape[0], size=2, replace=False)]
start_points

array([[0.21487083, 0.97724899],
       [0.33305315, 0.94955291]])

In [19]:

# comparing the initial points for getting the current best
current_best = user_simulation.exact_compare(start_points[0], start_points[1]) 
current_best

array([0.33305315, 0.94955291])

In [20]:
utility_current_best = user_simulation.ground_utility(current_best)
utility_current_best

array([0.64720583])

In [21]:
# getting points according to thompson sampling 
thompson_point = user_simulation.thompson_sampled_point(synthetic_pcs)
thompson_point

array([1., 0.])

In [22]:
utility_thompson_point = user_simulation.ground_utility(thompson_point)
utility_thompson_point

array([0.43391549])

In [23]:
# comparing the current best and the thompson sampled point
comp = user_simulation.exact_compare(current_best, thompson_point)
comp

array([0.33305315, 0.94955291])

In [24]:
# passing the comparison to bayesian logistic regression to get the mean and the covariance matrix 
user_simulation.current_map()

(array([0.22376206, 0.29484237, 0.24606416, 0.27254027]),
 array([9.29622448, 9.40589501, 9.18355746, 9.21365676]))

In [25]:
exclude_point = user_simulation.exclude_points([comp])
exclude_point

[array([0.33305315, 0.94955291])]

As seen above, the covariance matrix has independent variances.

In [26]:
# TODO: loop for BLR with stopping condition: if the current best point is already in the list of excluded points
# terminate the loop else continue
stop = False

while not stop:

    next_point_ts = user_simulation.thompson_sampled_point(synthetic_pcs)

    if next_point_ts.tolist() in current_best.tolist():
        stop = True
    
    current_best = user_simulation.exact_compare(next_point_ts, current_best)
    
    ground_truth_utility_function_next_point_ts = user_simulation.ground_utility(next_point_ts)
    ground_truth_utility_function_current_best = user_simulation.ground_utility(current_best)

    regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

    print('Next Point according to thompson sampling: ', next_point_ts, '\n',
          'Current best point: ', current_best, '\n',
          'Ground Truth Utility of Current best point: ', ground_truth_utility_function_current_best, '\n',
          'Simple Regret: ', regret)

Next Point according to thompson sampling:  [0. 1.] 
 Current best point:  [0. 1.] 
 Ground Truth Utility of Current best point:  [0.48826345] 
 Simple Regret:  [0.40302733]
Next Point according to thompson sampling:  [1. 0.] 
 Current best point:  [0. 1.] 
 Ground Truth Utility of Current best point:  [0.67437309] 
 Simple Regret:  [0.21691769]
Next Point according to thompson sampling:  [0.33305315 0.94955291] 
 Current best point:  [0.33305315 0.94955291] 
 Ground Truth Utility of Current best point:  [0.76683307] 
 Simple Regret:  [0.12445771]
Next Point according to thompson sampling:  [0. 1.] 
 Current best point:  [0.33305315 0.94955291] 
 Ground Truth Utility of Current best point:  [0.81519719] 
 Simple Regret:  [0.07609359]
Next Point according to thompson sampling:  [0.74002719 0.7144284 ] 
 Current best point:  [0.74002719 0.7144284 ] 
 Ground Truth Utility of Current best point:  [0.89603314] 
 Simple Regret:  [-0.00474236]
Next Point according to thompson sampling:  [1. 0

Next Point according to thompson sampling:  [0.74002719 0.7144284 ] 
 Current best point:  [0.81501748 0.65358114] 
 Ground Truth Utility of Current best point:  [0.75129989] 
 Simple Regret:  [0.13999089]
Next Point according to thompson sampling:  [1. 0.] 
 Current best point:  [0.81501748 0.65358114] 
 Ground Truth Utility of Current best point:  [0.80277631] 
 Simple Regret:  [0.08851447]
Next Point according to thompson sampling:  [0.74002719 0.7144284 ] 
 Current best point:  [0.74002719 0.7144284 ] 
 Ground Truth Utility of Current best point:  [0.82560455] 
 Simple Regret:  [0.06568623]
Next Point according to thompson sampling:  [0.05210043 0.99838732] 
 Current best point:  [0.74002719 0.7144284 ] 
 Ground Truth Utility of Current best point:  [0.98939489] 
 Simple Regret:  [-0.09810411]
Next Point according to thompson sampling:  [0.14370116 0.99159928] 
 Current best point:  [0.74002719 0.7144284 ] 
 Ground Truth Utility of Current best point:  [0.93422342] 
 Simple Regret:

KeyboardInterrupt: 