In [1]:
import pandas as pd
import numpy as np
from scipy.stats import multivariate_normal
from gp_pref_elicit_luisa import dataset as data
from gp_pref_elicit_luisa import gaussian_process as GP
from momabs_bayesian import bayes_logistic as bayes_logistic
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users
from logistic_user import LogisticDecisionMaker as logistic 

In [2]:
synthetic_pcs = np.genfromtxt('synthetic_pcs_sets/obj2size100.csv', delimiter=',')
output_csv = 'experiments/output-BLR-TS_5-100.csv'
output_regret_csv = 'experiments/regret-BLR-TS_5-100.csv'
objectives = 2
features = 4

In [3]:
# initializing dataset class
utils_comparisons = data.DatasetPairwise(num_objectives=objectives)
GP = GP.GPPairwise(num_objectives=objectives)
# making an object of LogisticDecisionMaker class
user_simulation = logistic(no_obj=objectives, num_features=features)

In [4]:
ground_utility_dataset = user_simulation.ground_utility(synthetic_pcs)
ground_truth_utility_function_dataset = np.max(ground_utility_dataset)
ground_truth_utility_function_dataset

0.7367688534135608

In [5]:
# generating random points to start with from synthetic pcs 
start_points = synthetic_pcs[np.random.choice(synthetic_pcs.shape[0], size=2, replace=False)]
start_points

array([[0.18479899, 0.98277634],
       [0.98431527, 0.1764184 ]])

In [6]:

# comparing the initial points for getting the current best
current_best = user_simulation.exact_compare(start_points[0], start_points[1]) 
current_best

array([0.18479899, 0.98277634])

In [7]:
utility_current_best = user_simulation.ground_utility(current_best)
utility_current_best

array([0.7156667])

In [8]:
# getting points according to thompson sampling 
thompson_point = user_simulation.thompson_sampled_point(synthetic_pcs)
thompson_point

array([0.99997976, 0.00636164])

In [9]:
utility_thompson_point = user_simulation.ground_utility(thompson_point)
utility_thompson_point

array([0.30401476])

In [10]:
# comparing the current best and the thompson sampled point
current_best = user_simulation.exact_compare(current_best, thompson_point)
current_best

array([0.18479899, 0.98277634])

In [11]:
exclude_point = user_simulation.exclude_points([current_best])
exclude_point

array([0.18479899, 0.98277634])

As seen above, the covariance matrix has independent variances.

In [12]:
# TODO: loop for BLR with stopping condition: if the current best point is already in the list of excluded points
# terminate the loop else continue
stop = False

while not stop:

    next_point_ts= user_simulation.thompson_sampled_point(synthetic_pcs)

    # for point_to_explore in current_best:
    #     if np.array_equal(next_point_ts, point_to_explore):
    #         stop = True
    #         break
    if next_point_ts.tolist() in exclude_point:
        stop = True
    
    # print('previous comparisons: ', user_simulation.previous_comparisons)
    # print('previous outcomes: ', user_simulation.previous_outcomes)
    # exclude_point_ts = user_simulation.exclude_points(synthetic_pcs)
    # print('exclude point: ', exclude_point_ts)
    current_best = user_simulation.exact_compare(next_point_ts, current_best)
    
    ground_truth_utility_function_next_point_ts = user_simulation.ground_utility(next_point_ts)
    ground_truth_utility_function_current_best = user_simulation.ground_utility(current_best)

    regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

    synthetic_pcs = [v for v in synthetic_pcs if not np.array_equal(v, next_point_ts)]
    # next_point_ts_index = np.where((synthetic_pcs == next_point_ts).all(axis=1))[0]
    # synthetic_pcs = np.delete(next_point_ts, next_point_ts_index, axis=0) 
    # synthetic_pcs.remove(next_point_ts)

    print('Next Point according to thompson sampling: ', next_point_ts, '\n',
          'Current best point: ', current_best, '\n',
          'Ground Truth Utility of Thompson Sampled Point', ground_truth_utility_function_next_point_ts, '\n',
          'Ground Truth Utility of Current best point: ', ground_truth_utility_function_current_best, '\n',
          'Simple Regret: ', regret)

Next Point according to thompson sampling:  [0.99997976 0.00636164] 
 Current best point:  [0.18479899 0.98277634] 
 Ground Truth Utility of Thompson Sampled Point [0.30401476] 
 Ground Truth Utility of Current best point:  [0.7156667] 
 Simple Regret:  [0.02110215]
Next Point according to thompson sampling:  [0.99965233 0.02636684] 
 Current best point:  [0.18479899 0.98277634] 
 Ground Truth Utility of Thompson Sampled Point [0.30906237] 
 Ground Truth Utility of Current best point:  [0.7156667] 
 Simple Regret:  [0.02110215]
Next Point according to thompson sampling:  [0.99965068 0.02642936] 
 Current best point:  [0.18479899 0.98277634] 
 Ground Truth Utility of Thompson Sampled Point [0.30907861] 
 Ground Truth Utility of Current best point:  [0.7156667] 
 Simple Regret:  [0.02110215]
Next Point according to thompson sampling:  [0.99958229 0.02890048] 
 Current best point:  [0.18479899 0.98277634] 
 Ground Truth Utility of Thompson Sampled Point [0.30972259] 
 Ground Truth Utility