In [2]:
import pandas as pd
import numpy as np
import csv
import os
import time

In [3]:
from scipy.stats import multivariate_normal
from gp_pref_elicit_luisa import dataset as data
from gp_pref_elicit_luisa import gaussian_process as GP
from momabs_bayesian import bayes_logistic as bayes_logistic
from gp_pref_elicit_luisa.gp_utilities import utils_user as gp_utils_users
from logistic_user import LogisticDecisionMaker as logistic 

In [4]:
synthetic_pcs = np.genfromtxt('synthetic_pcs_sets/obj5size100.csv', delimiter=',')
output_csv = 'experiments/output-BLR_5-100.csv'
output_regret_csv = 'experiments/regret-csvs/regret-BLR_5-100.csv'
objectives = 5

In [5]:
# initializing dataset class
utils_comparisons = data.DatasetPairwise(num_objectives=objectives)
GP = GP.GPPairwise(num_objectives=objectives)
# making an object of LogisticDecisionMaker class
user_simulation = logistic(no_obj=objectives)

In [6]:
dummy_feature_vector = user_simulation.features(np.zeros(objectives))
len(dummy_feature_vector)

25

In [7]:
ground_utility_dataset = user_simulation.ground_utility(synthetic_pcs)
ground_truth_utility_function_dataset = np.max(ground_utility_dataset)
ground_truth_utility_function_dataset

0.4394532693542663

In [8]:
# generating random points to start with from synthetic pcs 
start_points = synthetic_pcs[np.random.choice(synthetic_pcs.shape[0], size=2, replace=False)]

# comparing the initial points for getting the current best
current_best = user_simulation.exact_compare(start_points[0], start_points[1]) 

utility_current_best = user_simulation.ground_utility(current_best)

# getting points according to thompson sampling 
thompson_point = user_simulation.thompson_sampled_point(synthetic_pcs)

utility_thompson_point = user_simulation.ground_utility(thompson_point)

# comparing the current best and the thompson sampled point
current_best = user_simulation.exact_compare(current_best, thompson_point)

exclude_point = user_simulation.exclude_points([current_best])


print('Start points: ', start_points, '\n',
      'Current Best Point: ', current_best, '\n',
      'Utility of the Current Best Point: ', utility_current_best, '\n',
      'Thompson Sampled Point: ', thompson_point, '\n',
      'Utility of the Thompson Point: ', utility_thompson_point, '\n',
      'Exclude Point: ', exclude_point)

Start points:  [[0.18848239 0.47491632 0.83066665 0.17022018 0.14123343]
 [0.10695672 0.1115033  0.79034499 0.4135899  0.42476519]] 
 Current Best Point:  [0.55946852 0.54196124 0.3235303  0.02909763 0.53642751] 
 Utility of the Current Best Point:  [0.33026739] 
 Thompson Sampled Point:  [0.55946852 0.54196124 0.3235303  0.02909763 0.53642751] 
 Utility of the Thompson Point:  [0.37021248] 
 Exclude Point:  [0.55946852 0.54196124 0.3235303  0.02909763 0.53642751]


In [9]:
# loop for BLR with stopping condition: if the current best point is already in the list of excluded points
# terminate the loop else continue
stop = False
counter = 0 # counter variable to track the number of queries
regret_hist = [] # to track and store the regret at each step
last_counter = 0 # to store the total number of queries (which we get from the last iteration)
last_regret = 0 # to store the regret gained from the last iteration

first_run = not os.path.exists(output_csv) # checking if the file exists in the directory

regret_file_check = os.path.exists(output_regret_csv) # checking if the file containing regret values at each step exists

# we output all values for regret gained at each iteration in a new csv file
with open(output_regret_csv, 'a', newline='') as csvfile:
    write_to_regret_csv = csv.writer(csvfile)

    # checking if the file exists already, if not we include the header or else we don't
    if not regret_file_check:
        write_to_regret_csv.writerow(['Step', 'Regret'])

    starting_time = time.time() # starting the timer 

    while not stop:

        next_point_ts= user_simulation.thompson_sampled_point(synthetic_pcs)

        # for point_to_explore in current_best:
        #     if np.array_equal(next_point_ts, point_to_explore):
        #         stop = True
        #         break
        if next_point_ts.tolist() in exclude_point:
            last_counter = counter
            last_regret = regret
            stop = True
        
        # print('previous comparisons: ', user_simulation.previous_comparisons)
        # print('previous outcomes: ', user_simulation.previous_outcomes)
        # exclude_point_ts = user_simulation.exclude_points(synthetic_pcs)
        # print('exclude point: ', exclude_point_ts)
        current_best = user_simulation.exact_compare(next_point_ts, current_best)
        
        ground_truth_utility_function_next_point_ts = user_simulation.ground_utility(next_point_ts)
        ground_truth_utility_function_current_best = user_simulation.ground_utility(current_best)

        regret = np.subtract(ground_truth_utility_function_dataset, ground_truth_utility_function_current_best)

        # calculating regret for each step
        regret_step = regret
        regret_hist.append(regret) # appending the list to contain regret at every step
        print('Regret at step {}: {}'.format(counter, regret_step))
        write_to_regret_csv.writerow([counter + 1, regret_step]) # writing to csv
        print('Regret history saved')

        synthetic_pcs = [v for v in synthetic_pcs if not np.array_equal(v, next_point_ts)]

        # incrementing the counter variable
        counter += 1

        # stop the timer
        ending_time = time.time()
        total_time = ending_time - starting_time
        last_counter = counter
        last_regret = regret
        # next_point_ts_index = np.where((synthetic_pcs == next_point_ts).all(axis=1))[0]
        # synthetic_pcs = np.delete(next_point_ts, next_point_ts_index, axis=0) 
        # synthetic_pcs.remove(next_point_ts)

        print('Next Point according to thompson sampling: ', next_point_ts, '\n',
              'Current best point: ', current_best, '\n',
              'Ground Truth Utility of Thompson Sampled Point', ground_truth_utility_function_next_point_ts, '\n',
              'Ground Truth Utility of Current best point: ', ground_truth_utility_function_current_best, '\n',
              'Simple Regret: ', regret, '\n',
              'Number of Queries: ', counter, '\n',
              'Computation Time: ', total_time)
    
# we output the counter and regret of the last iteration to a csv for comparisons
with open(output_csv, 'a', newline='') as csvfile:
    write_to_csv = csv.writer(csvfile)

    # write the header row only for the first run
    if first_run: # if the file does not exist then we output the header otherwise we output only the data
        write_to_csv.writerow(['Number of Queries', 'Regret'])
    write_to_csv.writerow([last_counter, last_regret])
print('Successfully written to csv')

Regret at step 0: [0.06924079]
Regret history saved
Next Point according to thompson sampling:  [0.06268156 0.32592893 0.90573732 0.09428746 0.24615266] 
 Current best point:  [0.55946852 0.54196124 0.3235303  0.02909763 0.53642751] 
 Ground Truth Utility of Thompson Sampled Point [0.24953743] 
 Ground Truth Utility of Current best point:  [0.37021248] 
 Simple Regret:  [0.06924079] 
 Number of Queries:  1 
 Computation Time:  0.022475719451904297
Regret at step 1: [0.06924079]
Regret history saved
Next Point according to thompson sampling:  [0.04497888 0.2107041  0.21421156 0.3167206  0.89854446] 
 Current best point:  [0.55946852 0.54196124 0.3235303  0.02909763 0.53642751] 
 Ground Truth Utility of Thompson Sampled Point [0.1554504] 
 Ground Truth Utility of Current best point:  [0.37021248] 
 Simple Regret:  [0.06924079] 
 Number of Queries:  2 
 Computation Time:  0.042331695556640625
Regret at step 2: [0.06924079]
Regret history saved
Next Point according to thompson sampling:  [