-
Notifications
You must be signed in to change notification settings - Fork 4
/
utils.py
418 lines (347 loc) · 23 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# -*- coding: utf-8 -*-
"""
-------------------------------------------------------------------------------
Authors: Parshan Pakiman | https://parshanpakiman.github.io/
Selva Nadarajah | https://selvan.people.uic.edu/
Licensing Information: The MIT License
-------------------------------------------------------------------------------
"""
from scipy.stats import sem,t
import numpy as np
import pandas as pd
import os
from datetime import datetime
from shutil import copyfile
from itertools import chain, combinations
def index_unique_sub_list(input_list):
#--------------------------------------------------------------------------
# Returns the location of locations in a list with unique values
#--------------------------------------------------------------------------
_, indices = np.unique(np.asarray(input_list), return_index=True,axis=0)
return indices
def mean_confidence_interval(data, confidence=0.95):
#--------------------------------------------------------------------------
# Computes confidence interval around mean
#--------------------------------------------------------------------------
a = 1.0 * np.array(data)
n = len(a)
m, se = np.mean(a), sem(a)
h = se * t.ppf((1 + confidence) / 2., n-1)
return m, m-h, m+h,se
def make_text_bold(string):
#--------------------------------------------------------------------------
# Makes a text bold in terminal
#--------------------------------------------------------------------------
return '{}{}{}'.format('\033[1m', string, '\033[0m')
class output_handler:
#--------------------------------------------------------------------------
# Collects and stores outputs of an algorithm.
#--------------------------------------------------------------------------
def __init__(self,instance_conf):
#----------------------------------------------------------------------
# Inititalization
#----------------------------------------------------------------------
self.mdp_name = instance_conf['mdp_conf']['mdp_name']
self.basis_func_type = instance_conf['basis_func_conf']['basis_func_type']
self.batch_size = instance_conf['basis_func_conf']['batch_size']
self.instance_number = instance_conf['mdp_conf']['instance_number']
self.state_relevance_inner_itr = instance_conf['greedy_pol_conf']['state_relevance_inner_itr']
self.update_constr_via_greedy_pol = instance_conf['greedy_pol_conf']['update_constr_via_greedy_pol' ]
self.output_table = pd.DataFrame()
self.path = None
self.filename = None
self.lb_filename = '/LowerBound_' + self.mdp_name +'.csv'
self.setup_output_path()
def setup_output_path(self):
#----------------------------------------------------------------------
# Set the path to store outputs
#----------------------------------------------------------------------
self.path = 'Output/' + self.mdp_name
assert os.path.isdir(self.path)
if not os.path.isdir(self.path + '/instance_'+self.instance_number):
os.mkdir(self.path + '/instance_'+self.instance_number)
self.path = self.path + '/instance_'+self.instance_number
copyfile('MDP/'+ self.mdp_name+ '/Instances/instance_'+self.instance_number+'.py', self.path + '/instance_'+self.instance_number+'.py')
def save_lower_bound(self,lower_bound_list):
#----------------------------------------------------------------------
# Save lower bound into a file
#----------------------------------------------------------------------
pd.DataFrame(lower_bound_list,columns=['# bases','# constrs','FALP Obj','ALP ConT', 'ALP SlvT','lb_mean', 'lb_lb','lb_ub', 'lb_se','LB RT','best_lower_bound','TOT RT']).to_csv(self.path + self.lb_filename)
def load_lower_bound(self):
#----------------------------------------------------------------------
# Load lower bound from a file
#----------------------------------------------------------------------
df = pd.read_csv(self.path + self.lb_filename)
df = df[['lb_mean', 'lb_lb','lb_ub', 'lb_se','best_lower_bound']]
return np.asarray(df.iloc[[-1]]).flatten()
def append_to_outputs( self,
algorithm_name: str, # FALP, FGLP
state_relevance_name: str, # uniform, (5,5,5), greedy_policy
basis_seed: int, # seed number for basis function
num_basis_func: int, # 10, 20, ...
num_constr: int, # num of constraints in ALP
FALP_obj: float, # value of ALP objective
ALP_con_runtime: float, # time to construct ALP to get VFA
ALP_slv_runtime: float, # time tosolve ALP to get VFA
best_lower_bound: float, # best lower bound on the optimal cost until the current iteration
lower_bound_lb: float, # 95% lower bound on the optimal cost lower bound
lower_bound_mean: float, # mean lower bound on the optimal cost
lower_bound_se: float, # standard error of the lower bound on the optimal cost
lower_bound_ub: float, # 95% upper bound on the optimal cost lower bound
lower_bound_runtime: float, # runtime of computing lower bound on the optimla cost
best_policy_cost: float, # best upper bound (policy cost) on the optimal cost until the current iteration
policy_cost_lb: float, # 95% lower bound on the greedy policy cost
policy_cost_mean: float, # mean of the greedy policy cost
policy_cost_se: float, # standard error of greedy policy cost
policy_cost_ub: float, # 95% upper bound on the greedy policy cost
policy_cost_runtime: float, # runtime of computing greedy policy cost
total_runtime: float, # total runtime
SGFALP_obj: float = None,
SG_runtime: float = None,
):
#----------------------------------------------------------------------
# Having algorithm's results up to the current iteration, append
# new results to it.
#----------------------------------------------------------------------
if self.update_constr_via_greedy_pol == True:
self.filename = '/' + self.mdp_name + '_' + self.basis_func_type + '_' + algorithm_name + '_' +\
state_relevance_name+'_inner_update_'+str(self.state_relevance_inner_itr)+\
'_Batch_'+str(self.batch_size) + '_update_constr_via_pol_seed_' + str(basis_seed) +'.csv'
else:
self.filename = '/' + self.mdp_name + '_' + self.basis_func_type + '_' + algorithm_name + '_' +\
state_relevance_name+'_inner_update_'+str(self.state_relevance_inner_itr)+\
'_Batch_'+str(self.batch_size)+ '_seed_' + str(basis_seed) +'.csv'
SGFALP_ = None if SGFALP_obj is None else[round(SGFALP_obj,1)]
SG_runtime_ = None if SG_runtime is None else[round(SG_runtime,4)]
if not policy_cost_mean in [0.0,float('inf')]:
opt_gap_ = 100*(policy_cost_mean - lower_bound_mean)/policy_cost_mean
else:
opt_gap_ = float('inf')
info =\
{ 'update time' : datetime.now().strftime("%d-%m-%Y - %H : %M"),
'mdp' : [self.mdp_name],
'algorithm' : [algorithm_name],
'basis_func_seed' : [basis_seed],
'state relevance' : [state_relevance_name],
'# bases' : [num_basis_func],
'# constrs' : [num_constr],
'FALP obj' : [round(FALP_obj,1)],
'SGFALP' : SGFALP_,
'ALP Constr time' : [round(ALP_con_runtime,4)],
'ALP Solve time' : [round(ALP_slv_runtime,4)],
'SG time' : SG_runtime_,
'best_lower_bound' : [round(best_lower_bound,1)],
'lower bound lb' : [round(lower_bound_lb,1)],
'lower bound mean' : [round(lower_bound_mean,1)],
'lower bound se' : [round(lower_bound_se,2)],
'lower bound ub' : [round(lower_bound_ub,1)],
'lower bound runtime' : [round(lower_bound_runtime,4)],
'best_policy_cost' : [round(best_policy_cost,1)],
'policy cost lb' : [round(policy_cost_lb,1)],
'policy cost mean' : [round(policy_cost_mean,1)],
'policy cost se' : [round(policy_cost_se,2)],
'policy cost ub' : [round(policy_cost_ub,1)],
'policy cost runtime' : [round(policy_cost_runtime,4)],
'tot runtime' : [round(total_runtime,4)],
'opt gap' : [round(opt_gap_,1)],
'lower bound fluctuation' : [round(100*(lower_bound_mean - best_lower_bound)/best_lower_bound,1)],
'policy cost fluctuation' : [round(100*(best_policy_cost - policy_cost_mean)/best_policy_cost,1)],
}
self.output_table = pd.concat([self.output_table, pd.DataFrame(info)],ignore_index = True)
self.output_table.to_csv(self.path + self.filename)
def is_PIC_config_valid(config):
#--------------------------------------------------------------------------
# Add assertion if you need to check an instance of the PIC application
# is "valid". This function is called inside each instance.
#--------------------------------------------------------------------------
pass
def prune_similar_columns(matrix,threshold):
#--------------------------------------------------------------------------
# Prune similar columns of a matrix; not used in the current code.
#--------------------------------------------------------------------------
already_considered = []
similar_columns = []
for i in range(len(matrix.T)):
column = matrix.T[i]
if not i in already_considered:
column = np.asarray([column]).T
diff = column - matrix
norm = np.max(np.abs(diff),axis=0)
index = [_ for _ in range(len(norm)) if norm[_] < threshold]
already_considered += index
similar_columns.append((i,index))
keep = [similar_columns[_][0] for _ in range(len(similar_columns))]
remove = [_ for _ in range(len(similar_columns)) if not _ in keep]
return remove
class output_handler_option_pricing:
#--------------------------------------------------------------------------
# Collects and stores outputs of an algorithm.
#--------------------------------------------------------------------------
def __init__(self,instance_conf):
#----------------------------------------------------------------------
# Inititalization
#----------------------------------------------------------------------
self.mdp_name = instance_conf['mdp_conf']['mdp_name']
self.state_relevance_type = instance_conf['mdp_conf']['state_relevance_type']
self.basis_func_type = instance_conf['basis_func_conf']['basis_func_type']
self.batch_size = instance_conf['basis_func_conf']['batch_size']
self.instance_number = instance_conf['mdp_conf']['instance_number']
self.output_table = pd.DataFrame()
self.path = None
self.filename = None
self.setup_output_path()
def setup_output_path(self):
#----------------------------------------------------------------------
# Set the path to store outputs
#----------------------------------------------------------------------
self.path = 'Output/' + self.mdp_name
assert os.path.isdir(self.path)
if not os.path.isdir(self.path + '/instance_'+self.instance_number):
os.mkdir(self.path + '/instance_'+self.instance_number)
self.path = self.path + '/instance_'+self.instance_number
copyfile('MDP/'+ self.mdp_name+ '/Instances/instance_'+self.instance_number+'.py', self.path + '/instance_'+self.instance_number+'.py')
def append_to_outputs( self,
algorithm_name: str, # LSM
state_relevance_type: str,
basis_func_type: str,
basis_seed: int, # seed number for basis function
basis_bandwidth_str: str,
abs_val_upp_bound: str,
max_basis_num: int, # max number of basis functions
num_basis_func: int, # number of basis functions, ...
num_train_samples: int, # num of sample paths to train C/VFA
num_test_samples: int, # num of sample paths to test policy performanec via C/VFA
num_inner_samples: int, # num of inner samples while fitting C/VFA
train_LB_mean: float, # lower bound on the training sample paths
train_LB_SE: float, # lower bound on the training sample paths
test_LB_mean: float, # lower bound on the training sample paths
test_LB_SE: float, # lower bound on the training sample paths
dual_bound_no_penalty_mean: float,
dual_bound_no_penalty_se: float,
dual_bound_with_penalty_mean: float,
dual_bound_with_penalty_se: float,
best_upper_bound: float,
opt_gap: float,
path_gen_runtime: float,
upp_bound_runtime: float,
lower_bound_runtime: float, # runtime of computing greedy policy cost
CVFA_fitting_runtime: float,
total_runtime: float,
):
#----------------------------------------------------------------------
# Having algorithm's results up to the current iteration, append
# new results to it.
#----------------------------------------------------------------------
self.filename = '/' + self.mdp_name + '_' + self.basis_func_type + '_' + algorithm_name + '_instance_' + self.instance_number + '_seed_' +\
str(basis_seed) + '_basisnum_' + str(max_basis_num) + '_batchsize_' + str(self.batch_size) + '.csv'
info ={ 'update time' : datetime.now().strftime("%d-%m-%Y - %H : %M"),
'state relevance type' : [str(state_relevance_type)],
'mdp' : [self.mdp_name],
'algorithm' : [algorithm_name],
'basis_func_type' : [basis_func_type],
'basis_func_seed' : [basis_seed],
'basis_bandwidth' : [str(basis_bandwidth_str)],
'abs_val_upp_bound' : [str(abs_val_upp_bound)],
'max_basis_num' : [max_basis_num],
'# bases' : [num_basis_func],
'# batch size' : [self.batch_size],
'# training samples' : [num_train_samples],
'# testing samples' : [num_test_samples],
'# num_inner_samples' : [num_inner_samples],
'Train pol cost mean' : [round(train_LB_mean,4)],
'Train pol cost SE' : [round(train_LB_SE,4)],
'Test pol cost mean' : [round(test_LB_mean,4)],
'Test pol cost SE' : [round(test_LB_SE,4)],
'Dual bound (no penalty) mean' : [round(dual_bound_no_penalty_mean,4)],
'Dual bound (no penalty) se' : [round(dual_bound_no_penalty_se,4)],
'Dual bound (with penalty) mean' : [round(dual_bound_with_penalty_mean,4)],
'Dual bound (with penalty) se' : [round(dual_bound_with_penalty_se,4)],
'best_upper_bound' : [round(best_upper_bound,4)],
'Opt_gap (%)' : [round(opt_gap,4)],
'Runtime to simulate sample paths' : [round(path_gen_runtime,1)],
'Runtime to fit C/VFA' : [round(CVFA_fitting_runtime,1)],
'Runtime to get lower bounds' : [round(lower_bound_runtime,1)],
'Runtime to get upper bound ' : [round(upp_bound_runtime,1)],
'Total runtime of algorithm' : [round(total_runtime,1)],
}
self.output_table = pd.concat([self.output_table,pd.DataFrame(info)],ignore_index = True)
self.output_table.to_csv(self.path + self.filename)
def append_output_to_existingfile( self,
algorithm_name: str, # LSM
instance_num: int, # instance number
basis_seed: int, # seed number for basis function
max_basis_num: int, # max number of basis functions
num_basis_func: int, # number of basis functions, ...
batch_size: int, # batch size, ...
num_CFA_samples: int, # num of outer samples
num_pol_samples: int, # num of policy simulation samples
train_LB_mean: float, # lower bound on the training sample paths
train_LB_SE: float, # lower bound on the training sample paths
test_LB_mean: float, # lower bound on the training sample paths
test_LB_SE: float, # lower bound on the training sample paths
path_gen_runtime: float,
LSM_runtime: float,
test_LB_runtime: float, # runtime of computing greedy policy cost
):
#----------------------------------------------------------------------
# Add final results of instance run to a global file
#----------------------------------------------------------------------
self.filename = '/' + self.mdp_name + '_' + self.basis_func_type + '_' + algorithm_name + '_seed_' + str(basis_seed) + '.csv'
info ={ 'update time' : datetime.now().strftime("%d-%m-%Y - %H : %M"),
'mdp' : [self.mdp_name],
'algorithm' : [algorithm_name],
'instance_num' : [instance_num],
'basis_func_seed' : [basis_seed],
'# bases' : [num_basis_func],
'# batch size' : [batch_size],
'# CFA samples' : [num_CFA_samples],
'# pol sim samples' : [num_pol_samples],
'Train pol cost mean' : [round(train_LB_mean,4)],
'Train pol cost SE' : [round(train_LB_SE,4)],
'Test pol cost mean' : [round(test_LB_mean,4)],
'Test pol cost SE' : [round(test_LB_SE,4)],
'Sample path simul runtime' : [round(path_gen_runtime,1)],
'LSM runtime' : [round(LSM_runtime,1)],
'Test pol runtime' : [round(test_LB_runtime,1)],
}
newRow = [datetime.now().strftime("%d-%m-%Y - %H : %M"),
self.mdp_name,
algorithm_name,
instance_num,
basis_seed,
num_basis_func,
batch_size,
num_CFA_samples,
num_pol_samples,
round(train_LB_mean,4),
round(train_LB_SE,4),
round(test_LB_mean,4),
round(test_LB_SE,4),
round(path_gen_runtime,1),
round(LSM_runtime,1),
round(test_LB_runtime,1)]
addHeader = False
if not os.path.exists(self.global_path + self.filename):
addHeader = True
with open(self.global_path + self.filename, 'a+', newline='') as write_obj:
csv_writer = writer(write_obj)
# Create a writer object from csv module
if addHeader:
csv_writer.writerow(info)
# Add contents of list as last row in the csv file
csv_writer.writerow(newRow)
def make_single_ALP_constriant(eval_basis,discount,expected_basis,get_batch_next_state,get_expected_cost,new_basis_param,state,action):
return eval_basis(state,new_basis_param), \
discount*expected_basis(get_batch_next_state(state,action),new_basis_param),\
get_expected_cost(state,action)
def aggregate_all_algorithm_output_BerOpt(filename_list,path_to_save,model_name_list):
data = pd.DataFrame(np.zeros((2,len(model_name_list))))
data.columns = model_name_list
for i,filename in enumerate(filename_list):
file = pd.read_csv(filename)
test_pol_cost = file['Test pol cost mean'].to_list()
runtime = file['tot runtime'].to_list()
x = mean_confidence_interval(test_pol_cost)
y = mean_confidence_interval(runtime)
data[model_name_list[i]] = pd.DataFrame([str(round(x[0],2)) +u"\u00B1"+ str(round((x[2]-x[1])/2 ,2)),str(round(y[0],2)) +u"\u00B1"+ str(round((y[2]-y[1])/2,2)) ])
data.to_csv(path_to_save)