In [1]:
import pickle
import os, sys
sys.path.append(os.path.abspath('../'))
import numpy as np
import pandas as pd
from datetime import datetime
from EMAWorkbench import Policy
from EMAWorkbench.em_framework import sample_uncertainties
from EMAWorkbench import SequentialEvaluator, ema_logging, save_results
from models.MOMDP import MOMDP
from library.basis.dam_basis_rbf import dam_basis_rbf
from library.basis.lake_basis_poly import lake_basis_poly
from library.basis.basis_poly import basis_poly
from library.basis.fishing3_basis_poly import fishing3_basis_poly
from library.policies.Gibbs import Gibbs
from library.policies.GaussianLinearChol import GaussianLinearChol


def evaluation(env, problem, random_seed, steps, N_EVAL, policy, policy_path, result_path):
    for name in ['best', 'high']:
        
        with open(f'{policy_path}_{name}.pickle', 'rb') as fh:
            policy_high = pickle.load(fh)

        results = problem(policy=policy, policy_high=policy_high, N_EVAL=N_EVAL, env=env)

        current_time = datetime.now().strftime("-%d-%m-%Y-%H-%M")
        pd.DataFrame(results).to_csv(result_path + f'{current_time}_{name}.csv', index=False)
    return results

In [2]:
def robustness_evaluation(env, problem, random_seed, steps, scenarios_num, scenario_path, 
                          N_EVAL, policy, policy_path, result_path):
    scenarios = pd.read_csv(scenario_path)
    
    for name in ['best', 'high']:
        
        with open(f'{policy_path}_{name}.pickle', 'rb') as fh:
            policy_high = pickle.load(fh)

        results = problem(policy=policy, policy_high=policy_high, episodes=scenarios_num,
                          N_EVAL=N_EVAL, env=env, scenarios=scenarios)

        current_time = datetime.now().strftime("-%d-%m-%Y-%H-%M")
        pd.DataFrame(results).to_csv(result_path + f'{current_time}_{name}.csv', index=False)
    return results

# Discrete Dam Problem

In [None]:
import numpy as np
from models.rl_dam import dam_problem_rl
from models.MOMDP import MOMDP
from models.DamDiscrete import DamDiscrete1
from library.basis.dam_basis_rbf import dam_basis_rbf
from library.policies.Gibbs import Gibbs

random_seed = 1793476144
mdp = MOMDP('dam_discrete', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
evaluation(env=DamDiscrete1, problem=dam_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=500, policy=policy,
           policy_path='../results/rl_dam/repsep_discrete_12-06-2022-00-13',
           result_path='../results/dam_discrete_performance/repsep')

In [None]:
random_seed = 1793476144
mdp = MOMDP('dam_discrete', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
evaluation(env=DamDiscrete1, problem=dam_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=500, policy=policy,
           policy_path='../results/rl_dam/nes_discrete_12-06-2022-00-13',
           result_path='../results/dam_discrete_performance/nes')

# Robust Dam Problem (random initial state)

In [4]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_robust/repsep_10th_200_50_05_09-07-2022-01-21',
                      result_path='../results/dam_robust_performance_5/repsep_10th')

{'utility': array([-3.31644467, -3.91642229, -1.65704593, -3.39727255, -0.55888922,
        -3.15716859, -3.16490465, -0.62058627, -1.7400947 , -1.34925588,
        -1.08701748, -0.56600875, -3.6307519 , -3.90349291, -1.03264541,
        -3.40541668, -3.9573127 , -3.90410564, -2.0876094 , -3.23296591,
        -2.5851386 , -3.98702674, -0.56574521, -3.37567069, -2.25216269,
        -1.14576168, -3.99424643, -1.15348001, -3.39506442, -2.69844985,
        -2.91781201, -1.86973682, -0.60370983, -3.3537202 , -2.90408107,
        -1.59449481, -1.33448048, -0.5784926 , -3.33697966, -3.94566022,
        -3.86164418, -0.55634004, -3.37314254, -0.56548935, -3.88450127,
        -3.26174706, -0.593634  , -0.76419098, -4.01027815, -1.27265301,
        -0.99447738, -0.57446458, -3.336098  , -1.89365242, -3.60532904,
        -1.48299565, -2.98805306, -1.92244404, -2.82930075, -0.64751575,
        -2.14121963, -0.88377229, -3.95007988, -3.41395784, -1.94244149,
        -2.04639141, -0.90335218, -0.573

In [4]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_robust/nes_10th_200_50_02_06-07-2022-21-07',
                      result_path='../results/dam_robust_performance_5/nes_10th')

{'utility': array([-3.65974798, -0.5764827 , -0.60370983, -2.595674  , -1.72493098,
        -0.57089878, -0.59144055, -0.72649079, -0.61814419, -3.75598223,
        -2.33366063, -0.57833325, -0.59230924, -2.5799958 , -0.57979713,
        -2.38501206, -3.11252653, -3.80796064, -0.56136016, -3.00214754,
        -2.74773852, -2.3417827 , -0.80904537, -2.9595675 , -1.19839119,
        -3.77091662, -3.89019394, -0.63844882, -2.34897183, -3.61831633,
        -3.81133779, -2.36569877, -3.99519451, -2.53390899, -2.75708695,
        -2.86111656, -3.93888264, -1.44392487, -0.5956759 , -3.76260259,
        -2.67435076, -3.80479993, -3.79947695, -3.99520802, -2.24401592,
        -2.48666462, -0.58995256, -0.61666072, -2.64493705, -0.5597837 ,
        -2.28151943, -1.29033529, -2.54592457, -0.7446344 , -0.80391992,
        -4.01804694, -2.54713101, -0.58729325, -2.44634649, -3.99114714,
        -3.72002868, -2.55219525, -0.62419423, -0.582742  , -2.67177685,
        -2.99322715, -2.35306024, -3.779

In [5]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_robust/repsep_mv_200_50_05_06-07-2022-19-48',
                      result_path='../results/dam_robust_performance_5/repsep_mv')

{'utility': array([-0.74374932, -0.82473533, -2.06251616, -1.16280118, -3.91966476,
        -1.27190102, -0.88575325, -0.81935449, -0.99288668, -0.7596511 ,
        -0.74374813, -4.02501674, -0.74442035, -2.2536608 ]),
 'reliability': array([-9.72470151, -9.63239384, -9.43750082, -9.49395751, -9.06399307,
        -9.44119837, -9.60994536, -9.63690343, -9.60801705, -9.6965974 ,
        -9.73115807, -8.95624802, -9.71920151, -9.25624802])}

In [6]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_robust/nes_mv_200_150_02_06-07-2022-19-52',
                      result_path='../results/dam_robust_performance_5/nes_mv')

{'utility': array([-0.74134979, -1.03666851, -0.68765444, -0.67449906, -0.56190995,
        -0.67896611, -0.67515757, -0.70734926, -0.65064408, -0.68461947,
        -1.04288441, -0.70119042, -0.71576343, -0.56488996, -0.64244408,
        -0.96191506, -0.7230242 , -0.63684408, -0.65624408, -0.65664439,
        -0.7489839 , -0.63744408, -0.82187708, -0.98750384, -0.74421697,
        -0.89226285, -1.0410762 , -1.01840648, -0.72713842, -1.07585151,
        -0.62004408, -0.81253841, -0.55913423, -0.64084408, -0.58204408,
        -0.74277692, -0.7708949 , -0.7327207 , -1.04941756, -0.57703895,
        -0.56536623, -0.68996879, -0.87485461, -0.55624415, -0.68867896,
        -0.73238603, -0.72459073, -0.55665474, -0.7364567 , -0.5590668 ,
        -0.57354997, -0.82433125, -0.57388421]),
 'reliability': array([-9.68875733, -9.65974549, -9.7049524 , -9.71850254, -9.73810151,
        -9.7084524 , -9.7100524 , -9.6976524 , -9.7213968 , -9.7061524 ,
        -9.65873754, -9.6998524 , -9.6947524 , -9

In [7]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_robust/repsep_avg_200_40_05_07-07-2022-14-47',
                      result_path='../results/dam_robust_performance_5/repsep_avg')

{'utility': array([-0.56814882, -0.89780733, -1.68372351, -1.14003629, -3.41073402,
        -3.93557143, -3.8940938 , -1.54988897, -0.8434375 , -0.79580624,
        -3.14434049, -2.31970579, -1.54837357, -2.40194672, -3.54035346,
        -3.48089409, -3.91082116, -2.07696926, -3.43553338, -0.90632404,
        -0.57285821, -3.99043696, -0.62809015, -3.93778688, -3.98532255,
        -3.95910641, -3.30777692, -1.95072021, -4.02457627, -0.72815757,
        -0.77499929, -4.01353426, -3.92392863, -3.35095323, -3.60597491,
        -0.56161978, -0.77801959, -3.50103285, -0.61607314, -2.98000154,
        -3.73569057, -1.06221696, -4.00687451, -3.00480154, -3.66511528,
        -3.47583685, -0.55907307, -3.93069008, -0.61177175, -3.95152269,
        -3.91206982, -3.20351118, -0.57678368, -3.96901198, -0.68964572,
        -4.02411781, -3.64152361, -3.71841319, -2.5374982 , -0.55736533,
        -3.98284764, -3.42925034, -3.16396623, -0.62186467, -3.69595972,
        -4.00933906, -2.80665275, -3.946

In [8]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_robust/nes_avg_200_50_02_07-07-2022-15-22',
                      result_path='../results/dam_robust_performance_5/nes_avg')

{'utility': array([-3.87627476, -2.41959018, -2.2178107 , -1.12446231, -3.55954099,
        -1.03816769, -1.14871112, -3.72602935, -1.24880975, -1.97512228,
        -3.03545596, -1.4573948 , -3.56900448, -3.3481823 , -1.56741032,
        -1.00418697, -3.50352606, -3.9882925 , -1.93969405, -3.61274836,
        -3.37296452, -4.01071928, -1.24738594, -0.99429345, -3.01034555,
        -2.10699258, -0.88184884, -4.02384501, -1.08749813, -3.79440355,
        -4.02478422, -2.51157535, -2.18681133, -2.43477798, -1.36805836,
        -3.80794999, -4.01835866, -0.80753848, -2.13729192, -1.28544403,
        -3.05234168, -3.90957527, -3.99585997, -3.99837926, -1.88947515,
        -3.91575649, -2.31736235, -3.85587665, -3.47837667, -2.42211809,
        -3.71226171, -0.78469799, -3.81468271, -3.95354237, -2.2515037 ,
        -3.42399345, -3.80625031, -1.25742362, -1.47015518, -2.92315144,
        -1.38835776, -2.22740121, -1.93529387, -0.8984431 , -1.16289972,
        -1.62548147, -3.35648483, -3.525

# Robust Dam Problem (random inflow)

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios_random_inflow.csv",
                      policy_path='../results/rl_dam_robust/repsep_random_inflow_avg_200_52_05_13-06-2022-16-37',
                      result_path='../results/dam_robust_performance/repsep_random_inflow_avg')

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios_random_inflow.csv",
                      policy_path='../results/rl_dam_robust/nes_random_inflow_avg_200_70_02_13-06-2022-17-30',
                      result_path='../results/dam_robust_performance/nes_random_inflow_avg')

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=1793476144, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios_random_inflow.csv",
                      policy_path='../results/rl_dam_robust/repsep_random_inflow13-06-2022-14-46',
                      result_path='../results/dam_robust_performance/repsep_random_inflow')

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios_random_inflow.csv",
                      policy_path='../results/rl_dam_robust/nes_random_inflow13-06-2022-14-33',
                      result_path='../results/dam_robust_performance/nes_random_inflow')

# Robust Dam Problem (deep)

In [9]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_deep_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/nes_10th_200_200_02_07-07-2022-18-14',
                      result_path='../results/dam_deep_performance_3/nes_10th')

{'utility': array([-1.7774706 , -1.61902942, -4.01634384, -1.89215792, -1.64448173,
        -2.00660098, -1.82863291, -3.3856714 , -4.93142551, -1.77859287,
        -2.82081642, -1.89772028, -2.58579515, -1.91144048, -1.62028296,
        -3.78014336, -1.79599911, -4.72284581, -4.83050933, -2.35040825,
        -4.54283605, -1.66945208, -1.66837627, -4.20435206, -1.57158093,
        -2.27283693, -2.09806813, -1.60408355, -3.51688899, -1.75053317,
        -2.33235075, -1.62872631, -2.08416834, -1.73100716, -1.95016163,
        -4.20978593, -4.15701322, -1.82565703, -1.61415377, -2.58660604,
        -1.81517323, -3.29460516, -1.9601052 , -3.92752719, -2.15763166,
        -2.17610925, -4.01480031, -2.45630972, -3.72928819, -5.07530694,
        -2.05077657, -4.48011008, -2.90815595, -3.66325951, -4.42503615,
        -1.59751637, -5.6486199 , -2.0485492 , -4.48519202, -1.57844777,
        -2.53843448, -3.83182795, -1.87404932, -1.72488168, -2.13594607,
        -2.16307361, -2.98026497, -2.281

In [10]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_deep_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/repsep_10th_200_200_05_07-07-2022-17-16',
                      result_path='../results/dam_deep_performance_3/repsep_10th')

{'utility': array([-4.85055767, -1.99005098, -1.87969863, -1.91877362, -1.57695563,
        -1.6625673 , -2.53904827, -2.13411059, -1.64209938, -3.4950031 ,
        -1.55536977, -3.1380921 , -2.72587285, -2.17907405, -3.68653938,
        -1.7797634 , -2.04878233, -1.56255987, -4.70704407, -2.03459773,
        -1.65237465, -3.70762111, -2.06705038, -3.63762898, -1.87757358,
        -1.74135872, -2.86554852, -3.05939412, -2.200675  , -3.33402379,
        -2.01323872, -3.78118401, -2.90521546, -2.12003742, -1.59606372,
        -3.73500461, -4.96418727, -1.64209746, -1.69121629, -1.59054426,
        -1.62526356, -1.57926943, -1.84384153, -2.04413216, -2.27707227,
        -3.0823587 , -5.5679504 , -3.694095  , -1.97979705, -1.78558343,
        -2.48861355, -3.67460564, -1.83910856, -1.89728071, -1.85808874,
        -5.10001358, -1.75754444, -1.71959   , -4.28272031, -1.96681713,
        -2.02581533, -1.90975264, -4.54945784, -2.02443575, -4.82987177,
        -1.9275852 , -1.86245064, -3.515

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_deep_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/nes_avg_200_200_02_07-07-2022-17-48',
                      result_path='../results/dam_deep_performance_3/nes_avg')

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_deep_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/repsep_avg_200_200_05_07-07-2022-17-31',
                      result_path='../results/dam_deep_performance_3/repsep_avg')

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_deep_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/nes_mv_200_200_02_07-07-2022-18-01',
                      result_path='../results/dam_deep_performance_3/nes_mv')

In [None]:
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam_deep_uncertain', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=dam_uncertain_problem_rl, 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/repsep_mv_200_200_05_07-07-2022-17-04',
                      result_path='../results/dam_deep_performance_3/repsep_mv')

# Robust Dam3 Problem

In [3]:
import functools
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam3_deep', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=functools.partial(dam_uncertain_problem_rl, n_obj=3), 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/3_nes_200_100_02_16-07-2022-13-45',
                      result_path='../results/dam3_deep_performance/nes')

{'upstream_flooding': array([-33.89454337,  -6.77478747, -24.79630128, -23.54360562,
        -19.6153392 , -41.85224697, -25.45816454, -36.46187072,
        -15.15560925,  -1.76600242, -36.53114174, -51.94869367,
         -9.69817916, -33.5757115 , -23.32671   , -41.25924683,
        -26.28739096, -19.4116005 , -12.4209509 , -12.35783915,
        -29.24228226,  -3.1507729 , -15.395947  , -21.37599978,
        -27.7178615 , -61.75292444,  -6.76643556,  -1.56448585,
        -28.14722511, -20.07413761,  -8.25874418, -17.37072714,
        -37.88161072,  -9.14437713, -29.2826197 , -14.80427127,
         -3.13003092,  -4.58052412, -24.53638849, -31.98045361,
        -27.73011686, -29.39771268,  -1.5696148 , -19.6518947 ,
        -19.00951937, -31.49154971,  -5.53433969,  -1.65433483,
         -5.37598342,  -3.35340422, -42.33001628, -29.4864006 ,
        -26.81317407, -23.8969369 , -55.2233054 , -14.54945185,
        -28.25739366, -46.70978628, -13.91649009,  -5.49784339,
        -19.9035774

In [3]:
import functools
from models.rl_dam_uncertain import dam_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('dam3_deep', seed=random_seed)
bfs = dam_basis_rbf
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=functools.partial(dam_uncertain_problem_rl, n_obj=3), 
                      random_seed=random_seed, steps=100, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/dam_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_dam_deep/3_repsep_200_200_05_16-07-2022-15-01',
                      result_path='../results/dam3_deep_performance/repsep')

{'upstream_flooding': array([ -6.42996687,  -9.0377518 ,  -5.49114702,  -1.80679197,
        -18.74402775,  -5.73962905, -14.95014948,  -5.88370371,
        -10.79816393, -25.00609992,  -5.22054466,  -8.05958906,
         -3.61828742,  -6.40227277, -21.28188721,  -6.64612961,
         -6.11752858,  -6.33074341, -28.77015624,  -6.84052988,
        -30.91314118, -12.63818075, -15.58802014, -31.96332147,
         -6.21166852,  -6.7378727 ,  -2.10167802,  -6.12712107,
        -49.07071036, -30.07075331, -18.75882732, -19.05423704,
        -33.62221896,  -5.65983726,  -5.7718306 ,  -5.05740971,
        -11.22802319,  -8.35171075, -14.2347772 , -10.83468439,
        -25.86151848, -13.70766693,  -2.30636538,  -5.31524085,
         -7.24416637,  -6.25692627,  -7.63208427, -15.13356944,
        -41.31242033,  -3.71756365, -20.45524839,  -5.48605584,
        -14.14549213, -14.41205841,  -8.63883509,  -7.34820706,
         -2.66887452,  -8.6737789 , -12.48988031,  -4.69639874,
         -6.7322760

# Discrete Lake Problem

In [None]:
from models.rl_lake import lake_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_discrete', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
evaluation(env=mdp, problem=lake_problem_rl, random_seed=random_seed, 
           steps=99, N_EVAL=4000, policy=policy,
           policy_path='../results/rl_lake/nes_200_200_03_07-07-2022-18-34',
           result_path='../results/lake_discrete_performance_2/nes')

In [None]:
from models.rl_lake import lake_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_discrete', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
evaluation(env=mdp, problem=lake_problem_rl, random_seed=random_seed, 
           steps=99, N_EVAL=4000, policy=policy,
           policy_path='../results/rl_lake/repsep_400_200_05_07-07-2022-18-40',
           result_path='../results/lake_discrete_performance_2/repsep')

# Robust Lake Problem

In [None]:
from models.rl_lake_uncertain import lake_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_deep_uncertain', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=lake_uncertain_problem_rl, 
                      random_seed=random_seed, steps=99, scenarios_num=2000,
                      policy=policy, N_EVAL=4000,
                      scenario_path="../results/lake_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_lake_robust/repsep_400_40_05_30-06-2022-08-58',
                      result_path='../results/lake_robust_performance_2/repsep')

In [None]:
from models.rl_lake_uncertain import lake_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_deep_uncertain', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=lake_uncertain_problem_rl, 
                      random_seed=random_seed, steps=99, scenarios_num=2000,
                      policy=policy, N_EVAL=4000,
                      scenario_path="../results/lake_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_lake_robust/nes_200_40_03_30-06-2022-09-27',
                      result_path='../results/lake_robust_performance_2/nes')

In [4]:
from models.rl_lake_uncertain import lake_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_deep_uncertain', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=lake_uncertain_problem_rl, 
                      random_seed=random_seed, steps=99, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/lake_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_lake_robust/nes_avg_200_200_03_30-06-2022-09-33',
                      result_path='../results/lake_robust_performance_2/nes_avg')

{'utility': array([1.55766136, 1.52158568, 1.55854115, 1.62961673, 1.59265159,
        0.92675192, 1.53949821, 1.29127272, 1.56610713, 0.90609407,
        1.63806801, 1.11346922, 0.92904689, 0.83528503, 1.39918145,
        1.58683752, 1.17671896, 1.5928736 , 1.55262677, 1.30724701,
        0.91347484, 0.89590001, 1.59233464, 1.42880108, 1.5280188 ,
        0.97121998, 1.33385676, 1.14052843, 1.01332367, 1.02143964,
        1.51310794, 1.14912014, 1.16856911, 1.59489236, 0.93893925,
        1.16439948, 1.50692595, 0.83273471, 0.92259813, 1.56182466,
        1.52757626, 1.39894486, 1.61945514, 1.55360392, 0.96762867,
        1.39554441, 1.64302643, 1.5935414 , 0.90444965, 1.28112776,
        0.91380459, 1.6169688 , 1.6012261 , 1.35567154, 0.90714546,
        1.6052261 , 1.52764468, 1.43522958, 1.28463728, 0.8370112 ,
        1.00321257, 0.93687979, 0.90140748, 1.62705769, 0.95694642,
        1.56686195, 0.91672393, 1.5973061 , 1.19769468, 1.56643754,
        0.95813483, 0.91675788, 1.492

In [5]:
from models.rl_lake_uncertain import lake_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_deep_uncertain', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=lake_uncertain_problem_rl, 
                      random_seed=random_seed, steps=99, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/lake_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_lake_robust/repsep_avg_400_200_05_10-07-2022-11-26',
                      result_path='../results/lake_robust_performance_2/repsep_avg')

{'utility': array([1.446614  , 1.51907571, 1.01151959, 1.64153722, 1.51408538,
        0.97703794, 1.48881624, 1.62825894, 1.62544017, 1.63021627,
        1.47900691, 1.49443345, 1.50918956, 1.48239414, 1.14517029,
        1.45012606, 1.39983918, 1.41275237, 1.63045633, 1.49429272,
        1.46915852, 1.50951921, 1.47195713, 1.01694059, 1.61879614,
        1.13445484, 1.62224267, 1.45163514, 1.38879781, 1.46269812,
        1.47278398, 1.08328457, 1.47123524, 1.47700431, 0.95198991,
        1.03472813, 1.50880901, 1.55109426, 1.61697237, 0.85519478,
        1.44602839, 1.63192353, 1.49297093, 1.54963891, 1.51716763,
        1.60340042, 1.49701113, 0.99561607, 1.45616623, 1.63540755,
        1.4592273 , 1.57351125, 0.94507996, 1.4552136 , 1.63583756,
        1.0585717 , 1.52138525, 1.46535472, 1.64209265, 1.10004092,
        1.61104142, 1.49609449, 1.46521095, 1.44475824, 1.4898179 ,
        1.5172397 , 1.47720953, 1.44788775, 1.51149881, 1.64082645,
        1.44313645, 0.80948995, 1.446

In [7]:
from models.rl_lake_uncertain import lake_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_deep_uncertain', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=lake_uncertain_problem_rl, 
                      random_seed=random_seed, steps=99, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/lake_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_lake_robust/nes_mv_200_200_03_08-07-2022-17-21',
                      result_path='../results/lake_robust_performance_2/nes_mv')

{'utility': array([0.88996284, 1.14408597, 0.81918576, 0.94771248, 1.02812248,
        1.03696284, 0.81472103, 0.84892118, 0.8498729 , 0.81404262,
        0.99386102, 0.91490257, 1.42686764, 0.9904729 , 0.86836283,
        0.82648161, 0.99646592, 0.94573416, 0.97399161, 1.19550575,
        0.87151196, 1.04695887, 1.04689323, 1.09395152, 1.04918757,
        0.80741777, 0.88768206, 0.80223746, 1.05403275, 0.88032319,
        0.91666288, 0.87522052, 0.94851435, 1.20976283, 0.89402709,
        0.90822098, 0.90475031, 0.9163111 , 1.04780093, 0.87951099,
        1.02862503, 0.92324911, 0.80963563, 0.94568833, 1.07249368,
        0.86740754, 0.92431912, 0.93050097, 0.86181421, 0.83039647,
        0.95353874, 0.91243654, 0.96416554, 1.14708005, 1.13737173,
        0.90085967, 0.79343625, 1.03008658, 0.87447754, 0.83532068,
        0.95073486, 0.86751957, 1.14934215, 1.22361934, 1.2543998 ,
        0.94829247, 0.9229301 , 1.47244279, 0.94333701, 1.45211517,
        1.12677274, 0.9620713 , 0.766

In [6]:
from models.rl_lake_uncertain import lake_uncertain_problem_rl

random_seed = 1793476144
mdp = MOMDP('lake_deep_uncertain', seed=random_seed)
bfs = lake_basis_poly
policy = Gibbs(bfs, np.zeros(((bfs() + 1) * mdp.actionUB, 1)), np.arange(mdp.actionLB, mdp.actionUB + 1))

policy.makeDeterministic()
robustness_evaluation(env=mdp, problem=lake_uncertain_problem_rl, 
                      random_seed=random_seed, steps=99, scenarios_num=1000,
                      policy=policy, N_EVAL=2000,
                      scenario_path="../results/lake_scenarios/evaluation_scenarios.csv",
                      policy_path='../results/rl_lake_robust/repsep_mv_400_200_05_08-07-2022-18-42',
                      result_path='../results/lake_robust_performance_2/repsep_mv')

{'utility': array([1.54551941, 1.54825418, 1.09888147, 1.2736981 , 0.90983551,
        1.56516024, 0.87924986, 1.6012261 , 0.90419036, 1.54549771,
        0.86125232, 0.84698181, 0.91159163, 1.23361545, 0.92322224,
        1.43741056, 1.16506438, 1.06569074, 0.86891318, 0.974029  ,
        1.05906749, 1.28393351, 0.88928254, 1.28374192, 1.48641482,
        0.87687726, 0.86201401, 0.98763631, 1.54547381, 1.15578412,
        0.83417763, 0.8411956 , 1.30944812, 0.84112929, 0.94028029,
        0.87080396, 1.6052261 , 0.95205529, 0.88312822, 0.88634865,
        0.88427778, 0.96771986, 0.87674803, 0.82802273, 0.96069505]),
 'reliability': array([0.19250505, 0.19181818, 0.71026263, 0.48877778, 0.98220202,
        0.17254545, 0.99118182, 0.12474747, 0.98708081, 0.19281818,
        0.99844444, 0.99851515, 0.97959596, 0.5120101 , 0.97665657,
        0.31508081, 0.58582828, 0.71478788, 0.99712121, 0.86824242,
        0.85453535, 0.48307071, 0.98892929, 0.48354545, 0.24432323,
        0.99289899, 

# Fishing Problem

In [3]:
from models.rl_fishing import fishing_problem_rl

random_seed = 1793476144
mdp = MOMDP('fishing', seed=random_seed, daction=2)
bfs = lambda state=None: basis_poly(1, mdp.dstate, 0, state)
A0 = np.zeros((mdp.daction, bfs()+1))
Sigma0 = np.eye(mdp.daction)
policy = GaussianLinearChol(bfs, mdp.daction, A0, Sigma0)

policy.makeDeterministic()
evaluation(env=mdp, problem=fishing_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=2000, policy=policy,
           policy_path='../results/rl_fishing/nes_200_40_03_09-07-2022-15-11',
           result_path='../results/fishing_performance/nes')

{'fish_population_1': array([8.70306244, 5.24585012, 7.06815601, 4.22703478, 8.56669115,
        8.4460812 , 7.48707237, 8.38411801, 7.1354223 , 7.26958813,
        8.62777123, 6.86173826, 8.78914374, 9.18052431, 8.52069884,
        5.35942203, 8.58975826, 8.82220458, 4.19910682, 8.41879729,
        8.49222617, 5.79922282, 8.27606081, 9.21985376, 6.01217922,
        6.9257023 , 7.11717553, 8.17887016, 4.68819415, 8.95039925,
        7.86249108, 5.81296212, 9.03339588, 9.16603292, 8.95586029,
        8.91440262, 4.69525961, 5.21795621, 3.29395739, 1.82569305,
        8.91872115, 8.89029394, 7.4819554 , 7.09425144, 7.85274204,
        9.07824827, 7.15567843, 6.16935631, 8.86521702, 9.07381441,
        8.13354547, 8.58003239, 8.96202518, 7.66788385, 7.56454813,
        7.70753114, 2.31170692, 9.16390899, 9.12934017, 5.28952504,
        8.29610924, 8.58926595, 7.38216125, 6.5844811 , 5.06271776,
        7.68379352, 7.61461366, 3.3601431 , 7.59194056, 8.28850442,
        9.10397147, 8.50165

In [3]:
from models.rl_fishing import fishing_problem_rl

random_seed = 1793476144
mdp = MOMDP('fishing', seed=random_seed, daction=2)
bfs = lambda state=None: basis_poly(1, mdp.dstate, 0, state)
A0 = np.zeros((mdp.daction, bfs()+1))
Sigma0 = np.eye(mdp.daction)
policy = GaussianLinearChol(bfs, mdp.daction, A0, Sigma0)

policy.makeDeterministic()
evaluation(env=mdp, problem=fishing_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=2000, policy=policy,
           policy_path='../results/rl_fishing/repsep_200_200_05_08-07-2022-23-50',
           result_path='../results/fishing_performance/repsep')

{'fish_population_1': array([9.27051904, 8.74792908, 7.26119629, 9.37038918, 7.73438421,
        1.85222526, 8.14600561, 7.73145375, 6.6787201 , 0.87600254,
        9.86812329, 2.15064155, 1.81688129, 3.83805229, 9.43007957,
        1.58706405, 4.25812543, 7.07680661, 5.45788219, 9.06425341,
        9.63765744, 1.23038011, 8.25738584, 5.53019734, 6.6893641 ,
        9.72076485, 2.21883334, 7.08622305, 9.01878426, 7.25961891,
        7.96540154, 8.74703701, 3.815447  , 4.71538154, 8.28600575,
        9.15953462, 1.06426716, 2.41985736, 8.96230473, 9.21327042,
        5.38438504, 8.13226169, 7.87966445, 1.9396722 , 7.34409863,
        8.56593461, 9.21893609, 4.21643404, 8.61553204, 9.71111894,
        7.28197404, 8.06442909, 9.24615989, 9.34955975, 7.89320043,
        9.31145592, 1.62211145, 2.73064165, 8.27506074, 8.46000647,
        7.27670833, 5.96127024, 5.72572266, 9.60787988, 0.5       ,
        9.39545592, 8.37857584, 1.8397151 , 9.39549077, 8.77167741,
        0.86783048, 5.24221

In [3]:
from models.rl_fishing import fishing_problem_rl

random_seed = 1793476144
mdp = MOMDP('fishing', seed=random_seed, daction=2)
bfs = lambda state=None: basis_poly(1, mdp.dstate, 0, state)
A0 = np.zeros((mdp.daction, bfs()+1))
Sigma0 = np.eye(mdp.daction)
policy = GaussianLinearChol(bfs, mdp.daction, A0, Sigma0)

policy.makeDeterministic()
evaluation(env=mdp, problem=fishing_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=2000, policy=policy,
           policy_path='../results/rl_fishing/nes_200_40_03_09-07-2022-15-11',
           result_path='../results/fishing_performance/nes')

{'fish_population_1': array([8.70306244, 5.24585012, 7.06815601, 4.22703478, 8.56669115,
        8.4460812 , 7.48707237, 8.38411801, 7.1354223 , 7.26958813,
        8.62777123, 6.86173826, 8.78914374, 9.18052431, 8.52069884,
        5.35942203, 8.58975826, 8.82220458, 4.19910682, 8.41879729,
        8.49222617, 5.79922282, 8.27606081, 9.21985376, 6.01217922,
        6.9257023 , 7.11717553, 8.17887016, 4.68819415, 8.95039925,
        7.86249108, 5.81296212, 9.03339588, 9.16603292, 8.95586029,
        8.91440262, 4.69525961, 5.21795621, 3.29395739, 1.82569305,
        8.91872115, 8.89029394, 7.4819554 , 7.09425144, 7.85274204,
        9.07824827, 7.15567843, 6.16935631, 8.86521702, 9.07381441,
        8.13354547, 8.58003239, 8.96202518, 7.66788385, 7.56454813,
        7.70753114, 2.31170692, 9.16390899, 9.12934017, 5.28952504,
        8.29610924, 8.58926595, 7.38216125, 6.5844811 , 5.06271776,
        7.68379352, 7.61461366, 3.3601431 , 7.59194056, 8.28850442,
        9.10397147, 8.50165

In [3]:
from models.rl_fishing import fishing_problem_rl

random_seed = 1793476144
mdp = MOMDP('fishing3', seed=random_seed, daction=3)
bfs = lambda state=None: basis_poly(1, mdp.dstate, 0, state)
A0 = np.zeros((mdp.daction, bfs()+1))
Sigma0 = np.eye(mdp.daction)
policy = GaussianLinearChol(bfs, mdp.daction, A0, Sigma0)

policy.makeDeterministic()
evaluation(env=mdp, problem=fishing_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=2000, policy=policy,
           policy_path='../results/rl_fishing/3_repsep_400_200_05_09-07-2022-15-18',
           result_path='../results/fishing3_performance/repsep')

{'fish_population_1': array([5.58249326, 5.93152855, 5.35704608, 0.70641193, 4.84627251,
        0.99831058, 0.85067679, 3.70556951, 1.43916316, 5.92242195,
        3.59551534, 2.34165164, 5.46614248, 0.82419301, 0.        ,
        5.79315645, 0.82864532, 3.58534135, 0.5       , 5.12524587,
        4.97873965, 6.07771023, 0.55      , 5.74521329, 5.61688785,
        5.87885002, 1.39416417, 5.08399752, 1.56578046, 5.01854268,
        5.24831657, 5.04681292, 1.92591227]),
 'fish_population_2': array([1.07634   , 0.5       , 1.9549383 , 6.20376524, 4.03225916,
        4.61272966, 4.78367554, 4.58039161, 4.27161212, 0.5525    ,
        1.97590998, 3.15265406, 0.80717742, 5.36191957, 4.32428358,
        0.79159967, 5.5855199 , 0.5       , 0.5       , 0.5       ,
        3.84008931, 0.5       , 0.5       , 0.5       , 0.95855112,
        0.63981149, 1.71440584, 0.5       , 0.67310749, 4.33378562,
        0.5       , 0.5       , 2.2662124 ]),
 'fish_population_3': array([0.73773561, 0.5757043

In [4]:
from models.rl_fishing import fishing_problem_rl

random_seed = 1793476144
mdp = MOMDP('fishing3', seed=random_seed, daction=3)
bfs = fishing3_basis_poly
A0 = np.zeros((mdp.daction, bfs()+1))
Sigma0 = np.eye(mdp.daction)
policy = GaussianLinearChol(bfs, mdp.daction, A0, Sigma0)

policy.makeDeterministic()
evaluation(env=mdp, problem=fishing_problem_rl, random_seed=random_seed, 
           steps=100, N_EVAL=10000, policy=policy,
           policy_path='../results/rl_fishing/3_repsep_400_70_05_10-07-2022-06-17',
           result_path='../results/fishing_performance/repsep')

{'fish_population_1': array([3.21625644, 3.78211664, 6.17249876, 0.60418505, 5.16687867,
        0.53657118, 0.54027654, 6.11714459, 3.28306843, 5.3044997 ,
        4.04396297, 5.02271938, 4.61989544, 4.98880906, 2.12779965,
        0.50225739, 5.5215669 , 3.85419948, 4.00383703, 4.63695614,
        1.55807412, 3.49090767, 5.71984981, 5.04459996, 4.2317946 ,
        4.94387277, 5.25449286, 5.05032427, 3.46037153, 4.02019938,
        5.36340936, 5.85238202, 5.28804934, 3.96574288, 3.80745565,
        4.94001548, 5.52712553, 5.27279032, 6.18848282, 4.90222034,
        0.55566373, 4.42868162, 0.55509116, 4.90424374, 4.544506  ,
        5.09400475, 5.06563615, 4.1237378 , 4.83418561, 5.02766556,
        4.73966768, 4.96959711, 5.4020985 , 5.41105128, 4.5701211 ,
        5.04701337, 0.5       , 4.51744966, 5.17580537, 5.9730943 ,
        5.43322669, 4.77142768, 0.61312213, 5.63398194, 4.45347582,
        5.33168421, 5.01112476, 4.81453288, 5.63001123, 5.69284884,
        5.26390279, 5.11370