diff --git a/dev b/dev new file mode 100644 index 000000000..e69de29bb diff --git a/examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py index 926a35864..410fcc52c 100644 --- a/examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py +++ b/examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py @@ -8,89 +8,159 @@ from csle_common.dao.training.tabular_policy import TabularPolicy from csle_common.metastore.metastore_facade import MetastoreFacade -if __name__ == '__main__': - emulation_name = "csle-level9-040" +if __name__ == "__main__": + emulation_name = "csle-level1-050" emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) if emulation_env_config is None: - raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") + raise ValueError( + f"Could not find an emulation environment with the name: {emulation_name}" + ) simulation_name = "csle-intrusion-response-game-local-pomdp-defender-001" simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name) if simulation_env_config is None: raise ValueError(f"Could not find a simulation with name: {simulation_name}") experiment_config = ExperimentConfig( output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}dqn_clean_test", - title="DQN_clean test", random_seeds=[399, 98912, 999], agent_type=AgentType.DQN_CLEAN, + title="DQN_clean test", + random_seeds=[399, 98912, 999], + agent_type=AgentType.DQN_CLEAN, log_every=1000, hparams={ constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam( - value=7, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER, - descr="neurons per hidden layer of the policy network"), + value=7, + name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER, + descr="neurons per hidden layer of the policy network", + ), constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam( - value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, - descr="number of layers of the policy network"), + value=4, + name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS, + descr="number of layers of the policy network", + ), agents_constants.DQN_CLEAN.EXP_FRAC: HParam( - value=0.5, name=agents_constants.DQN_CLEAN.EXP_FRAC, - descr="the fraction of `total-timesteps it takes from start-e to go end-e"), + value=0.5, + name=agents_constants.DQN_CLEAN.EXP_FRAC, + descr="the fraction of `total-timesteps it takes from start-e to go end-e", + ), agents_constants.DQN_CLEAN.TAU: HParam( - value=1.0, name=agents_constants.DQN_CLEAN.TAU, descr="target network update rate"), + value=1.0, + name=agents_constants.DQN_CLEAN.TAU, + descr="target network update rate", + ), agents_constants.COMMON.BATCH_SIZE: HParam( - value=64, name=agents_constants.COMMON.BATCH_SIZE, descr="batch size for updates"), + value=64, + name=agents_constants.COMMON.BATCH_SIZE, + descr="batch size for updates", + ), agents_constants.DQN_CLEAN.LEARNING_STARTS: HParam( - value=10000, name=agents_constants.DQN_CLEAN.LEARNING_STARTS, descr="timestep to start learning"), + value=10000, + name=agents_constants.DQN_CLEAN.LEARNING_STARTS, + descr="timestep to start learning", + ), agents_constants.DQN_CLEAN.TRAIN_FREQ: HParam( - value=10, name=agents_constants.DQN_CLEAN.TRAIN_FREQ, descr="the frequency of training"), + value=10, + name=agents_constants.DQN_CLEAN.TRAIN_FREQ, + descr="the frequency of training", + ), agents_constants.DQN_CLEAN.T_N_FREQ: HParam( - value=500, name=agents_constants.DQN_CLEAN.T_N_FREQ, - descr="the batch size of sample from the reply memory"), + value=500, + name=agents_constants.DQN_CLEAN.T_N_FREQ, + descr="the batch size of sample from the reply memory", + ), agents_constants.DQN_CLEAN.BUFFER_SIZE: HParam( - value=1000, name=agents_constants.DQN_CLEAN.BUFFER_SIZE, descr="the replay memory buffer size"), + value=1000, + name=agents_constants.DQN_CLEAN.BUFFER_SIZE, + descr="the replay memory buffer size", + ), agents_constants.DQN_CLEAN.SAVE_MODEL: HParam( - value=False, name=agents_constants.DQN_CLEAN.SAVE_MODEL, descr="decision param for model saving"), + value=False, + name=agents_constants.DQN_CLEAN.SAVE_MODEL, + descr="decision param for model saving", + ), agents_constants.COMMON.LEARNING_RATE: HParam( - value=2.4e-5, name=agents_constants.COMMON.LEARNING_RATE, - descr="learning rate for updating the policy"), + value=2.4e-5, + name=agents_constants.COMMON.LEARNING_RATE, + descr="learning rate for updating the policy", + ), agents_constants.DQN_CLEAN.NUM_STEPS: HParam( - value=164, name=agents_constants.DQN_CLEAN.NUM_STEPS, descr="number of steps in each time step"), + value=164, + name=agents_constants.DQN_CLEAN.NUM_STEPS, + descr="number of steps in each time step", + ), constants.NEURAL_NETWORKS.DEVICE: HParam( - value="cpu", name=constants.NEURAL_NETWORKS.DEVICE, descr="the device to train on (cpu or cuda:x)"), + value="cpu", + name=constants.NEURAL_NETWORKS.DEVICE, + descr="the device to train on (cpu or cuda:x)", + ), agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam( - value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS, - descr="the nunmber of parallel environments for training"), + value=1, + name=agents_constants.COMMON.NUM_PARALLEL_ENVS, + descr="the nunmber of parallel environments for training", + ), agents_constants.COMMON.GAMMA: HParam( - value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"), + value=0.99, + name=agents_constants.COMMON.GAMMA, + descr="the discount factor", + ), agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam( - value=int(100000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, - descr="number of timesteps to train"), - agents_constants.COMMON.EVAL_EVERY: HParam(value=1, name=agents_constants.COMMON.EVAL_EVERY, - descr="training iterations between evaluations"), - agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE, - descr="the batch size for evaluation"), - agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY, - descr="how frequently to save the model"), + value=int(100000), + name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS, + descr="number of timesteps to train", + ), + agents_constants.COMMON.EVAL_EVERY: HParam( + value=1, + name=agents_constants.COMMON.EVAL_EVERY, + descr="training iterations between evaluations", + ), + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam( + value=100, + name=agents_constants.COMMON.EVAL_BATCH_SIZE, + descr="the batch size for evaluation", + ), + agents_constants.COMMON.SAVE_EVERY: HParam( + value=10000, + name=agents_constants.COMMON.SAVE_EVERY, + descr="how frequently to save the model", + ), agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( - value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, - descr="confidence interval"), + value=0.95, + name=agents_constants.COMMON.CONFIDENCE_INTERVAL, + descr="confidence interval", + ), agents_constants.COMMON.MAX_ENV_STEPS: HParam( - value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, - descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), + value=500, + name=agents_constants.COMMON.MAX_ENV_STEPS, + descr="maximum number of steps in the environment (for envs with infinite horizon generally)", + ), agents_constants.COMMON.RUNNING_AVERAGE: HParam( - value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, - descr="the number of samples to include when computing the running avg"), - agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L, - descr="the number of stop actions") + value=100, + name=agents_constants.COMMON.RUNNING_AVERAGE, + descr="the number of samples to include when computing the running avg", + ), + agents_constants.COMMON.L: HParam( + value=3, + name=agents_constants.COMMON.L, + descr="the number of stop actions", + ), }, - player_type=PlayerType.DEFENDER, player_idx=0 + player_type=PlayerType.DEFENDER, + player_idx=0, ) simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy( player_type=PlayerType.ATTACKER, - actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions, - simulation_name=simulation_env_config.name, value_function=None, q_table=None, - lookup_table=[ - [0.8, 0.2], - [1, 0], - [1, 0] - ], - agent_type=AgentType.RANDOM, avg_R=-1) - agent = DQNCleanAgent(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config, - experiment_config=experiment_config, save_to_metastore=False) + actions=simulation_env_config.joint_action_space_config.action_spaces[ + 1 + ].actions, + simulation_name=simulation_env_config.name, + value_function=None, + q_table=None, + lookup_table=[[0.8, 0.2], [1, 0], [1, 0]], + agent_type=AgentType.RANDOM, + avg_R=-1, + ) + agent = DQNCleanAgent( + simulation_env_config=simulation_env_config, + emulation_env_config=emulation_env_config, + experiment_config=experiment_config, + save_to_metastore=False, + ) experiment_execution = agent.train() diff --git a/examples/training/mcs/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/mcs/stopping_pomdp_defender/run_vs_random_attacker_v_001.py new file mode 100644 index 000000000..ea37338f7 --- /dev/null +++ b/examples/training/mcs/stopping_pomdp_defender/run_vs_random_attacker_v_001.py @@ -0,0 +1,84 @@ +import csle_agents.constants.constants as agents_constants +import csle_common.constants.constants as constants +from csle_agents.agents.mcs.mcs_agent import MCSAgent +from csle_agents.common.objective_type import ObjectiveType +from csle_common.dao.training.agent_type import AgentType +from csle_common.dao.training.experiment_config import ExperimentConfig +from csle_common.dao.training.hparam import HParam +from csle_common.dao.training.player_type import PlayerType +from csle_common.dao.training.policy_type import PolicyType +from csle_common.metastore.metastore_facade import MetastoreFacade + +if __name__ == "__main__": + emulation_name = "csle-level1-050" + emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) + if emulation_env_config is None: + raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") + simulation_name = "csle-stopping-pomdp-defender-002" + simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name) + if simulation_env_config is None: + raise ValueError(f"Could not find a simulation with name: {simulation_name}") + experiment_config = ExperimentConfig( + output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}particle_swarm_test", + title="Multilevel Coordinate Search", + random_seeds=[399, 98912], + agent_type=AgentType.MCS, + log_every=1, + hparams={ + agents_constants.MCS.STEP: HParam(value=1000, name=agents_constants.MCS.STEP, descr="step"), + agents_constants.MCS.STEP1: HParam(value=10000, name=agents_constants.MCS.STEP1, descr="step1"), + agents_constants.MCS.U: HParam(value=[-20, -20, -20], name=agents_constants.MCS.U, + descr="initial lower corner"), + agents_constants.MCS.LOCAL: HParam(value=50, name=agents_constants.MCS.LOCAL, + descr="local value stating to which degree to perform local searches"), + agents_constants.MCS.V: HParam(value=[20, 20, 20], name=agents_constants.MCS.V, + descr="initial upper corner"), + agents_constants.MCS.STOPPING_ACTIONS: HParam( + value=3, name=agents_constants.MCS.L, descr="no. of stopping actions"), + agents_constants.COMMON.MAX_ENV_STEPS: HParam( + value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, + descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), + agents_constants.MCS.IINIT: HParam( + value=0, name=agents_constants.MCS.IINIT, descr="simple initialization list"), + agents_constants.MCS.GAMMA: HParam( + value=2.220446049250313e-16, name=agents_constants.MCS.GAMMA, descr="MCS gamma value"), + agents_constants.MCS.EPSILON: HParam( + value=2.220446049250313e-16, name=agents_constants.MCS.EPSILON, descr="MCS epsilon value"), + agents_constants.MCS.M: HParam( + value=1, name=agents_constants.MCS.M, descr="m value"), + agents_constants.MCS.PRT: HParam( + value=1, name=agents_constants.MCS.PRT, descr="print level"), + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam( + value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE, descr="number of iterations to evaluate theta"), + agents_constants.COMMON.SAVE_EVERY: HParam( + value=1000, name=agents_constants.COMMON.SAVE_EVERY, descr="how frequently to save the model"), + agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( + value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, descr="confidence interval"), + agents_constants.COMMON.RUNNING_AVERAGE: HParam( + value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, + descr="the number of samples to include when computing the running avg"), + agents_constants.COMMON.GAMMA: HParam( + value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"), + agents_constants.MCS.POLICY_TYPE: HParam( + value=PolicyType.MULTI_THRESHOLD, name=agents_constants.PARTICLE_SWARM.POLICY_TYPE, + descr="policy type for the execution"), + agents_constants.MCS.OBJECTIVE_TYPE: HParam( + value=ObjectiveType.MAX, name=agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE, descr="Objective type"), + }, + player_type=PlayerType.DEFENDER, player_idx=0, + ) + agent = MCSAgent( + simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config, + experiment_config=experiment_config, save_to_metastore=False) + experiment_execution = agent.train() + # MetastoreFacade.save_experiment_execution(experiment_execution) + # for policy in experiment_execution.result.policies.values(): + # if experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value == PolicyType.MULTI_THRESHOLD: + # MetastoreFacade.save_multi_threshold_stopping_policy(multi_threshold_stopping_policy=policy) + # elif experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value \ + # == PolicyType.LINEAR_THRESHOLD: + # MetastoreFacade.save_linear_threshold_stopping_policy(linear_threshold_stopping_policy=policy) + # else: + # raise ValueError("Policy type: " + # f"{experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value} " + # f"not recognized for MCS") diff --git a/examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py b/examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py index 2b8dd5363..1953b9105 100644 --- a/examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py +++ b/examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py @@ -11,7 +11,7 @@ from csle_agents.common.objective_type import ObjectiveType if __name__ == '__main__': - emulation_name = "csle-level9-030" + emulation_name = "csle-level1-050" emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) if emulation_env_config is None: raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") diff --git a/examples/training/particle_swarm/stopping_pomdp_defender/run_vs_particle_swarm_attacker_v_001.py b/examples/training/particle_swarm/stopping_pomdp_defender/run_vs_particle_swarm_attacker_v_001.py index dd4dacb44..3feb3c767 100644 --- a/examples/training/particle_swarm/stopping_pomdp_defender/run_vs_particle_swarm_attacker_v_001.py +++ b/examples/training/particle_swarm/stopping_pomdp_defender/run_vs_particle_swarm_attacker_v_001.py @@ -1,89 +1,141 @@ +import csle_agents.constants.constants as agents_constants import csle_common.constants.constants as constants -from csle_common.dao.training.experiment_config import ExperimentConfig -from csle_common.metastore.metastore_facade import MetastoreFacade +from csle_agents.agents.particle_swarm.particle_swarm_agent import ParticleSwarmAgent +from csle_agents.common.objective_type import ObjectiveType from csle_common.dao.training.agent_type import AgentType +from csle_common.dao.training.experiment_config import ExperimentConfig from csle_common.dao.training.hparam import HParam from csle_common.dao.training.player_type import PlayerType -from csle_agents.agents.particle_swarm.particle_swarm_agent import ParticleSwarmAgent -import csle_agents.constants.constants as agents_constants from csle_common.dao.training.policy_type import PolicyType -from csle_agents.common.objective_type import ObjectiveType +from csle_common.metastore.metastore_facade import MetastoreFacade -if __name__ == '__main__': - emulation_name = "csle-level9-030" +if __name__ == "__main__": + emulation_name = "csle-level1-050" emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name) if emulation_env_config is None: - raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}") + raise ValueError( + f"Could not find an emulation environment with the name: {emulation_name}" + ) simulation_name = "csle-stopping-pomdp-defender-002" simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name) if simulation_env_config is None: raise ValueError(f"Could not find a simulation with name: {simulation_name}") experiment_config = ExperimentConfig( - output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}particle_swarm_test", title="Particle Swarm test", + output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}particle_swarm_test", + title="Particle Swarm test", random_seeds=[399, 98912], agent_type=AgentType.PARTICLE_SWARM, log_every=1, hparams={ - agents_constants.PARTICLE_SWARM.N: HParam(value=5, name=constants.T_SPSA.N, - descr="the number of training iterations"), - agents_constants.PARTICLE_SWARM.S: HParam(value=10, name=agents_constants.PARTICLE_SWARM.S, - descr="The number of particles in the swarm"), + agents_constants.PARTICLE_SWARM.N: HParam( + value=5, + name=constants.T_SPSA.N, + descr="the number of training iterations", + ), + agents_constants.PARTICLE_SWARM.S: HParam( + value=10, + name=agents_constants.PARTICLE_SWARM.S, + descr="The number of particles in the swarm", + ), agents_constants.PARTICLE_SWARM.L: HParam( - value=2, name=agents_constants.PARTICLE_SWARM.L, - descr="the number of stop actions"), + value=2, + name=agents_constants.PARTICLE_SWARM.L, + descr="the number of stop actions", + ), agents_constants.PARTICLE_SWARM.B_LOW: HParam( - value=-3, name=agents_constants.PARTICLE_SWARM.B_LOW, - descr="lower boundary of random initialition"), + value=-3, + name=agents_constants.PARTICLE_SWARM.B_LOW, + descr="lower boundary of random initialition", + ), agents_constants.PARTICLE_SWARM.B_UP: HParam( - value=3, name=agents_constants.PARTICLE_SWARM.B_UP, - descr="upperboundary of random initialization"), + value=3, + name=agents_constants.PARTICLE_SWARM.B_UP, + descr="upperboundary of random initialization", + ), agents_constants.PARTICLE_SWARM.INERTIA_WEIGHT: HParam( - value=0.5, name=agents_constants.PARTICLE_SWARM.INERTIA_WEIGHT, - descr="intertia weight w"), + value=0.5, + name=agents_constants.PARTICLE_SWARM.INERTIA_WEIGHT, + descr="intertia weight w", + ), agents_constants.PARTICLE_SWARM.COGNITIVE_COEFFICIENT: HParam( - value=1, name=agents_constants.PARTICLE_SWARM.COGNITIVE_COEFFICIENT, - descr="cognitive coefficient Phi_p"), + value=1, + name=agents_constants.PARTICLE_SWARM.COGNITIVE_COEFFICIENT, + descr="cognitive coefficient Phi_p", + ), agents_constants.PARTICLE_SWARM.SOCIAL_COEFFICIENT: HParam( - value=1, name=agents_constants.PARTICLE_SWARM.SOCIAL_COEFFICIENT, - descr="social coefficient Phi_g"), - agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE, - descr="number of iterations to evaluate theta"), - agents_constants.COMMON.SAVE_EVERY: HParam(value=1000, name=agents_constants.COMMON.SAVE_EVERY, - descr="how frequently to save the model"), + value=1, + name=agents_constants.PARTICLE_SWARM.SOCIAL_COEFFICIENT, + descr="social coefficient Phi_g", + ), + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam( + value=100, + name=agents_constants.COMMON.EVAL_BATCH_SIZE, + descr="number of iterations to evaluate theta", + ), + agents_constants.COMMON.SAVE_EVERY: HParam( + value=1000, + name=agents_constants.COMMON.SAVE_EVERY, + descr="how frequently to save the model", + ), agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( - value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, - descr="confidence interval"), + value=0.95, + name=agents_constants.COMMON.CONFIDENCE_INTERVAL, + descr="confidence interval", + ), agents_constants.COMMON.MAX_ENV_STEPS: HParam( - value=500, name=agents_constants.COMMON.MAX_ENV_STEPS, - descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), + value=500, + name=agents_constants.COMMON.MAX_ENV_STEPS, + descr="maximum number of steps in the environment (for envs with infinite horizon generally)", + ), agents_constants.COMMON.RUNNING_AVERAGE: HParam( - value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, - descr="the number of samples to include when computing the running avg"), + value=100, + name=agents_constants.COMMON.RUNNING_AVERAGE, + descr="the number of samples to include when computing the running avg", + ), agents_constants.COMMON.GAMMA: HParam( - value=0.99, name=agents_constants.COMMON.GAMMA, - descr="the discount factor"), + value=0.99, + name=agents_constants.COMMON.GAMMA, + descr="the discount factor", + ), agents_constants.PARTICLE_SWARM.POLICY_TYPE: HParam( - value=PolicyType.MULTI_THRESHOLD, name=agents_constants.PARTICLE_SWARM.POLICY_TYPE, - descr="policy type for the execution"), + value=PolicyType.MULTI_THRESHOLD, + name=agents_constants.PARTICLE_SWARM.POLICY_TYPE, + descr="policy type for the execution", + ), agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE: HParam( - value=ObjectiveType.MAX, name=agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE, - descr="Objective type") + value=ObjectiveType.MAX, + name=agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE, + descr="Objective type", + ), }, - player_type=PlayerType.DEFENDER, player_idx=0 + player_type=PlayerType.DEFENDER, + player_idx=0, + ) + agent = ParticleSwarmAgent( + simulation_env_config=simulation_env_config, + emulation_env_config=emulation_env_config, + experiment_config=experiment_config, ) - agent = ParticleSwarmAgent(simulation_env_config=simulation_env_config, - emulation_env_config=emulation_env_config, - experiment_config=experiment_config) experiment_execution = agent.train() MetastoreFacade.save_experiment_execution(experiment_execution) for policy in experiment_execution.result.policies.values(): - if experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value == \ - PolicyType.MULTI_THRESHOLD: - MetastoreFacade.save_multi_threshold_stopping_policy(multi_threshold_stopping_policy=policy) - elif experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value \ - == PolicyType.LINEAR_THRESHOLD: - MetastoreFacade.save_linear_threshold_stopping_policy(linear_threshold_stopping_policy=policy) + if ( + experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value + == PolicyType.MULTI_THRESHOLD + ): + MetastoreFacade.save_multi_threshold_stopping_policy( + multi_threshold_stopping_policy=policy + ) + elif ( + experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value + == PolicyType.LINEAR_THRESHOLD + ): + MetastoreFacade.save_linear_threshold_stopping_policy( + linear_threshold_stopping_policy=policy + ) else: - raise ValueError("Policy type: " - f"{experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value} " - f"not recognized for particle swarm") + raise ValueError( + "Policy type: " + f"{experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value} " + f"not recognized for particle swarm" + ) diff --git a/examples/training/ppg_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/ppg_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py index 830a23fb3..94402b3fd 100644 --- a/examples/training/ppg_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py +++ b/examples/training/ppg_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py @@ -41,7 +41,7 @@ descr="number of timesteps to train"), agents_constants.COMMON.EVAL_EVERY: HParam(value=10, name=agents_constants.COMMON.EVAL_EVERY, descr="training iterations between evaluations"), - agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE, + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=1, name=agents_constants.COMMON.EVAL_BATCH_SIZE, descr="the batch size for evaluation"), agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY, descr="how frequently to save the model"), diff --git a/management-system/csle-mgmt-webapp/src/components/Common/getAgentTypeStr.js b/management-system/csle-mgmt-webapp/src/components/Common/getAgentTypeStr.js index 9a25fd0e6..6d66d7032 100644 --- a/management-system/csle-mgmt-webapp/src/components/Common/getAgentTypeStr.js +++ b/management-system/csle-mgmt-webapp/src/components/Common/getAgentTypeStr.js @@ -114,6 +114,9 @@ const getAgentTypeStr = (agentType) => { if(agentType === 35) { return "PPG CLEAN" } + if(agentType === 36) { + return "MCS" + } else { return "Unknown" } diff --git a/simulation-system/libs/csle-agents/.vscode/settings.json b/simulation-system/libs/csle-agents/.vscode/settings.json new file mode 100644 index 000000000..457f44d9b --- /dev/null +++ b/simulation-system/libs/csle-agents/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "python.analysis.typeCheckingMode": "basic" +} \ No newline at end of file diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/__init__.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_agent.py new file mode 100644 index 000000000..c5009164c --- /dev/null +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_agent.py @@ -0,0 +1,2628 @@ +""" +MIT License + +Copyright (c) 2019 MCS developers https://github.com/vojha-code/Multilevel-Coordinate-Search +""" +from typing import Tuple +import copy +import sys +import os +import time +import math +from numpy.typing import NDArray +from typing import Union, List, Optional, Any, Dict +import gymnasium as gym +import numpy as np +import gym_csle_stopping_game.constants.constants as env_constants +from csle_common.dao.emulation_config.emulation_env_config import EmulationEnvConfig +from csle_common.dao.simulation_config.simulation_env_config import SimulationEnvConfig +from csle_common.dao.training.experiment_config import ExperimentConfig +from csle_common.dao.training.experiment_execution import ExperimentExecution +from csle_common.dao.training.experiment_result import ExperimentResult +from csle_common.dao.training.agent_type import AgentType +from csle_common.dao.training.player_type import PlayerType +from csle_common.util.experiment_util import ExperimentUtil +from csle_common.logging.log import Logger +from csle_common.dao.training.multi_threshold_stopping_policy import MultiThresholdStoppingPolicy +from csle_common.dao.training.linear_threshold_stopping_policy import LinearThresholdStoppingPolicy +from csle_common.metastore.metastore_facade import MetastoreFacade +from csle_common.dao.jobs.training_job_config import TrainingJobConfig +from csle_common.util.general_util import GeneralUtil +from csle_common.dao.simulation_config.base_env import BaseEnv +from csle_common.dao.training.policy_type import PolicyType +from csle_agents.agents.base.base_agent import BaseAgent +import csle_agents.constants.constants as agents_constants +from csle_agents.agents.mcs.mcs_utils.mcs_fun import MCSUtils +from csle_agents.agents.mcs.mcs_utils.gls_utils import GLSUtils +from csle_agents.agents.mcs.mcs_utils.ls_utils import LSUtils + + +class MCSAgent(BaseAgent): + """ + Multi-Level Coordinate Search Agent + """ + + def __init__(self, simulation_env_config: SimulationEnvConfig, + emulation_env_config: Union[None, EmulationEnvConfig], experiment_config: ExperimentConfig, + env: Optional[BaseEnv] = None, training_job: Optional[TrainingJobConfig] = None, + save_to_metastore: bool = True) -> None: + """ + Initializes the MCS Agent + + :param simulation_env_config: the simulation env config + :param emulation_env_config: the emulation env config + :param experiment_config: the experiment config + :param env: (optional) the gym environment to use for simulation + :param training_job: (optional) a training job configuration + :param save_to_metastore: boolean flag that can be set to avoid saving results and progress to the metastore + """ + super().__init__(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config, + experiment_config=experiment_config) + assert experiment_config.agent_type == AgentType.MCS + self.env = env + self.training_job = training_job + self.save_to_metastore = save_to_metastore + + def eval_theta(self, policy: Union[MultiThresholdStoppingPolicy, LinearThresholdStoppingPolicy], + max_steps: int = 200) -> Dict[str, Union[float, int]]: + """ + Evaluates a given threshold policy by running monte-carlo simulations + + :param policy: the policy to evaluate + :return: the average metrics of the evaluation + """ + if self.env is None: + raise ValueError("Need to specify an environment to run policy evaluation") + eval_batch_size = self.experiment_config.hparams[agents_constants.COMMON.EVAL_BATCH_SIZE].value + metrics: Dict[str, Any] = {} + for j in range(eval_batch_size): + done = False + o, _ = self.env.reset() + l = int(o[0]) + b1 = o[1] + t = 1 + r = 0 + a = 0 + info: Dict[str, Any] = {} + while not done and t <= max_steps: + Logger.__call__().get_logger().debug(f"t:{t}, a: {a}, b1:{b1}, r:{r}, l:{l}, info:{info}") + if self.experiment_config.player_type == PlayerType.ATTACKER: + policy.opponent_strategy = self.env.static_defender_strategy + a = policy.action(o=o) + else: + a = policy.action(o=o) + o, r, done, _, info = self.env.step(a) + l = int(o[0]) + b1 = o[1] + t += 1 + metrics = MCSAgent.update_metrics(metrics=metrics, info=info) + avg_metrics = MCSAgent.compute_avg_metrics(metrics=metrics) + avg_metrics[env_constants.ENV_METRICS.RETURN] = -avg_metrics[env_constants.ENV_METRICS.RETURN] + return avg_metrics + + @staticmethod + def update_metrics(metrics: Dict[str, List[Union[float, int]]], info: Dict[str, Union[float, int]]) \ + -> Dict[str, List[Union[float, int]]]: + """ + Update a dict with aggregated metrics using new information from the environment + + :param metrics: the dict with the aggregated metrics + :param info: the new information + :return: the updated dict of metrics + """ + for k, v in info.items(): + if k in metrics: + metrics[k].append(round(v, 3)) + else: + metrics[k] = [v] + return metrics + + @staticmethod + def compute_avg_metrics(metrics: Dict[str, List[Union[float, int]]]) -> Dict[str, Union[float, int]]: + """ + Computes the average metrics of a dict with aggregated metrics + + :param metrics: the dict with the aggregated metrics + :return: the average metrics + """ + avg_metrics = {} + for k, v in metrics.items(): + avg = round(sum(v) / len(v), 2) + avg_metrics[k] = avg + return avg_metrics + + def hparam_names(self) -> List[str]: + """ + Function that contains the hyperparameter names + + :return: a list with the hyperparameter names + """ + return [agents_constants.MCS.STEP, agents_constants.MCS.STEP1, agents_constants.MCS.U, agents_constants.MCS.V, + agents_constants.MCS.LOCAL, agents_constants.MCS.STOPPING_ACTIONS, agents_constants.MCS.GAMMA, + agents_constants.MCS.EPSILON, agents_constants.COMMON.CONFIDENCE_INTERVAL, + agents_constants.COMMON.RUNNING_AVERAGE] + + def train(self) -> ExperimentExecution: + """ + Initiating the parameters of performing the MCS algorithm, using external functions + + :return: The experiment execution + """ + pid = os.getpid() + u = self.experiment_config.hparams[agents_constants.MCS.U].value + v = self.experiment_config.hparams[agents_constants.MCS.V].value + iinit = self.experiment_config.hparams[agents_constants.MCS.IINIT].value + local = self.experiment_config.hparams[agents_constants.MCS.LOCAL].value + eps = self.experiment_config.hparams[agents_constants.MCS.EPSILON].value + gamma = self.experiment_config.hparams[agents_constants.MCS.GAMMA].value + # prt = self.experiment_config.hparams[agents_constants.MCS.PRT].value + # m = self.experiment_config.hparams[agents_constants.MCS.M].value + stopping_actions = self.experiment_config.hparams[agents_constants.MCS.STOPPING_ACTIONS].value + n = len(u) + smax = 5 * n + 10 + nf = 50 * pow(n, 2) + stop: List[Union[float, int]] = [3 * n] + hess = np.ones((n, n)) + stop.append(float("-inf")) + + exp_result = ExperimentResult() + exp_result.plot_metrics.append(agents_constants.COMMON.AVERAGE_RETURN) + exp_result.plot_metrics.append(agents_constants.COMMON.RUNNING_AVERAGE_RETURN) + exp_result.plot_metrics.append(env_constants.ENV_METRICS.INTRUSION_LENGTH) + exp_result.plot_metrics.append(agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_LENGTH) + exp_result.plot_metrics.append(env_constants.ENV_METRICS.INTRUSION_START) + exp_result.plot_metrics.append(agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_START) + exp_result.plot_metrics.append(env_constants.ENV_METRICS.TIME_HORIZON) + exp_result.plot_metrics.append(agents_constants.COMMON.RUNNING_AVERAGE_TIME_HORIZON) + exp_result.plot_metrics.append(env_constants.ENV_METRICS.AVERAGE_UPPER_BOUND_RETURN) + exp_result.plot_metrics.append(env_constants.ENV_METRICS.AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN) + + for l in range(1, self.experiment_config.hparams[agents_constants.MCS.STOPPING_ACTIONS].value + 1): + exp_result.plot_metrics.append(env_constants.ENV_METRICS.STOP + f"_{l}") + exp_result.plot_metrics.append(env_constants.ENV_METRICS.STOP + f"_running_average_{l}") + + descr = f"Training of policies with the random search algorithm using " \ + f"simulation:{self.simulation_env_config.name}" + + for seed in self.experiment_config.random_seeds: + exp_result.all_metrics[seed] = {} + exp_result.all_metrics[seed][agents_constants.MCS.THETAS] = [] + exp_result.all_metrics[seed][agents_constants.COMMON.AVERAGE_RETURN] = [] + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_RETURN] = [] + exp_result.all_metrics[seed][agents_constants.MCS.THRESHOLDS] = [] + if self.experiment_config.player_type == PlayerType.DEFENDER: + for l in range(1, self.experiment_config.hparams[agents_constants.MCS.STOPPING_ACTIONS].value + 1): + exp_result.all_metrics[seed][ + agents_constants.NELDER_MEAD.STOP_DISTRIBUTION_DEFENDER + f"_l={l}"] = [] + else: + for s in self.simulation_env_config.state_space_config.states: + for l in range(1, self.experiment_config.hparams[agents_constants.MCS.STOPPING_ACTIONS].value + 1): + exp_result.all_metrics[seed][agents_constants.NELDER_MEAD.STOP_DISTRIBUTION_ATTACKER + + f"_l={l}_s={s.id}"] = [] + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_START] = [] + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_TIME_HORIZON] = [] + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_LENGTH] = [] + exp_result.all_metrics[seed][env_constants.ENV_METRICS.INTRUSION_START] = [] + exp_result.all_metrics[seed][env_constants.ENV_METRICS.INTRUSION_LENGTH] = [] + exp_result.all_metrics[seed][env_constants.ENV_METRICS.TIME_HORIZON] = [] + exp_result.all_metrics[seed][env_constants.ENV_METRICS.AVERAGE_UPPER_BOUND_RETURN] = [] + exp_result.all_metrics[seed][ + env_constants.ENV_METRICS.AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN] = [] + for l in range(1, self.experiment_config.hparams[agents_constants.MCS.STOPPING_ACTIONS].value + 1): + exp_result.all_metrics[seed][env_constants.ENV_METRICS.STOP + f"_{l}"] = [] + exp_result.all_metrics[seed][env_constants.ENV_METRICS.STOP + f"_running_average_{l}"] = [] + + # Initialize training job + if self.training_job is None: + emulation_name = "" + if self.emulation_env_config is not None: + emulation_name = self.emulation_env_config.name + self.training_job = TrainingJobConfig( + simulation_env_name=self.simulation_env_config.name, experiment_config=self.experiment_config, + progress_percentage=0, pid=pid, experiment_result=exp_result, + emulation_env_name=emulation_name, simulation_traces=[], + num_cached_traces=agents_constants.COMMON.NUM_CACHED_SIMULATION_TRACES, + log_file_path=Logger.__call__().get_log_file_path(), descr=descr, + physical_host_ip=GeneralUtil.get_host_ip()) + if self.save_to_metastore: + training_job_id = MetastoreFacade.save_training_job(training_job=self.training_job) + self.training_job.id = training_job_id + else: + self.training_job.pid = pid + self.training_job.progress_percentage = 0 + self.training_job.experiment_result = exp_result + if self.save_to_metastore: + MetastoreFacade.update_training_job(training_job=self.training_job, id=self.training_job.id) + + # Initialize execution result + ts = time.time() + emulation_name = "" + if self.emulation_env_config is not None: + emulation_name = self.emulation_env_config.name + simulation_name = self.simulation_env_config.name + self.exp_execution = ExperimentExecution( + result=exp_result, config=self.experiment_config, timestamp=ts, emulation_name=emulation_name, + simulation_name=simulation_name, descr=descr, log_file_path=self.training_job.log_file_path) + if self.save_to_metastore: + exp_execution_id = MetastoreFacade.save_experiment_execution(self.exp_execution) + self.exp_execution.id = exp_execution_id + + config = self.simulation_env_config.simulation_env_input_config + if self.env is None: + self.env = gym.make(self.simulation_env_config.gym_env_name, config=config) + + for seed in self.experiment_config.random_seeds: + # ExperimentUtil.set_seed(seed) + exp_result = self.MCS(exp_result=exp_result, seed=seed, random_seeds=self.experiment_config.random_seeds, + training_job=self.training_job, u=u, v=v, smax=smax, nf=nf, stop=stop, iinit=iinit, + local=local, gamma=gamma, hess=hess, stopping_actions=stopping_actions, eps=eps, + n=n) + if self.save_to_metastore: + MetastoreFacade.save_simulation_trace(self.env.get_traces()[-1]) + self.env.reset_traces() + + # Calculate average and std metrics + exp_result.avg_metrics = {} + exp_result.std_metrics = {} + for metric in exp_result.all_metrics[self.experiment_config.random_seeds[0]].keys(): + value_vectors = [] + for seed in self.experiment_config.random_seeds: + value_vectors.append(exp_result.all_metrics[seed][metric]) + + max_num_measurements = max(list(map(lambda x: len(x), value_vectors))) + value_vectors = list(filter(lambda x: len(x) == max_num_measurements, value_vectors)) + + avg_metrics = [] + std_metrics = [] + for i in range(len(value_vectors[0])): + if type(value_vectors[0][0]) is int or type(value_vectors[0][0]) is float \ + or type(value_vectors[0][0]) is np.int64 or type(value_vectors[0][0]) is np.float64: + seed_values = [] + for seed_idx in range(len(value_vectors)): + seed_values.append(value_vectors[seed_idx][i]) + avg = ExperimentUtil.mean_confidence_interval( + data=seed_values, + confidence=self.experiment_config.hparams[agents_constants.COMMON.CONFIDENCE_INTERVAL].value)[0] + if not math.isnan(avg): + avg_metrics.append(avg) + ci = ExperimentUtil.mean_confidence_interval( + data=seed_values, + confidence=self.experiment_config.hparams[agents_constants.COMMON.CONFIDENCE_INTERVAL].value)[1] + if not math.isnan(ci): + std_metrics.append(ci) + else: + std_metrics.append(-1) + else: + avg_metrics.append(-1) + std_metrics.append(-1) + exp_result.avg_metrics[metric] = avg_metrics + exp_result.std_metrics[metric] = std_metrics + + traces = self.env.get_traces() + if len(traces) > 0 and self.save_to_metastore: + MetastoreFacade.save_simulation_trace(traces[-1]) + ts = time.time() + self.exp_execution.timestamp = ts + self.exp_execution.result = exp_result + if self.save_to_metastore: + MetastoreFacade.update_experiment_execution(experiment_execution=self.exp_execution, + id=self.exp_execution.id) + return self.exp_execution + + def get_policy(self, theta: Union[List[Union[float, int]], NDArray[np.float64]], L: int) \ + -> Union[MultiThresholdStoppingPolicy, LinearThresholdStoppingPolicy]: + """ + Gets the policy of a given parameter vector + + :param theta: the parameter vector + :param L: the number of parameters + :return: the policy + """ + if self.experiment_config.hparams[agents_constants.SIMULATED_ANNEALING.POLICY_TYPE].value \ + == PolicyType.MULTI_THRESHOLD.value: + policy = MultiThresholdStoppingPolicy( + theta=list(theta), simulation_name=self.simulation_env_config.name, + states=self.simulation_env_config.state_space_config.states, + player_type=self.experiment_config.player_type, L=L, + actions=self.simulation_env_config.joint_action_space_config.action_spaces[ + self.experiment_config.player_idx].actions, experiment_config=self.experiment_config, avg_R=-1, + agent_type=AgentType.SIMULATED_ANNEALING) + else: + policy = LinearThresholdStoppingPolicy( + theta=list(theta), simulation_name=self.simulation_env_config.name, + states=self.simulation_env_config.state_space_config.states, + player_type=self.experiment_config.player_type, L=L, + actions=self.simulation_env_config.joint_action_space_config.action_spaces[ + self.experiment_config.player_idx].actions, experiment_config=self.experiment_config, avg_R=-1, + agent_type=AgentType.SIMULATED_ANNEALING) + return policy + + def init_list(self, theta0: NDArray[np.int32], l: NDArray[np.int32], L: NDArray[np.int32], + stopping_actions: int, n: int, ncall: int = 0) \ + -> Tuple[NDArray[np.float32], NDArray[np.float32], int, + Union[MultiThresholdStoppingPolicy, LinearThresholdStoppingPolicy]]: + """ + Computes the function values corresponding to the initialization list + and the pointer istar to the final best point x^* of the init. list + :param theta0: theta0 + :param l: Indication of the mid point + :param L: Indication of the end point (or total number of partition of the value x in the i'th dimenstion) + :param stopping actions: stopping actions for the eval_theta function + :param n: dimension (should equal the number of stopping actions) + :return : initial conditions + """ + theta = np.zeros(n) + for i in range(n): + theta[i] = theta0[i, l[i]] + + policy = self.get_policy(theta, L=stopping_actions) + avg_metrics = self.eval_theta( + policy=policy, max_steps=self.experiment_config.hparams[agents_constants.COMMON.MAX_ENV_STEPS].value) + J1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall += 1 + J0 = np.zeros((L[0] + 1, n)) + J0[l[0], 0] = J1 + istar = np.zeros(n).astype(int) + for i in range(n): + istar[i] = l[i] + for j in range(L[i] + 1): + if j == l[i]: + if i != 0: + J0[j, i] = J0[istar[i - 1], i - 1] + else: + theta[i] = theta0[i, j] + policy = self.get_policy(theta, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + J0[j, i] = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + + ncall = ncall + 1 + + if J0[j, i] < J1: + J1 = J0[j, i] + istar[i] = j + + theta[i] = theta0[i, istar[i]] + return J0, istar, ncall, policy # type: ignore + + def MCS(self, exp_result: ExperimentResult, seed: int, random_seeds: List[int], training_job: TrainingJobConfig, + u: List[int], v: List[int], smax: int, nf: int, stop: List[Union[float, int]], iinit: int, local: int, + gamma: float, hess: NDArray[np.float64], stopping_actions: int, eps: float, n: int, prt: int = 1) \ + -> ExperimentResult: + """ + The Multilevel Coordinate Search algorithm + + :param exp_result: the experiment result + :param seed: the seed + :param random_seeds: the list of random seeds + :param training_job: the configuration of the training job + :param u: the initial lower bound ("lower corner" in 3D) + :param v: the initial upper bound ("upper corner" in 3D) + :param smax: maximum level depth + :param nf: maximum number of function calls + :param stop: stopping test + :param iinit: the initial list + :param local: command for lsearch or no lsearch + :param gamma: acceptable relative accuracy for local search + :param hess: + :param stopping_actions: number of stopping actions + :param hess: the hessian of the multidimensional function + :param eps: parameter value for the golden ratio + :param n: the number of iterations + :param prt: print option + :return: the experiment result + """ + progress = 0.0 + if MCSUtils().check_box_bound(u, v): + sys.exit("Error MCS main: out of bound") + n = len(u) + ncall: int = 0 + ncloc: int = 0 + + l = np.multiply(1, np.ones(n)).astype(int) + L = np.multiply(2, np.ones(n)).astype(int) + theta0 = MCSUtils().get_theta0(iinit, u, v, n) # type: ignore + if iinit != 3: + f0, istar, ncall1, policy = self.init_list(theta0, l, L, stopping_actions, n) # type: ignore + ncall = ncall + ncall1 + theta = np.zeros(n) + for i in range(n): + theta[i] = theta0[i, l[i]] + v1 = np.zeros(n) + for i in range(n): + if abs(theta[i] - u[i]) > abs(theta[i] - v[i]): + v1[i] = u[i] + else: + v1[i] = v[i] + + step = self.experiment_config.hparams[agents_constants.MCS.STEP].value + step1 = self.experiment_config.hparams[agents_constants.MCS.STEP1].value + dim = step1 + + isplit = np.zeros(step1).astype(int) + level = np.zeros(step1).astype(int) + ipar = np.zeros(step1).astype(int) + ichild = np.zeros(step1).astype(int) + nogain = np.zeros(step1).astype(int) + + f = np.zeros((2, step1)) + z = np.zeros((2, step1)) + + record: NDArray[Union[np.int32, np.float64]] = np.zeros(smax) + nboxes: int = 0 + nbasket: int = -1 + nbasket0: int = -1 + nsweepbest: int = 0 + nsweep: int = 0 + m = n + record[0] = 1 + nloc = 0 + xloc: List[float] = [] + flag = 1 + ipar, level, ichild, f, isplit, p, xbest, fbest, nboxes = MCSUtils().initbox( # type: ignore + theta0, f0, l, L, istar, u, v, isplit, level, ipar, ichild, f, nboxes, prt) # type: ignore + f0min = fbest + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + + s, record = MCSUtils().strtsw(smax, level, f[0, :], nboxes, record) # type: ignore + nsweep = nsweep + 1 + xmin: List[Union[float, List[float], NDArray[np.float64]]] = [] + fmi: List[float] = [] + + exp_result.all_metrics[seed][agents_constants.COMMON.AVERAGE_RETURN].append(f0min) + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_RETURN].append(f0min) + + running_avg_J = ExperimentUtil.running_average( + exp_result.all_metrics[seed][agents_constants.COMMON.AVERAGE_RETURN], + self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value) + avg_metrics: Optional[Dict[str, Union[float, int]]] = None + while s < smax and ncall + 1 <= nf: + if s % self.experiment_config.log_every == 0 and s > 0: + # Update training job + total_iterations = len(random_seeds) * smax + iterations_done = (random_seeds.index(seed)) * smax + s + progress = round(iterations_done / total_iterations, 2) + training_job.progress_percentage = progress + training_job.experiment_result = exp_result + if self.env is not None and len(self.env.get_traces()) > 0: + training_job.simulation_traces.append(self.env.get_traces()[-1]) + if len(training_job.simulation_traces) > training_job.num_cached_traces: + training_job.simulation_traces = training_job.simulation_traces[1:] + if self.save_to_metastore: + MetastoreFacade.update_training_job(training_job=training_job, id=training_job.id) + + # Update execution + ts = time.time() + self.exp_execution.timestamp = ts + self.exp_execution.result = exp_result + if self.save_to_metastore: + MetastoreFacade.update_experiment_execution(experiment_execution=self.exp_execution, + id=self.exp_execution.id) + + Logger.__call__().get_logger().info( + f"[MCS] s: {s}, J:{-fbest}, " + f"J_avg_{-self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value}:" + f"{-running_avg_J}, " + f"sigmoid(theta):{policy.thresholds()}, progress: {round(progress * 100, 2)}%") + + par = record[s] + n0, x, y, x1, x2, f1, f2 = MCSUtils().vertex(par, n, u, v, v1, theta0, f0, ipar, isplit, # type: ignore + ichild, z, f, l, L) # type: ignore + + if s > 2 * n * (min(n0) + 1): + isplit[par], z[1, par] = MCSUtils().splrnk(n, n0, p, x, y) + splt = 1 + else: + if nogain[par]: + splt = 0 + else: + e, isplit[par], z[1, par] = MCSUtils().exgain(n, n0, l, L, x, y, x1, x2, f[0, par], f0, f1, f2) + fexp = f[0, par] + min(e) + if fexp < fbest: + splt = 1 + else: + splt = 0 + nogain[par] = (1) + + if splt == 1: + i = isplit[par] + level[par] = 0 + if z[1, par] == np.Inf: + m = m + 1 + z[1, par] = m + (xbest, fbest, policy, f01, xmin, fmi, ipar, level, + ichild, f, flag, ncall1, record, nboxes, + nbasket, nsweepbest, nsweep) = \ + self.splinit(i, s, smax, par, theta0, n0, u, v, x, y, x1, x2, L, l, xmin, # type: ignore + fmi, ipar, level, # type: ignore + ichild, f, xbest, fbest, stop, prt, record, nboxes, nbasket, # type: ignore + nsweepbest, nsweep, # type: ignore + stopping_actions) # type: ignore + f01 = f01.reshape(len(f01), 1) + f0 = np.concatenate((f0, f01), axis=1) + ncall = ncall + ncall1 + else: + z[0, par] = x[i] + (xbest, fbest, policy, xmin, fmi, + ipar, level, ichild, f, + flag, ncall1, record, + nboxes, nbasket, nsweepbest, + nsweep) = self.split(i, s, smax, par, n0, u, v, x, y, x1, x2, z[:, par], xmin, fmi, ipar, level, + ichild, f, xbest, fbest, stop, prt, record, nboxes, nbasket, nsweepbest, + nsweep, stopping_actions) + ncall = ncall + ncall1 + + if nboxes > dim: + isplit = np.concatenate((isplit, np.zeros(step))) + level = np.concatenate((level, np.zeros(step))) + ipar = np.concatenate((ipar, np.zeros(step))) + ichild = np.concatenate((ichild, np.zeros(step))) + nogain = np.concatenate((nogain, np.zeros(step))) + # J: NDArray[Union[np.float64, np.int32]] = np.concatenate((J, np.ones((2, step))), axis=1) + z = np.concatenate((z, np.ones((2, step))), axis=1) + dim = nboxes + step + if not flag: + break + else: + if s + 1 < smax: + level[par] = s + 1 + record = MCSUtils().updtrec(par, s + 1, f[0, :], record) # type: ignore + else: + level[par] = 0 + nbasket = nbasket + 1 + if len(xmin) == nbasket: + xmin.append(copy.deepcopy(x)) + fmi.append(f[0, par]) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f[0, par] + s = s + 1 + while s < smax: + if record[s] == 0: + s = s + 1 + else: + break + + if s == smax: + if local: + fmiTemp = np.asarray(fmi[nbasket0 + 1: nbasket + 1]) + xminTemp = xmin[nbasket0 + 1: nbasket + 1] + j = np.argsort(fmiTemp) + fmiTemp = np.sort(fmiTemp) + xminTemp = [copy.deepcopy(xminTemp[jInd]) for jInd in j] + fmi[nbasket0 + 1: nbasket + 1] = fmiTemp + xmin[nbasket0 + 1: nbasket + 1] = xminTemp + + for j_iter in range(nbasket0 + 1, nbasket + 1): + x = copy.deepcopy(xmin[j_iter]) + f1 = copy.deepcopy(fmi[j_iter]) + loc = MCSUtils().chkloc(nloc, xloc, x) + if loc: + nloc, xloc = MCSUtils().addloc(nloc, xloc, x) + + if not nbasket0 or nbasket0 == -1: + (xbest, fbest, policy, avg_metrics, xmin, + fmi, x, f1, loc, flag, + ncall1, nsweep, + nsweepbest) = \ + self.basket( + x, f1, policy, avg_metrics, xmin, fmi, + xbest, fbest, stop, + nbasket0, nsweep, + nsweepbest, + stopping_actions) + else: + (xbest, fbest, policy, avg_metrics, + xmin, fmi, x, f1, loc, flag, + ncall1, nsweep, nsweepbest) = self.basket(x, f1, policy, avg_metrics, xmin, fmi, xbest, + fbest, stop, nbasket0, nsweep, nsweepbest, + stopping_actions) + ncall = ncall + ncall1 + if not flag: + break + if loc: + xmin1, fmi1, nc, flag, nsweep, nsweepbest = self.lsearch( + x, f1, f0min, u, v, nf - ncall, stop, local, gamma, hess, nsweep, nsweepbest, + stopping_actions, eps) + ncall = ncall + nc + ncloc = ncloc + nc + if fmi1 < fbest: + xbest = copy.deepcopy(xmin1) + fbest = copy.deepcopy(fmi1) + nsweepbest = nsweep + if not flag: + nbasket0 = nbasket0 + 1 + nbasket = copy.deepcopy(nbasket0) + if len(xmin) == nbasket: + xmin.append(copy.deepcopy(xmin1)) + fmi.append(copy.deepcopy(fmi1)) + else: + xmin[nbasket] = copy.deepcopy(xmin1) + fmi[nbasket] = copy.deepcopy(fmi1) + break + + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return exp_result + if not nbasket0 or nbasket0 == -1: + (xbest, fbest, xmin, fmi, loc, flag, ncall1, nsweep, nsweepbest) = self.basket1( + np.array(xmin1), fmi1, xmin, fmi, xbest, fbest, stop, nbasket0, nsweep, + nsweepbest, stopping_actions) + else: + (xbest, fbest, policy, avg_metrics, xmin, fmi, loc, flag, ncall1, nsweep, + nsweepbest) = self.basket1(np.array(xmin1), fmi1, xmin, fmi, xbest, fbest, stop, + nbasket0, nsweep, nsweepbest, stopping_actions) + ncall = ncall + ncall1 + if not flag: + break + if loc: + nbasket0 = nbasket0 + 1 + if len(xmin) == nbasket0: + xmin.append(copy.deepcopy(xmin1)) + fmi.append(copy.deepcopy(fmi1)) + else: + xmin[nbasket0] = copy.deepcopy(xmin1) + fmi[nbasket0] = copy.deepcopy(fmi1) + fbest, xbest = MCSUtils().fbestloc(fmi, fbest, xmin, xbest, # type: ignore + nbasket0, stop) # type: ignore + if not flag: + break + nbasket = copy.deepcopy(nbasket0) + if not flag: + break + + s, record = MCSUtils().strtsw(smax, list(level), list(f[0, :]), nboxes, record) + + running_avg_J = ExperimentUtil.running_average( + exp_result.all_metrics[seed][agents_constants.COMMON.AVERAGE_RETURN], + self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value) + exp_result.all_metrics[seed][agents_constants.COMMON.AVERAGE_RETURN].append(fbest) + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_RETURN].append(running_avg_J) + + # Log thresholds + exp_result.all_metrics[seed][agents_constants.NELDER_MEAD.THETAS].append( + MCSAgent.round_vec(xbest)) + exp_result.all_metrics[seed][agents_constants.NELDER_MEAD.THRESHOLDS].append( + MCSAgent.round_vec(policy.thresholds())) + + # Log stop distribution + for k, v in policy.stop_distributions().items(): + exp_result.all_metrics[seed][k].append(v) + + if avg_metrics is not None: + # Log intrusion lengths + exp_result.all_metrics[seed][env_constants.ENV_METRICS.INTRUSION_LENGTH].append( + round(avg_metrics[env_constants.ENV_METRICS.INTRUSION_LENGTH], 3)) + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_LENGTH].append( + ExperimentUtil.running_average( + exp_result.all_metrics[seed][env_constants.ENV_METRICS.INTRUSION_LENGTH], + self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value)) + + # Log stopping times + exp_result.all_metrics[seed][env_constants.ENV_METRICS.INTRUSION_START].append( + round(avg_metrics[env_constants.ENV_METRICS.INTRUSION_START], 3)) + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_START].append( + ExperimentUtil.running_average( + exp_result.all_metrics[seed][env_constants.ENV_METRICS.INTRUSION_START], + self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value)) + exp_result.all_metrics[seed][env_constants.ENV_METRICS.TIME_HORIZON].append( + round(avg_metrics[env_constants.ENV_METRICS.TIME_HORIZON], 3)) + exp_result.all_metrics[seed][agents_constants.COMMON.RUNNING_AVERAGE_TIME_HORIZON].append( + ExperimentUtil.running_average( + exp_result.all_metrics[seed][env_constants.ENV_METRICS.TIME_HORIZON], + self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value)) + for k in range(1, self.experiment_config.hparams[agents_constants.MCS.STOPPING_ACTIONS].value + 1): + exp_result.plot_metrics.append(env_constants.ENV_METRICS.STOP + f"_{k}") + exp_result.all_metrics[seed][env_constants.ENV_METRICS.STOP + f"_{k}"].append( + round(avg_metrics[env_constants.ENV_METRICS.STOP + f"_{k}"], 3)) + exp_result.all_metrics[seed][env_constants.ENV_METRICS.STOP + f"_running_average_{k}"].append( + ExperimentUtil.running_average( + exp_result.all_metrics[seed][env_constants.ENV_METRICS.STOP + f"_{k}"], + self.experiment_config.hparams[agents_constants.COMMON.RUNNING_AVERAGE].value)) + + # Log baseline returns + exp_result.all_metrics[seed][env_constants.ENV_METRICS.AVERAGE_UPPER_BOUND_RETURN].append( + round(avg_metrics[env_constants.ENV_METRICS.AVERAGE_UPPER_BOUND_RETURN], 3)) + exp_result.all_metrics[seed][ + env_constants.ENV_METRICS.AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN].append( + round(avg_metrics[env_constants.ENV_METRICS.AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN], + 3)) + + policy = self.get_policy(theta=list(xbest), L=stopping_actions) + exp_result.policies[seed] = policy + # Save policy + if self.save_to_metastore: + MetastoreFacade.save_multi_threshold_stopping_policy(multi_threshold_stopping_policy=policy) + if prt: + Logger.__call__().get_logger().info( + f"[MCS-summary-log]: " + f"nsweep: {nsweep}, minlevel: {s}, ncall: {ncall}, J:{-fbest}, " + f"theta_best: {xbest}, " + f"sigmoid(theta):{policy.thresholds()}, progress: {round(progress * 100, 2)}%") + + if stop[0] > 1: + if nsweep - nsweepbest >= stop[0]: + return exp_result + + return exp_result + + @staticmethod + def round_vec(vec) -> List[float]: + """ + Rounds a vector to 3 decimals + + :param vec: the vector to round + :return: the rounded vector + """ + return list(map(lambda x: round(x, 3), vec)) + + def splinit(self, i: int, s: int, smax: int, par: int, x0: NDArray[np.int32], n0: int, u: List[int], v: List[int], + x: NDArray[np.float64], y: NDArray[np.float64], x1: NDArray[np.float64], x2: NDArray[np.float64], + L: NDArray[np.int32], l: NDArray[np.int32], + xmin: List[Union[float, List[float], NDArray[np.float64]]], + fmi: List[float], ipar: NDArray[np.int32], + level: NDArray[np.int32], ichild: NDArray[np.int32], + f: NDArray[np.float64], xbest: NDArray[np.float64], fbest: NDArray[np.float64], + stop: List[Union[float, int]], prt: int, record: NDArray[Union[np.int32, np.float64]], + nboxes: int, nbasket: int, nsweepbest: int, nsweep: int, stopping_actions: int, ncall: int = 0, + nchild: int = 0): + """ + Splitting box at specified level s according to an initialization list + :param i: specified index + :param s: current depth level + :param smax: maximum depth level + :param par: + :param x0: initial position + :param n0: + :param u: initial lower guess ("lower corner" in 3D) + :param v: initial upper guess ("upper corner" in 3D) + :param x: starting point + :param y: + :param x1: evaluation argument (position) + :param x2: evaluation argument (position) + :param L: + :param l: + :param xmin: evaluation argument (position) + :param fmi: + :param ipar: + :param level: + :param ichild: + :param f: function value + :param xbest: best evaluation argument (position) + :param fbest: best function value + :param stop: stopping test + :param prt: print - unsued in this implementation so far + :param record: + :param nboxes: counter for boxes not in the 'shopping bas + :param nbasket: counter for boxes in the 'shopping bas + :param nsweepbest: number of sweep in which fbest was updated for the last + :param nsweep: sweep counter + :stopping_actions: number of stopping actions + :return: a collection of parameters and metrics from the initial split + """ + + f0 = np.zeros(max(L) + 1) + flag = 1 + + for j in range(L[i] + 1): + if j != l[i]: + x[i] = x0[i, j] + + policy = self.get_policy(x, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f0[j] = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + if f0[j] < fbest: + fbest = f0[j] + xbest = copy.deepcopy(x) + nsweepbest = copy.deepcopy(nsweep) + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(float(fbest), stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(float(fbest), stop[2]) + if not flag: + return xbest, fbest, f0, xmin, fmi, ipar, level, ichild, f, + else: + f0[j] = f[0, par] + if s + 1 < smax: + # nchild = 0 + if u[i] < x0[i, 0]: + nchild = nchild + 1 + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, s + 1, -nchild, + f0[0]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + for j in range(L[i]): + nchild = nchild + 1 + if f0[j] <= f0[j + 1] or s + 2 < smax: + nboxes = nboxes + 1 + if f0[j] <= f0[j + 1]: + level0 = s + 1 + else: + level0 = s + 2 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox( + par, level0, -nchild, f0[j]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + x[i] = x0[i, j] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(copy.deepcopy(x)) + fmi.append(f0[j]) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f0[j] + nchild = nchild + 1 + if f0[j + 1] < f0[j] or s + 2 < smax: + nboxes = nboxes + 1 + if f0[j + 1] < f0[j]: + level0 = s + 1 + else: + level0 = s + 2 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, level0, + -nchild, f0[j + 1]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + x[i] = x0[i, j + 1] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(copy.deepcopy(x)) + fmi.append(f0[j + 1]) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f0[j + 1] + if x0[i, L[i]] < v[i]: + nchild = nchild + 1 + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox( + par, s + 1, -nchild, f0[L[i]]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + for j in range(L[i] + 1): + x[i] = x0[i, j] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(copy.deepcopy(x)) + fmi.append(f0[j]) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f0[j] + return (xbest, fbest, policy, f0, xmin, fmi, ipar, level, ichild, f, flag, ncall, + record, nboxes, nbasket, nsweepbest, nsweep) + + def split(self, i: int, s: int, smax: int, par: int, n0: int, u: List[int], v: List[int], + x: NDArray[np.float64], y: NDArray[np.float64], + x1: NDArray[np.float64], x2: NDArray[np.float64], z: NDArray[np.float64], + xmin: List[Union[float, List[float], NDArray[np.float64]]], fmi: List[float], + ipar: NDArray[np.int32], level: NDArray[np.int32], ichild: NDArray[np.int32], + f: NDArray[np.float64], xbest: NDArray[np.float64], + fbest: NDArray[np.float64], stop: List[Union[float, int]], prt: int, + record: NDArray[Union[np.float64, np.int32]], nboxes: int, nbasket: int, + nsweepbest: int, nsweep: int, stopping_actions: int, ncall: int = 0, flag: int = 1): + """ + Function that performs a box split + :param i: + :param s: current depth level + :param smax: maximum depth level + :param par: + :param n0: + :param u: initial lower guess ("lower corner" in 3D) + :param v: initial upper guess ("upper corner" in 3D) + :param x: starting point + :param y: + :param x1: evaluation argument (position) + :param x2: evaluation argument (position) + :param param z: + :param xmin: minimum position + :param fmi: + :param ipar: + :param level: + :param ichild: + :param f: function value + :param xbest: currently best position + :param fbest: current best function value' + :param stop: stopping test + :param prt: print - unsued in this implementation so far + :param record: + :param nboxes: counter for boxes not in the 'shopping bas + :param nbasket: counter for boxes in the 'shopping basket' + :param nsweepbest: number of sweep in which fbest was updated for the last + :param nsweep: sweep counter + :param stopping_actions: the number of stopping actions + :return: a collection of parameters and metrics afdter the arbitrary split + """ + # ncall = 0 + # flag = 1 + x[i] = z[1] + policy = self.get_policy(x, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f[1, par] = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + if f[1, par] < fbest: + fbest = copy.deepcopy(f[1, par]) + xbest = copy.deepcopy(x) + nsweepbest = copy.deepcopy(nsweep) + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(float(fbest), stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(float(fbest), stop[2]) + + if not flag: + return (xbest, fbest, xmin, fmi, ipar, level, ichild, f, + flag, ncall, record, nboxes, nbasket, nsweepbest, nsweep) + + if s + 1 < smax: + if f[0, par] <= f[1, par]: + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, s + 1, + 1, f[0, par]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + if s + 2 < smax: + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, s + 2, + 2, f[1, par]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + x[i] = z[1] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(copy.deepcopy(x)) + fmi.append(f[1, par]) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f[1, par] + else: + if s + 2 < smax: + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, s + 2, + 1, f[0, par]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + x[i] = z[0] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(copy.deepcopy(x)) + fmi.append(f[0, par]) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f[0, par] + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, s + 1, + 2, f[1, par]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + if z[1] != y[i]: + if abs(z[1] - y[i]) > abs(z[1] - z[0]) * (3 - np.sqrt(5)) * 0.5: + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox(par, s + 1, + 3, f[1, par]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + if s + 2 < smax: + nboxes = nboxes + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = MCSUtils().genbox( + par, s + 2, 3, f[1, par]) + record = np.array(MCSUtils().updtrec(nboxes, level[nboxes], list(f[0, :]), list(record))) + else: + x[i] = z[1] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(copy.deepcopy(x)) + fmi.append(copy.deepcopy(f[1, par])) + else: + xmin[nbasket] = copy.deepcopy(x) + fmi[nbasket] = f[1, par] + + else: + xi1 = copy.deepcopy(x) + xi2 = copy.deepcopy(x) + + xi1[i] = z[0] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(xi1) + fmi.append(f[0, par]) + else: + xmin[nbasket] = xi1 + fmi[nbasket] = f[0, par] + + xi2[i] = z[1] + nbasket = nbasket + 1 + if (len(xmin) == nbasket): + xmin.append(xi2) + fmi.append(f[1, par]) + else: + xmin[nbasket] = xi2 + fmi[nbasket] = f[1, par] + return (xbest, fbest, policy, xmin, fmi, ipar, level, ichild, f, flag, + ncall, record, nboxes, nbasket, nsweepbest, nsweep) + + def basket(self, x: List[float], f: float, + policy: Union[MultiThresholdStoppingPolicy, LinearThresholdStoppingPolicy], + avg_metrics: Optional[Dict[str, Union[float, int]]], + xmin: List[Union[float, List[float], NDArray[np.float64]]], + fmi: List[float], xbest: List[float], fbest: float, stop: List[Union[int, float]], + nbasket: int, nsweep: int, nsweepbest: int, stopping_actions: int, loc: int = 1, + flag: int = 1, ncall: Union[float, int] = 0): + """ + Function representing the basket functional + :param x: starting point + :param f: function value + :param policy: current policy + :param avg_metrics: current average metrics + :param xmin: minum evaluation argumen (position) + :param fmi: + :param xbest: current best position + :param fbest: current best function value + :param stop: stopping test + :param nbasket: counter for boxes in the 'shopping basket' + :param nsweep: sweep counter + :param nsweepbest: number of sweep in which fbest was updated for the last + :param stopping_actions: number of stopping actions + :return: a collection of parameters and metrics after the basket functional + """ + if not nbasket: + return xbest, fbest, policy, avg_metrics, xmin, fmi, x, f, loc, flag, ncall, nsweep, nsweepbest + dist = np.zeros(nbasket + 1) + for k in range(len(dist)): + dist[k] = np.linalg.norm(np.subtract(x, xmin[k])) + + ind = np.argsort(dist) + if nbasket == -1: + return xbest, fbest, policy, avg_metrics, xmin, fmi, x, f, loc, flag, ncall, nsweep, nsweepbest + else: + for k in range(nbasket + 1): + i = ind[k] + if fmi[i] <= f: + p = xmin[i] - x + + y1 = x + 1 / 3 * p + policy = self.get_policy(y1, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + + ncall = ncall + 1 + if f1 <= f: + y2 = x + 2 / 3 * p + policy = self.get_policy(y2, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f2 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + if f2 > max(f1, fmi[i]): + if f1 < f: + x = y1 + f = f1 + if f < fbest: + fbest = f + xbest = copy.deepcopy(x) + nsweepbest = nsweep + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return ( + xbest, fbest, policy, + avg_metrics, xmin, fmi, x, + f, loc, flag, + ncall, nsweep, + nsweepbest, + ) + else: + if f1 < min(f2, fmi[i]): + f = f1 + x = copy.deepcopy(y1) + if f < fbest: + fbest = f + xbest = copy.deepcopy(x) + nsweepbest = nsweep + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return ( + xbest, fbest, policy, + avg_metrics, xmin, fmi, x, + f, loc, flag, + ncall, nsweep, + nsweepbest, + ) + elif f2 < min(f1, fmi[i]): + f = f2 + x = copy.deepcopy(y2) + if f < fbest: + fbest = f + xbest = copy.deepcopy(x) + nsweepbest = nsweep + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return ( + xbest, fbest, policy, + avg_metrics, xmin, fmi, x, f, + loc, flag, ncall, + nsweep, nsweepbest, + ) + else: + loc = 0 + break + + return xbest, fbest, policy, avg_metrics, xmin, fmi, x, f, loc, flag, ncall, nsweep, nsweepbest + + def lsearch(self, x: List[Union[float, int]], f: float, f0: NDArray[np.float64], u: List[int], v: List[int], + nf: int, stop: List[Union[int, float]], maxstep: int, gamma: float, + hess: NDArray[np.float64], nsweep: int, + nsweepbest: int, stopping_actions: int, eps: float, ncall: Union[float, int] = 0, + flag: int = 1, eps0: float = 0.001, nloc: int = 1, small: float = 0.1, + smaxls: int = 15, diag: int = 0, nstep: int = 0): + """ + The local search algorithm + :param x: starting point + :param f: function value + :param f0: function value + :param u: lower initial guess ("lower corner" in 3D) + :param v: initial upper guess ("upper corner" in 3D) + :param nf: + :param stop: stopping test + :param maxstep: maximum steps in the local search (mainly determined by the local command) + :param gamma: acceptable relative accuracy for local search + :param hess: the function Hessian + :param nsweep: sweep counter + :param nsweepbest: number of sweep in which fbest was updated for the last + :param stopping_actions: number of stopping actions + :param eps: parameter value for the golden ratio + :return: a collection of parameters and metrics afdter the local search + """ + n = len(x) + x0 = np.asarray([min(max(u[i], 0), v[i]) for i in range(len(u))]) + + xmin, fmi, g, G, nfcsearch = self.csearch(x, f, u, v, hess, + stopping_actions, eps) + + xmin = [max(u[i], min(xmin[i], v[i])) for i in range(n)] + ncall = ncall + nfcsearch + xold = copy.deepcopy(xmin) + fold = copy.deepcopy(fmi) + + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fmi, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fmi, stop[1]) + if not flag: + return xmin, fmi, ncall, flag, nsweep, nsweepbest + + d = np.asarray([min(min(xmin[i] - u[i], v[i] - xmin[i]), 0.25 * (1 + abs(x[i] - x0[i]))) for i in range(n)]) + p, _, _ = LSUtils().minq(fmi, g, G, -d, d, 0, eps) + + x = [max(u[i], min(xmin[i] + p[i], v[i])) for i in range(n)] + p = np.subtract(x, xmin) + if np.linalg.norm(p): + policy = self.get_policy(np.array(x), L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + alist: List[Union[float, int]] = [0, 1] + flist: List[Union[float, int]] = [fmi, f1] + fpred = fmi + np.dot(g.T, p) + np.dot(0.5, np.dot(p.T, np.dot(G, p))) + alist, flist, nfls = self.gls(u, v, xmin, p, alist, + flist, nloc, small, smaxls, stopping_actions) + ncall = ncall + nfls + + i: Union[int, np.int64] = np.argmin(flist) + fminew = min(flist) + if fminew == fmi: + i = [k for k in range(len(alist)) if not alist[k]][0] + else: + fmi = copy.deepcopy(fminew) + + xmin = xmin + np.dot(alist[i], p) + xmin = np.asarray([max(u[i], min(xmin[i], v[i])) for i in range(n)]) + gain = f - fmi + + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fmi, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fmi, stop[1]) + + if not flag: + return xmin, fmi, ncall, flag, nsweep, nsweepbest + + if fold == fmi: + r: Union[int, float] = 0 + elif fold == fpred: + r = 0.5 + else: + r = (fold - fmi) / (fold - fpred) + else: + gain = f - fmi + r = 0 + + ind = [i for i in range(n) if (u[i] < xmin[i] and xmin[i] < v[i])] + b = np.dot(np.abs(g).T, [max(abs(xmin[i]), abs(xold[i])) for i in range(len(xmin))]) + + while (ncall < nf) and (nstep < maxstep) and ((diag or len(ind) < n) or + (stop[0] == 0 and fmi - gain <= stop[1]) or + (b >= gamma * (f0 - f) and gain > 0)): + nstep = nstep + 1 + delta = [abs(xmin[i]) * eps ** (1 / 3) for i in range(len(xmin))] + j: Union[List[int]] = [inx for inx in range(len(delta)) if (not delta[inx])] + if len(j) != 0: + for inx in j: + delta[inx] = eps ** (1 / 3) * 1 + + x1, x2 = MCSUtils().neighbor(xmin, delta, list(u), list(v)) + f = copy.deepcopy(fmi) + + if len(ind) < n and (b < gamma * (f0 - f) or (not gain)): + ind1 = [i for i in range(len(u)) if (xmin[i] == u[i] or xmin[i] == v[i])] + for k in range(len(ind1)): + i = ind1[k] + x = copy.deepcopy(xmin) + if xmin[i] == u[i]: + x[i] = x2[i] + else: + x[i] = x1[i] + policy = self.get_policy(np.asarray(x), L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + + ncall = ncall + 1 + + if f1 < fmi: + alist = [0, x[i], -xmin[i]] + flist = [fmi, f1] + p = np.zeros(n) + p[i] = 1 + alist, flist, nfls = self.gls(u, v, xmin, p, alist, + flist, nloc, small, 6, stopping_actions) + ncall = ncall + nfls + l: Union[int, np.int32] = np.argmin(flist) + fminew = min(flist) + if fminew == fmi: + temp_list = [inx for inx in range(len(alist)) if (not alist[inx])] + # j = [inx for inx in range(len(alist)) if (not alist[inx])][0] + item = temp_list[0] + l = item + else: + fmi = fminew + xmin[i] = xmin[i] + alist[l] + else: + ind1[k] = -1 + + xmin = np.asarray([max(u[inx], min(xmin[inx], v[inx])) for inx in range(len(xmin))]) + if not sum(ind1): + break + + for inx in range(len(delta)): + delta[inx] = abs(xmin[inx]) * eps ** (1 / 3) + j = [inx for inx in range(len(delta)) if (not delta[inx])] + if len(j) != 0: + for inx in j: + delta[inx] = eps ** (1 / 3) * 1 + x1, x2 = MCSUtils().neighbor(xmin, delta, list(u), list(v)) + + if abs(r - 1) > 0.25 or (not gain) or (b < gamma * (f0 - f)): + xmin, fmi, g, G, x1, x2, nftriple = self.triple(xmin, fmi, x1, x2, u, v, hess, 0, + stopping_actions, setG=True) + ncall = ncall + nftriple + diag = 0 + else: + xmin, fmi, g, G, x1, x2, nftriple = self.triple(xmin, fmi, x1, x2, u, v, + hess, G, stopping_actions) + ncall = ncall + nftriple + diag = 1 + xold = copy.deepcopy(xmin) + fold = copy.deepcopy(fmi) + + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fmi, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fmi, stop[1]) + + if not flag: + return xmin, fmi, ncall, flag, nsweep, nsweepbest + if r < 0.25: + d = 0.5 * d + elif r > 0.75: + d = 2 * d + + minusd = np.asarray([max(-d[jnx], u[jnx] - xmin[jnx]) for jnx in range(len(xmin))]) + mind = np.asarray([min(d[jnx], v[jnx] - xmin[jnx]) for jnx in range(len(xmin))]) + p, _, _ = LSUtils().minq(fmi, g, G, minusd, mind, 0, eps) + + if not (np.linalg.norm(p)) and (not diag) and (len(ind) == n): + break + if np.linalg.norm(p): + fpred = fmi + np.dot(g.T, p) + np.dot(0.5, np.dot(p.T, np.dot(G, p))) + x = copy.deepcopy(xmin + p) + policy = self.get_policy(np.array(x), L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + alist = [0, 1] + flist = [fmi, f1] + alist, flist, nfls = self.gls(u, v, xmin, p, alist, + flist, nloc, small, smaxls, stopping_actions) + ncall = ncall + nfls + argmin = np.argmin(flist) + fmi = min(flist) + xmin = [xmin[jnx] + alist[argmin] * p[jnx] for jnx in range(len(xmin))] + xmin = np.asarray([max(u[jnx], min(xmin[jnx], v[jnx])) for jnx in range(len(xmin))]) + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fmi, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fmi, stop[1]) + if not flag: + return xmin, fmi, ncall, flag, nsweep, nsweepbest + + gain = f - fmi + if fold == fmi: + r = 0 + elif fold == fpred: + r = 0.5 + else: + r = (fold - fmi) / (fold - fpred) + if fmi < fold: + fac = abs(1 - 1 / r) + eps0 = max(eps, min(fac * eps0, 0.001)) + else: + eps0 = 0.001 + else: + gain = f - fmi + if (not gain): + eps0 = 0.001 + fac = np.Inf + r = 0 + ind = [inx for inx in range(len(u)) if (u[inx] < xmin[inx] and xmin[inx] < v[inx])] + b = np.dot(np.abs(g).T, [max(abs(xmin[inx]), abs(xold[inx])) for inx in range(len(xmin))]) + return xmin, fmi, ncall, flag, nsweep, nsweepbest + + def basket1(self, x: NDArray[np.float64], f: float, + xmin: List[Union[float, List[float], NDArray[np.float64]]], fmi: List[float], + xbest: List[float], fbest: float, stop: List[Union[float, int]], nbasket: int, + nsweep: int, nsweepbest: int, stopping_actions: int, loc: int = 1, + flag: int = 1, ncall: int = 0): + """ + Basket 1 + :param x: starting point + :param f: function value(s) + :param xmin: current minimum evaluation argument (position) + :param fmi: + :param xbest: current best evaluation argument (position) + :param fbest: current best function value + :param stop: stopping test + :param nbasket: counter for boxes in the 'shopping basket' + :param nsweep: sweep counter + :param nsweepbest: number of sweep in which fbest was updated for the last + :param stopping_actions: number of stopping actions + :return: the metrics and parameters from basket1 + """ + + if not nbasket: + return xbest, fbest, xmin, fmi, loc, flag, ncall, nsweep, nsweepbest + dist = np.zeros(nbasket + 1) + for k in range(len(dist)): + dist[k] = np.linalg.norm(np.subtract(x, xmin[k])) + ind = np.argsort(dist) + + if nbasket == -1: + return xbest, fbest, xmin, fmi, loc, flag, ncall, nsweep, nsweepbest + else: + for k in range(nbasket + 1): + i = ind[k] + p = xmin[i] - x + y1 = x + 1 / 3 * p + policy = self.get_policy(y1, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + if f1 <= max(fmi[i], f): + y2 = x + 2 / 3 * p + policy = self.get_policy(y2, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f2 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + ncall = ncall + 1 + if f2 <= max(f1, fmi[i]): + if f < min(min(f1, f2), fmi[i]): + fmi[i] = f + xmin[i] = copy.deepcopy(x) + if fmi[i] < fbest: + fbest = copy.deepcopy(fmi[i]) + xbest = copy.deepcopy(xmin[i]) + nsweepbest = nsweep + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return ( + xbest, fbest, + policy, avg_metrics, xmin, + fmi, loc, flag, ncall, + nsweep, nsweepbest, + ) + + loc = 0 + break + elif f1 < min(min(f, f2), fmi[i]): # type: ignore[call-overload] + fmi[i] = f1 + xmin[i] = copy.deepcopy(y1) + if fmi[i] < fbest: + fbest = copy.deepcopy(fmi[i]) + xbest = copy.deepcopy(xmin[i]) + nsweepbest = copy.deepcopy(nsweep) + + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return ( + xbest, fbest, + policy, avg_metrics, xmin, + fmi, loc, flag, ncall, + nsweep, nsweepbest, + ) + # end fmi[i] < fbest: elif + loc = 0 + break + elif f2 < min(min(f, f1), fmi[i]): # type: ignore[call-overload] + fmi[i] = f2 + xmin[i] = copy.deepcopy(y2) + if fmi[i] < fbest: + fbest = copy.deepcopy(fmi[i]) + xbest = copy.deepcopy(xmin[i]) + nsweepbest = nsweep + if stop[0] > 0 and stop[0] < 1: + flag = MCSUtils().chrelerr(fbest, stop) + elif stop[0] == 0: + flag = MCSUtils().chvtr(fbest, stop[1]) + if not flag: + return ( + xbest, fbest, + policy, avg_metrics, xmin, + fmi, loc, + flag, ncall, + nsweep, nsweepbest, + ) + loc = 0 + break + else: + loc = 0 + break + return xbest, fbest, policy, avg_metrics, xmin, fmi, loc, flag, ncall, nsweep, nsweepbest + + def csearch(self, x: List[Union[float, int]], f: float, u: List[int], v: List[int], hess: NDArray[np.float64], + stopping_actions: int, eps: float): + """ + Performs the csearch algorithm + :param x: starting point + :param f: function value + :param u: lower initial guess ("Lower corner" in 3D) + :param v: upper initial guess ("upper corner" in 3D) + :param hess: the function Hessian + :param stopping_actions: the number of stopping actions + :return: a collection of parameters and metrics after doing the csearch + """ + n = len(x) + x = [min(v[i], max(x[i], u[i])) for i in range(len(x))] + + nfcsearch = 0 + smaxls = 6 + small = 0.1 + nloc = 1 + hess = np.ones((n, n)) + xmin = copy.deepcopy(x) + fmi = copy.deepcopy(f) + xminnew = copy.deepcopy(xmin) + fminew = copy.deepcopy(fmi) + g = np.zeros(n) + x1 = np.zeros(n) + x2 = np.zeros(n) + G = np.zeros((n, n)) + + for i in range(n): + p = np.zeros(n) + p[i] = 1 + if xmin[i]: + delta = eps ** (1 / 3) * abs(xmin[i]) + else: + delta = eps ** (1 / 3) + linesearch = True + if xmin[i] <= u[i]: + policy = self.get_policy(xmin + delta * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + nfcsearch = nfcsearch + 1 + if f1 >= fmi: + policy = self.get_policy(xmin + 2 * delta * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f2 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + # fcsearch = nfcsearch + 1 + x1[i] = xmin[i] + delta + x2[i] = xmin[i] + 2 * delta + if f2 >= fmi: + xminnew[i] = xmin[i] + fminew = fmi + else: + xminnew[i] = x2[i] + fminew = copy.deepcopy(f2) + linesearch = False + else: + alist: List[Union[float, int]] = [0, delta] + flist: List[Union[float, int]] = [fmi, f1] + elif xmin[i] >= v[i]: + policy = self.get_policy(xmin - delta * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + nfcsearch = nfcsearch + 1 + if f1 >= fmi: + policy = self.get_policy(xmin - 2 * delta * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f2 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + nfcsearch = nfcsearch + 1 + x1[i] = xmin[i] - delta + x2[i] = xmin[i] - 2 * delta + if f2 >= fmi: + xminnew[i] = xmin[i] + fminew = fmi + else: + xminnew[i] = x2[i] + fminew = f2 + linesearch = False + else: + alist = [0, -delta] + flist = [fmi, f1] + else: + alist = [0] + flist = [fmi] + + if linesearch: + alist, flist, nfls = self.gls(u, v, xmin, p, alist, flist, nloc, + small, smaxls, stopping_actions) + nfcsearch = nfcsearch + nfls + j: Union[int, np.int32] = np.argmin(flist) + fminew = min(flist) + + if fminew == fmi: + j = [inx for inx in range(len(alist)) if not alist[inx]][0] + + ind = [inx for inx in range(len(alist)) if abs(alist[inx] - alist[j]) < delta] + ind1 = [inx for inx in range(len(ind)) if ind[inx] == j] + for inx in ind1: + del ind[inx] + + for inx in ind: + del alist[inx] + del flist[inx] + + j = np.argmin(flist) + fminew = min(flist) + xminnew[i] = xmin[i] + alist[j] + if i == 0 or not alist[j]: + if j == 0: + x1[i] = xmin[i] + alist[1] + f1 = flist[1] + x2[i] = xmin[i] + alist[2] + f2 = flist[2] + elif j == len(alist) - 1: + x1[i] = xmin[i] + alist[j - 1] + f1 = flist[j - 1] + x2[i] = xmin[i] + alist[j - 2] + f2 = flist[j - 2] + else: + x1[i] = xmin[i] + alist[j - 1] + f1 = flist[j - 1] + x2[i] = xmin[i] + alist[j + 1] + f2 = flist[j + 1] + xmin[i] = xminnew[i] + fmi = copy.deepcopy(fminew) + else: + x1[i] = xminnew[i] + f1 = copy.deepcopy(fminew) + if xmin[i] < x1[i] and j < len(alist) - 1: + x2[i] = xmin[i] + alist[j + 1] + f2 = flist[j + 1] + elif j == 0: + if alist[j + 1]: + x2[i] = xmin[i] + alist[j + 1] + f2 = flist[j + 1] + else: + x2[i] = xmin[i] + alist[j + 2] + f2 = flist[j + 2] + elif alist[j - 1]: + x2[i] = xmin[i] + alist[j - 1] + f2 = flist[j - 1] + else: + x2[i] = xmin[i] + alist[j - 2] + f2 = flist[j - 2] + g[i], G[i, i] = MCSUtils().polint1([xmin[i], x1[i], x2[i]], [fmi, f1, f2]) + x = copy.deepcopy(xmin) + k1 = -1 + if f1 <= f2: + x[i] = x1[i] + else: + x[i] = x2[i] + for k in range(i): + if hess[i, k]: + q1 = fmi + g[k] * (x1[k] - xmin[k]) + 0.5 * G[k, k] * (x1[k] - xmin[k]) ** 2 + q2 = fmi + g[k] * (x2[k] - xmin[k]) + 0.5 * G[k, k] * (x2[k] - xmin[k]) ** 2 + if q1 <= q2: + x[k] = x1[k] + else: + x[k] = x2[k] + policy = self.get_policy(np.array(x), L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f12 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + nfcsearch = nfcsearch + 1 + G[i, k] = MCSUtils().hessian(i, k, x, xmin, f12, fmi, g, G) + G[k, i] = G[i, k] + if f12 < fminew: + fminew = f12 + xminnew = copy.deepcopy(x) + k1 = k + x[k] = xmin[k] + else: + G[i, k] = 0 + G[k, i] = 0 + if fminew <= fmi: + if x1[i] == xminnew[i]: + x1[i] = xmin[i] + elif x2[i] == xminnew[i]: + x2[i] = xmin[i] + if k1 > -1: + if xminnew[k1] == x1[k1]: + x1[k1] = xmin[k1] + elif xminnew[k1] == x2[k1]: + x2[k1] = xmin[k1] + + for k in range(i + 1): + g[k] = g[k] + G[i, k] * (xminnew[i] - xmin[i]) + if k1 > -1: + g[k] = g[k] + G[k1, k] * (xminnew[k1] - xmin[k1]) + xmin = copy.deepcopy(xminnew) + fmi = copy.deepcopy(fminew) + return xmin, fmi, g, G, nfcsearch + + def gls(self, xl: List[int], xu: List[int], x: List[Union[float, int]], p: NDArray[Union[np.int32, np.float64]], + alist: List[Union[float, int]], flist: List[Union[int, float]], + nloc: int, small: Union[float, int], smax: int, stopping_actions: int, prt: int = 2, + short: float = 0.381966, bend: int = 0): + """ + Global line search main function + + :param func: funciton name which is subjected to optimization + :param xl: lower bound + :param xu: upper bound + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param nloc: (for local ~= 0) counter of points that have been + :param small: tolerance values + :param smax: search list size + :param prt: print - unsued in this implementation so far + :param short: + :param bend: + :return: search list,function values,number of fucntion evaluation + """ + + sinit = len(alist) + + # bend = 0 + xl, xu, x, p, amin, amax, scale = GLSUtils().lsrange(xl, xu, x, p, prt, bend) + alist, flist, alp, alp1, alp2, falp = self.lsinit(x, p, alist, flist, amin, amax, scale, stopping_actions) + + alist, flist, abest, fbest, fmed, up, down, monotone, minima, nmin, unitlen, s = GLSUtils().lssort(alist, flist) + nf = s - sinit + + while s < min(5, smax): + if nloc == 1: + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s, alp, fac) = self.lspar(nloc, small, sinit, short, x, p, alist, flist, + amin, amax, alp, abest, fbest, fmed, up, down, + monotone, minima, nmin, unitlen, s, stopping_actions) + + if s > 3 and monotone and (abest == amin or abest == amax): + nf = s - sinit + return alist, flist, nf + else: + alist, flist, alp, fac = self.lsnew(nloc, small, sinit, short, x, p, s, alist, flist, + amin, amax, alp, abest, fmed, unitlen, stopping_actions) + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + saturated = 0 + if nmin == 1: + if monotone and (abest == amin or abest == amax): + nf = s - sinit + return alist, flist, nf + if s == 5: + (alist, flist, amin, amax, alp, abest, fbest, fmed, up, down, + monotone, minima, nmin, unitlen, s, good, saturated) = self.lsquart(nloc, small, sinit, + short, np.array(x), p, alist, + flist, amin, amax, alp, + abest, fbest, fmed, up, + down, monotone, minima, + nmin, unitlen, s, saturated, + stopping_actions) + (alist, flist, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = self.lsdescent(x, p, alist, flist, alp, + abest, fbest, fmed, up, down, + monotone, minima, nmin, unitlen, + s, stopping_actions) + convex = GLSUtils().lsconvex(alist, flist, nmin, s) + if convex: + nf = s - sinit + return alist, flist, nf + sold = 0 + + while 1: + (alist, flist, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = self.lsdescent(x, p, alist, flist, alp, abest, + fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s, stopping_actions) + alp, saturated = GLSUtils().lssat(small, alist, flist, alp, amin, amax, s, saturated) + if saturated or s == sold or s >= smax: + break + + sold = s + nminold = nmin + if not saturated and nloc > 1: + (alist, flist, amin, amax, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = self.lssep(nloc, small, + sinit, short, x, + p, alist, flist, amin, + amax, alp, abest, fbest, + fmed, up, down, monotone, + minima, nmin, unitlen, s, + stopping_actions) + (alist, flist, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s, saturated) = self.lslocal(nloc, small, sinit, short, + x, p, alist, flist, amin, amax, + alp, abest, fbest, fmed, up, + down, monotone, minima, nmin, + unitlen, s, saturated, stopping_actions) + if nmin > nminold: + saturated = 0 + nf = s - sinit + + return alist, flist, nf + + def lsinit(self, x, p, alist, flist, amin, amax, scale, stopping_actions): + """ + Line search algorithm + + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param amin: + :param amax: + :param scale: + :param stopping_actions: number of stopping actions + :return: set of parameters obtained from performing the line search + """ + alp: Union[int, float] = 0 + alp1: Union[int, float] = 0 + alp2: Union[int, float] = 0 + falp: Union[float, int] = 0 + + if len(alist) == 0: + # evaluate at absolutely smallest point + alp = 0 + if amin > 0: + alp = amin + if amax < 0: + alp = amax + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + elif len(alist) == 1: + # evaluate at absolutely smallest point + alp = 0 + if amin > 0: + alp = amin + if amax < 0: + alp = amax + if alist[0] != alp: + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + + aamin = min(alist) + aamax = max(alist) + # if amin > aamin or amax < aamax: + # sys.exit('GLS Error: non-admissible step in alist') # TODO: investigate this + if aamax - aamin <= scale: + alp1 = max(amin, min(- scale, amax)) + alp2 = max(amin, min(+ scale, amax)) + alp = np.Inf + + if aamin - alp1 >= alp2 - aamax: + alp = alp1 + if alp2 - aamax >= aamin - alp1: + alp = alp2 + if alp < aamin or alp > aamax: + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + if len(alist) == 1: + sys.exit('GLS Error: lsinit bug: no second point found') + + return alist, flist, alp, alp1, alp2, falp + + def triple(self, x: Union[List[Union[int, float]]], f: float, x1: Union[List[Union[int, float]]], + x2: Union[List[Union[int, float]]], u: List[int], v: List[int], + hess, G, stopping_actions, setG=False): + """ + The triple function + :param x: starting point + :param f: function value + :param x1: evaluation argument (position) + :param x2: evaluation argument (position) + :param u: lower initial guess ("Lower corner" in 3D) + :param v: lower initial guess ("upper corner" in 3D) + :param hess: the hessian of the function + :param G: + :param stopping_actions: number of stopping actions + :param setG: + :return: the set of parameters and metrics after performing the triple + """ + nf = 0 + n = len(x) + g = np.zeros(n) + nargin = 10 + if setG: + nargin = 9 + G = np.zeros((n, n)) + + ind = [i for i in range(n) if (u[i] < x[i] and x[i] < v[i])] + ind1 = [i for i in range(n) if (x[i] <= u[i] or x[i] >= v[i])] + + for j in range(len(ind1)): + g[ind1[j]] = 0 + for k in range(n): + G[ind1[j], k] = 0 + G[k, ind1[j]] = 0 + + if len(ind) <= 1: + xtrip = copy.deepcopy(x) + ftrip = copy.deepcopy(f) + if len(ind) != 0: + for i in ind: + g[i] = 1 + G[i, i] = 1 + return xtrip, ftrip, g, G, x1, x2, nf + + if setG: + G = np.zeros((n, n)) + xtrip = copy.deepcopy(x) + ftrip = copy.deepcopy(f) + xtripnew = copy.deepcopy(x) + ftripnew = copy.deepcopy(f) + for j in range(len(ind)): + i = ind[j] + x = copy.deepcopy(xtrip) + f = copy.deepcopy(ftrip) + + x[i] = x1[i] + + policy = self.get_policy(x, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f1 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + x[i] = x2[i] + policy = self.get_policy(x, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f2 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + nf = nf + 2 + g[i], G[i, i] = MCSUtils().polint1([xtrip[i], x1[i], x2[i]], [f, f1, f2]) + if f1 <= f2: + if f1 < ftrip: + ftripnew = copy.deepcopy(f1) + xtripnew[i] = x1[i] + else: + if f2 < ftrip: + ftripnew = copy.deepcopy(f2) + xtripnew[i] = x2[i] + + if nargin < 10: + k1 = -1 + if f1 <= f2: + x[i] = x1[i] + else: + x[i] = x2[i] + + for k in range(i): + if hess[i, k]: + if xtrip[k] > u[k] and xtrip[k] < v[k] and \ + (len([m for m in range(len(ind)) if ind[m] == k]) != 0): + q1 = ftrip + g[k] * (x1[k] - xtrip[k]) + 0.5 * G[k, k] * (x1[k] - xtrip[k]) ** 2 + q2 = ftrip + g[k] * (x2[k] - xtrip[k]) + 0.5 * G[k, k] * (x2[k] - xtrip[k]) ** 2 + if q1 <= q2: + x[k] = x1[k] + else: + x[k] = x2[k] + policy = self.get_policy(x, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + f12 = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + nf = nf + 1 + G[i, k] = MCSUtils().hessian(i, k, x, xtrip, f12, ftrip, g, G) + G[k, i] = G[i, k] + if f12 < ftripnew: + ftripnew = copy.deepcopy(f12) + xtripnew = copy.deepcopy(x) + k1 = k + x[k] = xtrip[k] + else: + G[i, k] = 0 + G[k, i] = 0 + + if ftripnew < ftrip: + if x1[i] == xtripnew[i]: + x1[i] = xtrip[i] + else: + x2[i] = xtrip[i] + if nargin < 10 and k1 > -1: + if xtripnew[k1] == x1[k1]: + x1[k1] = xtrip[k1] + else: + x2[k1] = xtrip[k1] + for k in range(i + 1): + if (len([m for m in range(len(ind)) if ind[m] == k]) != 0): + g[k] = g[k] + G[i, k] * (xtripnew[i] - xtrip[i]) + if nargin < 10 and k1 > -1: + g[k] = g[k] + G[k1, k] * (xtripnew[k1] - xtrip[k1]) + xtrip = copy.deepcopy(xtripnew) + ftrip = copy.deepcopy(ftripnew) + return xtrip, ftrip, g, G, x1, x2, nf + + def lspar(self, nloc: int, small: Union[float, int], sinit: int, short: float, + x: Union[List[Union[int, float]], NDArray[np.float64]], p: NDArray[Union[np.float64, np.int32]], + alist: List[Union[float, int]], flist: List[Union[float, int]], amin: float, amax: float, + alp: Union[int, float], abest: float, fbest: float, + fmed: float, up: List[float], down: List[float], monotone: int, + minima: List[int], nmin: int, unitlen: float, s: int, stopping_actions: int): + """ + The lspar function + :param nloc: (for local ~= 0) counter of points that have been + :param small: tolerance values + :param sinit: length of list of known steps + :param short: + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param amin: + :param amax: + :param alp: + :param abest: best step + :param fbest: best function value so far + :param fmed: + :param up: + :param down: + :param monotone: + :param minima: + :param nmin: + :param unitlen: + :param s: + :param stopping_actions: number if stopping actions + :return: the set of parameters and metrics after performing lspar + """ + cont = 1 + fac = short + if s < 3: + alist, flist, alp, fac = self.lsnew(nloc, small, sinit, short, x, p, s, + alist, flist, amin, amax, alp, + abest, fmed, unitlen, stopping_actions) + cont = 0 + + if cont: + # fmin = min(flist) + i: Union[int, np.int32] = np.argmin(flist) + if i <= 1: + ind = [j for j in range(3)] + ii: Union[int, np.int32] = copy.deepcopy(i) + elif i >= s - 2: + ind = [j for j in range(s - 2 - 1, s)] + ii = i - (s - 1) + 2 + else: + ind = [j for j in range(i - 1, i + 1)] + ii = 2 - 1 + + aa = [alist[j] for j in ind] + ff = [flist[j] for j in ind] + + f12 = (ff[1] - ff[0]) / (aa[1] - aa[0]) + f23 = (ff[2] - ff[1]) / (aa[2] - aa[1]) + f123 = (f23 - f12) / (aa[2] - aa[0]) + if not (f123 > 0): + alist, flist, alp, fac = self.lsnew(nloc, small, sinit, short, x, p, s, alist, + flist, amin, amax, alp, abest, fmed, unitlen, + stopping_actions) + # alist,flist,abest,fbest,fmed,up,down,monotone,minima,nmin,unitlen,s = GLSUtils().lssort(alist,flist) + cont = 0 + if cont: + alp0 = 0.5 * (aa[1] + aa[2] - f23 / f123) + alp = LSUtils().lsguard(alp0, alist, amax, amin, small) + alptol = small * (aa[2] - aa[0]) + if f123 == np.Inf or min([abs(i - alp) for i in alist]) <= alptol: + if ii == 0 or (ii == 1 and (aa[1] >= 0.5 * (aa[0] + aa[2]))): + alp = 0.5 * (aa[0] + aa[1]) + else: + alp = 0.5 * (aa[1] + aa[2]) + # else: + # np_print = alp0 + + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + return alist, flist, abest, fbest, fmed, up, down, monotone, minima, nmin, unitlen, s, alp, fac + + def lsnew(self, nloc: int, small: Union[float, int], sinit: int, short: float, + x: Union[List[Union[int, float]], NDArray[np.float64]], p: NDArray[Union[np.float64, np.int32]], + s: int, alist: List[Union[float, int]], flist: List[Union[float, int]], + amin: float, amax: float, alp: Union[int, float], abest: float, fmed: float, + unitlen: float, stopping_actions: int): + """ + The lsnew function + :param nloc: (for local ~= 0) counter of points that have been + :param small: tolerance values + :param sinit: + :param short: + :param x: starting point + :param p: search direction + :param s: current depth level + :param alist: list of known steps + :param flist: function values of known steps + :param amin: + :param amax: + :param alp: + :param abest: best step + :param fmed: + :param unitlen: + :param stopping_actions + :return: set of parameters and metrics obtained after performing lsnew + """ + if alist[0] <= amin: + leftok = 0 + elif flist[0] >= max(fmed, flist[1]): + leftok = (sinit == 1 or nloc > 1) + else: + leftok = 1 + if alist[s - 1] >= amax: + rightok = 0 + elif flist[s - 1] >= max(fmed, flist[s - 2]): + rightok = (sinit == 1 or nloc > 1) + else: + rightok = 1 + if sinit == 1: + step = s - 1 + else: + step = 1 + fac = short + if leftok and (flist[0] < flist[s - 1] or (not rightok)): + # extra = 1 + al = alist[0] - (alist[0 + step] - alist[0]) / small + alp = max(amin, al) + elif rightok: + # extra = 1 + au = alist[s - 1] + (alist[s - 1] - alist[s - 1 - step]) / small + alp = min(au, amax) + else: + # extra = 0 + lenth = [i - j for i, j in zip(alist[1: s], alist[0: s - 1])] + dist = [max(i, j, k) for i, j, k in zip([i - abest for i in alist[1: s]], + [abest - i for i in alist[0: s - 1]], + (unitlen * np.ones(s - 1)).tolist())] + wid = [lenth[i] / dist[i] for i in range(len(lenth))] + i = np.argmax(wid) + alp, fac = LSUtils().lssplit(int(i), alist, flist, short) + + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + + return alist, flist, alp, fac + + def lsdescent(self, x: Union[List[Union[int, float]], NDArray[np.float64]], + p: NDArray[Union[np.float64, np.int32]], alist: List[Union[float, int]], + flist: List[Union[float, int]], alp: Union[int, float], + abest: float, fbest: float, fmed: float, up: List[float], + down: List[float], monotone: int, minima: List[int], + nmin: int, unitlen: float, s: int, stopping_actions: int): + """ + The lsdescent algorithm + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param alp: + :param abest: best step + :param fbest: best function value so far + :param fmed: + :param up: + :param down: + :param monotone: + :param minima: + :param nmin: + :param unitlen: + :param s: the current depth level + :param stopping_actions: number of stopping actions + :return: the set pf parameters and metrics obtained from performing lsdescent + """ + cont: Union[bool, int] = max([i == 0 for i in alist]) + + if cont: + fbest = min(flist) + i = np.argmin(flist) + if alist[i] < 0: + if alist[i] >= 4 * alist[i + 1]: + cont = 0 + elif alist[i] > 0: + if alist[i] < 4 * alist[i - 1]: + cont = 0 + else: + if i == 0: + fbest = flist[1] + elif i == s - 1: + fbest = flist[s - 2] + else: + fbest = min(flist[i - 1], flist[i + 1]) + if cont: + if alist[i] != 0: + alp = alist[i] / 3 + elif i == s - 1: + alp = alist[s - 2] / 3 + elif i == 0: + alp = alist[1] / 3 + else: + if alist[i + 1] - alist[i] > alist[i] - alist[i - 1]: + alp = alist[i + 1] / 3 + else: + alp = alist[i - 1] / 3 + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + return (alist, flist, alp, abest, fbest, fmed, up, down, + monotone, minima, nmin, unitlen, s) + + def lsquart(self, nloc: int, small: Union[float, int], sinit: int, short: float, + x: NDArray[np.float64], p: NDArray[Union[np.float64, np.int32]], + alist: List[Union[float, int]], flist: List[float], amin: float, amax: float, + alp: float, abest: float, fbest: float, + fmed: float, up: List[float], down: List[float], + monotone: int, minima: List[Union[int, float, bool]], nmin: int, unitlen: float, s: int, + saturated: int, stopping_actions: int): + """ + The lsaquart function + :param nloc: (for local ~= 0) counter of points that have been + :param small: tolerance values + :param sinit: initial depth level + :param short: + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param amin: + :param amax: + :param alp: + :param up: + :param down: + :param monotone: + :param minima: + :param nmin + :param unitlen: + :param s: the current depth level + :param saturated: + :param stopping_actions: the number of stopping actions + :return: the parameters and metrics obtained from performing lsquart + """ + if alist[0] == alist[1]: + f12: Union[int, float] = 0 + else: + f12 = (flist[1] - flist[0]) / (alist[1] - alist[0]) + + if alist[1] == alist[2]: + f23: Union[int, float] = 0 + else: + f23 = (flist[2] - flist[1]) / (alist[2] - alist[1]) + + if alist[2] == alist[3]: + f34: Union[int, float] = 0 + else: + f34 = (flist[3] - flist[2]) / (alist[3] - alist[2]) + + if alist[3] == alist[4]: + f45: Union[int, float] = 0 + else: + f45 = (flist[4] - flist[3]) / (alist[4] - alist[3]) + + f123 = (f23 - f12) / (alist[2] - alist[0]) + f234 = (f34 - f23) / (alist[3] - alist[1]) + f345 = (f45 - f34) / (alist[4] - alist[2]) + f1234 = (f234 - f123) / (alist[3] - alist[0]) + f2345 = (f345 - f234) / (alist[4] - alist[1]) + f12345 = (f2345 - f1234) / (alist[4] - alist[0]) + good = np.Inf + + if f12345 <= 0: + good = 0 + (alist, flist, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s, saturated) = self.lslocal(nloc, small, sinit, + short, x, p, alist, + flist, amin, amax, alp, + abest, fbest, fmed, up, + down, monotone, minima, nmin, + unitlen, s, saturated, stopping_actions) + quart = 0 + else: + quart = 1 + + if quart: + c = np.zeros(len(alist)) + c[0] = f12345 + c[1] = f1234 + c[0] * (alist[2] - alist[0]) + c[2] = f234 + c[1] * (alist[2] - alist[3]) + c[1] = c[1] + c[0] * (alist[2] - alist[3]) + c[3] = f23 + c[2] * (alist[2] - alist[1]) + c[2] = c[2] + c[1] * (alist[2] - alist[1]) + c[1] = c[1] + c[0] * (alist[2] - alist[1]) + c[4] = flist[2] + cmax = max(c) + c = np.divide(c, cmax) + hk = 4 * c[0] + compmat = [[0, 0, - c[3]], [hk, 0, - 2 * c[2]], [0, hk, - 3 * c[1]]] + ev = np.divide(np.linalg.eig(compmat)[0], hk) + i = np.where(ev.imag == 0) + + if i[0].shape[0] == 1: + alp = alist[2] + ev[i[0][0]] + + else: + ev = np.sort(ev) + alp1 = LSUtils().lsguard(alist[2] + ev[0], alist, amax, amin, small) + alp2 = LSUtils().lsguard(alist[2] + ev[2], alist, amax, amin, small) + f1 = cmax * LSUtils().quartic(c, alp1 - alist[2]) + f2 = cmax * LSUtils().quartic(c, alp2 - alist[2]) + + if alp2 > alist[4] and f2 < max(flist): + alp = alp2 + elif alp1 < alist[0] and f1 < max(flist): + alp = alp1 + elif f2 <= f1: + alp = alp2 + else: + alp = alp1 + + if max([i == alp for i in alist]): + quart = 0 + if quart: + alp = LSUtils().lsguard(alp, alist, amax, amin, small) + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + + return (alist, flist, amin, amax, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s, good, saturated) + + def lssep(self, nloc: int, small: float, sinit: int, short: float, + x: Union[List[Union[float, int]], NDArray[np.float64]], p: NDArray[Union[np.float64, np.int32]], + alist: List[Union[float, int]], flist: List[float], + amin: float, amax: float, alp: float, abest: float, fbest: float, + fmed: float, up: List[float], down: List[float], monotone: int, + minima: List[int], nmin: int, unitlen: float, + s: int, stopping_actions: int): + """ + The lssep function + :param nloc: (for local ~= 0) counter of points that have been + :param small: tolerance values + :param sinit: initial depth levekl + :param short: + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param amin: + :param amax: + :param alp: + :param abest: best step + :param fbest: best function value so far + :param fmed: median function value + :param up: + :param down: + :param monotone: + :param minima: + :param nmin: + :param untilen: + :param s: current depth level + :param stopping_actions: the number of stopping actions + :return: the parameters and metrics obtained from performing lssep + """ + nsep = 0 + while nsep < nmin: + down = [i < j for i, j in zip(flist[1: s], flist[0: s - 1])] + sep = [i and j and k for i, j, k in zip([True, True] + down, [False] + up + [False], down + [True, True])] + temp_sep = [i and j and k for i, j, k in zip([True, True] + up, + [False] + down + [False], up + [True, True])] + sep = [i or j for i, j in zip(sep, temp_sep)] + + ind = [i for i in range(len(sep)) if sep[i]] + + if len(ind) == 0: + break + + aa = [0.5 * (alist[i] + alist[i - 1]) for i in ind] # interval midpoints + if len(aa) > nloc: + # ff: List[Union[int, float]] = [min(flist[i], flist[j]) for i, j in ind] + ff: List[Union[int, float]] = [min(flist[i], flist[j]) for + i, j in enumerate(ind)] # this must be the intent + ind = list(np.argsort(ff)) + ff.sort() + aa = [aa[ind[i]] for i in range(0, nloc)] + + for alp in aa: + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + nsep = nsep + 1 + if nsep >= nmin: + break + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + + for times in range(0, nmin - nsep): + print(times) + alist, flist, alp, fac = self.lsnew(nloc, small, sinit, short, x, p, s, alist, + flist, amin, amax, alp, abest, fmed, unitlen, stopping_actions) + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + + return (alist, flist, amin, amax, alp, abest, fbest, fmed, + up, down, monotone, minima, nmin, unitlen, s) + + def lslocal(self, nloc: int, small: float, sinit: int, short: float, + x: Union[List[Union[int, float]], NDArray[np.float64]], + p: NDArray[Union[np.float64, np.int32]], alist: List[Union[float, int]], + flist: List[float], amin: float, amax: float, alp: float, + abest: float, fbest: float, fmed: float, + up: List[float], down: List[float], monotone: int, + minima: List[Union[int, float, bool]], nmin: int, unitlen: float, s: int, + saturated: int, stopping_actions: int): + """ + The lslocal function + :param nloc: (for local ~= 0) counter of points that have been + :param small: tolerance values + :param sinit: the initial depth level: + :param short: + :param x: starting point + :param p: search direction + :param alist: list of known steps + :param flist: function values of known steps + :param amin: + :param amax: + :param alp: + :param abest: best step + :param fbest: best function value so far + :param fmed: median function value + :param up: + :param down: + :param monotone: if function is monotone or not + :param minima: + :param nmin: + :param unitlen: + :param s: current depth level + :return: the parameters and metrics obtained from lslocal + """ + up = [i < j for i, j in zip(flist[0: s - 1], flist[1: s])] + down = [i <= j for i, j in zip(flist[1: s], flist[0: s - 1])] + down[s - 2] = (flist[s - 1] < flist[s - 2]) + minima = [i and j for i, j in zip(up + [True], [True] + down)] + imin = [i for i in range(len(minima)) if minima[i]] + + ff = [flist[i] for i in imin] + perm = np.argsort(ff) + ff.sort() + + imin = [imin[i] for i in perm] + nind = min(nloc, len(imin)) + imin = imin[nind - 1:: - 1] + + nadd = 0 + nsat = 0 + + for i in imin: + if i <= 1: + ind = [j for j in range(5)] + ii = i + elif i >= s - 2: + ind = [j for j in range(s - 5, s)] + ii = i - (s - 1) + 4 + else: + ind = [j for j in range(i - 2, i + 3)] + ii = 2 + aa = [alist[i] for i in ind] + ff = [flist[i] for i in ind] + + f12 = (ff[1] - ff[0]) / (aa[1] - aa[0]) + f23 = (ff[2] - ff[1]) / (aa[2] - aa[1]) + f34 = (ff[3] - ff[2]) / (aa[3] - aa[2]) + f45 = (ff[4] - ff[3]) / (aa[4] - aa[3]) + f123 = (f23 - f12) / (aa[2] - aa[0]) + f234 = (f34 - f23) / (aa[3] - aa[1]) + f345 = (f45 - f34) / (aa[4] - aa[2]) + if ii == 0: + cas = 0 + if f123 > 0 and f123 < np.Inf: + alp = 0.5 * (aa[1] + aa[2] - f23 / f123) + if alp < amin: + cas = -1 + else: + alp = -np.Inf + if alist[0] == amin and flist[1] < flist[2]: + cas = -1 + alp = LSUtils().lsguard(alp, alist, amax, amin, small) + elif ii == 4: + cas = 0 + if f345 > 0 and f345 < np.Inf: + alp = 0.5 * (aa[2] + aa[3] - f34 / f345) + if alp > amax: + cas = -1 + else: + alp = np.Inf + if alist[s - 1] == amax and flist[s - 2] < flist[s - 3]: + cas = -1 + alp = LSUtils().lsguard(alp, alist, amax, amin, small) + elif not (f234 > 0 and f234 < np.Inf): + cas = 0 + if ii < 2: + alp = 0.5 * (aa[1] + aa[2] - f23 / f123) + else: + alp = 0.5 * (aa[2] + aa[3] - f34 / f345) + + elif not (f123 > 0 and f123 < np.Inf): + if f345 > 0 and f345 < np.Inf: + cas = 5 + else: + + cas = 0 + alp = 0.5 * (aa[2] + aa[3] - f34 / f234) + elif f345 > 0 and f345 < np.Inf and ff[1] > ff[3]: + cas = 5 + else: + cas = 1 + + if cas == 0: + alp = max(amin, min(alp, amax)) + elif cas == 1: + + if ff[1] < ff[2]: + f13 = (ff[2] - ff[0]) / (aa[2] - aa[0]) + f1x4 = (f34 - f13) / (aa[3] - aa[0]) + else: + f24 = (ff[3] - ff[1]) / (aa[3] - aa[1]) + f1x4 = (f24 - f12) / (aa[3] - aa[0]) + alp = 0.5 * (aa[1] + aa[2] - f23 / (f123 + f234 - f1x4)) + if alp <= min(aa) or alp >= max(aa): + cas = 0 + alp = 0.5 * (aa[1] + aa[2] - f23 / max(f123, f234)) + elif cas == 5: + if ff[2] < ff[3]: + f24 = (ff[3] - ff[1]) / (aa[3] - aa[1]) + f2x5 = (f45 - f24) / (aa[4] - aa[1]) + else: + f35 = (ff[4] - ff[2]) / (aa[4] - aa[2]) + f2x5 = (f35 - f23) / (aa[4] - aa[1]) + alp = 0.5 * (aa[2] + aa[3] - f34 / (f234 + f345 - f2x5)) + if alp <= min(aa) or alp >= max(aa): + cas = 0 + alp = 0.5 * (aa[2] + aa[3] - f34 / max(f234, f345)) + if cas < 0 or flist[i] > fmed: + alptol: Union[float, int] = 0 + elif cas >= 0: + if i == 0: + alptol = small * (alist[2] - alist[0]) + elif i == s - 1: + alptol = small * (alist[s - 1] - alist[s - 3]) + else: + alptol = small * (alist[i + 1] - alist[i - 1]) + close = (min([abs(i - alp) for i in alist]) <= alptol) + if cas < 0 or close: + nsat = nsat + 1 + + saturated = (nsat == nind) + final = saturated and not max([i == alp for i in alist]) + if cas >= 0 and (final or not close): + nadd = nadd + 1 + policy = self.get_policy(x + alp * p, L=stopping_actions) + avg_metrics = self.eval_theta(policy=policy, + max_steps=self.experiment_config.hparams[ + agents_constants.COMMON.MAX_ENV_STEPS].value) + falp = round(avg_metrics[env_constants.ENV_METRICS.RETURN], 3) + alist.append(alp) + flist.append(falp) + if nadd: + (alist, flist, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s) = GLSUtils().lssort(alist, flist) + return (alist, flist, alp, abest, fbest, fmed, up, down, monotone, + minima, nmin, unitlen, s, saturated) diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/__init__.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/gls_utils.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/gls_utils.py new file mode 100644 index 000000000..942495d25 --- /dev/null +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/gls_utils.py @@ -0,0 +1,189 @@ +from typing import Union, List +import numpy as np +import sys +from typing import Tuple + + +class GLSUtils: + """ + Class with util functions for MCS + """ + + def lsrange(self, xl, xu, x, p, prt, bend): + """ + Defining line search range + + :param xl: lower bound + :param xu: upper bound + :param x: starting point + :param p: Search direction + :param prt: print command - unused in this implementation sofar + :param bend: + """ + if np.max(np.abs(p)) == 0: + sys.exit('GLS Error: zero search direction in line search') + + # Find sensible step size scale + if type(p) is not np.ndarray: + if type(p) is not list: + p = [p] + p = np.asarray(p) + + if type(x) is not np.ndarray: + if type(x) is not list: + x = [x] + xl = [xl] + xu = [xu] + x = np.asarray(x) + xl = np.asarray(xl) + xu = np.asarray(xu) + + # this is test for python + if x.shape != p.shape: + sys.exit('GLS Error: dim of x and p does not match: program is going to fail') + + pp = np.abs(p[p != 0]) + u = np.divide(np.abs(x[p != 0]), pp) + scale = min(u) + + if scale == 0: + u[u == 0] = np.divide(1, pp[u == 0]) + scale = min(u) + + if not bend: + # find range of useful alp in truncated line search + amin = -np.Inf + amax = np.Inf + for i in range(len(x)): + if p[i] > 0: + amin = max(amin, (xl[i] - x[i]) / p[i]) + amax = min(amax, (xu[i] - x[i]) / p[i]) + elif p[i] < 0: + amin = max(amin, (xu[i] - x[i]) / p[i]) + amax = min(amax, (xl[i] - x[i]) / p[i]) + + if amin > amax: + sys.exit('GLS Error: no admissible step in line search') + + else: + amin = np.Inf + amax = -np.Inf + for i in range(len(x)): + if p[i] > 0: + amin = min(amin, (xl[i] - x[i]) / p[i]) + amax = max(amax, (xu[i] - x[i]) / p[i]) + elif p[i] < 0: + amin = min(amin, (xu[i] - x[i]) / p[i]) + amax = max(amax, (xl[i] - x[i]) / p[i]) + + return xl, xu, x, p, amin, amax, scale + + def lssort(self, alist: List[Union[float, int]], flist: List[Union[float, int]]): + """ + Performing the lssort + + :param alist: list of known steps + :param flist: function values of known steps + :return: metrics and parameters obtained from doing the lssort + """ + perm = np.argsort(alist).tolist() + alist.sort() + flist = [flist[i] for i in perm if i < len(flist)] + if len(flist) >= len(alist): + s = len(alist) + else: + s = len(flist) + + up = [i < j for i, j in zip(flist[0: s - 1], flist[1: s])] + down = [i <= j for i, j in zip(flist[1: s], flist[0: s - 1])] + if len(down) == 1: + down[0] = flist[s - 1] < flist[s - 2] + else: + down[s - 2] = flist[s - 1] < flist[s - 2] + + monotone = (sum(up) == 0 or sum(down) == 0) + minima = [i and j for i, j in zip(up + [True], [True] + down)] + nmin = sum(minima) + + fbest = min(flist) + i = np.argmin(flist) + + abest = alist[i] + fmed = np.median(flist) + + if nmin > 1: + al = [alist[i] for i in range(len(minima)) if minima[i]] + if abest in al: + al.remove(abest) + unitlen = min(np.abs(np.subtract(al, abest))) + # TODO: investigate edge case error + else: + unitlen = max(abest - alist[0], alist[s - 1] - abest) + + return alist, flist, abest, fbest, fmed, up, down, monotone, minima, nmin, unitlen, s + + def lsconvex(self, alist: List[Union[float, int]], flist: List[Union[float, int]], + nmin: int, s: int) -> int: + """ + Performing the lsconvex + :param alist: list of known steps + :param flist: function values of known steps + :param nmin: + :param s: + :return: convex + """ + if nmin > 1: + convex = 0 + else: + convex = 1 + for i in range(1, s - 1): + f12 = (flist[i] - flist[i - 1]) / (alist[i] - alist[i - 1]) + f13 = (flist[i] - flist[i + 1]) / (alist[i] - alist[i + 1]) + f123 = (f13 - f12) / (alist[i + 1] - alist[i - 1]) + if f123 < 0: + convex = 0 + break + # if convex: + # nothing_to_do = 'done!' + return convex + + def lssat(self, small: Union[float, int], alist: List[Union[float, int]], + flist: List[Union[float, int]], alp: float, amin: float, amax: float, s: int, + saturated: int) -> Tuple[float, int]: + """ + Performing the lssat + :param small: + :param alist: + :param flist: + :param alp: + :param amin: + :param amin: + :param amax: + :param s: + :param saturated: + :return: alp, saturated + """ + cont = saturated + if cont: + i = np.argmin(flist) + if i == 0 or i == s - 1: + cont = 0 + + if cont: + aa = [alist[j] for j in range(i - 1, i + 1 + 1)] + ff = [flist[j] for j in range(i - 1, i + 1 + 1)] + + f12 = (ff[1] - ff[0]) / (aa[1] - aa[0]) + f23 = (ff[2] - ff[1]) / (aa[2] - aa[1]) + f123 = (f23 - f12) / (aa[2] - aa[0]) + + if f123 > 0: + alp = 0.5 * (aa[1] + aa[2] - f23 / f123) + alp = max(amin, min(alp, amax)) + alptol = small * (aa[2] - aa[0]) + saturated = (abs(alist[i] - alp) <= alptol) + else: + saturated = 0 + # if not saturated: + # no_print = 0 + return alp, saturated diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/ls_utils.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/ls_utils.py new file mode 100644 index 000000000..89d69a4ee --- /dev/null +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/ls_utils.py @@ -0,0 +1,596 @@ +import numpy as np +from numpy.typing import NDArray +from typing import Any, Tuple, Union, List +import sys +from scipy.sparse import spdiags +from scipy import sparse + + +class Minq: + """ + The minQ class helper for MCS + """ + + def ldlrk1(self, L: NDArray[np.float64], d: NDArray[np.float64], + alp: float, u: NDArray[np.float64], eps: float = 2.2204e-16) \ + -> Tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]: + """ + lslrk1 function + + :param L: Indication of the end point (or total number of partition of the value x in the i'th dimenstion) + :param d: the value of the interpolating polynomial + :param alp: + :param u: + :param eps: parameter value for the golden ratio + :return: + """ + p = np.array([]) + if alp == 0: + return L, d, p + + n = u.shape[0] + neps = n * eps + + L0 = L + d0 = d + + for k in [i for i in range(n) if u[i] != 0]: + delta = d[k] + alp * pow(u[k], 2) + if alp < 0 and delta <= neps: + p = np.zeros(n) + p[k] = 1 + p0Krange = [i for i in range(0, k + 1)] + p0K = np.asarray([p[i] for i in p0Krange]) + L0K = np.asarray([[L[i, j] for j in p0Krange] for i in p0Krange]) + p0K = np.linalg.solve(L0K, p0K) + p = np.asarray([p0K[i] if (i in p0Krange) else p[i] for i in range(len(p))]) + L = L0 + d = d0 + return L, d, p + + q = d[k] / delta + d[k] = delta + ind = [i for i in range(k + 1, n)] + LindK = np.asarray([L[i, k] for i in ind]) + uk = u[k] + c = np.dot(LindK, uk) + for i in range(len(ind)): + L[ind[i], k] = LindK[i] * q + (alp * u[k] / delta) * u[ind[i]] + + for i in range(len(ind)): + u[ind[i]] = u[ind[i]] - c[i] + + alp = alp * q + if alp == 0: + break + return L, d, p + + +class UtilHelpers: + """ + A class with util functions for MCS + """ + + def ldldown(self, L: NDArray[np.float64], d: NDArray[np.float64], j: int) \ + -> Tuple[NDArray[np.float64], NDArray[np.float64]]: + """ + ldldown function + + :param L: indication of the end point (or total number of partition of the value x in the i'th dimenstion) + :param d: the value of the interpolating polynomial + :param j: label + :return: the updated end point and the updated value + """ + n = d.shape[0] + if j < n: + I = [i for i in range(0, j)] + K = [i for i in range(j + 1, n)] + + LKK = np.asarray([[L[i, j] for j in K] for i in K]) + dK = np.asarray([d[i] for i in K]) + LKj = np.asarray([L[i, j] for i in K]) + LKK, dK, _ = Minq().ldlrk1(LKK, dK, d[j], LKj) + d[K] = dK + r1 = L[I, :] + r2 = sparse.coo_matrix((1, n)).toarray() + if len(I) == 0: + r3 = np.concatenate((sparse.coo_matrix((n - j - 1, 1)).toarray(), LKK), axis=1) + L = np.concatenate((r2, r3), axis=0) + else: + LKI = np.asarray([[L[i, j] for j in I] for i in K]) + if len(K) != 0: + r3 = np.concatenate((LKI, sparse.coo_matrix((n - j - 1, 1)).toarray(), LKK), axis=1) + L = np.concatenate((r1, r2, r3), axis=0) + else: + L = np.concatenate((r1, r2), axis=0) + L[j, j] = 1 + else: + L[n - 1, 0: n - 1] = sparse.coo_matrix((1, n - 1)).toarray() + d[j] = 1 + return L, d + + def ldlup(self, L: NDArray[np.float64], d: NDArray[np.float64], j: int, g: NDArray[np.float64], + eps: float) -> Tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]: + """ + ldlup function + :param L: Indication of the end point (or total number of partition of the value x in the i'th dimenstion) + :param d: the value of the interpolating polynomial + :param j: label + :param g: + :param eps: parameter value for the golden ratio + :return: + """ + p = np.array([]) + n = d.shape[0] + I = [i for i in range(0, j)] + K = [i for i in range(j + 1, n)] + + if j == 0: + v = np.zeros(0) + delta = g[j] + if delta <= n * eps: + p = np.asarray([1] + np.zeros(n - 1).tolist()) + return L, d, p + w = np.asarray([g[i] / delta for i in K]) + L[j, I] = v.T + d[j] = delta + return L, d, p + LII = np.asarray([[L[i, j] for j in I] for i in I]) + gI = [g[i] for i in I] + u = np.linalg.solve(LII, gI) + dI = [d[i] for i in I] + v = np.divide(u, dI) + delta = g[j] - np.dot(u.T, v) + if delta <= n * eps: + p = np.asarray(np.linalg.solve(LII.T, v).tolist() + [-1] + np.zeros(n - j - 1).tolist()) + return L, d, p + + if len(K) != 0: + LKI = np.asarray([[L[i, j] for j in I] for i in K]) + gK = np.asarray([g[i] for i in K]) + w = np.divide(np.subtract(gK, np.dot(LKI, u)), delta) + LKK = np.asarray([[L[i, j] for j in K] for i in K]) + dK = np.asarray([d[i] for i in K]) + LKK, dK, q = Minq().ldlrk1(LKK, dK, -delta, w) + d[K] = dK + else: + q = np.array([]) + + if len(q) == 0: + r1 = L[I, :] + r2 = np.asarray(v.T.tolist() + [1] + L[j, K].tolist()) + r2 = r2.reshape((1, len(r2))) + if len(K) != 0: + r3 = np.concatenate((LKI, w.reshape(len(w), 1), LKK), axis=1) + + L = np.concatenate((r1, r2, r3), axis=0) + else: + L = np.concatenate((r1, r2), axis=0) + d[j] = delta + else: + r1 = L[0: j + 1, :] + r2 = np.concatenate((LKI, L[K, j].reshape(len(L[K, j]), 1), LKK), axis=1) + L = np.concatenate((r1, r2), axis=0) + w = w.reshape((len(w), 1)) + q.reshape((len(q)), 1) + pi = np.dot(w.T, q) + piv = np.multiply(pi, v) + LKIq = np.dot(LKI.T, q) + pivLKIq = np.subtract(piv.flatten(), LKIq.flatten()) + piSolve = np.linalg.solve(LII.T, pivLKIq) + p = np.asarray(piSolve.flatten().tolist() + (-1 * pi).flatten().tolist() + q.tolist()) + return L, d, p + + def getalp(self, alpu: float, alpo: float, gTp: float, pTGp: float) -> \ + Tuple[float, bool, bool, int]: + """ + Gives minimizer alp in [alpu,alpo] for a univariate quadratic q(alp)=alp*gTp+0.5*alp^2*pTGp + + :param alpu: + :param alpo: + :param gTp: + :parm pTGp: + :return: + """ + lba = False + uba = False + + ier = 0 + if alpu == -np.Inf and (pTGp < 0 or (pTGp == 0 and gTp > 0)): + ier = 1 + lba = True + if alpo == np.Inf and (pTGp < 0 or (pTGp == 0 and gTp < 0)): + ier = 1 + uba = True + if ier: + alp = np.NAN + return alp, lba, uba, ier + + if pTGp == 0 and gTp == 0: + alp = 0 + elif pTGp <= 0: + if alpu == -np.Inf: + lba = False + elif alpo == np.Inf: + lba = True + else: + lba = (2 * gTp + (alpu + alpo) * pTGp > 0) + uba = not lba + else: + alp = -gTp / pTGp + lba = (alp <= alpu) + uba = (alp >= alpo) + + if lba: + alp = alpu + if uba: + alp = alpo + + if abs(alp) == np.Inf: + gTp, pTGp, alpu, alpo, alp, lba, uba, ier + return alp, lba, uba, ier + + def minqsub(self, nsub: int, free: NDArray[np.bool_], L: NDArray[np.float64], + dd: NDArray[np.float64], K: NDArray[np.bool_], G: NDArray[np.float64], + n: int, g: NDArray[np.float64], x: NDArray[np.float64], xo: NDArray[np.float64], + xu: NDArray[np.float64], convex: int, xx: NDArray[np.float64], fct: float, + nfree: int, unfix: int, alp: float, alpu: float, + alpo: float, lba: bool, uba: bool, ier: int, subdone: int, eps: float): + """ + Minqsub function + + :param nsub: + :param free: + :param L: Indication of the end point (or total number of partition of the value x in the i'th dimenstion) + :param dd: + :param n: + :param g: + :param x: + :param xo: + :param xu: + :param convex: + :param xx: + :param fct: + :param nfree: + :param unfix: + :param alp: + :param alpu: + :param alpo: + :param lba: + :param uba: + :param ier: + :param subdone: + :param eps: parameter value for the golden ratio + :return: + """ + nsub = nsub + 1 + freelK = [i for i in range(len(free)) if (free < K)[i] is True] + for j in freelK: + L, dd = self.ldldown(L, dd, j) + K[j] = False + + definite = 1 + freeuK = [i for i in range(len(free)) if (free > K)[i] is True] + for j in freeuK: + p = np.zeros(n) + if n > 1: + p = np.asarray([G[i, j] if K[i] is True else p[i] for i in range(len(K))]) + p[j] = G[j, j] + L, dd, p = self.ldlup(L, dd, j, p, eps) + definite = (len(p) == 0) + if not definite: + break + K[j] = True + + if definite: + p = np.zeros(n) + p = np.asarray([g[i] if K[i] is True else p[i] for i in range(len(K))]) + LPsolve = np.linalg.solve(L, p) + LPsolve = np.divide(LPsolve, dd) + p = np.multiply(-1, np.linalg.solve(L.T, LPsolve)) + + p = (x + p) - x + ind = [i for i in range(len(p)) if p[i] != 0] + if len(ind) == 0: + unfix = 1 + subdone = 0 + return (nsub, free, L, dd, K, G, n, g, x, xo, xu, convex, xx, fct, + nfree, alp, alpu, alpo, lba, uba, ier, unfix, subdone) + pp = np.asarray([p[i] for i in ind]) + oo = np.subtract([xo[i] for i in ind], [x[i] for i in ind]) / pp + uu = np.subtract([xu[i] for i in ind], [x[i] for i in ind]) / pp + alpu = max([oo[i] for i in range(len(ind)) if pp[i] < 0] + [uu[i] for i in range( + len(ind)) if pp[i] > 0] + [-np.inf]) + alpo = min([oo[i] for i in range(len(ind)) if pp[i] > 0] + [uu[i] for i in range( + len(ind)) if pp[i] < 0] + [np.inf]) + if alpo <= 0 or alpu >= 0: + sys.exit('programming error: no alp') + + gTp = np.dot(g.T, p) + agTp = np.dot(np.abs(g).T, np.abs(p)) + if abs(gTp) < 100 * eps * agTp: + gTp = 0 + pTGp = np.dot(p.T, np.dot(G, p)) + if convex: + pTGp = max(0, pTGp) + if not definite and pTGp > 0: + pTGp = 0 + + alp, lba, uba, ier = self.getalp(alpu, alpo, gTp, pTGp) + if ier: + x = np.zeros(n) + if lba: + x = -p + else: + x = p + return (nsub, free, L, dd, K, G, n, g, x, xo, xu, convex, xx, + fct, nfree, alp, alpu, alpo, lba, uba, ier, unfix, subdone) + + unfix = not (lba or uba) + for k in range(0, len(ind)): + ik = ind[k] + if alp == uu[k]: + xx[ik] = xu[ik] + free[ik] = 0 + elif alp == oo[k]: + xx[ik] = xo[ik] + free[ik] = 0 + else: + xx[ik] = xx[ik] + alp * p[ik] + if abs(xx[ik]) == np.Inf: + ik, alp, p[ik] + sys.exit('infinite xx in minq') + + nfree = sum(free) + subdone = 1 + return (nsub, free, L, dd, K, G, n, g, x, xo, xu, convex, xx, + fct, nfree, alp, alpu, alpo, lba, uba, ier, unfix, subdone) + + +class LSUtils(UtilHelpers): + """ + Class with utility functions for MCS + """ + + def lsguard(self, alp: float, alist: List[Union[float, int]], amax: float, amin: float, small: Union[float, int]) \ + -> float: + """ + Local search guard function + + :param alp: + :param alist: list of known steps + :param amax: maximum step + :param amin: minimum step + :param small: + :return: + """ + asort = alist + asort.sort() + s = len(asort) + + al = asort[0] - (asort[s - 1] - asort[0]) / small + au = asort[s - 1] + (asort[s - 1] - asort[0]) / small + alp = max(al, min(alp, au)) + alp = max(amin, min(alp, amax)) + + if abs(alp - asort[0]) < small * (asort[1] - asort[0]): + alp = (2 * asort[0] + asort[1]) / 3 + + if abs(alp - asort[s - 1]) < small * (asort[s - 1] - asort[s - 1 - 1]): + alp = (2 * asort[s - 1] + asort[s - 1 - 1]) / 3 + + return alp + + def lssplit(self, i: int, alist: List[Union[float, int]], flist: List[Union[float, int]], short: float) \ + -> Tuple[float, float]: + """ + Local search split function + + :param i: label index + :param alist: list of known steps + :param flist: function values of known steps + :param short: + :return: + """ + if flist[i] < flist[i + 1]: + fac = short + elif flist[i] > flist[i + 1]: + fac = 1 - short + else: + fac = 0.5 + + alp = alist[i] + fac * (alist[i + 1] - alist[i]) + return alp, fac + + def minq(self, gam: float, c: NDArray[np.float64], G: NDArray[np.float64], xu: NDArray[np.float64], + xo: NDArray[np.float64], prt: int, eps: float, ier: int = 0, convex: int = 0) \ + -> Tuple[NDArray[np.float64], float, int]: + """ + Minq Function + Minimizes an affine quadratic form subject to simple bounds. + Using coordinate searches and reduced subspace minimizations, using LDL^T factorization updates + fct = gam + c^T x + 0.5 x^T G x s.t. x in [xu,xo] (xu <= xo is assumed), + where G is a symmetric n x n matrix, not necessarily definite + (if G is indefinite, only a local minimum is found) + if G is sparse, it is assumed that the ordering is such that + a sparse modified Cholesky factorization is feasible + + :param prt: print command + :param xx: initial guess (optional) + :param x: minimizer (but unbounded direction if ier = 1) + :param fct: optimal function value + :param ier: 0 (local minimizer found) + """ + + n = G.shape[0] + + if G.shape[1] != n: + ier = -1 + print('minq: Hessian has wrong dimension') + x = np.NAN + np.zeros(n) + fct = np.NAN + nsub = -1 + return x, fct, ier + + if c.shape[0] != n: + ier = -1 + print('minq: linear term has wrong dimension') + if xu.shape[0] != n: + ier = -1 + print('minq: lower bound has wrong dimension') + + if xo.shape[0] != n: + ier = -1 + print('minq: lower bound has wrong dimension') + + if 'xx' in locals(): + xx: NDArray[Any] = locals()["xx"] + if xx.shape[0] != n: + ier = -1 + print('minq: lower bound has wrong dimension') + else: + xx = np.zeros(n) + + if ier == -1: + x = np.NAN + np.zeros(n) + fct = np.NAN + nsub = -1 + return x, fct, ier + + maxit = 3 * n + + nitrefmax = 3 + xx = np.asarray([max(xu[i], min(xx[i], xo[i])) for i in range(len(xx))]) + + hpeps = 100 * eps + G = G + spdiags(hpeps * np.diag(G), 0, n, n).toarray() + + K = np.zeros(n, dtype=bool) + L = np.eye(n) + dd = np.ones(n) + + free = np.zeros(n, dtype=bool) + nfree = 0 + nfree_old = -1 + + fct = np.Inf + nsub = 0 + unfix = 1 + nitref = 0 + improvement = 1 + + while 1: + if np.linalg.norm(xx, np.inf) == np.inf: + sys.exit('infinite xx in minq.m') + + g = np.dot(G, xx) + c + fctnew = gam + np.dot(0.5 * xx.T, (c + g)) + if not improvement: + ier = 0 + break + elif nitref > nitrefmax: + + ier = 0 + break + elif nitref > 0 and nfree_old == nfree and fctnew >= fct: + ier = 0 + break + elif nitref == 0: + x = xx + fct = min(fct, fctnew) + else: + x = xx + fct = fctnew + if nitref == 0 and nsub >= maxit: + ier = 99 + break + count = 0 + k = -1 + while 1: + while count <= n: + count = count + 1 + if k == n - 1: + k = -1 + k = k + 1 + if free[k] or unfix: + break + if count > n: + break + q = G[:, k] + alpu = xu[k] - x[k] + alpo = xo[k] - x[k] + + alp, lba, uba, ier = self.getalp(alpu, alpo, g[k], q[k]) + + if ier: + x = np.zeros(n) + if lba: + x[k] = -1 + else: + x[k] = 1 + + return x, fct, ier + + xnew = x[k] + alp + if prt and nitref > 0: + xnew, alp + + if lba or xnew <= xu[k]: + if alpu != 0: + x[k] = xu[k] + g = g + alpu * q + count = 0 + free[k] = 0 + elif uba or xnew >= xo[k]: + if alpo != 0: + x[k] = xo[k] + g = g + alpo * q + count = 0 + free[k] = 0 + else: + if alp != 0.0: + if prt > 1 and not free[k]: + unfixstep = [x[k], alp] + x[k] = xnew + g = g + alp * q + free[k] = 1 + + nfree = sum(free) + if (unfix and nfree_old == nfree): + g = np.dot(G, x) + c + nitref = nitref + 1 + else: + nitref = 0 + nfree_old = nfree + gain_cs = fct - gam - np.dot(0.5 * x.T, (c + g)) + improvement = (gain_cs > 0 or not unfix) + xx = x + if not improvement or nitref > nitrefmax: + + nothing_to_do = 'done!' + elif nitref > nitrefmax: + nothing_to_do = 'done!' + elif nfree == 0: + unfix = 1 + else: + subdone = 0 + (nsub, free, L, dd, K, G, n, g, x, xo, xu, convex, xx, fct, + nfree, alp, alpu, alpo, lba, uba, ier, unfix, subdone) = self.minqsub(nsub, free, L, dd, K, G, n, g, x, + xo, xu, convex, xx, fct, nfree, + unfix, alp, alpu, alpo, lba, uba, + ier, subdone, eps) + if not subdone: + return x, fct, ier + if ier: + return x, fct, ier + return x, fct, ier + + def quartic(self, a: NDArray[np.float64], x: float): + """ + quartic function + + :param a: vector of function values, normalized by its maximum value + :param x: positional argument generated from alp + :return: scalar value adjused for quart bool in main code (mcs_agent) + """ + return (((a[0] * x + a[1]) * x + a[2]) * x + a[3]) * x + a[4] diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/mcs_fun.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/mcs_fun.py new file mode 100644 index 000000000..0affa4b05 --- /dev/null +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/mcs/mcs_utils/mcs_fun.py @@ -0,0 +1,822 @@ +import numpy as np +import copy +from typing import Union, List, Tuple, Any +import math +from numpy.typing import NDArray +import sys + + +class UtilHelpers: + """ + Class with utility functions for MCS + """ + + def polint(self, x: Union[List[float], NDArray[np.float64]], + f: Union[List[float], NDArray[np.float64], NDArray[np.float32]]) -> NDArray[np.float64]: + """ + Quadratic polynomial interpolation + + :param x: pairwise distinct support points + :param f: corresponding function values + :return d: the value of the interpolating polynomial + """ + d = np.zeros(3) + d[0] = f[0] + d[1] = (f[1] - f[0]) / (x[1] - x[0]) + f12 = (f[2] - f[1]) / (x[2] - x[1]) + d[2] = (f12 - d[1]) / (x[2] - x[0]) + return d + + def subint(self, x1: float, x2: float) -> Tuple[float, float]: + """ + Computes [min(x,y),max(x,y)] that are neither too close nor too far away from x + + :param x1: corresponding parameter/coordinate value + :param x2: corresponding parameter/coordinate value + """ + f: int = 1000 + if f * abs(x1) < 1: + if abs(x2) > f: + x2 = np.sign(x2) + else: + if abs(x2) > f: + x2 = 10 * np.sign(x2) * abs(x1) + x1 = x1 + (x2 - x1) / 10 + return x1, x2 + + def quadpol(self, x: float, d: NDArray[np.float64], x0: Union[List[float], NDArray[np.float64]]): + """ + Evaluates the quadratic polynomial + + :param x: starting point + :param d: the value of the interpolating polynomial + :param x0: initial position + """ + return d[0] + d[1] * (x - x0[0]) + d[2] * (x - x0[0]) * (x - x0[1]) + + def quadmin(self, a: float, b: float, d: NDArray[np.float64], x0: Union[List[float], NDArray[np.float64]]) -> float: + """ + The quadmin method + + :param a: + :param b: + :param d: + :param x0: + :return: + """ + if d[2] == 0: + if d[1] > 0: + x = a + else: + x = b + elif d[2] > 0: + x1 = 0.5 * (x0[0] + x0[1]) - 0.5 * d[1] / d[2] + if a <= x1 and x1 <= b: + x = x1 + elif self.quadpol(a, d, x0) < self.quadpol(b, d, x0): + x = a + else: + x = b + else: + if self.quadpol(a, d, x0) < self.quadpol(b, d, x0): + x = a + else: + x = b + return x + + def split1(self, x1: float, x2: float, f1: float, f2: float) -> float: + """ + The split1 method + + :param x1: + :param x2: + :param f1: + :param f2: + :return: + """ + if f1 <= f2: + return x1 + 0.5 * (-1 + math.sqrt(5)) * (x2 - x1) + else: + return x1 + 0.5 * (3 - math.sqrt(5)) * (x2 - x1) + + def split2(self, x: float, y: float) -> float: + """ + The split2 method. Determines a value x1 for splitting the interval [min(x,y),max(x,y)] + is modeled on the function subint with safeguards for infinite y + + :param x: + :param y: + :return: + """ + x2 = y + if x == 0 and abs(y) > 1000: + x2 = np.sign(y) + elif x != 0 and abs(y) > 100 * abs(x): + x2 = 10 * np.sign(y) * abs(x) + x1 = x + 2 * (x2 - x) / 3 + return x1 + + def vert1(self, j: int, z: NDArray[np.float64], f: NDArray[np.float64], x1: float, + x2: float, f1: float, f2: float) \ + -> Tuple[float, float, float, float, float]: + """ + The vert1 method + + :param j: label + :param z: + :param f: function value + :param x1: corresponding parameter/coordinate value + :param x2: corresponding parameter/coordinate value + :param f1: corresponding function value + :param f2: corresponding function value + :return: + """ + if j == 0: + j1 = 1 + else: + j1 = 0 + x = z[j1] + if x1 == np.Inf: + x1 = z[j] + f1 = f1 + f[j] + elif x2 == np.Inf and x1 != z[j]: + x2 = z[j] + f2 = f2 + f[j] + + return x, x1, x2, f1, f2 + + def vert2(self, j: int, x: float, z: NDArray[np.float64], + f: NDArray[np.float64], x1: float, x2: float, + f1: float, f2: float) -> Tuple[float, float, float, float]: + """ + The vert2 function + + :param j: label + :param x: + :param z: + :param f: function values + :param x1: corresponding parameter/coordinate value + :param x2: corresponding parameter/coordinate value + :param f1: corresponding function value + :param f2: corresponding function value + """ + if j == 0: + j1 = 1 + else: + j1 = 0 + + if x1 == np.Inf: + x1 = z[j] + f1 = f1 + f[j] + if x != z[j1]: + x2 = z[j1] + f2 = f2 + f[j1] + elif x2 == np.Inf and x1 != z[j]: + x2 = z[j] + f2 = f2 + f[j] + elif x2 == np.Inf: + x2 = z[j1] + f2 = f2 + f[j1] + + return x1, x2, f1, f2 + + def vert3(self, j: int, x0, f0, L: int, x1: float, x2: float, f1: float, f2: float) \ + -> Tuple[float, float, float, float]: + """ + Vert3 function + + :param j: label + :param x0: initial position + :param f0: inital function value + :param L: + :param x1: corresponding parameter/coordinate value + :param x2: corresponding parameter/coordinate value + :param f1: corresponding function value + :param f2: corresponding function value + """ + if j == 0: + k1 = 1 + k2 = 2 + elif j == L: + k1 = L - 2 + k2 = L - 1 + else: + k1 = j - 1 + k2 = j + 1 + x1 = x0[k1] + x2 = x0[k2] + f1 = f1 + f0[k1] + f2 = f2 + f0[k2] + return x1, x2, f1, f2 + + def updtf(self, n: int, i: int, x1: NDArray[np.float64], x2: NDArray[np.float64], f1: NDArray[np.float64], + f2: NDArray[np.float64], fold: Union[float, NDArray[np.float64]], f: NDArray[np.float64]) \ + -> Tuple[NDArray[np.float64], NDArray[np.float64], NDArray[np.float64]]: + """ + updtf function + + :param n: + :param i: + :param x1: corresponding parameter/coordinate value + :param x2: corresponding parameter/coordinate value + :param f1: corresponding function value + :param f2: corresponding function value + :param fold: former function value + :param f: function values + :return: + """ + for i1 in range(n): + if i1 != i: + if x1[i1] == np.Inf: + f1[i1] = f1[i1] + fold - f + if x2[i1] == np.Inf: + f2[i1] = f2[i1] + fold - f + fold = f + return f1, f2, fold + + +class MCSUtils(UtilHelpers): + """ + Class with utiltiy functions for MCS + """ + + def check_box_bound(self, u: List[int], v: List[int]): + """ + Function that checks the bounds of the box + :param u: lower bound + :param v: upper bound + :return: boolean indicating the bound + """ + if v < u: + print('incompatible box bounds') + return True + elif (u == v): + print('degenerate box bound') + return True + else: + return False + + def strtsw(self, smax: int, level: List[int], f: List[float], nboxes: int, record: NDArray[Any]) \ + -> Tuple[int, NDArray[np.int32]]: + """ + Function that does the strtsw + + :param smax: + :param level: + :param f: + :param nboxes: counter for boxes not in the 'shopping bas + :param record: + """ + record = np.zeros(smax).astype(int) + s = smax + for j in range(nboxes + 1): + if level[j] > 0: + if level[j] < s: + s = level[j] + if not record[level[j]]: + record[level[j]] = j + + elif f[j] < f[record[level[j]]]: + record[level[j]] = j + return s, record + + def exgain(self, n: int, n0: NDArray[np.float64], l: NDArray[np.int32], L: NDArray[np.int32], + x: NDArray[np.float64], y: NDArray[np.float32], x1: NDArray[np.float32], + x2: NDArray[np.float32], fx: float, f0: NDArray[np.float32], + f1: NDArray[np.float32], f2: NDArray[np.float32]) -> Tuple[NDArray[np.float64], int, float]: + """ + Determines the splitting index, the splitting value and the expected + gain vector e for (potentially) splitting a box by expected gain + + :param n: dimension of the problem + :param n0: the ith coordinate has been split n0(i) times in the history of the box + :param l: Pointer to the initial point of the initialization list + :param L: lengths of the initialization list + :param x: base vertex of the box + :param y: opposite vertex of the box + :param x1: corresponding parameter/coordinate value + :param x2: corresponding parameter/coordinate value + :param f1: corresponding function value + :param f2: corresponding function value + :param fx: function value at the base vertex + :param f0: function values appertaining to the init. list + :return e: maximal expected gain in function value by changing coordinate i + :return isplit: splitting index + :return splval: Inf if n0(isplit) = 0, splitting value otherwise + """ + e = np.zeros(n) + emin = np.Inf + for i in range(n): + if n0[i] == 0: + e[i] = min(f0[0: L[i] + 1, i]) - f0[l[i], i] + if e[i] < emin: + emin = e[i] + isplit = i + splval = np.Inf + else: + z1 = [x[i], x1[i], x2[i]] + z2 = [0, f1[i] - fx, f2[i] - fx] + d = self.polint(z1, z2) + eta1, eta2 = self.subint(x[i], y[i]) + xi1 = min(eta1, eta2) + xi2 = max(eta1, eta2) + z = self.quadmin(xi1, xi2, d, z1) + e[i] = self.quadpol(z, d, z1) + if e[i] < emin: + emin = e[i] + isplit = i + splval = z + return e, isplit, splval + + def updtrec(self, j: int, s: int, f: List[float], record: List[int]) -> List[int]: + """ + Updates the pointer record(s) to the best non-split box at level s + :param j: label of a box + :param s: its level + :param f: vector containing the base vertex function values of the already defined boxes. + :param record: record list + """ + if len(record) < s: + record[s] = j + elif record[s] == 0: + record[s] = j + elif f[j] < f[record[s]]: + record[s] = j + + return record + + def chkloc(self, nloc: int, xloc: List[float], x: float) -> int: + """ + Checking the location + + :param nloc: + :param xloc: + :param x: + :return: the location + """ + loc = 1 + for k in range(nloc): + if np.array_equal(x, xloc[k]): + loc = 0 + break + return loc + + def addloc(self, nloc: int, xloc: List[float], x: float) -> Tuple[int, List[float]]: + """ + Adding a location + + :param nloc: + :param xloc: + :param x: + :return: locations including the added one + """ + nloc = nloc + 1 + xloc.append(copy.deepcopy(x)) + return nloc, xloc + + def chrelerr(self, fbest: float, stop: List[Union[int, float]]) -> int: + """ + Performing the chrelerr + + :param fbest: + :param stop: + :return: flags + """ + fglob = stop[1] + if fbest - fglob <= max(stop[0] * abs(fglob), stop[2]): + flag = 0 + else: + flag = 1 + + return flag + + def chvtr(self, f: float, vtr: float) -> int: + """ + Performing te chvtr function + + :param f: + :param vtr: + :return: flag + """ + if f <= vtr: + flag = 0 + else: + flag = 1 + + return flag + + def fbestloc(self, fmi: List[float], fbest: float, xmin: List[float], + xbest: float, nbasket0: int, stop: List[Union[float, int]]) -> Tuple[float, float]: + """ + The fbestloc function of MCS + + :param fmi: + :param fbest: + :param xmin: + :param xbest: + :param nbasket0: + :param stop: + :return: + """ + if fmi[nbasket0] < fbest: + fbest = copy.deepcopy(fmi[nbasket0]) + xbest = copy.deepcopy(xmin[nbasket0]) + return fbest, xbest + + def splrnk(self, n: int, n0: NDArray[np.float64], p: NDArray[np.int32], x: NDArray[np.float64], + y: NDArray[np.float32]) -> Tuple[int, float]: + """ + Determines the splitting index and splitting value for splitting a box by rank + + :param n: dimension of the problem + :param p: ranking of estimated variability of the function in the different coordinates + :param x: base vertex of the box + :param y: opposite vertex of the box + :return : splitting index and value at splitting point + """ + + isplit = 0 + n1 = n0[0] + p1 = p[0] + for i in range(1, n): + if n0[i] < n1 or (n0[i] == n1 and p[i] < p1): + isplit = i + n1 = n0[i] + p1 = p[i] + if n1 > 0: + splval = self.split2(x[isplit], y[isplit]) + else: + splval = np.Inf + return isplit, splval + + def genbox(self, par: int, level0: int, nchild: int, f0: float) -> Tuple[int, int, int, float]: + """ + Function that generates a box + + :param par: + :param level0: + :param nchild: + :param f0: inital function value + :return: Metrics and parameters from generating the box + """ + ipar = par + level = level0 + ichild = nchild + f = f0 + return ipar, level, ichild, f + + def vertex(self, j: int, n: int, u: List[Union[int, float]], v: List[Union[int, float]], + v1: NDArray[np.float64], x0: NDArray[np.float64], f0: NDArray[np.float64], + ipar: NDArray[np.int32], isplit: NDArray[np.int32], ichild: NDArray[np.int32], + z: NDArray[np.float64], f: NDArray[np.float64], l: NDArray[np.int32], + L: NDArray[np.int32]): + """ + Vertex function + + :param j: label + :param n: + :param u: the initial lower bound ("lower corner" in 3D) + :param v: the initial upper bound ("upper corner" in 3D) + :param v1: + :param x0: initial position + :param f0: inital function value + :param ipar: + :param isplit: + :param ichild: + :param z: + :param f: + :param l: Indication of the mid point + :param L: Indication of the end point (or total number of partition of the value x in the i'th dimenstion) + """ + x = np.multiply(np.Inf, np.ones(n)) + y = np.multiply(np.Inf, np.ones(n)) + x1 = np.multiply(np.Inf, np.ones(n)) + x2 = np.multiply(np.Inf, np.ones(n)) + f1 = np.zeros(n) + f2 = np.zeros(n) + + n0 = np.zeros(n) + fold = f[0, j] + m = j + + while m > 0: + if isplit[ipar[m]] < 0: + i = int(abs(isplit[ipar[m]])) - 1 + else: + i = int(abs(isplit[ipar[m]])) + + n0[i] = n0[i] + 1 + + if ichild[m] == 1: + if x[i] == np.Inf or x[i] == z[0, ipar[m]]: + x[i], x1[i], x2[i], f1[i], f2[i] = self.vert1(1, z[:, ipar[m]], + f[:, ipar[m]], x1[i], x2[i], f1[i], f2[i]) + else: + f1, f2, fold = self.updtf(n, i, x1, x2, f1, f2, fold, f[0, ipar[m]]) + x1[i], x2[i], f1[i], f2[i] = self.vert2(0, x[i], z[:, ipar[m]], + f[:, ipar[m]], x1[i], x2[i], f1[i], f2[i]) + elif ichild[m] >= 2: + f1, f2, fold = self.updtf(n, i, x1, x2, f1, f2, fold, f[0, ipar[m]]) + if x[i] == np.Inf or x[i] == z[1, ipar[m]]: + x[i], x1[i], x2[i], f1[i], f2[i] = self.vert1(0, z[:, ipar[m]], + f[:, ipar[m]], x1[i], x2[i], f1[i], f2[i]) + else: + x1[i], x2[i], f1[i], f2[i] = self.vert2(1, x[i], z[:, ipar[m]], + f[:, ipar[m]], x1[i], x2[i], f1[i], f2[i]) + + if 1 <= ichild[m] and ichild[m] <= 2 and y[i] == np.Inf: + y[i] = self.split1(z[0, ipar[m]], z[1, ipar[m]], f[0, ipar[m]], f[1, ipar[m]]) + + if ichild[m] < 0: + if u[i] < x0[i, 0]: + j1 = math.ceil(abs(ichild[m]) / 2) + j2 = math.floor(abs(ichild[m]) / 2) + if (abs(ichild[m]) / 2 < j1 and j1 > 0) or j1 == L[i] + 1: + j3 = -1 + else: + j3 = 1 + else: + j1 = math.floor(abs(ichild[m]) / 2) + 1 + j2 = math.ceil(abs(ichild[m]) / 2) + if abs(ichild[m]) / 2 + 1 > j1 and j1 < L[i] + 1: + j3 = 1 + else: + j3 = -1 + j1 -= 1 + j2 -= 1 + + if int(isplit[ipar[m]]) < 0: + k = copy.deepcopy(i) + else: + k = int(z[0, ipar[m]]) + + if j1 != l[i] or (x[i] != np.Inf and x[i] != x0[i, l[i]]): + f1, f2, fold = self.updtf(n, i, x1, x2, f1, f2, fold, f0[l[i], k]) + if x[i] == np.Inf or x[i] == x0[i, j1]: + x[i] = x0[i, j1] + if x1[i] == np.Inf: + x1[i], x2[i], f1[i], f2[i] = self.vert3(j1, x0[i, :], f0[:, k], L[i], x1[i], x2[i], f1[i], + f2[i]) + elif x2[i] == np.Inf and x1[i] != x0[i, j1 + j3]: + x2[i] = x0[i, j1 + j3] + f2[i] = f2[i] + f0[j1 + j3, k] + elif x2[i] == np.Inf: + if j1 != 1 and j1 != L[i]: + x2[i] = x0[i, j1 - j3] + f2[i] = f2[i] + f0[j1 - j3, k] + else: + x2[i] = x0[i, j1 + 2 * j3] + f2[i] = f2[i] + f0[j1 + 2 * j3, k] + else: + if x1[i] == np.Inf: + x1[i] = x0[i, j1] + f1[i] = f1[i] + f0[j1, k] + if x[i] != x0[i, j1 + j3]: + x2[i] = x0[i, j1 + j3] + f2[i] = f2[i] + f0[j1 + j3, k] + elif x2[i] == np.Inf: + if x1[i] != x0[i, j1]: + x2[i] = x0[i, j1] + f2[i] = f2[i] + f0[j1, k] + elif x[i] != x0[i, j1 + j3]: + x2[i] = x0[i, j1 + j3] + f2[i] = f2[i] + f0[j1 + j3, k] + else: + if j1 != 1 and j1 != L[i]: + x2[i] = x0[i, j1 - j3] + f2[i] = f2[i] + f0[j1 - j3, k] + else: + x2[i] = x0[i, j1 + 2 * j3] + f2[i] = f2[i] + f0[j1 + 2 * j3, k] + if y[i] == np.Inf: + if j2 == -1: + y[i] = u[i] + elif j2 == L[i]: + y[i] = v[i] + else: + y[i] = self.split1(x0[i, j2], x0[i, j2 + 1], f0[j2, k], f0[j2 + 1, k]) + m = ipar[m] + for i in range(n): + if x[i] == np.Inf: + x[i] = x0[i, l[i]] + x1[i], x2[i], f1[i], f2[i] = self.vert3(l[i], x0[i, :], f0[:, i], L[i], x1[i], x2[i], f1[i], f2[i]) + if y[i] == np.Inf: + y[i] = v1[i] + + return n0, x, y, x1, x2, f1, f2 + + def initbox(self, theta0: NDArray[np.float64], f0: NDArray[np.float32], l: NDArray[np.int32], + L: NDArray[np.int32], istar: NDArray[Union[np.float32, np.float64]], u: List[Union[int, float]], + v: List[Union[int, float]], isplit: NDArray[np.int32], level: NDArray[np.int32], + ipar: NDArray[np.int32], ichild: NDArray[np.int32], f: NDArray[np.float32], nboxes: int, prt: int): + """ + Generates the boxes in the initializaiton procedure + + :param theta0: + :param f0: inital function value + :param l: Indication of the mid point + :param L: Indication of the end point (or total number of partition of the value x in the i'th dimenstion) + :param istar: + :param u: + :param v: + :param isplit: + :param level: + :param ipar: + :param ichild: + :param ichild: + :param f: function value of the splitinhg float value + :param nboxes: counter for boxes not in the 'shopping bas + :param prt: print - unsued in this implementation so far + """ + n = len(u) + + ipar[0] = -1 + level[0] = 1 + ichild[0] = 1 + + f[0, 0] = f0[l[0], 0] + + par = 0 + + var = np.zeros(n) + for i in range(n): + isplit[par] = - i - 1 + nchild = 0 + if theta0[i, 0] > u[i]: + nboxes = nboxes + 1 + nchild = nchild + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = \ + MCSUtils().genbox(par, level[par] + 1, - nchild, f0[0, i]) + if L[i] == 2: + v1 = v[i] + else: + v1 = theta0[i, 2] + d = self.polint(theta0[i, 0: 3], f0[0: 3, i]) + xl = self.quadmin(u[i], v1, d, theta0[i, 0: 3]) + fl = self.quadpol(xl, d, theta0[i, 0: 3]) + xu = self.quadmin(u[i], v1, - d, theta0[i, 0: 3]) + fu = self.quadpol(xu, d, theta0[i, 0: 3]) + + if istar[i] == 0: + if xl < theta0[i, 0]: + par1 = nboxes + else: + par1 = nboxes + 1 + + for j in range(L[i]): + nboxes = nboxes + 1 + nchild = nchild + 1 + if f0[j, i] <= f0[j + 1, i]: + s = 1 + else: + s = 2 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = \ + MCSUtils().genbox(par, level[par] + s, - nchild, f0[j, i]) + + if j >= 1: + if istar[i] == j: + if xl <= theta0[i, j]: + par1 = nboxes - 1 + else: + par1 = nboxes + if j <= L[i] - 2: + d = self.polint(theta0[i, j: j + 1], f0[j: j + 1, i]) + if j < L[i] - 2: + u1 = theta0[i, j + 1] + else: + u1 = v[i] + xl = self.quadmin(theta0[i, j], u1, d, theta0[i, j: j + 1]) + fl = min(self.quadpol(xl, d, theta0[i, j: j + 1]), fl) + xu = self.quadmin(theta0[i, j], u1, -d, theta0[i, j: j + 1]) + fu = max(self.quadpol(xu, d, theta0[i, j: j + 1]), fu) + + nboxes = nboxes + 1 + nchild = nchild + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = \ + MCSUtils().genbox(par, level[par] + 3 - s, -nchild, f0[j + 1, i]) + if theta0[i, L[i]] < v[i]: + nboxes = nboxes + 1 + nchild = nchild + 1 + ipar[nboxes], level[nboxes], ichild[nboxes], f[0, nboxes] = \ + MCSUtils().genbox(par, level[par] + 1, -nchild, f0[L[i], i]) + + if istar[i] == L[i]: + if theta0[i, L[i]] < v[i]: + if xl <= theta0[i, L[i]]: + par1 = nboxes - 1 + else: + par1 = nboxes + else: + par1 = nboxes + var[i] = fu - fl + + level[par] = 0 + par = par1 + fbest = f0[istar[n - 1], n - 1] + p = np.zeros(n).astype(int) + xbest = np.zeros(n) + for i in range(n): + p[i] = np.argmax(var) + var[p[i]] = -1 + xbest[i] = theta0[i, istar[i]] + return ipar, level, ichild, f, isplit, p, xbest, fbest, nboxes + + def neighbor(self, x: NDArray[np.float32], delta: List[float], + u: List[Union[float, int]], v: List[Union[float, int]]) -> Tuple[List[Any], List[Any]]: + """ + Computes 'neighbors' x1 and x2 of x needed for making triple search + and building a local quadratic model such that x(i), x1(i), x2(i) are + pairwise distinct for i = 1,...,n + + :param x: current position + :param delta: radius of neighboring region + :param u: + :param v: + """ + i1 = [i for i in range(len(x)) if x[i] == u[i]] + i2 = [i for i in range(len(x)) if x[i] == v[i]] + x1 = [max(u[i], x[i] - delta[i]) for i in range(len(x))] + x2 = [min(x[i] + delta[i], v[i]) for i in range(len(x))] + for i in i1: + x1[i] = x[i] + 2 * delta[i] + for i in i2: + x2[i] = x[i] - 2 * delta[i] + return x1, x2 + + def polint1(self, x: List[float], f: List[float]) -> Tuple[float, float]: + """ + Quadratic polynomial interpolation + + :param x: positions + :param f: function values + :return: g, G + """ + f13 = (f[2] - f[0]) / (x[2] - x[0]) + f12 = (f[1] - f[0]) / (x[1] - x[0]) + f23 = (f[2] - f[1]) / (x[2] - x[1]) + g = f13 + f12 - f23 + G = 2 * (f13 - f12) / (x[2] - x[1]) + return g, G + + def hessian(self, i: int, k: int, x: List[Union[float, int]], x0: List[Union[float, int]], + f: float, f0: float, g: NDArray[np.float64], G: NDArray[np.float64]) -> Any: + """ + Computes the element G(i,k) of the Hessian of the local quadratic model + + :param i: + :param k: + :param x: position + :param x0: initial position + :param f: function values + :param f0: inital function value + :param g: + :param G: + """ + h = f - f0 - g[i] * (x[i] - x0[i]) - g[k] * (x[k] - x0[k]) - 0.5 * G[i, i] * (pow((x[i] - x0[i]), 2)) \ + - 0.5 * G[k, k] * pow((x[k] - x0[k]), 2) + h = h / (x[i] - x0[i]) / (x[k] - x0[k]) + return h + + def get_theta0(self, iinit: int, u: List[Union[float, int]], v: List[Union[float, int]], n: int) \ + -> NDArray[np.float32]: + """ + Function for obtaining initial position + + :param iinit: + :param u: + :param v: + :param n: + :return: the initial position theta0 + """ + if iinit == 0: + theta0 = np.array([]) + theta0 = np.append(theta0, u, axis=0) + theta0 = np.vstack([theta0, [(i + j) / 2 for i, j in zip(u, v)]]) + theta0 = np.vstack([theta0, v]) + theta0 = theta0.T + + elif iinit == 1: + theta0 = np.zeros((n, 3)) + for i in range(n): + if u[i] >= 0: + theta0[i, 0] = u[i] + theta0[i, 1], theta0[i, 2] = self.subint(u[i], v[i]) + theta0[i, 1] = 0.5 * (theta0[i, 0] + theta0[i, 2]) + elif v[i] <= 0: + theta0[i, 2] = v[i] + theta0[i, 1], theta0[i, 0] = self.subint(v[i], u[i]) + theta0[i, 1] = 0.5 * (theta0[i, 0] + theta0[i, 2]) + else: + theta0[i, 1] = 0 + _, theta0[i, 0] = self.subint(0, u[i]) # type: ignore[name-defined] + _, theta0[i, 2] = self.subint(0, v[i]) # type: ignore[name-defined] + elif iinit == 2: + theta0 = np.array([]) + theta0 = np.append(theta0, [(i * 5 + j) / 6 for i, j in zip(u, v)]) + theta0 = np.vstack([theta0, [0.5 * (i + j) for i, j in zip(u, v)]]) + theta0 = np.vstack([theta0, [(i + j * 5) / 6 for i, j in zip(u, v)]]) + theta0 = theta0.T + + if np.any(np.isinf(theta0)): + sys.exit("Error- MCS main: infinities in ititialization list") + return theta0 diff --git a/simulation-system/libs/csle-agents/src/csle_agents/agents/particle_swarm/particle_swarm_agent.py b/simulation-system/libs/csle-agents/src/csle_agents/agents/particle_swarm/particle_swarm_agent.py index 028606ac3..290c6628d 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/agents/particle_swarm/particle_swarm_agent.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/agents/particle_swarm/particle_swarm_agent.py @@ -236,6 +236,7 @@ def particle_swarm(self, exp_result: ExperimentResult, seed: int, random_seeds: objective_type_param = self.experiment_config.hparams[agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE].value if agents_constants.PARTICLE_SWARM.THETA1 in self.experiment_config.hparams: thetas = self.experiment_config.hparams[agents_constants.PARTICLE_SWARM.THETA1].value + print("thetas = ", thetas) else: if self.experiment_config.player_type == PlayerType.DEFENDER: P, thetas = ParticleSwarmAgent.initial_theta(L=L, S=S, b_lo=b_lo, b_up=b_up) diff --git a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py index cc39ee4e0..45589c5b3 100644 --- a/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py +++ b/simulation-system/libs/csle-agents/src/csle_agents/constants/constants.py @@ -404,6 +404,32 @@ class PARTICLE_SWARM: OBJECTIVE_TYPE = "objective_type" +class MCS: + """ + String constants related to Multilevel Coordinate Search + """ + STEP = "step" + STEP1 = "step1" + THETAS = "thetas" + U = "u" + V = "v" + L = "L" + STOPPING_ACTIONS = "stopping_actions" + STOP_DISTRIBUTION_DEFENDER = "stop_distribution_defender" + GAMMA = "gamma" + EPSILON = "epsilon" + LOCAL = "local" + IINIT = "iinit" + M = "m" + PRT = "prt" + SMAX = "smax" + NF = "nf" + STOP = "stop" + POLICY_TYPE = "policy_type" + OBJECTIVE_TYPE = "objective_type" + THRESHOLDS = "thresholds" + + class BAYESIAN_OPTIMIZATION_EMUKIT: """ String constants related to Bayesian Optimization Emukit diff --git a/simulation-system/libs/csle-agents/tests/test_mcs.py b/simulation-system/libs/csle-agents/tests/test_mcs.py new file mode 100644 index 000000000..32b38d9b6 --- /dev/null +++ b/simulation-system/libs/csle-agents/tests/test_mcs.py @@ -0,0 +1,204 @@ +import numpy as np +import pytest +import pytest_mock +import csle_common.constants.constants as constants +from csle_common.dao.training.experiment_config import ExperimentConfig +from csle_common.dao.training.agent_type import AgentType +from csle_common.dao.training.hparam import HParam +from csle_common.dao.training.player_type import PlayerType +from csle_common.dao.training.policy_type import PolicyType +from csle_agents.agents.mcs.mcs_agent import MCSAgent +import csle_agents.constants.constants as agents_constants +from gym_csle_stopping_game.dao.stopping_game_config import StoppingGameConfig +from gym_csle_stopping_game.dao.stopping_game_defender_pomdp_config import StoppingGameDefenderPomdpConfig +from gym_csle_stopping_game.util.stopping_game_util import StoppingGameUtil +from csle_common.dao.training.random_policy import RandomPolicy +import gym_csle_stopping_game.constants.constants as env_constants +from csle_agents.common.objective_type import ObjectiveType + + +class TestMCSSuite: + """ + Test suite for the MCSAgent + """ + + @pytest.fixture + def experiment_config(self) -> ExperimentConfig: + """ + Fixture, which is run before every test. It sets up an example experiment config + + :return: the example experiment config + """ + experiment_config = ExperimentConfig( + output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}MCS_test", + title="Multilevel Coordinate Search", + random_seeds=[399, 98912], + agent_type=AgentType.MCS, + log_every=1, + hparams={ + agents_constants.MCS.STEP: HParam(value=10, name=agents_constants.MCS.STEP, descr="step"), + agents_constants.MCS.STEP1: HParam(value=100, name=agents_constants.MCS.STEP1, descr="step1"), + agents_constants.MCS.U: HParam(value=[-20], name=agents_constants.MCS.U, + descr="initial lower corner"), + agents_constants.MCS.LOCAL: HParam( + value=50, name=agents_constants.MCS.LOCAL, + descr="local value stating to which degree to perform local searches"), + agents_constants.MCS.V: HParam( + value=[20], name=agents_constants.MCS.V, descr="initial upper corner"), + agents_constants.MCS.STOPPING_ACTIONS: HParam( + value=1, name=agents_constants.MCS.L, descr="no. of stopping actions"), + agents_constants.MCS.IINIT: HParam(value=0, name=agents_constants.MCS.IINIT, + descr="simple initialization list"), + agents_constants.MCS.GAMMA: HParam( + value=2.220446049250313e-16, name=agents_constants.MCS.GAMMA, descr="gamma value"), + agents_constants.MCS.EPSILON: HParam( + value=2.220446049250313e-16, name=agents_constants.MCS.EPSILON, descr="epsilon value"), + agents_constants.MCS.M: HParam( + value=1, name=agents_constants.MCS.M, descr="m value"), + agents_constants.MCS.PRT: HParam( + value=1, name=agents_constants.MCS.PRT, descr="print level"), + agents_constants.COMMON.EVAL_BATCH_SIZE: HParam( + value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE, + descr="number of iterations to evaluate theta"), + agents_constants.COMMON.SAVE_EVERY: HParam( + value=1000, name=agents_constants.COMMON.SAVE_EVERY, descr="how frequently to save the model"), + agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam( + value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, descr="confidence interval"), + agents_constants.COMMON.MAX_ENV_STEPS: HParam( + value=2, name=agents_constants.COMMON.MAX_ENV_STEPS, + descr="maximum number of steps in the environment (for envs with infinite horizon generally)"), + agents_constants.COMMON.RUNNING_AVERAGE: HParam( + value=100, name=agents_constants.COMMON.RUNNING_AVERAGE, + descr="the number of samples to include when computing the running avg"), + agents_constants.COMMON.GAMMA: HParam( + value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"), + agents_constants.MCS.POLICY_TYPE: HParam( + value=PolicyType.MULTI_THRESHOLD, name=agents_constants.MCS.POLICY_TYPE, + descr="policy type for the execution"), + agents_constants.MCS.OBJECTIVE_TYPE: HParam( + value=ObjectiveType.MAX, name=agents_constants.MCS.OBJECTIVE_TYPE, descr="Objective type"), + }, + player_type=PlayerType.DEFENDER, player_idx=0) + return experiment_config + + @pytest.fixture + def pomdp_config(self) -> StoppingGameDefenderPomdpConfig: + """ + Fixture, which is run before every test. It sets up an input POMDP config + + :return: The example config + """ + L = 1 + R_INT = -5 + R_COST = -5 + R_SLA = 1 + R_ST = 5 + p = 0.1 + n = 100 + + attacker_stage_strategy = np.zeros((3, 2)) + attacker_stage_strategy[0][0] = 0.9 + attacker_stage_strategy[0][1] = 0.1 + attacker_stage_strategy[1][0] = 0.9 + attacker_stage_strategy[1][1] = 0.1 + attacker_stage_strategy[2] = attacker_stage_strategy[1] + + stopping_game_config = StoppingGameConfig( + A1=StoppingGameUtil.attacker_actions(), A2=StoppingGameUtil.defender_actions(), L=L, R_INT=R_INT, + R_COST=R_COST, + R_SLA=R_SLA, R_ST=R_ST, b1=np.array(list(StoppingGameUtil.b1())), + save_dir="./results", + T=StoppingGameUtil.transition_tensor(L=L, p=p), + O=StoppingGameUtil.observation_space(n=n), + Z=StoppingGameUtil.observation_tensor(n=n), + R=StoppingGameUtil.reward_tensor(R_SLA=R_SLA, R_INT=R_INT, R_COST=R_COST, L=L, R_ST=R_ST), + S=StoppingGameUtil.state_space(), env_name="csle-stopping-game-v1", checkpoint_traces_freq=100000, + gamma=1) + pomdp_config = StoppingGameDefenderPomdpConfig( + stopping_game_config=stopping_game_config, stopping_game_name="csle-stopping-game-v1", + attacker_strategy=RandomPolicy(actions=list(stopping_game_config.A2), + player_type=PlayerType.ATTACKER, + stage_policy_tensor=list(attacker_stage_strategy)), + env_name="csle-stopping-game-pomdp-defender-v1") + return pomdp_config + + def test_create_agent(self, mocker: pytest_mock.MockFixture, experiment_config: ExperimentConfig) -> None: + """ + Tests creation of the MCSAgent + + :return: None + """ + emulation_env_config = mocker.MagicMock() + simulation_env_config = mocker.MagicMock() + MCSAgent(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config, + experiment_config=experiment_config, save_to_metastore=False) + + def test_run_agent(self, mocker: pytest_mock.MockFixture, experiment_config: ExperimentConfig, + pomdp_config: StoppingGameDefenderPomdpConfig) -> None: + """ + Tests running the agent + + :param mocker: object for mocking API calls + :param experiment_config: the example experiment config + :param pomdp_config: the example POMDP config + + :return: None + """ + # Mock emulation and simulation configs + emulation_env_config = mocker.MagicMock() + simulation_env_config = mocker.MagicMock() + + # Set attributes of the mocks + simulation_env_config.configure_mock(**{ + "name": "simulation-test-env", "gym_env_name": "csle-stopping-game-pomdp-defender-v1", + "simulation_env_input_config": pomdp_config + }) + emulation_env_config.configure_mock(**{"name": "emulation-test-env"}) + + # Mock metastore facade + mocker.patch('csle_common.metastore.metastore_facade.MetastoreFacade.save_training_job', return_value=True) + mocker.patch('csle_common.metastore.metastore_facade.MetastoreFacade.save_experiment_execution', + return_value=True) + mocker.patch('csle_common.metastore.metastore_facade.MetastoreFacade.update_training_job', return_value=True) + mocker.patch('csle_common.metastore.metastore_facade.MetastoreFacade.update_experiment_execution', + return_value=True) + mocker.patch('csle_common.metastore.metastore_facade.MetastoreFacade.save_simulation_trace', return_value=True) + mocker.patch('csle_common.metastore.metastore_facade.MetastoreFacade.save_multi_threshold_stopping_policy', + return_value=True) + agent = MCSAgent(emulation_env_config=emulation_env_config, + simulation_env_config=simulation_env_config, + experiment_config=experiment_config) + experiment_execution = agent.train() + assert experiment_execution is not None + assert experiment_execution.descr != "" + assert experiment_execution.id is not None + assert experiment_execution.config == experiment_config + assert agents_constants.COMMON.AVERAGE_RETURN in experiment_execution.result.plot_metrics + assert agents_constants.COMMON.RUNNING_AVERAGE_RETURN in experiment_execution.result.plot_metrics + assert env_constants.ENV_METRICS.INTRUSION_LENGTH in experiment_execution.result.plot_metrics + assert agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_LENGTH in experiment_execution.result.plot_metrics + assert env_constants.ENV_METRICS.INTRUSION_START in experiment_execution.result.plot_metrics + assert agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_START in experiment_execution.result.plot_metrics + + assert env_constants.ENV_METRICS.TIME_HORIZON in experiment_execution.result.plot_metrics + assert agents_constants.COMMON.RUNNING_AVERAGE_TIME_HORIZON in experiment_execution.result.plot_metrics + assert env_constants.ENV_METRICS.AVERAGE_UPPER_BOUND_RETURN in experiment_execution.result.plot_metrics + assert env_constants.ENV_METRICS.AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN in \ + experiment_execution.result.plot_metrics + for seed in experiment_config.random_seeds: + assert seed in experiment_execution.result.all_metrics + assert agents_constants.MCS.THETAS in experiment_execution.result.all_metrics[seed] + assert agents_constants.COMMON.AVERAGE_RETURN in experiment_execution.result.all_metrics[seed] + assert agents_constants.COMMON.RUNNING_AVERAGE_RETURN in experiment_execution.result.all_metrics[seed] + assert agents_constants.MCS.THRESHOLDS in experiment_execution.result.all_metrics[seed] + assert (agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_START in + experiment_execution.result.all_metrics[seed]) + assert (agents_constants.COMMON.RUNNING_AVERAGE_TIME_HORIZON in + experiment_execution.result.all_metrics[seed]) + assert (agents_constants.COMMON.RUNNING_AVERAGE_INTRUSION_LENGTH in + experiment_execution.result.all_metrics[seed]) + assert env_constants.ENV_METRICS.INTRUSION_START in experiment_execution.result.all_metrics[seed] + assert env_constants.ENV_METRICS.TIME_HORIZON in experiment_execution.result.all_metrics[seed] + assert env_constants.ENV_METRICS.AVERAGE_UPPER_BOUND_RETURN in experiment_execution.result.all_metrics[seed] + assert (env_constants.ENV_METRICS.AVERAGE_DEFENDER_BASELINE_STOP_ON_FIRST_ALERT_RETURN in + experiment_execution.result.all_metrics[seed]) diff --git a/simulation-system/libs/csle-common/src/csle_common/dao/training/agent_type.py b/simulation-system/libs/csle-common/src/csle_common/dao/training/agent_type.py index 8baee2e9a..58013c577 100644 --- a/simulation-system/libs/csle-common/src/csle_common/dao/training/agent_type.py +++ b/simulation-system/libs/csle-common/src/csle_common/dao/training/agent_type.py @@ -41,3 +41,4 @@ class AgentType(IntEnum): DQN_CLEAN = 33 C51_CLEAN = 34 PPG_CLEAN = 35 + MCS = 36