Merge pull request #353 from Limmen/dev

Multilevel Coordinate Search (MCS)
Limmen · May 13, 2024 · 45721bd · 45721bd
2 parents f858728 + f4a58e3
commit 45721bd
Show file tree

Hide file tree

Showing 18 changed files with 4,787 additions and 108 deletions.
diff --git a/dev b/dev
diff --git a/examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/dqn_clean/stopping_pomdp_defender/run_vs_random_attacker_v_001.py
@@ -8,89 +8,159 @@
 from csle_common.dao.training.tabular_policy import TabularPolicy
 from csle_common.metastore.metastore_facade import MetastoreFacade
 
-if __name__ == '__main__':
-    emulation_name = "csle-level9-040"
+if __name__ == "__main__":
+    emulation_name = "csle-level1-050"
     emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
     if emulation_env_config is None:
-        raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
+        raise ValueError(
+            f"Could not find an emulation environment with the name: {emulation_name}"
+        )
     simulation_name = "csle-intrusion-response-game-local-pomdp-defender-001"
     simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
     if simulation_env_config is None:
         raise ValueError(f"Could not find a simulation with name: {simulation_name}")
     experiment_config = ExperimentConfig(
         output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}dqn_clean_test",
-        title="DQN_clean test", random_seeds=[399, 98912, 999], agent_type=AgentType.DQN_CLEAN,
+        title="DQN_clean test",
+        random_seeds=[399, 98912, 999],
+        agent_type=AgentType.DQN_CLEAN,
         log_every=1000,
         hparams={
             constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER: HParam(
-                value=7, name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
-                descr="neurons per hidden layer of the policy network"),
+                value=7,
+                name=constants.NEURAL_NETWORKS.NUM_NEURONS_PER_HIDDEN_LAYER,
+                descr="neurons per hidden layer of the policy network",
+            ),
             constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS: HParam(
-                value=4, name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
-                descr="number of layers of the policy network"),
+                value=4,
+                name=constants.NEURAL_NETWORKS.NUM_HIDDEN_LAYERS,
+                descr="number of layers of the policy network",
+            ),
             agents_constants.DQN_CLEAN.EXP_FRAC: HParam(
-                value=0.5, name=agents_constants.DQN_CLEAN.EXP_FRAC,
-                descr="the fraction of `total-timesteps it takes from start-e to go end-e"),
+                value=0.5,
+                name=agents_constants.DQN_CLEAN.EXP_FRAC,
+                descr="the fraction of `total-timesteps it takes from start-e to go end-e",
+            ),
             agents_constants.DQN_CLEAN.TAU: HParam(
-                value=1.0, name=agents_constants.DQN_CLEAN.TAU, descr="target network update rate"),
+                value=1.0,
+                name=agents_constants.DQN_CLEAN.TAU,
+                descr="target network update rate",
+            ),
             agents_constants.COMMON.BATCH_SIZE: HParam(
-                value=64, name=agents_constants.COMMON.BATCH_SIZE, descr="batch size for updates"),
+                value=64,
+                name=agents_constants.COMMON.BATCH_SIZE,
+                descr="batch size for updates",
+            ),
             agents_constants.DQN_CLEAN.LEARNING_STARTS: HParam(
-                value=10000, name=agents_constants.DQN_CLEAN.LEARNING_STARTS, descr="timestep to start learning"),
+                value=10000,
+                name=agents_constants.DQN_CLEAN.LEARNING_STARTS,
+                descr="timestep to start learning",
+            ),
             agents_constants.DQN_CLEAN.TRAIN_FREQ: HParam(
-                value=10, name=agents_constants.DQN_CLEAN.TRAIN_FREQ, descr="the frequency of training"),
+                value=10,
+                name=agents_constants.DQN_CLEAN.TRAIN_FREQ,
+                descr="the frequency of training",
+            ),
             agents_constants.DQN_CLEAN.T_N_FREQ: HParam(
-                value=500, name=agents_constants.DQN_CLEAN.T_N_FREQ,
-                descr="the batch size of sample from the reply memory"),
+                value=500,
+                name=agents_constants.DQN_CLEAN.T_N_FREQ,
+                descr="the batch size of sample from the reply memory",
+            ),
             agents_constants.DQN_CLEAN.BUFFER_SIZE: HParam(
-                value=1000, name=agents_constants.DQN_CLEAN.BUFFER_SIZE, descr="the replay memory buffer size"),
+                value=1000,
+                name=agents_constants.DQN_CLEAN.BUFFER_SIZE,
+                descr="the replay memory buffer size",
+            ),
             agents_constants.DQN_CLEAN.SAVE_MODEL: HParam(
-                value=False, name=agents_constants.DQN_CLEAN.SAVE_MODEL, descr="decision param for model saving"),
+                value=False,
+                name=agents_constants.DQN_CLEAN.SAVE_MODEL,
+                descr="decision param for model saving",
+            ),
             agents_constants.COMMON.LEARNING_RATE: HParam(
-                value=2.4e-5, name=agents_constants.COMMON.LEARNING_RATE,
-                descr="learning rate for updating the policy"),
+                value=2.4e-5,
+                name=agents_constants.COMMON.LEARNING_RATE,
+                descr="learning rate for updating the policy",
+            ),
             agents_constants.DQN_CLEAN.NUM_STEPS: HParam(
-                value=164, name=agents_constants.DQN_CLEAN.NUM_STEPS, descr="number of steps in each time step"),
+                value=164,
+                name=agents_constants.DQN_CLEAN.NUM_STEPS,
+                descr="number of steps in each time step",
+            ),
             constants.NEURAL_NETWORKS.DEVICE: HParam(
-                value="cpu", name=constants.NEURAL_NETWORKS.DEVICE, descr="the device to train on (cpu or cuda:x)"),
+                value="cpu",
+                name=constants.NEURAL_NETWORKS.DEVICE,
+                descr="the device to train on (cpu or cuda:x)",
+            ),
             agents_constants.COMMON.NUM_PARALLEL_ENVS: HParam(
-                value=1, name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
-                descr="the nunmber of parallel environments for training"),
+                value=1,
+                name=agents_constants.COMMON.NUM_PARALLEL_ENVS,
+                descr="the nunmber of parallel environments for training",
+            ),
             agents_constants.COMMON.GAMMA: HParam(
-                value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
+                value=0.99,
+                name=agents_constants.COMMON.GAMMA,
+                descr="the discount factor",
+            ),
             agents_constants.COMMON.NUM_TRAINING_TIMESTEPS: HParam(
-                value=int(100000), name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
-                descr="number of timesteps to train"),
-            agents_constants.COMMON.EVAL_EVERY: HParam(value=1, name=agents_constants.COMMON.EVAL_EVERY,
-                                                       descr="training iterations between evaluations"),
-            agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(value=100, name=agents_constants.COMMON.EVAL_BATCH_SIZE,
-                                                            descr="the batch size for evaluation"),
-            agents_constants.COMMON.SAVE_EVERY: HParam(value=10000, name=agents_constants.COMMON.SAVE_EVERY,
-                                                       descr="how frequently to save the model"),
+                value=int(100000),
+                name=agents_constants.COMMON.NUM_TRAINING_TIMESTEPS,
+                descr="number of timesteps to train",
+            ),
+            agents_constants.COMMON.EVAL_EVERY: HParam(
+                value=1,
+                name=agents_constants.COMMON.EVAL_EVERY,
+                descr="training iterations between evaluations",
+            ),
+            agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(
+                value=100,
+                name=agents_constants.COMMON.EVAL_BATCH_SIZE,
+                descr="the batch size for evaluation",
+            ),
+            agents_constants.COMMON.SAVE_EVERY: HParam(
+                value=10000,
+                name=agents_constants.COMMON.SAVE_EVERY,
+                descr="how frequently to save the model",
+            ),
             agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
-                value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
-                descr="confidence interval"),
+                value=0.95,
+                name=agents_constants.COMMON.CONFIDENCE_INTERVAL,
+                descr="confidence interval",
+            ),
             agents_constants.COMMON.MAX_ENV_STEPS: HParam(
-                value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
-                descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
+                value=500,
+                name=agents_constants.COMMON.MAX_ENV_STEPS,
+                descr="maximum number of steps in the environment (for envs with infinite horizon generally)",
+            ),
             agents_constants.COMMON.RUNNING_AVERAGE: HParam(
-                value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
-                descr="the number of samples to include when computing the running avg"),
-            agents_constants.COMMON.L: HParam(value=3, name=agents_constants.COMMON.L,
-                                              descr="the number of stop actions")
+                value=100,
+                name=agents_constants.COMMON.RUNNING_AVERAGE,
+                descr="the number of samples to include when computing the running avg",
+            ),
+            agents_constants.COMMON.L: HParam(
+                value=3,
+                name=agents_constants.COMMON.L,
+                descr="the number of stop actions",
+            ),
         },
-        player_type=PlayerType.DEFENDER, player_idx=0
+        player_type=PlayerType.DEFENDER,
+        player_idx=0,
     )
     simulation_env_config.simulation_env_input_config.attacker_strategy = TabularPolicy(
         player_type=PlayerType.ATTACKER,
-        actions=simulation_env_config.joint_action_space_config.action_spaces[1].actions,
-        simulation_name=simulation_env_config.name, value_function=None, q_table=None,
-        lookup_table=[
-            [0.8, 0.2],
-            [1, 0],
-            [1, 0]
-        ],
-        agent_type=AgentType.RANDOM, avg_R=-1)
-    agent = DQNCleanAgent(simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config,
-                          experiment_config=experiment_config, save_to_metastore=False)
+        actions=simulation_env_config.joint_action_space_config.action_spaces[
+            1
+        ].actions,
+        simulation_name=simulation_env_config.name,
+        value_function=None,
+        q_table=None,
+        lookup_table=[[0.8, 0.2], [1, 0], [1, 0]],
+        agent_type=AgentType.RANDOM,
+        avg_R=-1,
+    )
+    agent = DQNCleanAgent(
+        simulation_env_config=simulation_env_config,
+        emulation_env_config=emulation_env_config,
+        experiment_config=experiment_config,
+        save_to_metastore=False,
+    )
     experiment_execution = agent.train()
diff --git a/examples/training/mcs/stopping_pomdp_defender/run_vs_random_attacker_v_001.py b/examples/training/mcs/stopping_pomdp_defender/run_vs_random_attacker_v_001.py
@@ -0,0 +1,84 @@
+import csle_agents.constants.constants as agents_constants
+import csle_common.constants.constants as constants
+from csle_agents.agents.mcs.mcs_agent import MCSAgent
+from csle_agents.common.objective_type import ObjectiveType
+from csle_common.dao.training.agent_type import AgentType
+from csle_common.dao.training.experiment_config import ExperimentConfig
+from csle_common.dao.training.hparam import HParam
+from csle_common.dao.training.player_type import PlayerType
+from csle_common.dao.training.policy_type import PolicyType
+from csle_common.metastore.metastore_facade import MetastoreFacade
+
+if __name__ == "__main__":
+    emulation_name = "csle-level1-050"
+    emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
+    if emulation_env_config is None:
+        raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")
+    simulation_name = "csle-stopping-pomdp-defender-002"
+    simulation_env_config = MetastoreFacade.get_simulation_by_name(simulation_name)
+    if simulation_env_config is None:
+        raise ValueError(f"Could not find a simulation with name: {simulation_name}")
+    experiment_config = ExperimentConfig(
+        output_dir=f"{constants.LOGGING.DEFAULT_LOG_DIR}particle_swarm_test",
+        title="Multilevel Coordinate Search",
+        random_seeds=[399, 98912],
+        agent_type=AgentType.MCS,
+        log_every=1,
+        hparams={
+            agents_constants.MCS.STEP: HParam(value=1000, name=agents_constants.MCS.STEP, descr="step"),
+            agents_constants.MCS.STEP1: HParam(value=10000, name=agents_constants.MCS.STEP1, descr="step1"),
+            agents_constants.MCS.U: HParam(value=[-20, -20, -20], name=agents_constants.MCS.U,
+                                           descr="initial lower corner"),
+            agents_constants.MCS.LOCAL: HParam(value=50, name=agents_constants.MCS.LOCAL,
+                                               descr="local value stating to which degree to perform local searches"),
+            agents_constants.MCS.V: HParam(value=[20, 20, 20], name=agents_constants.MCS.V,
+                                           descr="initial upper corner"),
+            agents_constants.MCS.STOPPING_ACTIONS: HParam(
+                value=3, name=agents_constants.MCS.L, descr="no. of stopping actions"),
+            agents_constants.COMMON.MAX_ENV_STEPS: HParam(
+                value=500, name=agents_constants.COMMON.MAX_ENV_STEPS,
+                descr="maximum number of steps in the environment (for envs with infinite horizon generally)"),
+            agents_constants.MCS.IINIT: HParam(
+                value=0, name=agents_constants.MCS.IINIT, descr="simple initialization list"),
+            agents_constants.MCS.GAMMA: HParam(
+                value=2.220446049250313e-16, name=agents_constants.MCS.GAMMA, descr="MCS gamma value"),
+            agents_constants.MCS.EPSILON: HParam(
+                value=2.220446049250313e-16, name=agents_constants.MCS.EPSILON, descr="MCS epsilon value"),
+            agents_constants.MCS.M: HParam(
+                value=1, name=agents_constants.MCS.M, descr="m value"),
+            agents_constants.MCS.PRT: HParam(
+                value=1, name=agents_constants.MCS.PRT, descr="print level"),
+            agents_constants.COMMON.EVAL_BATCH_SIZE: HParam(
+                value=10, name=agents_constants.COMMON.EVAL_BATCH_SIZE, descr="number of iterations to evaluate theta"),
+            agents_constants.COMMON.SAVE_EVERY: HParam(
+                value=1000, name=agents_constants.COMMON.SAVE_EVERY, descr="how frequently to save the model"),
+            agents_constants.COMMON.CONFIDENCE_INTERVAL: HParam(
+                value=0.95, name=agents_constants.COMMON.CONFIDENCE_INTERVAL, descr="confidence interval"),
+            agents_constants.COMMON.RUNNING_AVERAGE: HParam(
+                value=100, name=agents_constants.COMMON.RUNNING_AVERAGE,
+                descr="the number of samples to include when computing the running avg"),
+            agents_constants.COMMON.GAMMA: HParam(
+                value=0.99, name=agents_constants.COMMON.GAMMA, descr="the discount factor"),
+            agents_constants.MCS.POLICY_TYPE: HParam(
+                value=PolicyType.MULTI_THRESHOLD, name=agents_constants.PARTICLE_SWARM.POLICY_TYPE,
+                descr="policy type for the execution"),
+            agents_constants.MCS.OBJECTIVE_TYPE: HParam(
+                value=ObjectiveType.MAX, name=agents_constants.PARTICLE_SWARM.OBJECTIVE_TYPE, descr="Objective type"),
+        },
+        player_type=PlayerType.DEFENDER, player_idx=0,
+    )
+    agent = MCSAgent(
+        simulation_env_config=simulation_env_config, emulation_env_config=emulation_env_config,
+        experiment_config=experiment_config, save_to_metastore=False)
+    experiment_execution = agent.train()
+    # MetastoreFacade.save_experiment_execution(experiment_execution)
+    # for policy in experiment_execution.result.policies.values():
+    #     if experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value == PolicyType.MULTI_THRESHOLD:
+    #         MetastoreFacade.save_multi_threshold_stopping_policy(multi_threshold_stopping_policy=policy)
+    #     elif experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value \
+    #             == PolicyType.LINEAR_THRESHOLD:
+    #         MetastoreFacade.save_linear_threshold_stopping_policy(linear_threshold_stopping_policy=policy)
+    #     else:
+    #         raise ValueError("Policy type: "
+    #                          f"{experiment_config.hparams[agents_constants.PARTICLE_SWARM.POLICY_TYPE].value} "
+    #                          f"not recognized for MCS")
diff --git a/examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py b/examples/training/nelder_mead/stopping_pompd_defender/run_vs_random_attacker_v_001.py
@@ -11,7 +11,7 @@
 from csle_agents.common.objective_type import ObjectiveType
 
 if __name__ == '__main__':
-    emulation_name = "csle-level9-030"
+    emulation_name = "csle-level1-050"
     emulation_env_config = MetastoreFacade.get_emulation_by_name(emulation_name)
     if emulation_env_config is None:
         raise ValueError(f"Could not find an emulation environment with the name: {emulation_name}")