## DeepHive: A multi-agent reinforcement learning approach for automated discovery of swarm-based optimization policies

## How to use the deephive optimization algorithm

In [1]:
from commons.utils import get_config, get_environment, get_policy, get_obj_func
from src.deephive import DeepHive
from src.mappo import MAPPO
from src.environment import OptimizationEnv
import numpy as np
from registry import Registry

  from .autonotebook import tqdm as notebook_tqdm


### Load the config file

In [2]:
# load the config file
config = get_config('config.yml')
environment_config = config['environment_config']

### Get Environment Parameters

In [3]:
# get the environment parameters
n_agents = environment_config['n_agents']
n_dim = environment_config['n_dim']
ep_length = environment_config['ep_length']
freeze = environment_config['freeze']
opt_bound = environment_config['opt_bound']
reward_type = environment_config['reward_type']

mode = 'test'
title = 'tutorial02'

### We can chose to use already registered optimization environments or we can register a new one
#### First let us use an already registered environment

In [4]:
env_name = "cos_function"
env = get_environment(config, env_name, reinit=True)
state = env.reset()
print(state)

[[0.51       0.25833333 0.        ]
 [0.44333333 0.1        0.48534057]
 [0.56       0.475      0.39053804]
 [0.38833332 0.37333333 0.3032356 ]
 [0.41166666 0.76       0.5547444 ]
 [0.915      0.5083333  0.31475282]
 [0.5516667  0.835      0.31438377]
 [0.95666665 0.15666667 0.53685486]
 [0.67       0.44833332 1.        ]
 [0.945      0.77       0.4532216 ]
 [0.79       0.33666667 0.15035988]
 [0.93666667 0.075      0.3412973 ]
 [0.58       0.41       0.5989853 ]
 [0.765      0.39       0.48835653]
 [0.50166667 0.735      0.6164073 ]
 [0.02333333 0.17       0.24716564]
 [0.45833334 0.16166666 0.55228686]
 [0.9533333  0.19166666 0.41279554]
 [0.7366667  0.06333333 0.5811763 ]
 [0.40166667 0.07833333 0.37029055]]


### Loading a trained policy
Now, we are going to load a trained policy from `models/model.pth`. The MAPPO parameters are loaded from the config as well

In [5]:
# get the policy
policy = get_policy(config, mode=mode)

Loading pretrained model from logs/cosmix_train/checkpoints/policy-900.pth...


### Initialize DeepHive

In [6]:
deephive = DeepHive(title, env, policy, mode, config)

tutorial02 : TEST RUN SUMMARY
Environment: cos_function
Number of agents: 20
Number of dimensions: 2
Episode length: 20
Number of runs: 5


### Optimize the Cosine Mixture Function

In [7]:
deephive.optimize(debug=True)

*************Run: 1/5***************
Episode best: 8 | Number of best changes: 2 | Total runtime: 0.589931
Global best values: [2.01363182 1.97887504] | Global best fitness: 5.993365657757597
*************Run: 2/5***************
Episode best: 19 | Number of best changes: 5 | Total runtime: 0.044102
Global best values: [1.99473083 1.95775247] | Global best fitness: 5.981003630544476
*************Run: 3/5***************
Episode best: 9 | Number of best changes: 4 | Total runtime: 0.087461
Global best values: [2.09557295 1.96208549] | Global best fitness: 5.889965444720485
*************Run: 4/5***************
Episode best: 13 | Number of best changes: 3 | Total runtime: 0.077795
Global best values: [2.01736045 2.0590266 ] | Global best fitness: 5.9603907412244
*************Run: 5/5***************
Episode best: 13 | Number of best changes: 5 | Total runtime: 0.093013
Global best values: [1.91119754 1.87063968] | Global best fitness: 5.745353480590932


### Registering a New Environment
In order to optimize a new function, we need to create the RL environment for it and then add it to registry or use it as it is. The steps are as follows:
1. Create your optimization function and add it to  `commons.objective_functions.py`
2. Specify the bounds, max value if known in the   `get_obj_func` method (see the examples)

In [11]:
env_name = "sphere"
function_info = get_obj_func(env_name)
objfunc, bounds, opt_obj_value, type = function_info

# create the environment
env = OptimizationEnv(env_name=env_name, optFunc=objfunc, n_agents=n_agents, 
                      n_dim=n_dim, ep_length=ep_length, bounds=bounds, opt_value=opt_obj_value, 
                      reward_type=reward_type, freeze=freeze, opt_bound=opt_bound)


### Registry: 
The registry is the class that help organize the environments. We can add, get, delete environments from the registry

In [12]:
registry = Registry()
# load the saved registry
registry._load_envs(environment_config['envs_cache_path'])
# get the list of registered environments
env_names = registry._get_all_envs()
# print the list of registered environments
print(env_names)

# add the environment to the registry
registry.add_env(env_name, env)

# save the registry
registry._save_envs(environment_config['envs_cache_path'])

{'cosine_mixture': <src.environment.OptimizationEnv object at 0x000002EB9206BEE0>, 'minmax': <src.environment.OptimizationEnv object at 0x000002EB92009490>, 'cos_function': <src.environment.OptimizationEnv object at 0x000002EB92009610>}


In [13]:
# get the environment from the registry
env = registry.get_env(env_name)

# run optimization
deephive = DeepHive(title, env, policy, mode, config)
deephive.optimize(debug=True)

tutorial02 : TEST RUN SUMMARY
Environment: sphere
Number of agents: 20
Number of dimensions: 2
Episode length: 20
Number of runs: 5
*************Run: 1/5***************
Episode best: 8 | Number of best changes: 5 | Total runtime: 0.113657
Global best values: [-0.02515161  0.00181425] | Global best fitness: -0.0006358961440915708
*************Run: 2/5***************
Episode best: 18 | Number of best changes: 2 | Total runtime: 0.081012
Global best values: [-0.01804125 -0.02377224] | Global best fitness: -0.000890599966581101
*************Run: 3/5***************
Episode best: 8 | Number of best changes: 5 | Total runtime: 0.090838
Global best values: [0.00902081 0.02332377] | Global best fitness: -0.0006253633230600064
*************Run: 4/5***************
Episode best: 9 | Number of best changes: 1 | Total runtime: 0.093654
Global best values: [-0.02507079  0.03118658] | Global best fitness: -0.0016011571061642035
*************Run: 5/5***************
Episode best: 1 | Number of best chan