In [3]:
import sys 
sys.path.append('../')
import os
from environment.deephive_utils import *
from environment.utils import *
import numpy as np
from dotenv import load_dotenv
load_dotenv()
import matplotlib.pyplot as plt

In [4]:
def run_experiment(env, agent_policy, timesteps, iters, save_gif=False, result_path="experiment/", save_interval=10,
                   split_agents=True):
    gbest_values = []
    if save_gif:
        os.makedirs(result_path, exist_ok=True)
    for iter in range(iters):
        #print("Iteration: ", iter)
        observation_info = env.reset()
        episode_gbVals = []
        for _ in range(timesteps):
            episode_gbVals.append(env.gbest[-1])
            exploiters_action =  get_action(observation_info, agent_policy, env)
            explorer_action = get_informed_action(env)
            # split the agents into two groups and let one group exploit and the other explore
            actions = np.zeros((env.n_agents, env.n_dim))
            if split_agents:
                actions[:env.n_agents//2] = exploiters_action[:env.n_agents//2]
                actions[env.n_agents//2:] = explorer_action[env.n_agents//2:]
            else:
                actions = exploiters_action
            observation_info, reward, done, info = env.step(actions)
        gbest_values.append(episode_gbVals)
        if save_gif and iter % save_interval == 0:
            _ = env.render(type="history", file_path=result_path + "iter_" + str(iter) + ".gif")
    return gbest_values

In [5]:
# #Experiment 1: VARIANCE DRIVEN EXPLORERS AND EXPLOITERS
config_path = '../config/config.json'
model_path = "../models/exploiting_model.pth"
mode = "test"
env1, agent_policy1 = initialize(config_path, mode=mode, model_path=model_path)
config = parse_config(config_path)
#agent_policy1.set_action_std(config["test_action_std"])

#Experiment 2: NO DIVISION
model_path = "../models/policy-50000.pth"
mode = "test"
env2, agent_policy2 = initialize(config_path, mode=mode, model_path=model_path)
config = parse_config(config_path)
agent_policy2.set_action_std(config["test_action_std"])

iters = 10
exp_num = 1
timesteps = 20
experiments = [
    [env1, agent_policy1, f"variance_driven_exploration_{exp_num}", timesteps, iters, True, "experiment/", True],
    [env2, agent_policy2, f"no_division_{exp_num}", timesteps, iters, True, "experiment/", False]
]

Loaded policy from:  ../models/exploiting_model.pth
Loaded policy from:  ../models/policy-50000.pth


In [None]:
all_symbols = ["-", "--", "-.", ":"]
all_colors = ["r", "g", "b", "k"]

symbol_list = []
color_list = []
label_list = []
gbest_values = []
for i, experiment in enumerate(experiments):
    env, agent_policy, name, timesteps, iters, save_gif, result_path, split_agent = experiment
    gbest_value = run_experiment(env, agent_policy, timesteps, iters, save_gif, f"{result_path}{name}/", split_agents=split_agent, save_interval=1)
    env.surrogate.plot_surrogate(save_dir=f"{result_path}{name}/surrogate.png")
    env.surrogate.plot_variance(save_dir=f"{result_path}{name}/variance.png")
    np.save(result_path + name + "_gbest_values.npy", gbest_values)
    gbest_values.append(gbest_value)
    symbol_list.append(all_symbols[i])
    color_list.append(all_colors[i])
    label_list.append(name)

In [21]:
plot_num_function_evaluation(fopt=np.array(gbest_values), label_list=label_list, symbol_list=symbol_list, color_list=color_list, save_dir=f"experiment/comparison_{exp_num}.png",
                             n_agents=env.n_agents, opt_value=env.objective_function.optimal_value(env.n_dim))

Number of function evaluations: 10
Number of algorithms: 2


In [8]:
import str2bool

str2bool.str2bool("False")

False