In [None]:
import os
import pandas as pd
from policies.mappo import MAPPO
from environment.optimization_environment import OptimizationEnv
from environment.utils import parse_config
from datetime import datetime
import numpy as np
import seaborn as sns
import os
import neptune
from neptune.types import File
import argparse 
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as stats
import scipy
from other_algorithms.pso import ParticleSwarmOptimizer
import time
from dotenv import load_dotenv
load_dotenv()

In [None]:
def initialize(config_path, mode="train", **kwargs):
    env = OptimizationEnv(config_path)
    agent_policy = MAPPO(config_path)
    if mode == "test" or mode == "benchmark":
        model_path = kwargs.get("model_path", None)
        if model_path is None:
            raise ValueError("Model path must be provided for testing")
        agent_policy.load(model_path)
    return env, agent_policy

def print_items(**kwargs):
    for key, value in kwargs.items():
        print(key, value)
        
def get_action(observation_info, agent_policy, env):
    observation, observation_std = observation_info
    actions = np.zeros((env.n_agents, env.n_dim))
    for dim in range(env.n_dim):
        observation[dim] = observation[dim].astype(np.float32)
        #print(observation[dim])
        observation_std[dim] = observation_std[dim].astype(np.float32)
        action = agent_policy.select_action(observation[dim], observation_std[dim])
        actions[:, dim] = action
    return actions

In [None]:
config_path = 'config/config.json'
model_path = "training_runs/2023-11-03_08-25-56/policy-30000.pth"
mode = "test"
env, agent_policy = initialize(config_path, mode=mode, model_path=model_path)
config = parse_config(config_path)
agent_policy.set_action_std(config["test_action_std"])

In [None]:
observation_info = env.reset()
env.render()

In [None]:
actions = get_action(observation_info, agent_policy, env)

In [None]:
observation_info, reward, done, info = env.step(actions)
env.render()

In [None]:
import numpy as np
from sklearn.mixture import GaussianMixture
from scipy.stats import multivariate_normal

class ExplorationModule:
    def __init__(self, initial_samples, n_components=1, max_samples=None, **kwargs):
        """
        Initialize the exploration module with a Gaussian Mixture Model.

        Parameters:
        - initial_samples: An array of initial sample points.
        - n_components: Number of components (Gaussians) in the initial GMM.
        - max_samples: Maximum number of samples to use for updating the GMM.
        """
        self.n_components = n_components
        self.max_samples = max_samples
        self.samples = initial_samples
        self.gmm = GaussianMixture(n_components=self.n_components)
        self.gmm.fit(self.samples)
        self.upper_bound = kwargs.get("upper_bound", 1)
        self.lower_bound = kwargs.get("lower_bound", -1)
    
    def update_distribution(self, new_samples):
        """
        Update the GMM with new samples.

        Parameters:
        - new_samples: An array of new sample points.
        """
        # Optionally limit the number of samples to prevent excessive growth
        if self.max_samples and len(self.samples) >= self.max_samples:
            self.samples = self.samples[-self.max_samples:]
        
        self.samples = np.vstack([self.samples, new_samples])
        self.gmm = GaussianMixture(n_components=self.n_components)
        self.gmm.fit(self.samples)

    def sample_candidate_points(self, n_samples):
        """
        Generate new candidate points based on the current GMM.

        Parameters:
        - n_samples: Number of candidate points to generate.
        """
        return self.gmm.sample(n_samples)[0]

    def assess_novelty(self, points):
        """
        Assess the novelty of given points based on the current GMM.

        Parameters:
        - points: An array of points to assess.
        """
        # Evaluate the probability density of each point under each GMM component
        densities = np.array([multivariate_normal(mean=mean, cov=cov, allow_singular=True).pdf(points)
                              for mean, cov in zip(self.gmm.means_, self.gmm.covariances_)])

        # Novelty score could be the inverse of density or a more complex function
        novelty_scores = 1 / np.max(densities, axis=0)
        return novelty_scores

    def get_variance(self, point):
        """
        Estimate the variance of a given point based on the GMM.

        Parameters:
        - point: The point to estimate variance for.
        """
        # Find the nearest GMM component to the point
        nearest_component = np.argmin(np.linalg.norm(self.gmm.means_ - point, axis=1))
        # Return the variance (diagonal of the covariance matrix) of the nearest component
        return np.diag(self.gmm.covariances_[nearest_component])

    def plot_distribution(self):
        """
        Plot the current GMM.
        """
        # Create a mesh grid on which to evaluate the GMM
        x = np.linspace(self.lower_bound, self.upper_bound, 100)
        y = np.linspace(self.lower_bound, self.upper_bound, 100)
        X, Y = np.meshgrid(x, y)
        XY = np.array([X.ravel(), Y.ravel()]).T

        # Evaluate the GMM's probability density function (PDF) on the grid
        Z = np.exp(self.gmm.score_samples(XY))
        Z = Z.reshape(X.shape)
        # Plot the contour
        plt.contourf(X, Y, Z, levels=50, cmap='viridis')
        plt.colorbar()
        plt.title('GMM Contour Plot')
        plt.xlabel('X-axis')
        plt.ylabel('Y-axis')
        plt.show()


In [None]:
obs = env.reset()
particle_data = env._get_actual_state()[:, :-1]
print(particle_data)
gmm = ExplorationModule(initial_samples=env._get_actual_state()[:, :-1], n_components=1, max_samples=1000)

In [None]:
gmm.plot_distribution()

In [None]:
candidate_point = gmm.sample_candidate_points(10)
print(candidate_point)
novelty_scores = gmm.assess_novelty(candidate_point)
print(novelty_scores)

In [None]:
# plot the distribution of the particles and candidate points in 2D with different colors let the size of the points be the novelty score
# plot the distribution of the particles and candidate points in 2D with different colors let the size of the points be the novelty score
plt.rcParams['figure.figsize'] = [7, 7]
plt.rcParams['figure.dpi'] = 80
plt.rcParams['font.size'] = 12
# def plot_2d_distribution(particles, candidate_points, novelty_scores):
#     fig, ax = plt.subplots()
#     x = np.linspace(env.bounds[0], env.bounds[1], 1000)
#     y = np.linspace(env.bounds[0], env.bounds[1], 1000)
#     X, Y = np.meshgrid(x, y)
#     Z = env.objective_function.evaluate(np.array([X.flatten(), Y.flatten()]).T).reshape(X.shape)
#     ax.contour(X, Y, Z, 50)
#     ax.set_xlim(env.bounds[0][0], env.bounds[1][0])
#     ax.set_ylim(env.bounds[0][1], env.bounds[1][1])
#     ax.scatter(particles[:, 0], particles[:, 1], c='blue', label='Particles')
#     # scale the novelty scores to be between 1 and 10
#     novelty_scores = (novelty_scores - np.min(novelty_scores)) / (np.max(novelty_scores) - np.min(novelty_scores)) * 9 + 1
#     ax.scatter(candidate_points[:, 0], candidate_points[:, 1], c='red', label='Candidate points', s=novelty_scores*100)
#     ax.legend()
#     plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture

def plot_2d_distribution(particles, candidate_points, novelty_scores, gmm, env):
    fig, ax = plt.subplots()
    x = np.linspace(env.bounds[0], env.bounds[1], 1000)
    y = np.linspace(env.bounds[0], env.bounds[1], 1000)
    X, Y = np.meshgrid(x, y)
    Z = env.objective_function.evaluate(np.array([X.flatten(), Y.flatten()]).T).reshape(X.shape)

    # Evaluate the GMM PDF on the grid
    XY = np.vstack([X.ravel(), Y.ravel()]).T
    Z_gmm = np.exp(gmm.gmm.score_samples(XY)).reshape(X.shape)
    
    # Plot the objective function contour
    ax.contour(X, Y, Z, 50)
    
    # Plot the GMM contour with some transparency
    ax.contourf(X, Y, Z_gmm, 50, cmap='viridis', alpha=0.5)  # Set alpha for transparency

    ax.set_xlim(env.bounds[0][0], env.bounds[1][0])
    ax.set_ylim(env.bounds[0][1], env.bounds[1][1])

    # Plot particles and candidate points with scaled novelty scores
    ax.scatter(particles[:, 0], particles[:, 1], c='blue', label='Particles')
    novelty_scores = (novelty_scores - np.min(novelty_scores)) / (np.max(novelty_scores) - np.min(novelty_scores)) * 9 + 1
    ax.scatter(candidate_points[:, 0], candidate_points[:, 1], c='red', label='Candidate points', s=novelty_scores*100)

    ax.legend()
    plt.show()

plot_2d_distribution(particle_data, candidate_point, novelty_scores, gmm, env)

    

In [None]:
config_path = 'config/config.json'
model_path = "training_runs/2023-11-03_08-25-56/policy-30000.pth"
mode = "test"
env, agent_policy = initialize(config_path, mode=mode, model_path=model_path)
config = parse_config(config_path)
agent_policy.set_action_std(config["test_action_std"])

In [None]:
iters  = 10
obs = env.reset()
particle_data = env._get_actual_state()[:, :-1]
# print(particle_data)
gmm = ExplorationModule(initial_samples=particle_data, n_components=2, max_samples=1000)
for i in range(iters):
    particle_data = env._get_actual_state()[:, :-1]
    actions = get_action(obs, agent_policy, env)
    obs, reward, done, info = env.step(actions)
    gmm.update_distribution(particle_data)
    candidate_point = env._get_actual_state()[:, :-1]
    novelty_scores = gmm.assess_novelty(candidate_point)
    print(novelty_scores)
    plot_2d_distribution(particle_data, candidate_point, novelty_scores, gmm, env)
    gmm.plot_distribution()



In [None]:
iters  = 20
obs = env.reset()
particle_data = env._get_actual_state()[:, :-1]
# print(particle_data)
gmm = ExplorationModule(initial_samples=particle_data, n_components=2, max_samples=1000)
for i in range(iters):
    particle_data = env._get_actual_state()[:, :-1]
    actions = get_action(obs, agent_policy, env)
    obs, reward, done, info = env.step(actions)
    gmm.update_distribution(particle_data)
    candidate_point = env._get_actual_state()[:, :-1]
    novelty_scores = gmm.assess_n0ovelty(candidate_point)
    print(novelty_scores)
    plot_2d_distribution(particle_data, candidate_point, novelty_scores, gmm, env)
    gmm.plot_distribution()


In [None]:
def mean_confidence_interval(data, confidence=0.95):
        a = 1.0 * np.array(data)
        n = len(a)
        m, se = np.mean(a, axis = 0), scipy.stats.sem(a)
        h = se * scipy.stats.t.ppf((1 + confidence) / 2., n-1)
        return m, m-h, m+h

def num_function_evaluation(fopt, n_agents, save_dir, opt_value, label="TEST OPT"):
    # convert fopt to numpy array if it is not already
        fopt = np.array(fopt)
        mf1 = np.mean(fopt, axis = 0)
        err = np.std(fopt, axis = 0)
        mf1, ml1, mh1 = mean_confidence_interval(fopt,0.95)

        fig = plt.figure(figsize=(6,4), dpi=200)
        plt.rcParams["figure.figsize"] = [6, 4]
        plt.rcParams["figure.autolayout"] = True
        plt.fill_between((np.arange(len(mf1))+1)*n_agents, ml1, mh1, alpha=0.1, edgecolor='#3F7F4C', facecolor='#7EFF99')
        plt.plot((np.arange(len(mf1))+1)*n_agents, mf1, linewidth=2.0, label = label, color='#3F7F4C')
        if opt_value is not None:
            plt.plot((np.arange(len(mf1))+1)*n_agents, np.ones(len(mf1))*opt_value, linewidth=1.0, label = 'True OPT', color='#CC4F1B')

        plt.xlabel('number of function evaluations', fontsize = 14)
        plt.ylabel('best fitness value', fontsize = 14)

        plt.legend(fontsize = 14, frameon=False)
        plt.xscale('log')
        plt.yticks(fontsize = 14)
        plt.savefig(save_dir)
        # close the figure
        plt.close(fig)

def plot_num_function_evaluation(fopt, n_agents, save_dir, opt_value, show_std=False, symbol_list=None, color_list=None, label_list=None, show=True, title=None):
        # The method implementation goes here
        fig = plt.figure(figsize=(6, 4), dpi=200)
        plt.rcParams["figure.figsize"] = [6, 4]
        plt.rcParams["figure.autolayout"] = True

        if symbol_list is None:
            symbol_list = ['-']
        if color_list is None:
            color_list = ['#3F7F4C']
        if label_list is None:
            label_list = ['DeepHive']

        print(f"Number of function evaluations: {len(fopt[0])}")
        print(f"Number of algorithms: {len(fopt)}")

        if len(fopt) == 1:
            print("Single algorithm")
            num_function_evaluation(fopt[0], n_agents, save_dir, opt_value, label=label_list[0])
        else:
            for i in range(len(fopt)):
                
                mf1, ml1, mh1 = mean_confidence_interval(fopt[i], 0.95)
                if show_std:
                    plt.errorbar((np.arange(len(mf1)) + 1) * n_agents, mf1, yerr=mh1 - ml1, linewidth=2.0,
                                label=label_list[i],
                                color=color_list[i])
                # plt.fill_between((np.arange(len(mf1)) + 1) * n_agents, ml1, mh1, alpha=0.1, edgecolor='#3F7F4C',
                #                  facecolor=color_list[i])
                plt.plot((np.arange(len(mf1)) + 1) * n_agents, mf1, symbol_list[i], linewidth=1, label=label_list[i],
                        color=color_list[i])

        if opt_value is not None:
            plt.plot((np.arange(len(mf1))+1)*n_agents, np.ones(len(mf1))*opt_value, linewidth=0.5, label = 'True OPT', color='#CC4F1B')

        plt.xlabel('number of function evaluations', fontsize=14)
        plt.ylabel('best fitness value', fontsize=14)
        plt.legend(fontsize=8, frameon=False, loc="lower right")
        plt.xscale('log')
        plt.yticks(fontsize=14)
        if title is not None:
            plt.title(title)
        if show:
            plt.show()
    
        plt.savefig(save_dir)
        #plt.close(fig)

In [None]:
opt1_dir = "benchmarking_runs/2023-11-13_10-31-46"  # std = 0.05
opt2_dir = "benchmarking_runs/2023-11-13_10-34-03" # std = 0.02
opt3_dir = "benchmarking_runs/2023-11-13_10-35-39" # std = 0.035
opt4_dir = "benchmarking_runs/2023-11-13_10-37-39" # learn_std - 0.3 - 0.03
opt5_dir = "benchmarking_runs/2023-11-13_10-39-21" # learn_std - 0.5 - 0.005
opt6_dir = "benchmarking_runs/2023-11-13_10-40-59" # unfreeze learn_std - 0.4 - 0.02

opt_dirs = [opt1_dir, opt2_dir, opt3_dir, opt4_dir, opt5_dir, opt6_dir]
labels = ["std-0.05", "std-0.02", "std-0.035", "ls-0.3-0.03", "ls-0.5-0.005", "unfreeze-ls-0.4-0.02"]
colors = ["red", "green", "blue", "orange", "purple", "brown"]
symbols = ["-", "-", "-", "-", "-", "-"]

def prepare_opt_values(dir):
    # get all the .npy files in the directory
    opt_values = []
    files = os.listdir(dir)
    files = [file for file in files if file.endswith(".npy")]
    for file in files:
        opt_value = np.load(os.path.join(dir, file))
        # grab all the columns except the last one
        opt_value = opt_value[:, -1]
        #print(opt_value)
        opt_values.append(opt_value.tolist())
    return opt_values

ploting_values = []
for dir in opt_dirs:
    opt_values = prepare_opt_values(dir)
    ploting_values.append(opt_values)

plot_num_function_evaluation(ploting_values, 10, "benchmarking_runs/plot.png", 0.2, label_list=labels, show_std=False, color_list=colors, symbol_list=symbols, show=False, title="2D cosine mixture function benchmarking")

In [None]:
opt1_dir = "benchmarking_runs/2023-11-13_10-57-38"  # std = 0.05
opt2_dir = "benchmarking_runs/2023-11-13_10-56-41" # std = 0.02
opt3_dir = "benchmarking_runs/2023-11-13_10-55-41" # std = 0.035
opt4_dir = "benchmarking_runs/2023-11-13_10-54-53" # learn_std - 0.3 - 0.03
opt5_dir = "benchmarking_runs/2023-11-13_10-53-53" # learn_std - 0.5 - 0.005
opt6_dir = "benchmarking_runs/2023-11-13_10-52-32" # unfreeze learn_std - 0.5 - 0.05

opt_dirs = [opt1_dir, opt2_dir, opt3_dir, opt4_dir, opt5_dir, opt6_dir]
labels = ["std-0.05", "std-0.02", "std-0.035", "ls-0.3-0.03", "ls-0.5-0.005", "unfreeze-ls-0.4-0.02"]
colors = ["red", "green", "blue", "orange", "purple", "brown"]
symbols = ["-", "-", "-", "-", "-", "-"]

def prepare_opt_values(dir):
    # get all the .npy files in the directory
    opt_values = []
    files = os.listdir(dir)
    files = [file for file in files if file.endswith(".npy")]
    for file in files:
        opt_value = np.load(os.path.join(dir, file))
        # grab all the columns except the last one
        opt_value = opt_value[:, -1]
        #print(opt_value)
        opt_values.append(opt_value.tolist())
    return opt_values

ploting_values = []
for dir in opt_dirs:
    opt_values = prepare_opt_values(dir)
    ploting_values.append(opt_values)

plot_num_function_evaluation(ploting_values, 10, "benchmarking_runs/plot1.png", 0.2, label_list=labels, show_std=False, color_list=colors, symbol_list=symbols, show=False, title="3D cosine mixture function benchmarking")

In [None]:
plot_num_function_evaluation([opt_values_1, opt_values_2], 1, "test.png", 0.2, show_std=False, symbol_list=['-', '--'], color_list=['#3F7F4C', '#CC4F1B'], label_list=['DeepHive', 'PSO'])