In [None]:
import numpy as np
import gymnasium as gym
from gymnasium import spaces
import numpy as np
from typing import Any, Dict, List, Optional, Tuple
from deephive.environment.optimization_functions.benchmark_functions import *
from matplotlib import pyplot as plt
from matplotlib import animation
from typing import Optional

In [None]:
function_selector = FunctionSelector()

In [None]:

class Render:
    """ Helper class for rendering the environment. 
        The class should be able to plot the particles actual position in the optimization function landscape if it is 
        a 1D or 2D function. Also the class should be able to plot the state history as a gif or video.
    """
    def __init__(self, env):
        self.env = env
    
        
    def render_state(self, file_path: Optional[str] = None, **kwargs):
        self.optimal_position = kwargs.get("optimal_positions", None)
        if self.env.dimension > 2:
            raise ValueError("Cannot render state for n_dim > 2")
        
        self._render_state_2d(file_path)
            
    def _render_state_2d(self, file_path: Optional[str] = None):
        fig, ax = plt.subplots()
        x = np.linspace(self.env.bounds[0], self.env.bounds[1], 1000)
        y = np.linspace(self.env.bounds[0], self.env.bounds[1], 1000)
        X, Y = np.meshgrid(x, y)
        if self.env.log_scale: 
            Z = np.log10(-self.env.fitness_function(np.array([X.flatten(), Y.flatten()]).T).reshape(X.shape))
        else:
            Z = self.env.fitness_function(np.array([X.flatten(), Y.flatten()]).T).reshape(X.shape)
        ax.contour(X, Y, Z, 50)
        ax.set_xlim(self.env.bounds[0], self.env.bounds[1])
        ax.set_ylim(self.env.bounds[0], self.env.bounds[1])
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.set_title("Particle positions")
        #state = self.env._get_actual_state()
        state = self.env.state_history[:, self.env.current_step, :-1]
        ax.scatter(state[:, 0], state[:, 1], c="red", s=100, marker="^", edgecolors="black", label="Particle's points", alpha=1)
        # plot the optimal position if given
        if self.optimal_position is not None:
            ax.scatter(self.optimal_position[0], self.optimal_position[1], c="blue", s=300, marker="o", edgecolors="black", label="Optimal Position")
        
        ax.legend()
        
        
        if file_path is not None:
            plt.savefig(file_path)
        else:
            plt.show()
        
    def render_state_history(self, file_path: str, fps: int = 10, **kwargs):
        self.optimal_position = kwargs.get("optimal_positions", None)
        if self.env.dimension > 2:
            raise ValueError("Cannot render state for n_dim > 2")
        
        self._render_state_history_2d(file_path, fps)
            
        
    def _render_state_history_2d(self, file_path: str, fps: int):
        fig, ax = plt.subplots()
        x = np.linspace(self.env.bounds[0], self.env.bounds[1], 1000)
        y = np.linspace(self.env.bounds[0], self.env.bounds[1], 1000)
        X, Y = np.meshgrid(x, y)
        if self.env.log_scale:
            Z = np.log10(-self.env.fitness_function(np.array([X.flatten(), Y.flatten()]).T).reshape(X.shape))
        else:
            Z = self.env.fitness_function(np.array([X.flatten(), Y.flatten()]).T).reshape(X.shape)
        ax.contour(X, Y, Z, 50)
        if self.optimal_position is not None:
            ax.scatter(self.optimal_position[0], self.optimal_position[1], c="green", s=300, marker="o", edgecolors="black", label="Optimal Position")
        ax.set_xlim(self.env.bounds[0], self.env.bounds[1])
        ax.set_ylim(self.env.bounds[0], self.env.bounds[1])
        ax.set_xlabel("x")
        ax.set_ylabel("y")
        ax.set_title("Particle positions")
        ax.legend()
        scat = ax.scatter([], [], c="red", s=100, marker="^", edgecolors="black")
        # add a text box to display the iteration number
        text = ax.text(0.05, 0.95, "", transform=ax.transAxes)
        self.previous_state_history = self.env.state_history[:, 0, :-1]
        
        def animate(i):
            scat.set_offsets(self.env.state_history[:, i, :-1])
            text.set_text(f"Iteration: {i}")
            # plot a line between the previous state and the current state
            for j in range(len(self.env.state_history)):
                ax.plot([self.previous_state_history[j, 0], self.env.state_history[j, i, 0]], [self.previous_state_history[j, 1], self.env.state_history[j, i, 1]], c="black", alpha=0.1)
                
            self.previous_state_history = self.env.state_history[:, i, :-1]
            # clear the line between the previous state and the current state
        
            # use different colors for the particles based on their role - red for closer half, blue for farther half
    
            return scat,
        print("Creating animation")
        anim = animation.FuncAnimation(fig, animate, frames=self.env.state_history.shape[1], interval=1000/fps, blit=True)
        print("Saving animation to: ", file_path)
        anim.save(file_path, writer="Pillow")

In [None]:

class ScalerHelper:
    """ Helper class to scale and unscale observations and actions """
    @staticmethod
    def scale_observation(observation: np.ndarray, low: float, high: float) -> np.ndarray:
        # Scale the observation to the range [0, 1]
        scaled_obs = (observation - low) / (high - low)
        # Stretch and shift the [0, 1] interval to [-1, 1]
        return scaled_obs
    
    @staticmethod
    def unscale_observation(observation: np.ndarray, low: float, high: float) -> np.ndarray:
        # Shift and compress the [-1, 1] interval back to [0, 1]
        unscaled_obs = observation
        # Unscale the observation back to the original range
        return unscaled_obs * (high - low) + low


class OptimizationEnv(gym.Env):
    def __init__(self, optimization_function, dimension, swarm_size, lower_bound, upper_bound, mix_rate=0.5,
                     scale=False, ep_length=100, neighborhood_size=5, mutation_probability=0.2, log_scale=False,
                     apply_mutation=True):
        super(OptimizationEnv, self).__init__()
        self.opt_function = optimization_function
        self.swarm_size = swarm_size    
        self.dimension = dimension
        self.lower_bound = lower_bound
        self.upper_bound = upper_bound
        self.bounds = (lower_bound, upper_bound)
        self.ep_length = ep_length
        self.neighborhood_size = neighborhood_size
        self.mix_rate = mix_rate    
        self.mutation_probability = mutation_probability
        self.scaler = ScalerHelper()
        self.scale_option = scale
        self.log_scale = log_scale
        self.render_helper = Render(self)
        self.apply_mutation = apply_mutation
        self.fitness_cache = {}  # - Initialize the cache
        if self.scale_option:
            self.action_space = spaces.Box(low=-1, high=1, shape=(swarm_size, dimension), dtype=np.float32) # Continuous action space
            self.observation_space = spaces.Box(low=0, high=1, shape=(swarm_size, dimension), dtype=np.float32)
        else:
            self.action_space = spaces.Box(low=-np.inf, high=np.inf, shape=(swarm_size, dimension), dtype=np.float32) # Continuous action space
            self.observation_space = spaces.Box(low=lower_bound, high=upper_bound, shape=(swarm_size,dimension), dtype=np.float32)
        self.reset()
    
    def fitness_function(self, x, *args, **kwargs):
        # Convert x to a hashable and unique representation
        x_hashable = tuple(map(tuple, x))
        if x_hashable in self.fitness_cache:
            return self.fitness_cache[x_hashable]
        
        if self.scale_option:
            x = self.scaler.unscale_observation(x, self.lower_bound, self.upper_bound)
        fitness_value = self.opt_function(x)
        self.evaluation_counter += len(x)
        self.fitness_cache[x_hashable] = fitness_value  # Cache the result
        return fitness_value
        
    def reset(self):
        self.done = False
        self.current_step = 0
        self.evaluation_counter = 0
        self.state_history = np.zeros((self.swarm_size, self.ep_length+1, self.dimension+1))
        self.gbest_history = np.zeros((self.ep_length+1, self.dimension))
        self.state = np.random.uniform(low=self.lower_bound, high=self.upper_bound, size=(self.swarm_size, self.dimension))
        if self.scale_option:
            self.state = self.scaler.scale_observation(self.state, self.lower_bound, self.upper_bound)
        self.old_state = np.copy(self.state)    
        self.velocities = np.zeros((self.swarm_size, self.dimension))
        self.state_history[:, self.current_step, :-1] = self.state
        self.state_history[:, self.current_step, -1] = self.fitness_function(self.state)
        self.pbest_positions = np.copy(self.state)
        self.fitness_values = self.fitness_function(self.state)
        self.gbest_position = self.state[np.argmin(self.fitness_values)]
        self.gbest_value = np.min(self.fitness_values)
        self.initialize_neighborhoods()
        self.compute_lbest()
        observation = self.generate_observation()
        return observation
    
    def reward_function(self, state):
        return -self.fitness_function(state)
    
    def step(self, action):
        self.compute_lbest()  # Compute Lbest positions
        self.current_step += 1
        self.velocities = action
        self.old_state = np.copy(self.state)
        self.state += action # Update the state
        # confirm the position is within the search space
        if self.scale_option:
            self.state = np.clip(self.state, 0, 1)
        else:
            self.state = np.clip(self.state, self.lower_bound, self.upper_bound)
        
        self.fitness_values = self.fitness_function(self.state)
        self.state_history[:, self.current_step, :-1] = self.state
        self.state_history[:, self.current_step, -1] = self.fitness_values
        
        if self.apply_mutation:
             # Mutation and Crossover
            self.state = self.mutate_and_crossover(self.state, self.lbest_positions)
            
            self.modified_actions = self.state - self.old_state
        else:
            # update the personal best and global best
            better_mask = self.fitness_values < self.fitness_function(self.pbest_positions)
            self.pbest_positions[better_mask] = self.state[better_mask]
            self.fitness_values[better_mask] = self.fitness_function(self.state[better_mask])
            if np.min(self.fitness_values) < self.fitness_function(self.gbest_position.reshape(1, -1)):
                self.gbest_position = self.state[np.argmin(self.fitness_values)]
                self.gbest_value = np.min(self.fitness_values)
        
        
        rewards = self.calculate_reward()
        
        if self.current_step == self.ep_length:
            self.done = True
            
        observation = self.generate_observation()
        return observation, rewards, self.done, {}
        
    def initialize_neighborhoods(self):
        self.neighborhoods = []
        for i in range(self.swarm_size):
            neighborhood_indices = list(range(i - self.neighborhood_size, i)) + list(range(i + 1, i + 1 + self.neighborhood_size))
            neighborhood_indices = [index % self.swarm_size for index in neighborhood_indices]  # Ensure indices are within bounds
            self.neighborhoods.append(neighborhood_indices)
    
    def compute_lbest(self):
        self.lbest_positions = np.zeros((self.swarm_size, self.dimension))
        for i in range(self.swarm_size):
            neighborhood_fitnesses = self.fitness_values[self.neighborhoods[i]]
            best_neighbor_idx = self.neighborhoods[i][np.argmin(neighborhood_fitnesses)]
            self.lbest_positions[i] = self.state[best_neighbor_idx]
            
    def calculate_fitness(self):
        self.fitness_values = self.fitness_function(self.state)

    def mutate(self, positions, lbest_positions):
        # Eq. (12) from the description:
        # Tij = Xij + A(φ × Lbest - Xij)
        A = 2 * np.random.rand(positions.shape[0], positions.shape[1])
        phi = np.random.rand(positions.shape[0], positions.shape[1])
        return positions + A * (phi * lbest_positions - positions)
    
    def crossover(self, positions, trial_positions):
        # For each dimension, we have a mix rate chance of taking the value from the trial position
        crossover_mask = np.random.rand(positions.shape[0], positions.shape[1]) < self.mix_rate
        new_positions = np.where(crossover_mask, trial_positions, positions)
        return new_positions
    
    def permute_lbest(self):
        # Apply a permutation to the lbest of all particles
        for i in range(self.swarm_size):
            neighbor_indices = self.neighborhoods[i]
            np.random.shuffle(neighbor_indices)
            self.lbest_positions[i] = self.state[neighbor_indices[0]]
    
    def mutate_and_crossover(self, positions, lbest_positions):
        # Mutation and Crossove
        #for i in range(self.swarm_size):
        if np.random.rand() < self.mutation_probability:
            trial_positions = self.mutate(positions, lbest_positions)
        else:
            self.permute_lbest()
            trial_positions = self.mutate(positions, lbest_positions)

        new_positions = self.crossover(positions, trial_positions)
        new_fitness = self.fitness_function(new_positions)

        # Selection-II: Update the personal and global bests
        better_mask = new_fitness < self.fitness_values
        positions[better_mask] = new_positions[better_mask]
        self.pbest_positions[better_mask] = new_positions[better_mask]
        self.fitness_values[better_mask] = new_fitness[better_mask]

        if np.min(new_fitness) < self.fitness_function(self.gbest_position.reshape(1, -1)):
            self.gbest_position = new_positions[np.argmin(new_fitness)]
        
        return positions
    
    def calculate_reward(self):
        # Assume gbest_value and worst_value are updated elsewhere in the environment
        gbest_values = self.state_history[:, 0:self.current_step+1, -1].min(axis=0)
        worst_values = self.state_history[:, 0:self.current_step+1, -1].max(axis=0)
        
        all_fitness_values = self.state_history[:, 0:self.current_step+1, -1]
        scaled_fitness_values = (all_fitness_values - worst_values) / (gbest_values - worst_values)
        
        current_fitness = scaled_fitness_values[:, self.current_step]
        previous_fitness = scaled_fitness_values[:, self.current_step-1]
        
        improvement =  current_fitness - previous_fitness
        # Reward for improvement
        R_pbest = np.where(improvement > 0, improvement, 0)
        
        R_gbest = improvement / (1 - current_fitness + 1e-3)

        rewards = R_pbest + R_gbest
        rewards = np.where(rewards < 20, rewards, 20)  
        return rewards

    def renders(self, type: str = "state",fps=1, file_path: Optional[str] = None, **kwargs):
        """ Render the environment
        Args:
            type: type of rendering : "state" or "history"
        """
        optimal_positons = kwargs.get("optimal_positions", None)
        try:
            if type == "state":
                self.render_helper.render_state(file_path, optimal_positions=optimal_positons)
            elif type == "history":
                self.render_helper.render_state_history(file_path=file_path, fps=fps, optimal_positions=optimal_positons)
            else:
                raise ValueError("type should be either 'state' or 'history'")
        except Exception as e:
            raise e
    
    def get_observation(self, particle_index, dimension_index, current_step, total_steps):
        current_velocity = self.velocities[particle_index, dimension_index]
        personal_best_improvement = self.pbest_positions[particle_index, dimension_index] - self.state[particle_index, dimension_index]
        local_best_improvement = self.lbest_positions[particle_index, dimension_index] - self.state[particle_index, dimension_index]
        avg_velocity_neighborhood = np.mean([self.velocities[n_idx, dimension_index] for n_idx in self.neighborhoods[particle_index]])
        var_velocity_neighborhood = np.var([self.velocities[n_idx, dimension_index] for n_idx in self.neighborhoods[particle_index]])
        time_left = (total_steps - current_step) / total_steps  # Normalized time left
        
        observation = [
            current_velocity, 
            personal_best_improvement, 
            local_best_improvement, 
            avg_velocity_neighborhood, 
            var_velocity_neighborhood, 
            time_left
        ]
        return np.array(observation)
    
    def generate_observation(self):
        observations = [[self.get_observation(particle_index, dimension_index, self.current_step, self.ep_length) for dimension_index in range(self.dimension)] for particle_index in range(self.swarm_size)]
        return np.array(observations)


function = function_selector.get_function('f01')
fitness_function = function['func']
dimension = 2#function['dimension']
lower_bound = function['domain'][0]
upper_bound = function['domain'][1]
global_min = function['global_min']
scale = False

acc_coefficients = (1.4, 1.4)
inertia_weight = 0.7

def get_action(env):
    r1, r2 = np.random.rand(env.swarm_size, env.dimension), np.random.rand(env.swarm_size, env.dimension)
    cognitive_component = acc_coefficients[0] * r1 * (env.pbest_positions - env.state)
    social_component = acc_coefficients[1] * r2 * (env.gbest_position - env.state)
    velocities = inertia_weight * env.velocities + cognitive_component + social_component
    return velocities


env = OptimizationEnv(fitness_function, dimension=dimension, lower_bound=lower_bound, 
                      upper_bound=upper_bound, swarm_size=10, scale=scale, ep_length=100, 
                      neighborhood_size=2, mutation_probability=0.2, log_scale=True, apply_mutation=True)


In [None]:
import numpy as np

logs = []

n_episodes = 2
for i in range(n_episodes):
    episode_log = np.empty((env.ep_length, env.swarm_size * env.dimension, 8))
    obs = env.reset()
    
    episode_return = np.zeros(env.swarm_size)
    for step in range(env.ep_length):
        actions = get_action(env)
        obs, reward, done, info = env.step(actions)
        # Reshape data to ensure they have the correct shape
        obs_flat = obs.reshape(-1, 6)  # Assuming obs is already in the correct shape, just flattening
        actions_flat = actions.reshape(-1, 1)
        modified_actions_flat = env.modified_actions.reshape(-1, 1)
        
        # Concatenate observation, actions, and modified actions
        step_data = np.concatenate((obs_flat, actions_flat, modified_actions_flat), axis=1)
        
        # Log the concatenated array for the current step
        episode_log[step] = step_data
        
        # Check if the episode is done and break the loop if true
        if done:
            break
    episode_log = episode_log.reshape(-1, 8)
    # Append the episode log to the main logs
    logs.append(episode_log)

logs = np.array(logs).reshape(-1, 8)



In [None]:
# save the log to file
import pandas as pd
df = pd.DataFrame(logs, columns=["current_velocity", "personal_best_improvement", "local_best_improvement", "avg_velocity_neighborhood", "var_velocity_neighborhood", "time_left", "action", "modified_action"])
df.to_csv("logs.csv", index=False)


In [None]:
logs.shape

In [None]:
logs[-100]

In [None]:
logs[0]["observations"][0]

In [None]:
n_episodes = 100
for i in range(n_episodes):
    obs = env.reset()
    episode_return = np.zeros(env.swarm_size)
    for step in range(env.ep_length):
        actions = get_action(env)
        obs, reward, done, info = env.step(actions)
        # if step == env.ep_length - 1:
        #    print(f"Episode {i} completed - Gbest value: {env.fitness_values.min()}")

In [None]:
obs_all_dim = obs.reshape(-1, 6)

In [None]:
obs_all_dim[0]

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim

class DecisionTransformer(nn.Module):
    def __init__(self, input_dim, model_dim, num_layers, output_dim):
        super(DecisionTransformer, self).__init__()
        self.transformer = nn.Transformer(d_model=model_dim, nhead=4, num_encoder_layers=num_layers, num_decoder_layers=num_layers)
        self.input_embedding = nn.Linear(input_dim, model_dim)
        self.pos_encoder = PositionalEncoding(model_dim)
        self.output_layer = nn.Linear(model_dim, output_dim)
    
    def forward(self, src, tgt):
        src = self.pos_encoder(self.input_embedding(src))
        tgt = self.pos_encoder(self.input_embedding(tgt))
        output = self.transformer(src, tgt)
        return self.output_layer(output)

class PositionalEncoding(nn.Module):
    def __init__(self, d_model, max_len=5000):
        super(PositionalEncoding, self).__init__()
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * -(math.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)

    def forward(self, x):
        x = x + self.pe[:x.size(0)]
        return x
