## Imports

In [4]:
import numpy as np
import random
import time
import gymnasium as gym
import torch
from evogym.envs import *
from evogym import EvoViewer, get_full_connectivity
from neural_controller import NeuralController, set_weights
import matplotlib.pyplot as plt

## Utils

In [5]:
def run_experiment(algorithm, num_runs=5, num_generations=100, scenario='DownStepper-v0', steps=500):
    """
    Run an evolutionary algorithm multiple times and store key results.

    Parameters:
        algorithm (function): The function that runs an evolutionary algorithm (e.g., `evolution_strategy`).
        num_runs (int): Number of times to run the algorithm.
        num_generations (int): Number of generations per run.
        scenario (str): Environment scenario.
        steps (int): Number of steps per simulation.

    Returns:
        best_overall_weights (list): Best neural network weights found across all runs.
        best_overall_fitness (float): Best fitness score found across all runs.
        mean_fitness_per_generation (np.array): Mean best fitness per generation across all runs.
        mean_fitnesses (np.array): Mean of mean fitness scores per generation.
        mean_execution_time (float): Mean execution time across all runs.
        std_fitnesses (np.array): Standard deviation of fitness scores per generation.
    """

    # Initialize storage variables
    best_fitnesses_overall = np.zeros(num_generations)  # Sum fitness scores across runs (to average later)
    mean_fitnesses = np.zeros(num_generations)  # Sum mean fitness scores across runs (to average later)
    std_fitnesses = np.zeros(num_generations)  # Sum std fitness scores across runs (to average later)
    
    total_execution_time = 0
    best_overall_fitness = float('-inf')
    best_overall_weights = None

    for i in range(num_runs):
        print(f"\nRunning {algorithm.__name__} {i + 1}/{num_runs}...")

        # Run the algorithm and extract results
        best_weights, best_fitness, best_fitness_scores, mean_fitnesses_run, execution_time, std_fitness_scores_run = algorithm()

        print(f"Best fitness score of run {i + 1}: {best_fitness:.3f}")

        # Accumulate fitness data for averaging
        best_fitnesses_overall += np.array(best_fitness_scores)  
        mean_fitnesses += np.array(mean_fitnesses_run)  
        std_fitnesses += np.array(std_fitness_scores_run)

        # Keep track of the best neural network controller across all runs
        if best_fitness > best_overall_fitness:
            best_overall_fitness = best_fitness
            best_overall_weights = best_weights

        # Accumulate execution time
        total_execution_time += execution_time

    # Compute the **mean** best fitness per generation across runs
    best_fitnesses_overall /= num_runs
    mean_fitnesses /= num_runs
    std_fitnesses /= num_runs
    mean_execution_time = total_execution_time / num_runs

    print("\nFinal Results After Multiple Runs:")
    print(f"Mean execution time: {mean_execution_time:.2f} seconds")
    print(f"Best fitness found: {best_overall_fitness:.3f}")

    # Plot averaged fitness evolution
    plt.figure(figsize=(10, 5))
    plt.plot(range(num_generations), best_fitnesses_overall, label="Best Fitness per Generation", color='blue')
    plt.fill_between(range(num_generations), best_fitnesses_overall - std_fitnesses, best_fitnesses_overall + std_fitnesses, color='blue', alpha=0.2, label="Std Dev Range")
    plt.xlabel("Generation")
    plt.ylabel("Fitness Score")
    plt.title(f"{algorithm.__name__}: Fitness Evolution in {scenario}")
    plt.legend()
    plt.grid()
    plt.show()

    return best_overall_weights, best_overall_fitness, best_fitnesses_overall, mean_fitnesses, mean_execution_time, std_fitnesses


## Random

In [None]:
NUM_GENERATIONS = 100  # Number of generations to evolve
STEPS = 500
SCENARIO = 'DownStepper-v0'
# SEED = random.randint(0, 10000)
# np.random.seed(SEED)
# random.seed(SEED)


robot_structure = np.array([ 
[1,3,1,0,0],
[4,1,3,2,2],
[3,4,4,4,4],
[3,0,0,3,2],
[0,0,0,0,2]
])



connectivity = get_full_connectivity(robot_structure)
env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
sim = env.sim
input_size = env.observation_space.shape[0]  # Observation size
output_size = env.action_space.shape[0]  # Action size

brain = NeuralController(input_size, output_size)

# ---- FITNESS FUNCTION ----
def evaluate_fitness(weights, view=False):
        set_weights(brain, weights)  # Load weights into the network
        env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
        sim = env
        viewer = EvoViewer(sim)
        viewer.track_objects('robot')
        state = env.reset()[0]  # Get initial state
        t_reward = 0
        for t in range(STEPS):  
            # Update actuation before stepping
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Convert to tensor
            action = brain(state_tensor).detach().numpy().flatten() # Get action
            if view:
                viewer.render('screen') 
            state, reward, terminated, truncated, info = env.step(action)
            t_reward += reward
            if terminated or truncated:
                env.reset()
                break

        viewer.close()
        env.close()
        return t_reward 


# ---- RANDOM SEARCH ALGORITHM ----
best_fitness = -np.inf
best_weights = None

for generation in range(NUM_GENERATIONS):
    # Generate random weights for the neural network
    random_weights = [np.random.randn(*param.shape) for param in brain.parameters()]
    
    # Evaluate the fitness of the current weights
    fitness = evaluate_fitness(random_weights)
    
    # Check if the current weights are the best so far
    if fitness > best_fitness:
        best_fitness = fitness
        best_weights = random_weights
    
    print(f"Generation {generation + 1}/{NUM_GENERATIONS}, Fitness: {fitness}")

# Set the best weights found
set_weights(brain, best_weights)
print(f"Best Fitness: {best_fitness}")


# ---- VISUALIZATION ----
def visualize_policy(weights):
    set_weights(brain, weights)  # Load weights into the network
    env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
    sim = env.sim
    viewer = EvoViewer(sim)
    viewer.track_objects('robot')
    state = env.reset()[0]  # Get initial state
    for t in range(STEPS):  
        # Update actuation before stepping
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Convert to tensor
        action = brain(state_tensor).detach().numpy().flatten() # Get action
        viewer.render('screen') 
        state, reward, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            env.reset()
            break

    viewer.close()
    env.close()
i = 0
while i < 10:
    visualize_policy(best_weights)
    i += 1


Generation 1/100, Fitness: -0.06929301204140781
Generation 2/100, Fitness: -0.06772797986342216
Generation 3/100, Fitness: 0.010921709899752996
Generation 4/100, Fitness: -0.07362099703432512


KeyboardInterrupt: 

## Evolutionary Strategy

In [None]:

# ---- HYPERPARAMETERS ----
MU = 20               # Number of parents
LAMBDA = 20           # Number of offspring per generation
NUM_GENERATIONS = 100  # Number of generations
MUTATION_STD = 0.1    # Standard deviation for Gaussian mutation
STEPS = 500
SCENARIO = 'DownStepper-v0'
SEED = 42             # Não é suposto não termos seed fixa??

np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

# ---- ENVIRONMENT SETUP ----
robot_structure = np.array([
    [1, 3, 1, 0, 0],
    [4, 1, 3, 2, 2],
    [3, 4, 4, 4, 4],
    [3, 0, 0, 3, 2],
    [0, 0, 0, 0, 2]
])

connectivity = get_full_connectivity(robot_structure)
env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
sim = env.sim
input_size = env.observation_space.shape[0]  # Observation size
output_size = env.action_space.shape[0]  # Action size

# Initialize neural network
brain = NeuralController(input_size, output_size)

# ---- FITNESS FUNCTION ----
def evaluate_fitness(weights, view=False):
    """Evaluates the neural network controller's fitness."""
    set_weights(brain, weights)  # Load weights into the network
    env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
    sim = env.sim
    viewer = EvoViewer(sim)
    viewer.track_objects('robot')
    
    state = env.reset()[0]
    total_reward = 0

    for t in range(STEPS):
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)
        action = brain(state_tensor).detach().numpy().flatten()
        
        if view:
            viewer.render('screen')

        state, reward, terminated, truncated, _ = env.step(action)
        total_reward += reward
        
        if terminated or truncated:
            break

    viewer.close()
    env.close()
    return total_reward

# ---- INITIALIZATION ----
def initialize_population():
    """Initialize the population with random weights."""
    return [[np.random.randn(*param.shape) for param in brain.parameters()] for _ in range(MU)]

def mutate(parent):
    """Mutate an individual by adding Gaussian noise to weights."""
    return [w + MUTATION_STD * np.random.randn(*w.shape) for w in parent]

# ---- (μ + λ) EVOLUTION STRATEGY ----
def evolution_strategy():
    """Run the (μ + λ) Evolution Strategy to optimize the neural controller."""
    population = initialize_population()
    best_fitness_scores = []
    mean_fitness_scores = []
    std_fitness_scores = []

    start_time = time.time()

    for generation in range(NUM_GENERATIONS):
        # Evaluate fitness of current population
        fitnesses = np.array([evaluate_fitness(ind) for ind in population])

        # Store statistics
        best_fitness_scores.append(np.max(fitnesses))
        mean_fitness_scores.append(np.mean(fitnesses))
        std_fitness_scores.append(np.std(fitnesses))

        # Generate λ offspring by mutating random parents
        offspring = [mutate(random.choice(population)) for _ in range(LAMBDA)]
        offspring_fitnesses = np.array([evaluate_fitness(ind) for ind in offspring])

        # Combine parents and offspring
        combined_population = population + offspring
        combined_fitnesses = np.concatenate((fitnesses, offspring_fitnesses))

        # Select the top μ individuals for the next generation
        top_indices = np.argsort(combined_fitnesses)[-MU:]
        population = [combined_population[i] for i in top_indices]

        print(f"Generation {generation + 1}: Best Fitness = {best_fitness_scores[-1]:.3f}, Mean Fitness = {mean_fitness_scores[-1]:.3f}")

    end_time = time.time()
    execution_time = end_time - start_time

    # Get the best individual
    best_idx = np.argmax(fitnesses)
    best_weights = population[best_idx]
    best_fitness = fitnesses[best_idx]

    return best_weights, best_fitness, best_fitness_scores, mean_fitness_scores, execution_time, std_fitness_scores


## Differential Evolution