In [1]:
import numpy as np
import random
import gymnasium as gym
from evogym.envs import *
from evogym import EvoViewer, get_full_connectivity
from neural_controller import *


NUM_GENERATIONS = 100  # Number of generations to evolve
STEPS = 500
SCENARIO = 'DownStepper-v0'
SEED = 42
np.random.seed(SEED)
random.seed(SEED)


robot_structure = np.array([ 
[1,3,1,0,0],
[4,1,3,2,2],
[3,4,4,4,4],
[3,0,0,3,2],
[0,0,0,0,2]
])



connectivity = get_full_connectivity(robot_structure)
env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
sim = env.sim
input_size = env.observation_space.shape[0]  # Observation size
output_size = env.action_space.shape[0]  # Action size

brain = NeuralController(input_size, output_size)

# ---- FITNESS FUNCTION ----
def evaluate_fitness(weights, view=False):
        set_weights(brain, weights)  # Load weights into the network
        env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
        sim = env
        viewer = EvoViewer(sim)
        viewer.track_objects('robot')
        state = env.reset()[0]  # Get initial state
        t_reward = 0
        for t in range(STEPS):  
            # Update actuation before stepping
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Convert to tensor
            action = brain(state_tensor).detach().numpy().flatten() # Get action
            if view:
                viewer.render('screen') 
            state, reward, terminated, truncated, info = env.step(action)
            t_reward += reward
            if terminated or truncated:
                env.reset()
                break

        viewer.close()
        env.close()
        return t_reward 


# ---- RANDOM SEARCH ALGORITHM ----
best_fitness = -np.inf
best_weights = None

for generation in range(NUM_GENERATIONS):
    # Generate random weights for the neural network
    random_weights = [np.random.randn(*param.shape) for param in brain.parameters()]
    
    # Evaluate the fitness of the current weights
    fitness = evaluate_fitness(random_weights)
    
    # Check if the current weights are the best so far
    if fitness > best_fitness:
        best_fitness = fitness
        best_weights = random_weights
    
    print(f"Generation {generation + 1}/{NUM_GENERATIONS}, Fitness: {fitness}")

# Set the best weights found
set_weights(brain, best_weights)
print(f"Best Fitness: {best_fitness}")


# ---- VISUALIZATION ----
def visualize_policy(weights):
    set_weights(brain, weights)  # Load weights into the network
    env = gym.make(SCENARIO, max_episode_steps=STEPS, body=robot_structure, connections=connectivity)
    sim = env.sim
    viewer = EvoViewer(sim)
    viewer.track_objects('robot')
    state = env.reset()[0]  # Get initial state
    for t in range(STEPS):  
        # Update actuation before stepping
        state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0)  # Convert to tensor
        action = brain(state_tensor).detach().numpy().flatten() # Get action
        viewer.render('screen') 
        state, reward, terminated, truncated, info = env.step(action)
        if terminated or truncated:
            env.reset()
            break

    viewer.close()
    env.close()
i = 0
while i < 10:
    visualize_policy(best_weights)
    i += 1

Generation 1/100, Fitness: -0.06929301204140781
Generation 2/100, Fitness: -0.06772797986342216
Generation 3/100, Fitness: 0.010921709899752996
Generation 4/100, Fitness: -0.07362099703432512
Generation 5/100, Fitness: 0.07389050288225579
Generation 6/100, Fitness: -0.01573390005174341
Generation 7/100, Fitness: -3.4496195477541978
Generation 8/100, Fitness: -0.06170167128908266
Generation 9/100, Fitness: -0.05231940084656739
Generation 10/100, Fitness: -0.06226246049128381
Generation 11/100, Fitness: -0.08650517857618512
Generation 12/100, Fitness: -0.017321719050655215
Generation 13/100, Fitness: -0.15485086085975563
Generation 14/100, Fitness: 0.034963853371737574
Generation 15/100, Fitness: -3.485200471625875
Generation 16/100, Fitness: -0.053365455737026335
Generation 17/100, Fitness: -0.04515946193625725
Generation 18/100, Fitness: -0.003643064587288636
Generation 19/100, Fitness: 0.2699062084668015
Generation 20/100, Fitness: -0.03436621811774365
Generation 21/100, Fitness: -3.7