In [1]:
import numpy as np
import matplotlib.pyplot as plt

class MLP:
    '''
    Class to define Multilayer Perceptrons.
    Declare instance with MLP(layers).
    '''
    def __init__(self, layers):
        '''
        layers: a tuple with (ninputs, nhidden1, nhidden2, ... noutput)
        '''
        self.layers = layers
        self.trace = False
        self.threshold = 5.0
        self.labels = None # text for the labels [input-list, output-list]
        
        self.size = 0
        self.W = [] # list of numpy matrices
        self.b = [] # list of numpy vectors
        for i in range(len(layers)-1):
            w = np.random.rand(layers[i],layers[i+1])-0.5
            b = np.random.rand(layers[i+1])-0.5
            self.W.append(w)
            self.b.append(b)
            self.size += layers[i] * layers[i+1] + layers[i+1]
        
        self.lRMS = [] # hold all traced RMSs to draw graph
        self.laccuracy = [] # hold all traced accuracies to draw graph
        
    def sigm (self, neta):
        return 1.0 / (1.0 + np.exp(-neta))
    
    def forward (self, x): # fast forward (optimized in time, but not use to train!)
        for i in range(len(self.b)):
            net = np.dot(x,self.W[i]) + self.b[i]
            x = self.sigm(net)
        return x
            
    def to_chromosome (self):
        '''
        Convert weights and biases to a flatten list to use in AG.
        '''
        ch = []
        for w,b in zip(self.W,self.b):
            ch += w.flatten().tolist()
            ch += b.flatten().tolist()
        return ch
    
    def from_chromosome (self, ch):
        '''
        Convert a flatten list (chromosome from a GA) to internal weights and biases.
        '''
        if len(ch) != self.size:
            print(self.size)
            raise ValueError("Chromosome legnth doesn't match architecture")
        self.W = []
        self.b = []
        pos = 0
        for i in range(len(self.layers)-1): # for each layer
            to = self.layers[i]*self.layers[i+1] # number of weight
            w = np.array(ch[pos:pos+to]).reshape(self.layers[i],self.layers[i+1])
            pos += to
            to = self.layers[i+1] # number of bias
            b = np.array(ch[pos:pos+to]).reshape(self.layers[i+1])
            pos += to
            
            self.W.append(w)
            self.b.append(b)


In [2]:
import random
import copy
from IPython.display import clear_output

rang = (-10, 10) # al no hacerlo con clases, debemos definir el rango como variable global


def create (alphabet=None, N=50): # crea y devuelve población; alphabet aquí no se usa
    return [MLP([8,6,4]).to_chromosome() for _ in range(N)]

def crossover (ind1, ind2, pcross): # devuelve el cruce (emparejamiento) de dos individuos
    if pcross < random.random():
        return (ind1, ind2)

    # se aplica cruce aritmético
    beta = random.random()
    hijo1 = [beta*ind1[i] + (1-beta)*ind2[i] for i in range(len(ind1))]
    hijo2 = [beta*ind2[i] + (1-beta)*ind1[i] for i in range(len(ind1))]
   
    return (hijo1, hijo2)

def sort_pop (pop, fit): # devuelve una tupla: la población ordenada por fitness y la lista de fitness ordenada
    fit_values = [fit(x) for x in pop]
    dict_sort = dict(zip(fit_values, pop))

    sorted_pop = [dict_sort[x] for x in sorted(fit_values)]
    sorted_fit = sorted(fit_values)

    return sorted_pop, sorted_fit

def select (pop, T): # devuelve un individuo seleccionado por torneo, devuelve una copia para evitar efectos laterales
    index_list = [random.randrange(len(pop)) for _ in range(T)]
    return copy.deepcopy(pop[min(index_list)])

def mutate (ind, pmut): # devuelve individuo mutado;
    if pmut < random.random():
        return ind

    #ind = (ind[0] + random.uniform(rang[0], rang[1])/10, ind[1] + random.uniform(rang[0], rang[1])/10)

    for i in range(len(ind)):
        ind[i] += random.uniform(-1, 1)
        ind[i] = max(min(ind[i], rang[1]), rang[0])

    #ind = (random.uniform(rang[0], rang[1]), random.uniform(rang[0], rang[1]))
    # clip to range
        

    return ind

def evolve (pop, fit, pmut, pcross=0.7, ngen=100, T=2, trace=0, elitism=False, maximize=False):
    # devuelve la población evolucionada
    # si trace > 0, muestra información cada trace generaciones
    # si elitism=True, se aplica elitismo
    # si maximize=True, se busca el máximo en lugar del mínimo
    track_fitness = []
    best = None
    best_fit = None
    pop_size = len(pop)
    for gen in range(ngen):
        newpop = []
        for i in range(pop_size//2):
            # seleccionamos dos individuos
            ind1 = select(pop, T)
            ind2 = select(pop, T)
            # los cruzamos
            hijo1, hijo2 = crossover(ind1, ind2, pcross)
            # los mutamos
            hijo1 = mutate(hijo1, pmut)
            hijo2 = mutate(hijo2, pmut)
            # los añadimos a la nueva población
            newpop.append(hijo1)
            newpop.append(hijo2)
        # elitismo
        if elitism and best != None:
            # eliminamos un individuo aleatorio
            newpop.pop(random.randrange(len(newpop)))
            # añadimos el mejor de la generación anterior
            newpop.append(best)

        # evaluamos la nueva población
        pop, fit_pop = sort_pop(newpop, fit)
        if maximize:
            pop.reverse()
            fit_pop.reverse()
        # nos quedamos con el mejor
        best = pop[0]
        best_fit = fit_pop[0]
 
        # mostramos información
        if trace > 0 and gen % trace == 0:
            clear_output()
            print("Generación: ", gen)
            print("Mejor: ", best)
            print("Fitness: ", best_fit)
            
            if maximize: 
                track_fitness.append(np.max(fit_pop))
            else: 
                track_fitness.append(np.min(fit_pop))
                
            plt.plot(track_fitness)
            plt.show()
            
    return pop

In [3]:
# definir política
def policy (observation, individual):
    model.from_chromosome(individual)
    s = model.forward(observation)
    action = np.argmax(s)
    return action

In [11]:

import gymnasium as gym
from gym.wrappers.monitoring.video_recorder import VideoRecorder

def run_info_2(individual, env):
    #observation, info = env.reset(seed=42)
    observation, info = env.reset()
    ite = 0
    racum = 0
    while True:
        action = policy(observation,individual)
        observation, reward, terminated, truncated, info = env.step(action)
        
        racum += reward

        str_print = "Reward: "

        if terminated or truncated:
            str_print += str(racum)
            if racum > 200:
                str_print += " (success)"

            r = (racum+200) / 500
            #print(racum, r)
            print(str_print)
            return racum

def get_ref_medio(individuo, draw=False):

    if draw:
        env = gym.make("LunarLander-v2", render_mode="human")
    else:
        env = gym.make("LunarLander-v2")

    N = 10
    r = []

    for _ in range(N):
        r.append(run_info_2(individuo, env))
        
    r_mean = round(np.mean(r),2)
    r_std = round(np.std(r),2)

    print(f"Refuerzo medio: {r_mean}\u00B1{r_std}")

    env.close()

def run_video(individual, env, video):
    #observation, info = env.reset(seed=42)
    observation, info = env.reset()
    ite = 0
    racum = 0
    while True:
        env.render()
        video.capture_frame()
        action = policy(observation,individual)
        observation, reward, terminated, truncated, info = env.step(action)
        
        racum += reward

        str_print = "Reward: "

        if terminated or truncated:
            str_print += str(racum)
            if racum > 200:
                str_print += " (success)"

            r = (racum+200) / 500
            #print(racum, r)
            print(str_print)
            return racum

def create_video(individuo):

    path = "./video/neuroevolutivo.mp4"

    env = gym.make("LunarLander-v2", render_mode="rgb_array")
    video = VideoRecorder(env, path)

    N = 10
    r = []

    for _ in range(N):
        r.append(run_video(individuo, env, video))
        
    r_mean = round(np.mean(r),2)
    r_std = round(np.std(r),2)

    print(f"Refuerzo medio: {r_mean}\u00B1{r_std}")

    video.close()
    env.close()

In [7]:
import pickle
with open('gen_863.pkl', 'rb') as f:
    ind = pickle.load(f)

print(ind)

[1.8226959935595612, -1.2379526017844542, 1.3879502710096223, -1.0768258351435207, 1.2378128055945234, -0.41509036297208035, 0.10635652921017373, 0.9023272761116615, -1.265736388732924, -1.1019480051594508, 0.14047197654862115, -0.3201015009176916, -1.9783737500707028, -0.05704845870978751, 0.18471107094115993, -0.022932434387907576, 2.0958925432193602, 0.641653723859404, -0.5108425773979623, 2.2822487466868004, -1.7583836259164864, -1.4752309879556664, -1.9717054138296877, -1.9729800318589121, 2.2051147685987864, -1.1506250239019855, 0.8608695314025976, 2.229552533297108, 1.9261964721508735, 0.09750524214142338, 1.2401404075076377, 2.0621418088317265, -2.4852411404898946, -1.0923634802521156, -1.6644109949618, -0.3124387434012907, 0.005039623221969815, -1.3550093708071387, 0.04917383351645008, -0.5881671803423153, -0.08178318384998379, 1.7919925982948008, 0.09334687905125523, 3.491786943475871, -1.748672292036944, 0.9338507520022021, 0.1390317351875843, 2.338653233130711, 0.1295296214

In [8]:
# construir modelo
layer = [8,6,4]
model = MLP(layer)
ch = model.to_chromosome()
model.from_chromosome(ch)

# pasar al modelo los pesos del mejor cromosoma obtenido con neuroevolución
pop = create(N=50)

In [13]:
create_video(ind)

Reward: 255.94259945238633 (success)
Reward: 237.4916862326451 (success)
Reward: 269.81844942897214 (success)
Reward: 265.2110739436627 (success)
Reward: 265.6250913511924 (success)
Reward: 277.5924207768712 (success)
Reward: 273.37466590203655 (success)
Reward: 244.98667387324883 (success)
Reward: 258.7592945878241 (success)
Reward: 260.5412471520483 (success)
Refuerzo medio: 260.93±11.77
Moviepy - Building video ./video/neuroevolutivo.mp4.
Moviepy - Writing video ./video/neuroevolutivo.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready ./video/neuroevolutivo.mp4


