In [1]:
""" To run this notebook clone the repository found at https://github.com/OhGreat/evolutionary_algorithms
    and modify the imports relative to where you cloned the repository.
"""

import sys
sys.path.append('evolutionary_algorithms/')

import gym
import numpy as np
from evolutionary_algorithms.classes.Population import *
from evolutionary_algorithms.classes.EA import *
from evolutionary_algorithms.classes.Recombination import *
from evolutionary_algorithms.classes.Mutation import *
from evolutionary_algorithms.classes.Selection import *


<h4>Reward Maximization evaluation function</h4>
This class is used to evaluate the individuals and get their fitness.

In [2]:
class RewardMaximization():
    def __init__(self, env, reps=3):
        self.env = env
        self.n_observations = np.sum([dim for dim in env.observation_space.shape]) 
        self.n_actions = env.action_space.n
        self.reps = reps

    def __call__(self, population):
        fitnesses = []
        for individual in population.individuals:
            ind_rews = []
            for i in range(self.reps):
                state = self.env.reset()
                rep_rews = 0
                done = False
                while not done:
                    # sample action
                    a = np.argmax(np.dot(individual.reshape(self.n_actions, 
                                                        self.n_observations), 
                                                        state))                    
                    # query environment
                    state, rew, done, _ = self.env.step(a)
                    rep_rews += rew
                ind_rews.append(rep_rews)
            fitnesses.append(np.mean(ind_rews))
        population.fitnesses = np.array(fitnesses)

<h4>Creating gym environment</h4>

In [3]:
# define population size 
env = gym.make('CartPole-v1')
n_observations = np.sum([dim for dim in env.observation_space.shape]) 
n_actions = env.action_space.n

pop_size = 4
ind_size = n_observations * n_actions 

<h4>Define Evolutionary Algorithm parameters</h4>

In [4]:
minimize = False
budget = 200
patience = 5
parents_size = 2
offspring_size = 4
individual_size = ind_size
recomb = Discrete()
mutation = IndividualSigma()
selection = PlusSelection()
evaluation = RewardMaximization(env, reps=10)
repetitions = 10
v = 1


<h4>Run the EA for a predefined number of repetitions and collect results</h4>

In [5]:
best_results = []
for i in range(repetitions):
    print(f"Curr iteration: {i}")
    ea = EA(minimize=minimize, budget=budget, patience=patience, 
        parents_size=parents_size, offspring_size=offspring_size,
        individual_size=individual_size, recombination=recomb,
        mutation=mutation, selection=selection, evaluation=evaluation,
        verbose=v)
    best_ind, best_eval, _ = ea.run()
    best_results.append([best_ind, best_eval])

Curr iteration: 0
Best eval: 500.0 on budget: 14
Curr iteration: 1
Best eval: 500.0 on budget: 2
Curr iteration: 2
Best eval: 500.0 on budget: 26
Curr iteration: 3
Best eval: 500.0 on budget: 14
Curr iteration: 4
Best eval: 10.2 on budget: 58
Curr iteration: 5
Best eval: 495.6 on budget: 178
Curr iteration: 6
Best eval: 500.0 on budget: 42
Curr iteration: 7
Best eval: 500.0 on budget: 82
Curr iteration: 8
Best eval: 500.0 on budget: 62
Curr iteration: 9
Best eval: 500.0 on budget: 102


<h4>Evaluate the trained agent.</h4>

In [6]:
def eval(individual, env, render=False):
    """ Simulates a playthrough of our agent with the environment 
    """
    n_observations = np.sum([dim for dim in env.observation_space.shape]) 
    n_actions = env.action_space.n

    done = False
    state = env.reset()
    if render:
        env.render()
        
    tot_rew = 0
    done = False
    while not done:
        # Sample action
        a = np.argmax(np.dot(individual.reshape(n_actions, 
                                                n_observations), 
                                                state))
        # query environment
        state, rew, done, _ = env.step(a)
        tot_rew += rew
        if render:
            env.render()
    return tot_rew

In [7]:
# Main evaluation loop
final_evals = []
for ind in best_results:
    evals = []
    for i in range(100):
        evals.append(eval(best_ind, env, render=False))
    final_evals.append(np.mean(evals))

print("Average results of 100 runs for each agent:",final_evals)

Average results of 100 runs for each agent: [500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0]


In [8]:
# Save best individual to file
best_ind_idx = np.argmax(final_evals)
print(f"chosen individual: {best_ind_idx}")
best_ind = best_results[best_ind_idx][0]
np.save('es_naive.npy', best_ind)
print(best_ind)

chosen individual: 0
[0.50603473 0.3420444  0.05768099 0.55968364 0.56565455 0.45848474
 0.51473977 0.91867617]


<h4>Do a final evaluation on the best chosen individual</h4>

In [9]:
# load trained weights
a = np.load('es_naive.npy')

# simulate 100 runs to average results for the individual
final_evals = []
for i in range(100):
    evals.append(eval(a, env, render=False))
    final_evals.append(np.mean(evals))
print(np.mean(final_evals))

500.0
