In [1]:
import sys
sys.path.append('evolutionary_algorithms/')

import gym
import numpy as np
from evolutionary_algorithms.classes.Population import *
from evolutionary_algorithms.classes.EA import *
from evolutionary_algorithms.classes.Recombination import *
from evolutionary_algorithms.classes.Mutation import *
from evolutionary_algorithms.classes.Selection import *
from evolutionary_algorithms.classes.Evaluation import *

In [2]:
# define population size 
env = gym.make('CartPole-v1')
n_observations = np.sum([dim for dim in env.observation_space.shape]) 
n_actions = env.action_space.n

pop_size = 4
ind_size = n_observations * n_actions 

In [3]:
minimize = False
budget = 200
patience = 5
parents_size = 2
offspring_size = 4
individual_size = ind_size
recomb = Discrete()
mutation = IndividualSigma()
selection = PlusSelection()
evaluation = RewardMaximization(env, reps=10)
v = 1


In [4]:
best_results = []
for i in range(10):
    print(f"Curr iteration: {i}")
    ea = EA(minimize=minimize, budget=budget, patience=patience, 
        parents_size=parents_size, offspring_size=offspring_size,
        individual_size=individual_size, recombination=recomb,
        mutation=mutation, selection=selection, evaluation=evaluation,
        verbose=v)
    best_ind, best_eval = ea.run()
    best_results.append([best_ind, best_eval])

Curr iteration: 0
Best eval: 500.0 on budget: 30
Curr iteration: 1
Best eval: 500.0 on budget: 30
Curr iteration: 2
Best eval: 500.0 on budget: 154
Curr iteration: 3
Best eval: 500.0 on budget: 6
Curr iteration: 4
Best eval: 500.0 on budget: 10
Curr iteration: 5
Best eval: 500.0 on budget: 14
Curr iteration: 6
Best eval: 500.0 on budget: 6
Curr iteration: 7
Best eval: 500.0 on budget: 62
Curr iteration: 8
Best eval: 500.0 on budget: 198
Curr iteration: 9
Best eval: 500.0 on budget: 78


In [5]:
def eval(individual, env, render=False):
    n_observations = np.sum([dim for dim in env.observation_space.shape]) 
    n_actions = env.action_space.n

    done = False
    state = env.reset()
    if render:
        env.render()
    tot_rew = 0
    done = False
    while not done:
        # SAmple action
        a = np.argmax(np.dot(individual.reshape(n_actions, 
                                                n_observations), 
                                                state))
        # query environment
        state, rew, done, _ = env.step(a)
        tot_rew += rew
        if render:
            env.render()
    return tot_rew

In [6]:
final_evals = []
for ind in best_results:
    evals = []
    for i in range(10):
        evals.append(eval(best_ind, env, render=False))
    final_evals.append(np.mean(evals))

print(final_evals)

[500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0, 500.0]
