In [1]:
import gym
import time
from src.Entity import Entity
from src.EvolutionCtrl import Population_Manager

# 1) Environment
env = gym.make("BipedalWalker-v3") 
print(f"Dim. action space: {env.action_space.shape}")
print(f"Dim. observation space: {env.observation_space.shape}")

# 2) Training Params
POP_SIZE = 100
MAX_SEQUENCE_LEN = 400
N_GENERATIONS = 1500    
MUTATION_RATE = .1
#MUTATE_BIAS_GEN = 20
PROB_NODE_COPY = .1
WEIGHT_INIT_INTERVAL = [-1, 1]

n_layer_nodes = [env.observation_space.shape[0], 48, 48, 32, env.action_space.shape[0]]

init_population = [Entity(n_layer_nodes, WEIGHT_INIT_INTERVAL) for i in range(POP_SIZE)]  

pop_manager = Population_Manager(init_population, MUTATION_RATE, PROB_NODE_COPY)

Dim. action space: (4,)
Dim. observation space: (24,)


In [2]:
def agent_env_loop(entity):
        entity.survived = True
        entity.fitness = 0
        env.seed(10) 
        observation = env.reset()                                            # s_0
        for i in range(MAX_SEQUENCE_LEN):  
            action = entity.controller.feed_forward(observation)             # a_t, s_t            
            observation, reward, done, _ = env.step(action)                  # s_{t+1}, r_{t+1}, a_t       
            entity.fitness += reward
            if done:
                break   

# Optimize 
for generation_act in range(N_GENERATIONS):
    t_start = time.time() 
    for entity in pop_manager.population:
        if not entity.survived:
            agent_env_loop(entity)
    
    pop_manager.breed_new_population(generation_act)
    print(f"Generation {generation_act}: {time.time() - t_start}")


38905968989648
Generation 18: 3.492997169494629
52 survived walker,max fitness: -20.938905968989648
Generation 19: 3.935004234313965
56 survived walker,max fitness: -20.938905968989648
Generation 20: 3.7749955654144287
55 survived walker,max fitness: -20.938905968989648
Generation 21: 3.025003671646118
55 survived walker,max fitness: -20.938905968989648
Generation 22: 3.8479974269866943
48 survived walker,max fitness: -20.938905968989648
Generation 23: 2.7140309810638428
52 survived walker,max fitness: -20.938905968989648
Generation 24: 4.259968519210815
49 survived walker,max fitness: -20.938905968989648
Generation 25: 4.206002473831177
54 survived walker,max fitness: -20.938905968989648
Generation 26: 4.077025413513184
48 survived walker,max fitness: -20.938905968989648
Generation 27: 4.534978628158569
41 survived walker,max fitness: -20.938905968989648
Generation 28: 4.065992832183838
53 survived walker,max fitness: -20.938905968989648
Generation 29: 4.387002944946289
52 survived wa

KeyboardInterrupt: 

In [4]:
pop_manager.population.sort(key=lambda x: x.fitness, reverse=True) 
best_agent = pop_manager.population[-50]

env.seed(10) 
observation = env.reset()
for _ in range(MAX_SEQUENCE_LEN):
    env.render()
    action = best_agent.controller.feed_forward(observation)            # a_t, s_t            
    observation, reward, done, _ = env.step(action)                     # s_{t+1}, r_{t+1}, a_t       
env.close()

In [13]:
child = Entity(n_layer_nodes, weight_interval)
print(f"{len(child.controller.bias[0])}")
print(f"{len(child.controller.weights[0][0])}")

48
48


In [21]:
child.controller.bias[0]

array([ 0.7584839 ,  0.10789981, -0.42991881, -0.15561191,  0.49077721,
        0.84670393,  0.57381795,  0.08863811,  0.76584448,  0.71707239,
       -0.01921783,  0.79219211, -0.67317444,  0.09585796, -0.54096392,
        0.61198527,  0.24809397, -0.90281628, -0.4444071 ,  0.90020591,
       -0.33117147, -0.60336651, -0.20123493,  0.70344756,  0.70935507,
       -0.52090854,  0.0978931 ,  0.76381188, -0.28623422, -0.00845665,
        0.58594024, -0.14965437,  0.43026903, -0.8864281 , -0.90564161,
        0.93240325, -0.73510206, -0.22643748,  0.31478809, -0.65302702,
       -0.54730051,  0.40117432,  0.04468406, -0.02153551,  0.67961832,
        0.78448254, -0.91231042, -0.62626673])

In [3]:
import random
import copy
child = Entity(n_layer_nodes, weight_interval)
parent_1 = Entity(n_layer_nodes, weight_interval)
parent_2 = Entity(n_layer_nodes, weight_interval)
prob_node_copy = .5
print(child.controller.n_hidden)

for i in range(len(child.controller.weights)):   
    # loop over nodes
    print(f"i: {i}, len_bias {len(child.controller.bias)}")
    for j in range(len(child.controller.weights[i])): 
        test = child.controller.bias[i]
        #print(f"{test}")
        # inherit complete node 
        if random.random() > prob_node_copy:
            #    random_parent = random.choice([parent_1, parent_2])
            child.controller.weights[i][j] = copy.deepcopy(parent_1.controller.weights[i][j])
            if i == 1 or i > child.controller.n_hidden - 1::
                try:
                    child.controller.bias[i][j] = copy.deepcopy(parent_1.controller.bias[i][j])
                except:
                    print(f"j: {j}, len_bias {len(child.controller.bias[0])}, , len_weights {len(child.controller.weights[0])}")


3
i: 0, len_bias 4
i: 1, len_bias 4
i: 2, len_bias 4
i: 3, len_bias 4
j: 7, len_bias 48, , len_weights 24
j: 9, len_bias 48, , len_weights 24
j: 10, len_bias 48, , len_weights 24
j: 11, len_bias 48, , len_weights 24
j: 12, len_bias 48, , len_weights 24
j: 13, len_bias 48, , len_weights 24
j: 14, len_bias 48, , len_weights 24
j: 15, len_bias 48, , len_weights 24
j: 16, len_bias 48, , len_weights 24
j: 18, len_bias 48, , len_weights 24
j: 22, len_bias 48, , len_weights 24
j: 24, len_bias 48, , len_weights 24
j: 29, len_bias 48, , len_weights 24


In [28]:
len(child.controller.bias[3])

4

In [35]:
class testclass(object):
    def __init__(self):
        self.weights = [[1,2,3], [4,5,6]]

x = testclass()

In [36]:
y = x.weights[0]
y

[1, 2, 3]

In [37]:
y[0] = 7

In [38]:
x.weights[0]

[7, 2, 3]

In [39]:
y

[7, 2, 3]

In [34]:
a = [1,2,3]
b = a
b[0] = 4
a

[4, 2, 3]

In [9]:
import numpy as np
np.random.uniform(-1.0, 1.0, (4, 3))

array([[ 0.08500486,  0.69602858,  0.54515709],
       [ 0.74302638,  0.37548427,  0.48805228],
       [ 0.48482226, -0.2124669 ,  0.71335525],
       [ 0.4457444 ,  0.56424148,  0.87824   ]])

In [10]:
np.random.uniform(low = -1.0, high = 1.0, size = 3)

array([0.96191887, 0.5971491 , 0.52781256])