In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Input
from tensorflow.keras.models import Sequential, Model
from tqdm import tqdm
import time
import gym

print('importing done')

importing done


## Environment

In [16]:
# constants
NUM_AGENTS = 20 # Number of agent (Model) to create
MUTATION_RATE = 0.4 # Mutation rate of the agent
SIMULATION_STEPS = 100 # Number of steps to simulate each agent

initializer = tf.keras.initializers.TruncatedNormal(
    mean=0.0, stddev=0.05, seed=7
)

In [13]:
class EvolutionnaryNeuralNetwork:
    def __init__(self, input_shape=24, hidden_layers=[], env_name="BipedalWalker-v3", weights=None):
        self.model = self.__init_model(input_shape, hidden_layers, weights)
        self.__env = self.__init_env(env_name)
        self.weights = weights  # Should be a dict
        self.__reward = 0

    def __init_model(self, input_shape, hidden_layers, weights):
        input = Input(shape=(input_shape,), name='input')
        for idx, layer in enumerate(hidden_layers):
            if idx == 0:
                X = Dense(layer, activation='tanh', name=str(idx + 1), kernel_initializer=initializer)(input)
            else:
                X = Dense(layer, activation='tanh', name=str(idx + 1), kernel_initializer=initializer)(X)
        output = Dense(4, activation='tanh', name='output', kernel_initializer=initializer)(X)

        model = Model(inputs=input, outputs=output)

        if weights != None:
            for layer in self.model:
                if layer.name in weights:
                    layer.set_weights(weights[layer.name])
        return model

    def __init_env(self, env_name):
        return gym.make(env_name)

    def get_layer_weights(self, layer_name):
        for layer in self.model.layers:
            if layer.name == layer_name:
                return layer.get_weights()
        return None

    def get_all_weights(self):
        weights = {}
        for layer in self.model.layers:
            weights[layer.name] = layer.get_weights()
        return weights

    def add_random_noise(self, layer_weights):
        # updating neurone weights next
        update_neurones = []
        for neurone_weights in layer_weights[0]:
            weights = np.array([w + np.random.normal(0, 0.1) if np.random.random() < MUTATION_RATE else w for w in neurone_weights])
            weights[weights > 4] = 4
            weights[weights < -4] = -4
            update_neurones.append(weights)
        
        # updating biases first
        updated_biases = np.array([w + np.random.normal(0, 0.1) if np.random.random() < MUTATION_RATE else w for w in layer_weights[1]])
        updated_biases[updated_biases > 4] = 4
        updated_biases[updated_biases < -4] = -4
        
        # updating layer weights
        return [np.array(update_neurones), updated_biases]

    def mutate_layer(self, layer_name):
        for layer in self.model.layers:
            if layer.name == layer_name:
                layer.set_weights(self.add_random_noise(layer.get_weights()))
                break

    def mutate(self):
        for layer in self.model.layers:
            layer.set_weights(self.add_random_noise(layer.get_weights()))
                
    def predict(self, state):
        return self.model.predict(state)

    def simulate(self, steps=300, render=False):
        state = self.__env.reset()
        for _ in range(steps):
            action = self.model.predict(state.reshape(1, -1))[0]
            state, reward, done, _ = self.__env.step(action)
            self.__reward += reward
            if render:
                self.__env.render()
            if done:
                state = self.__env.reset()
        if render:
            self.__env.close()

        return self.__reward

    def get_reward(self):
        return self.__reward


In [14]:
enn = EvolutionnaryNeuralNetwork(hidden_layers=[64, 32], env_name="BipedalWalker-v3")
temp_w = np.zeros((4, 64))
temp_b = np.zeros(64)
r = enn.add_random_noise([temp_w, temp_b])

In [17]:
enn.simulate(steps=SIMULATION_STEPS, render=True)

-92.92700834687453

In [None]:
class GA ():
    def __init__(self, models_list, rewards_list, elite_ratio=0.2):
        self.__models = np.array(models_list)
        self.__rewards = np.array(rewards_list)
        self.__elite_ratio = elite_ratio
        self.__number_population = len(rewards_list)
        self.__elites_models = None
        self.__elites_rewards = None

        self.__select_elite()

    def __select_elite(self):
        elite_slice = int(self.__elite_ratio * self.__number_population)
        self.__elites_rewards = self.__rewards[self.__rewards.argsort()[
            ::-1][:elite_slice]]
        self.__elites_models = self.__models[self.__rewards.argsort()[
            ::-1][:elite_slice]]

    def create_new_population(self):
        pass


## Train Models with GA

In [None]:
models = []
rewards = []

for n in tqdm(range(number_agent)):

    temp_mod = Model()
    models.append(temp_mod)

    temp_mod.fit()
    rewards.append(temp_mod.get_reward())


100%|██████████| 20/20 [02:26<00:00,  7.31s/it]


In [4]:
import gym

env = gym.make('BipedalWalker-v3')
env.reset()

for _ in range(1000):
    env.render()
    env.step(env.action_space.sample())

env.close()
