First we need to import used libraries:

In [1]:
import gym, itertools, numpy as np, pandas as pd, random

Rule as a class:

In [2]:
class rules:
    _id = 0

    def __init__(self,geno=[],fitness=0):
        rules._id += 1
        self.id = rules._id
        self.geno = geno
        self.fitness = fitness


    def update_geno(self,geno):
        
        self.geno = geno


    def random_initiation(self):
        if self.geno == []:
            hidden = np.random.normal(size=(4,10))
            output = np.random.normal(size=(10,2))
            self.geno = [hidden,output]

    def update_fitness(self,new_fitness):
        self.fitness = new_fitness

    def reproduce(self, other, method='uniform'):
        hidden = np.zeros_like(self.geno[0])
        output = np.zeros_like(self.geno[1])
        if method == 'uniform':
            for i in range(len(self.geno[0])):
                for j in range(len(self.geno[0][0])):
                        hidden[i][j] = self.geno[0][i][j] if np.random.random() > 0.5 else other.geno[0][i][j]
            
            for i in range(len(self.geno[1])):
                for j in range(len(self.geno[1][0])):
                        output[i][j] = self.geno[1][i][j] if np.random.random() > 0.5 else other.geno[1][i][j]

        child = rules()
        child.update_geno([hidden,output])

        return child   

    def mutate(self, mutation_probability):
        hidden = np.zeros_like(self.geno[0])
        output = np.zeros_like(self.geno[1])
        for i in range(len(self.geno[0])):
            for j in range(len(self.geno[0][0])):
                    hidden[i][j] = self.geno[0][i][j] + np.random.normal(scale=0.1) if np.random.random() < mutation_probability else self.geno[0][i][j]
            
        for i in range(len(self.geno[1])):
            for j in range(len(self.geno[1][0])):
                    output[i][j] = self.geno[1][i][j] + np.random.normal(scale=0.1) if np.random.random() < mutation_probability  else self.geno[1][i][j]

        child = rules()
        child.update_geno([hidden, output])
        child.update_fitness(0)
        return child

Create list containing rulesets which map every combinations of 5 neighbours bits to randomly chosen 0 or 1:

In [3]:
def generate_ruleset(size):
    ruleset = []

    for _ in range(0,size):
        rule = rules()
        rule.random_initiation()
        ruleset.append(rule)

    return ruleset

We define function action to behave accordingly to previously generated ruleset:

In [4]:

def action(observation, rule):
    hidden = np.matmul(observation,rule.geno[0])
    activation = np.maximum(hidden,0)
    output = np.matmul(activation, rule.geno[1])

    return np.argmax(output)

Selecting candidates for reproduction or mutation using fitness proportionate selection

In [6]:
def evolve_ruleset(ruleset, offspring_proportion, crossover_proportion, mutation_probability):
    m = len(ruleset)
    n = round(offspring_proportion * m)

    num_children_needed = round(n * crossover_proportion)
    num_mutation_needed = n - num_children_needed

    crossover_without_mutation = ruleset[:m-n]
    crossover_with_mutation = []
    offspring = []

    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]

    for i in range(num_children_needed):
        p1, p2 = select_parents(ruleset, selection_probs)
        offspring.append(p1.reproduce(p2))
    
    for j in range(num_mutation_needed):
        candidate = select_one(ruleset)
        crossover_with_mutation.append(candidate.mutate(mutation_probability))

    return crossover_without_mutation + offspring + crossover_with_mutation

def select_one(ruleset, selection_probs):
    return np.random.choice(ruleset)

def select_parents(ruleset, selection_probs):
    return np.random.choice(ruleset, size=2, replace=False, p=selection_probs)

Prepare the environment and generate a given number of rulesets:

In [7]:
generations = 20
number_of_rules = 50
number_of_episodes = 1000
available_maxscore = 200
offspring_proportion = 0.9
crossover_proportion = 0.8
mutation_probability = 0.01

ruleset = generate_ruleset(number_of_rules)

Running the simulation for a given number of generations:

In [8]:
env = gym.make("CartPole-v1", render_mode="human")
env = gym.wrappers.TimeLimit(env, available_maxscore)
observation, info= env.reset()
list_to_return = []
scoreboard = pd.DataFrame()

for i in range(1,generations+1):
    for rule in ruleset:
        score = 0
        epi = 1
        for episode in range(number_of_episodes):
            observation, reward, terminated, truncated, info = env.step(action(observation, rule))
            score += reward
            
            if terminated or truncated:
                observation, info = env.reset()
                epi += 1
                
        rule.update_fitness(score/epi)

    ranked_ruleset = sorted(ruleset, key=lambda x: x.fitness, reverse = True)

    score_list = [r.fitness for r in ranked_ruleset]
    id_list = [r.id for r in ranked_ruleset]

    df = pd.DataFrame({'ID': id_list,'Fitness': score_list})
    df.to_csv(f'results_generation_{i}.csv', index=False, header=False, sep=",")

    scoreboard[f'Generation_{i}'] = score_list

    ruleset = evolve_ruleset(ranked_ruleset, offspring_proportion, crossover_proportion, mutation_probability)

scoreboard.to_csv(f'scores_after_{i}_generations.csv', index=False, header=False)

env.close()
