First we need to import used libraries:

In [None]:
import gym, itertools, numpy as np, pandas as pd, random

Rule as a class

In [None]:
class rules:
    _id = 0

    def __init__(self,geno=[],fitness=0):
        self.id = rules._id
        rules._id += 1
        self.geno = geno
        self.fitness = fitness


    def update_geno(self,geno):
        
        self.geno = geno


    def random_initiation(self):
        if self.geno == []:
            hidden = np.random.normal(size=(4,10))
            output = np.random.normal(size=(10,2))
            self.geno = [hidden,output]

    def update_fitness(self,new_fitness):
        self.fitness = new_fitness

    def reproduce(self, other, method='uniform'):
        hidden = np.zeros_like(self.geno[0])
        output = np.zeros_like(self.geno[1])
        if method == 'uniform':
            for i in range(len(self.geno[0])):
                for j in range(len(self.geno[0][0])):
                        hidden[i][j] = self.geno[0][i][j] if np.random.random() > 0.5 else other.geno[0][i][j]
            
            for i in range(len(self.geno[1])):
                for j in range(len(self.geno[1][0])):
                        output[i][j] = self.geno[1][i][j] if np.random.random() > 0.5 else other.geno[1][i][j]

        child = rules()
        child.update_geno([hidden,output])

        return child   

    def mutate(self,p=0.5):
        hidden = np.zeros_like(self.geno[0])
        output = np.zeros_like(self.geno[1])
        for i in range(len(self.geno[0])):
            for j in range(len(self.geno[0][0])):
                    hidden[i][j] = self.geno[0][i][j] + np.random.normal(scale=0.1) if np.random.random() < p else self.geno[0][i][j]
            
        for i in range(len(self.geno[1])):
            for j in range(len(self.geno[1][0])):
                    output[i][j] = self.geno[1][i][j] + np.random.normal(scale=0.1) if np.random.random() < p  else self.geno[1][i][j]

        child = rules()
        child.update_geno([hidden, output])
        child.update_fitness(0)
        return child

Create list containing rulesets which map every combinations of 5 neighbours bits to randomly chosen 0 or 1:

In [None]:
def generate_ruleset(size):
    ruleset = []

    for _ in range(0,size):
        rule = rules()
        rule.random_initiation()
        ruleset.append(rule)

    return ruleset

We define function action to behave accordingly to previously generated ruleset:

In [None]:

def action(observation, rule):
    hidden = np.matmul(observation,rule.geno[0])
    activation = np.maximum(hidden,0)
    output = np.matmul(activation, rule.geno[1])

    return np.argmax(output)

Selecting candidates for reproduction or mutation using fitness proportionate selection

In [None]:
def select_one(ruleset):
    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]
    return np.random.choice(ruleset, p=selection_probs)

def select_parents(ruleset):
    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]
    return np.random.choice(ruleset, size=2, replace=False, p=selection_probs)

In [None]:
def evovle(ruleset, p_n, p_crossover):
    m = len(ruleset)
    n = round(p_n * m)
    num_children_needed = round(n * p_crossover)
    num_mutation_needed = n - num_children_needed
    sorted_ruleset = sorted(ruleset, key=lambda x: x.fitness, reverse = True)
    crossover_without_mutation = sorted_ruleset[:m-n]
    crossover_with_mutation = []
    offspring = []

    for i in range(num_children_needed):
        p1, p2 = select_parents(sorted_ruleset)
        offspring.append(p1.reproduce(p2))
    
    for j in range(num_mutation_needed):
        candidate = select_one(sorted_ruleset)
        crossover_with_mutation.append(candidate.mutate())

    return crossover_without_mutation + offspring + crossover_with_mutation
    



Prepare the environment and generate a given number of rulesets:

In [None]:
env = gym.make("CartPole-v1", render_mode="human")
env = gym.wrappers.TimeLimit(env,200)
observation, info= env.reset()
list_to_return = []

ruleset = generate_ruleset(100)

Running for 8 generations

In [None]:
import pygame

gen = 10

scoreboard = pd.DataFrame()
for i in range(gen):

    for rule in ruleset:
        score = 0
        maxscore = 0

        for episode in range(1000):
            observation, reward, terminated, truncated, info = env.step(action(observation, rule))
            score += reward
            
            if terminated or truncated:
                observation, info = env.reset()
                if score > maxscore:
                    maxscore = score 
                score = 0
                
        rule.update_fitness(int(maxscore))

    ranked_ruleset = sorted(ruleset, key=lambda x: x.fitness, reverse = True)

    score_list = [r.fitness for r in ranked_ruleset]
    id_list = [r.id for r in ranked_ruleset]

    df = pd.DataFrame({'ID': id_list,'Fitness': score_list})
    # df = df.astype(str)
    df.to_csv(f'results_generation_{i}.csv', index=False, header=False, sep=",")

    scoreboard[f'Generation_{i}'] = score_list

    ruleset = evovle(ranked_ruleset, 0.8, 0.8)

scoreboard.to_csv(f'scores_after_{i}_generations.csv', index=False, header=False)

env.close()
