Importing necessary libraries

In [None]:
import gym, itertools, numpy as np, pandas as pd, random

Implementing rule as a class

In [None]:
class rules:
    #use a class variable as counter for rule instances
    _id = 0

    def __init__(self,geno=[],fitness=0):
        self.id = rules._id
        rules._id += 1
        self.geno = geno
        self.fitness = fitness


    def update_geno(self,geno):
        
        self.geno = geno


    def random_initiation(self):
        if self.geno == []:
            #the weights of the hidden layer
            hidden = np.random.normal(size=(4,10))
            #the weights of the output layer
            output = np.random.normal(size=(10,2))
            self.geno = [hidden,output]

    def update_fitness(self,new_fitness):
        self.fitness = new_fitness

    def reproduce(self, other, method='uniform'):
        hidden = np.zeros_like(self.geno[0])
        output = np.zeros_like(self.geno[1])
        if method == 'uniform':
            #loop through all elements of the hidden layer matrix
            for i in range(len(self.geno[0])):
                for j in range(len(self.geno[0][0])):
                        hidden[i][j] = self.geno[0][i][j] if np.random.random() > 0.5 else other.geno[0][i][j]
            #loop through all elements of the output layer matrix
            for i in range(len(self.geno[1])):
                for j in range(len(self.geno[1][0])):
                        output[i][j] = self.geno[1][i][j] if np.random.random() > 0.5 else other.geno[1][i][j]
        #creating a new instance for the offspring
        child = rules()
        child.update_geno([hidden,output])

        return child   

    def mutate(self,p=0.1):
        hidden = np.zeros_like(self.geno[0])
        output = np.zeros_like(self.geno[1])
        for i in range(len(self.geno[0])):
            for j in range(len(self.geno[0][0])):
                    # a shift from a normal distribution with mean of 0 and sd of 0.1 is added with probability p
                    hidden[i][j] = self.geno[0][i][j] + np.random.normal(scale=0.1) if np.random.random() < p else self.geno[0][i][j]
            
        for i in range(len(self.geno[1])):
            for j in range(len(self.geno[1][0])):
                    # a shift from a normal distribution with mean of 0 and sd of 0.1 is added with probability p
                    output[i][j] = self.geno[1][i][j] + np.random.normal(scale=0.1) if np.random.random() < p  else self.geno[1][i][j]
        
        #creating a new instance for the offspring
        child = rules()
        child.update_geno([hidden, output])
        child.update_fitness(0)
        return child

Generating a set of randomised rules of a given size

In [None]:
def generate_ruleset(size):
    ruleset = []

    for _ in range(0,size):
        rule = rules()
        rule.random_initiation()
        ruleset.append(rule)

    return ruleset

Define function action to determine the output of the ANN

In [None]:

def action(observation, rule):
    #obeservation x weights of the hidden layer
    hidden_layer_output = np.matmul(observation,rule.geno[0])
    #applying ReLU
    activation = np.maximum(hidden_layer_output,0)
    #activation X weights of the output layer
    output = np.matmul(activation, rule.geno[1])

    return np.argmax(output)

Selecting candidates for reproduction or mutation using fitness proportionate selection

In [None]:
def select_one(ruleset):
    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]
    #select a rule with probability inversely proportional to fitness
    return np.random.choice(ruleset, p=1/selection_probs)

def select_parents(ruleset):
    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]
    #select two rules with probability proportional to fitness
    return np.random.choice(ruleset, size=2, replace=False, p=selection_probs)

Evovling the set of ANNs

In [None]:
def evovle(ruleset, portion_of_offsprings, portion_of_offsprings_from_crossover, mutation_probability):
    m = len(ruleset)
    n = round(portion_of_offsprings * m)
    num_children_needed = round(n * portion_of_offsprings_from_crossover)
    num_mutation_needed = n - num_children_needed
    sorted_ruleset = sorted(ruleset, key=lambda x: x.fitness, reverse = True)
    #pick the top m-n rules to crossover without change
    crossover_without_mutation = sorted_ruleset[:m-n]
    
    #generate all offsprings from reproduction
    offspring = []
    for i in range(num_children_needed):
        p1, p2 = select_parents(sorted_ruleset)
        offspring.append(p1.reproduce(p2))

    #generate all offsprings from mutation
    crossover_with_mutation = []
    for j in range(num_mutation_needed):
        candidate = select_one(sorted_ruleset)
        crossover_with_mutation.append(candidate.mutate(p = mutation_probability))

    #the whole new generation
    return crossover_without_mutation + offspring + crossover_with_mutation
    



Collection of the hyperparameters

In [None]:
number_of_generatons = 20
number_of_total_timesteps = 1000
number_of_rules = 50
portion_of_offsprings = 0.9
portion_of_offsprings_from_crossover = 0.5
mutation_probability = 0.1

Prepare the environment and generate a given number of rulesets, with seeds for reproducibility

In [None]:
env = gym.make("CartPole-v1", render_mode="human")
env = gym.wrappers.TimeLimit(env,200)
observation, info= env.reset(seed = 42)

ruleset = generate_ruleset(number_of_rules)

The simulation of environment

In [None]:

scoreboard = pd.DataFrame()
for i in range(number_of_generatons):

    for rule in ruleset:
        score = 0
        maxscore = 0

        for timestep in range(number_of_total_timesteps):
            observation, reward, terminated, truncated, info = env.step(action(observation, rule))
            score += reward
            
            if terminated or truncated:
                observation, info = env.reset(seed=42)
                if score > maxscore:
                    maxscore = score 
                score = 0
                
        rule.update_fitness(int(maxscore))

    ranked_ruleset = sorted(ruleset, key=lambda x: x.fitness, reverse = True)

    score_list = [r.fitness for r in ranked_ruleset]
    id_list = [r.id for r in ranked_ruleset]

    df = pd.DataFrame({'ID': id_list,'Fitness': score_list})
    df.to_csv(f'results_generation_{i}.csv', index=False, header=False, sep=",")

    scoreboard[f'Generation_{i}'] = score_list

    ruleset = evovle(ranked_ruleset, portion_of_offsprings, portion_of_offsprings_from_crossover)

scoreboard.to_csv(f'scores_after_{i}_generations.csv', index=False, header=False)

env.close()
