First we need to import used libraries:

In [10]:
import gym, itertools, numpy as np, pandas as pd, random

After that we can divide observation space values into certain ranges (cart position and pole angle are fixed, cart velocity and pole angular velocity need to be calculated by GA):

In [11]:
def mapping_observation_to_1D(observation, bits):
    rule = []
    lst_observations =[]
    #var_for_velocity is fixed value for now
    var_for_velocity = 5

    #given range (-2.4, 2.4)
    cart_position_range = np.linspace(-2.4, 2.4, num=bits+1)
    lst_observations.append(cart_position_range)
    #TODO
    cart_velocity_range = np.linspace(-var_for_velocity, var_for_velocity, num=bits+1)
    lst_observations.append(cart_velocity_range)
    #given range (-0.2095 rad, 0.2095 rad)
    pole_angle_range = np.linspace(-0.2095, 0.2095, num=bits+1)
    lst_observations.append(pole_angle_range)
    #TODO
    pole_angular_velocity = np.linspace(-var_for_velocity, var_for_velocity, num=bits+1)
    lst_observations.append(pole_angular_velocity)

    for observation_index in range(len(lst_observations)):
        for index in range(1, len(lst_observations[observation_index])):
            #lower bound value <= observed value <= upper bound value
            if lst_observations[observation_index][index-1] <= observation[observation_index] <= lst_observations[observation_index][index]:
                rule.append(1)
                #if we find it then fill the rest with 0s and break loop
                for i in range(len(lst_observations[observation_index])-index-1):
                    rule.append(0)
                break
            else:
                rule.append(0)
                
        #5 break cells between observation values
        for i in range(0,5):
            rule.append(0)
            
    return rule


Rule as a class

In [12]:
class rules:
    def __init__(self,geno={},fitness=0, num_neighbours=5):
        self.geno = geno
        self.fitness = fitness
        self.num_neighbours = num_neighbours

        combinations_n_neighbours = list(itertools.product([0, 1], repeat=num_neighbours))
        combinations_n_neighbours = ["".join(str(seq)).replace(',','').replace(' ','').replace('(','').replace(')','') for seq in combinations_n_neighbours]

        self.configurations = combinations_n_neighbours

    def update_geno(self,gene):
        
        for i in range(len(self.configurations)):
            self.geno[self.configurations[i]] = gene[i]


    def random_initiation(self):
        if self.geno == {}:
            _ = {}

            for configuration in self.configurations:
                _[configuration] = random.choice([0,1])

            self.geno = _
            return

    def update_fitness(self,new_fitness):
        self.fitness = new_fitness

    def reproduce(self, other, method='random_one_point'):
        if method == 'random_one_point':
            split_point = random.randrange(len(self.geno.values()))
            first_half = list(self.geno.values())[:split_point]
            second_half = list(other.geno.values())[split_point:]
            _ = first_half + second_half
        
        child = rules()
        child.update_geno(_)
        return child   

    def mutate(self,p=0.01):
        _ = list(self.geno.values())
        
        _ = [digit if random.random() > p else 1 - digit for digit in _]
        
        self.update_geno(_)
        self.update_fitness(0)

Create list containing rulesets which map every combinations of 5 neighbours bits to randomly chosen 0 or 1:

In [13]:
def generate_ruleset(size):
    ruleset = []

    for _ in range(0,size):
        rule = rules()
        rule.random_initiation()
        ruleset.append(rule)

    return ruleset

We define function action to behave accordingly to previously generated ruleset:

In [14]:
def action(mapped_observation, rule):
    v = 0
    oldline = mapped_observation
    #print(f'oldline: {oldline}')
    newline = [0] * len(mapped_observation)
    for x in range(0,len(mapped_observation)):
        #print(f'x: {x}')
        for bit in range(len(oldline)):
            #print(f'bit: {bit}')
            combination = str(oldline[bit % 60]) + str(oldline[(bit+1) % 60]) + str(oldline[(bit+2) % 60]) + str(oldline[(bit+3) % 60]) + str(oldline[(bit+4) % 60])
            #print(f'combination: {combination}')
            newline[(bit+2) % 60] = rule.geno[combination]
        #print(f'newline: {newline}')
        oldline = newline

    v = 1 if newline.count(1) > (len(mapped_observation)/2) else 0
    return v

Selecting candidates for reproduction or mutation using fitness proportionate selection

In [15]:
def select_one(ruleset):
    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]
    return np.random.choice(ruleset, p=selection_probs)

def select_parents(ruleset):
    total_fitness =  sum([rule.fitness for rule in ruleset])
    selection_probs = [rule.fitness/total_fitness for rule in ruleset]
    return np.random.choice(ruleset, size=2, replace=False, p=selection_probs)

In [16]:
def evovle(ruleset, p_n, p_crossover):
    m = len(ruleset)
    n = round(p_n * m)
    num_children_needed = round(n * p_crossover)
    num_mutation_needed = n - num_children_needed
    sorted_ruleset = sorted(ruleset, key=lambda x: x.fitness, reverse = True)
    crossover_without_mutation = sorted_ruleset[:m-n]
    crossover_with_mutation = []
    offspring = []

    for i in range(num_children_needed):
        p1, p2 = select_parents(sorted_ruleset)
        offspring.append(p1.reproduce(p2))
    
    for j in range(num_mutation_needed):
        candidate = select_one(sorted_ruleset)
        candidate.mutate()
        crossover_with_mutation.append(candidate)

    return crossover_without_mutation + offspring + crossover_with_mutation
    



Prepare the environment and generate a given number of rulesets:

In [17]:
env = gym.make("CartPole-v1", render_mode="human")
env = gym.wrappers.TimeLimit(env,200)
observation, info= env.reset()
list_to_return = []

ruleset = generate_ruleset(100)

Running for 8 generations

In [18]:
import pygame


gen = 8
scoreboard = pd.DataFrame()
for i in range(gen):
    rule_list = []
    score_list = []

    for rule in ruleset:
        score = 0
        maxscore = 0


        for episode in range(1000):
            mapped_observation = mapping_observation_to_1D(observation, 10)
            observation, reward, terminated, truncated, info = env.step(action(mapped_observation, rule))
            score += reward
            
            if terminated or truncated:
                observation, info = env.reset()
                if score > maxscore:
                    maxscore = score 
                score = 0

        rule_list.append(rule.geno)
        score_list.append(int(maxscore))
        rule.update_fitness(int(maxscore))

    

    df = pd.DataFrame({'Rules':rule_list, 'Fitness': score_list})
    # df = df.astype(str)
    df.to_csv(f'results_generation_{i}.csv', index=False, header=False, sep=";")

    scoreboard[f'Generation_{i}'] = score_list

    ruleset = evovle(ruleset,0.5, 0.8)

scoreboard.to_csv(f'scores_after_{i}_generations.csv', index=False, header=False)

env.close()
