In [96]:
pip install gymnasium



In [97]:
import numpy as np
import gymnasium as gym
import matplotlib.pyplot as plt

Big parameters

In [98]:
pop_size = 100
half_pop_size = int(pop_size/2)
max_moves = 500

Initial population creation

In [99]:
population = np.random.choice([0,1], (pop_size, max_moves))

###GA Parameters

In [100]:
mutation_rate = 0.05

###GA functions

The fitness will be extracted directly from the environment as the number of moves performed before the end of the episode

In [101]:
def fitness_selection(fitnesses):
  half_pop_size = int(len(fitnesses)/2)
  pairs = np.random.choice(range(pop_size), size=(half_pop_size, 2), p=fitnesses/sum(fitnesses))
  return pairs

def rank_selection(fitnesses):
  half_pop_size = int(len(fitnesses)/2)
  pairs = np.random.choice(range(pop_size), size=(half_pop_size, 2), p=np.argsort(fitnesses)/sum(range(len(fitnesses))))
  return pairs

In [111]:
def crossover(pairs, population):
  pop_size = population.shape[0]
  half_pop_size = int(pop_size/2)

  new_gen = np.empty((pop_size, max_moves))
  for pair_nb in range(half_pop_size):
    pair = pairs[pair_nb]
    dad = population[pair[0],:]
    mom = population[pair[1],:]
    for i in range(max_moves):
      gene = np.random.choice([dad[i], mom[i]])
      new_gen[2*pair_nb, i] = gene
      new_gen[2*pair_nb+1, i] = 1-gene

  return new_gen

In [103]:
def mutation(population):
  pop_size = population.shape[0]

  for i in range(pop_size):
    for j in range(max_moves):

      if np.random.random() <= mutation_rate:
        population[i,j] = 1-population[i,j]
  return population

In [104]:
def pop_diversity(population):
  return np.std(population, 1).mean()

###Environment functions

In [105]:
def play_gen(population, env):
  fitnesses = np.empty(pop_size)
  for i in range(pop_size):
    env.reset()

    for t in range(max_moves):
      action = population[i,t]
      observation, reward, done, info, blc = env.step(action)

      if done : break

    fitnesses[i]=t
    return fitnesses

###Environment creation

In [106]:
env = gym.make("CartPole-v1")

In [134]:
def evolve(selection, nb_gen, pop_size, mutation_rate, elitism=0):
  if selection == "rank" : f = rank_selection
  elif selection=="fitness" : f = fitness_selection
  else : return "Unknown selection method"

  population = np.random.choice([0,1], (pop_size, max_moves))

  best_scores = np.empty(nb_gen)
  avg_scores = np.empty(nb_gen)
  diversity = np.empty(nb_gen)

  for i in range(nb_gen):
    #print("Generation",i)

    population = population.astype(int)

    fit = play_gen(population, env)

    elite = population[np.argmax(fit)].copy()

    best_scores[i] = max(fit)
    avg_scores[i] = np.mean(fit)

    pairs = f(fit)

    population = crossover(pairs, population)

    population = mutation(population)

    population[np.random.choice(range(pop_size))] = elite

    diversity[i] = pop_diversity(population)

  plt.plot(best_scores, label="Best score")
  plt.plot(avg_scores, label="Average score")
  plt.legend()
  plt.xlabel("Generation")
  plt.ylabel("Fitness")
  plt.title("Fitness over time")
  plt.show()

  plt.plot(diversity, color="orange")
  plt.xlabel("Generation")
  plt.ylabel("Average gene STD")
  plt.title("Population diversity over time")
  plt.show()

In [138]:
nb_gen = 10000
pop_size = 30
mutation_rate = 0.01

In [None]:
evolve("rank", nb_gen, pop_size, mutation_rate)

####Fitness Selection

In [None]:
best_scores_fit = np.empty(nb_gen)

for i in range(nb_gen):
  #print("Generation",i)

  population = population.astype(int)
  fit = play_gen(population, env)

  best_scores_fit[i] = max(fit)

  pairs = fitness_selection(fit)

  population = crossover(pairs, population)

  population = mutation(population)

In [None]:
plt.plot(best_scores_fit)
plt.xlabel("Generation")
plt.ylabel("Best score")
plt.title("Evolution of best score with fitness selection")

####Rank selection

In [None]:
best_scores_rank = np.empty(nb_gen)

for i in range(nb_gen):
  #print("Generation",i)

  population = population.astype(int)
  fit = play_gen(population, env)

  best_scores_rank[i] = max(fit)

  pairs = rank_selection(fit)

  population = crossover(pairs, population)

  population = mutation(population)

In [None]:
plt.plot(best_scores_rank)
plt.xlabel("Generation")
plt.ylabel("Best score")
plt.title("Evolution of best score with rank selection")

In [None]:
pop_diversity(population)