In [25]:
#@title Imports
import logging
import random
from copy import copy

import random
import platform
from collections import Counter

#from gx_utils import *

In [26]:
#@title Problem instances generator

def problem(N, seed=None):
    """Creates an instance of the problem"""

    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [27]:
from traitlets.traitlets import Callable
import itertools

def remove_duplicates(list_):
  list_ = list(k for k,_ in itertools.groupby(list_))

  return list_

def sort_by(list_: list, key: Callable = None):
  list_.sort(key=key)
  return list_
  
def preproc(problem, rem_dup=False, sort=False, f:Callable = None):
  problem= sort_by(problem, key=f)
  problem= remove_duplicates(problem)
  return problem

In [28]:
#@title Problem init & PreProcessing
N = 10

prob = problem(N, seed=42)
prob = preproc(prob)
prob_dim = len(prob)



In [29]:
check_table = list( True for _ in range(0,21))
sum(check_table)

21

In [124]:
from IPython.utils.text import num_ini_spaces
#@title Class & Function Definitions for GA

#TODO: try a multi dimension fitness_score
#You can change this to try different fitness function (FF)
def fitness_function(genome):
    """
    This function compute roughly two metrics: the number of used lists and the percentage of integers covered
    it also uses two parameters (Alpha and Beta) to weigth differently the aforementioned metrics
    """

    #Parameters for weigthing the two metrics used
    alpha = 1
    beta = 0

    #First metric
    num_of_lists_taken = sum(genome)

    #Third metric
    num_of_total_numbers = 0

    check_table = list( False for _ in range(0,N))
    #Read all genes in the genome, using an index
    for index in range(0, len(genome)):

      #if the genome at this index is 1, the individual has the index-th list of the problem
      if genome[index] == 1:
        
        #read the number of elements in that list, and add it to the total
        num_of_total_numbers += len(prob[index])

        #then we check for each integer in that list, and we update the checkTable
        for target_integer in range(0,N):
          if target_integer in prob[index]:
            check_table[target_integer] = True
          if all(check_table):
            break

    #Second Metric
    percentage_of_value_covered = ( sum(check_table)/N ) * 100



    #Fitness Score equation  
    #fitness_score = alpha*percentage_of_value_covered - beta*num_of_lists_taken  #we want to minimize the second addendum

    #let's try a double fitness function
    return percentage_of_value_covered, -num_of_total_numbers , -num_of_lists_taken


class Individual():

  def __init__(self, genome=None, fitness_score=None):
    self.genome= genome
    self.fitness_score= fitness_score

  def __str__(self):
    return f"""GENOME: {self.genome}\nFIT_SCORE: {self.fitness_score}"""

  def evaluate(self):
    """
    This function is mainly used as a wrap function, to let us change only the FF and leave the rest
    of the code untouched, using 'evaluate'. It also helps for readability
    """

    self.fitness_score = fitness_function(self.genome)

  def mutate(self):
    """
    This function simulates mutations over the entire genome. 
    For each gene it tries to mutate with a give probability p.
    The mutation is random between 0 or 1 
    """

    #TODO: check if mutate a single gene with 100% probability is better than this approach

    p = 0.05 #probability of mutation on the single gene

    #for each gene, consider mutate it with a certain probability p
    for gene_index in range(0, len(self.genome)):
      if random.random() < p:
        #set its value randomly
        self.genome[gene_index] = random.choice([0,1])
    self.fitness_score = self.evaluate()

  def cross_over(genome1, genome2):
    #TODO: consider setting a Constant for the length of the genome
    cut_index = random.randint(0,len(genome1))
    temp = []
    temp = genome1[0:cut_index]
    genome1[0:cut_index] = genome2[0:cut_index]
    genome2[0:cut_index] = temp

    return genome1, genome2, cut_index
  
  def recombine_with(self, other_individual, recomb_function: Callable=cross_over):
    g1 = self.genome
    g2 = other_individual.genome
    
    self.genome, other_individual.genome, cut_index = recomb_function(g1,g2)
    self.evaluate()
    other_individual.evaluate()
    return cut_index

def random_new_individual():
  """
  This function create an Individual object
  Genome defined as a bunch of lists from problem
  Fitness score defined using a FF implemented in the 'evaluate' function
  we take a list of value 1 (list taken) or 0 (list NOT taken), each corresponding to a specific list of the problem
  """
  indi = Individual()
  genome = list(random.choice([0,1]) for _ in range(0, prob_dim))   #something like  [0 1 1 0 ... 0 1 1]
  indi.genome = genome
  indi.evaluate()
  return indi

#TODO: UNSTABLE GENERATION OF OFFSPRING TOO SENSIBLE TO p_best_copied AND tournament_dim
#EASILY GO TO 0 FOR LOW VALUE OF P, OR EXPLODE FOR HIGH VALUES OF P. 
#IF STABLE, IT WLL BE STABILIZE OVERA SINGLE SOLUTION AND THE ENTIRE OFFSPRING WILL BE A BIG BUNCH OF CLONES :O
def generate_offspring_from(population: list, reproduction_scheme=None, p_best_copied=10, tournament_dim= 5):

  index = int( p_best_copied/100 * len(population) )
  #save the p best individuals

  top_p_best = population[0:index]

  #recombine the top p best between theyself
  for indi in population[0:index]:
    random_picked_indi = population[random.randint(0, index-1)]  # -1 cause randint include both endpoints
    indi.recombine_with(random_picked_indi)
    
    #after recomb append their offspring to the top_p_best (future offspring)
    top_p_best.append(indi)
    top_p_best.append(random_picked_indi)

  #we cycle over the lower part of the population. At each cycle we take a bunch of indis from this pool, and save the best.
  #It will then mutate randomly
  lower_pop = population[index:]
  #print(len(lower_pop))
  while len(lower_pop) >= tournament_dim:
    fight_list = []
    for i in range(0, tournament_dim):
      j = random.randint(0, len(lower_pop)-1)
      # append the randomly extract indi to the fight list
      fight_list.append(lower_pop.pop(j))

    #select the winner taken the max over the ordered values of the fitness function
    winner = max(fight_list, key= lambda x: (x.fitness_score[0], x.fitness_score[1], x.fitness_score[2],) )

    #mutate it
    winner.mutate()
    winner.evaluate()
    #save it
    top_p_best.append(winner)

  return top_p_best


#TODO: implement Generation tracking to better analyze the evolution


In [None]:

#@title Test Functions
i1 = random_new_individual()
i2 = random_new_individual()

i1.evaluate
i2.evaluate

#indi.mutate()
#print(f"MUTATED\nGENOME: {indi.genome}")


print(f"BEFORE REC:\n1) {i1}\n\n2) {i2}")
ci=i1.recombine_with(i2)

print(f"\n\nAFTER REC at {ci}:\n1) {i1}\n\n2) {i2}")

In [None]:
for i in range(0,10):
  print(random_new_individual())

#TODO: Understand why the func generates only a single random genome for 10 times and not an entire list of 10 random genomes

In [123]:
#@title Population Creation

def print_pop(pop):
  print("\n\n")
  for i in pop:
    print(i)
  print("\n\n")

DIM_POP = 2000
#Create an array of Individual istances, each already evaluate
population = [random_new_individual() for _ in range(0, DIM_POP)]
population.sort(key= lambda x: (x.fitness_score[0], x.fitness_score[1], x.fitness_score[2]), reverse=True)

P = 21 #%
TDIM = 2
NUM_GEN = 310
for gen in range(0, NUM_GEN):

  offspring = generate_offspring_from(population, p_best_copied=P, tournament_dim=TDIM)
  offspring.sort(key= lambda x: (x.fitness_score[0], x.fitness_score[1], x.fitness_score[2]), reverse=True)
  population = offspring
  print(population[0]) 

print_pop(population) 

GENOME: [0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1]
FIT_SCORE: (100.0, -27, -11)
GENOME: [0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1]
FIT_SCORE: (100.0, -19, -7)
GENOME: [0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
FIT_SCORE: (100.0, -23, -8)
GENOME: [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]
FIT_SCORE: (100.0, -21, -9)
GENOME: [0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
FIT_SCORE: (100.0, -18, -7)
GENOME: [0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0]
FIT_SCORE: (100.0, -31, -11)
GENOME: [1, 0, 0, 0,

KeyboardInterrupt: ignored