In [44]:
#@title Imports
import logging
import random
from copy import copy

import random
import platform
from collections import Counter
from collections import namedtuple
#from gx_utils import *

In [45]:
#@title Problem instances generator

def problem(N, seed=None):
    """Creates an instance of the problem"""

    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [46]:
#@title Some Functions for Preprocessing
from traitlets.traitlets import Callable
import itertools

def remove_duplicates(list_):
  list_ = list(k for k,_ in itertools.groupby(list_))

  return list_

def sort_by(list_: list, key: Callable = None):
  list_.sort(key=key)
  return list_
  
def preproc(problem, rem_dup=False, sort=False, f:Callable = None):
  problem= sort_by(problem, key=f)
  problem= remove_duplicates(problem)

  #Make problem a list of sets, each inner list is now a set
  problem= list( set(problem[i]) for i in range(0,len(problem)))
  return problem



In [47]:
#@title Problem init & PreProcessing

N = 100

prob_raw = problem(N, seed=42)
prob = preproc(prob_raw, sort=True, f=lambda x: -len(x))


#For each index of the main list problme, we link a set composed by the numbers the set covers
coverage_table = {}  #Something like {2: {1,3,4,8}}

for i, set_ith in enumerate(prob):
    coverage_table[i] = set_ith

prob_dim = len(prob)


from math import ceil, floor

num_bytes = ceil(prob_dim / 8)
diff = num_bytes*8 -prob_dim +5

for i in range(len(coverage_table), len(coverage_table) + diff):
    coverage_table[i] = set()



In [48]:
CHECK_MASK = 0b1000_0000
TEST_MASK =  0b1101_0100

print( bin(CHECK_MASK & TEST_MASK))
cm = CHECK_MASK << 2
print(bin(cm), cm)
print(bin(cm & TEST_MASK), cm & TEST_MASK)

0b10000000
0b1000000000 512
0b0 0


In [49]:
goal = set(range(0,N))
def covered_by(genome) -> bool:
    "compute the coverage for a given genome"
    cnt = Counter()
    cvg_set = set()
    cvg = False
    for b, byte in enumerate(genome, 1):

        cm = CHECK_MASK
        for i in range(0,8):

            if cm & byte != 0: #check if the bit is 1

                index = b*8+i  #then compute the index in the prob
                
                _set = coverage_table[index]
                cnt.update(_set)
                if not cvg and cvg_set != goal:
                    cvg_set |= _set
                else:
                    cvg = True

                
            cm = cm >> i+1
    if cvg:
        return -cnt.total()
    return -1000000


def fitness_function_2(genome):
    cnt = Counter()
    cnt.update(sum((prob[e] for e in genome), start=()))

    # Counting (Number of useful elements, Numbers of redundant elements)
    return tuple([sum(cnt[c] - 1 for c in cnt if cnt[c] > 1), 
                 -sum(cnt[c] == 1 for c in cnt)])

In [50]:
i = random_new_individual()
i

Individual(genome=bytearray(b'\x19\x10\x078\'\x1c:\x08\x87-"y\x05\xd0\xd4\xee\xb2-\xc9Z\xcc\xab\xac?\xbd*\xcc\xcf\xa21\xb2\xad\xbb[\x15\xa3\xf7\xad\xe0b\x96\x1b-l\x00O,fa4\x87\xf7w\xe7'), fitness=-2554)

In [59]:
#@title Individual Dataclass Def

from dataclasses import dataclass, replace

@dataclass
class Individual:
    """Class for keeping track of an ind."""
    genome: bytearray
    fitness: int = None

    def evaluate(self):
        """
        This function is mainly used as a wrap function, to let us change only the FF and leave the rest
        of the code untouched, using 'evaluate'. It also helps for readability
        """

        self.fitness = covered_by(self.genome)

    def mutate(self, PROB_MUT=0.3):
        """
        With a given probability 'prob' it mutates the given ind
        """
        j = random.randint(0,num_bytes-1)

        if random.random() < 0.3:
            self.genome[j] = random.randint(0, 255)


      

In [52]:
b = random.randbytes(num_bytes)
b[38]

145

In [53]:
#@title Class & Function Definitions for GA

from math import log 


#can be use to define GENOME_LENGTH as a function of N

DEAFULT_GENOME_LENGHT = 11
#A CRITICAL PARAMETER
#used to define a maximum of lists taken as a solution
#Small values can lead to unacceptable solutions, while Big 
#values lead to a huge amount of possibilities for the individuals 
#and then to a really slow converge towards a minimum
GENOME_LENGTH = DEAFULT_GENOME_LENGHT

#function used to sort the inds, and selecting the "best" part of them
sorting_key= lambda x: (x.fitness)

def random_new_individual():
  """
  This function create a random Individual object.
  """

  genome = bytearray( random.randbytes(num_bytes))

  ind = Individual(genome, 0)
  ind.evaluate()
  return ind

def mate(i1: Individual, i2: Individual, PROB_MUT=0.3) -> Individual:
  """
  Take two inds, randomly cut their genome and then create a new ind with 
  the genome create by the union of the two parts, properly
  """

  #cut_index = random.randint(0,len_genome)

  cut_index = random.randint( int( GENOME_LENGTH/3), int(2*GENOME_LENGTH/3) )
  new_genome = bytearray()
  new_genome[0:cut_index] = i1.genome[0:cut_index]
  new_genome[cut_index:] = i2.genome[cut_index:]
  ind = Individual(new_genome,0)
  ind.mutate(PROB_MUT)
  ind.evaluate()
  return ind


In [67]:
#@title Offspring Generation Function
def generate_offspring_from(population: list,
                            PERCENTAGE_SELECTED=10, 
                            DIM_MAX_OFFSPRING=10_000, 
                            PROB_MUT= 0.1):
  """
  Generates a new list of inds (INDIS), starting from given 'population'.

  @TODO: Try re-inserting the parents and have N random mating session at couple of even triplet or more 
        (I know it's kinda cringe...)

  @param: population Is the starting point for this function, the list of INDIS
  used to generate the new offspring (OFF).

  @param: PERCENTAGE_SELECTED Used to select the percentage of the population considered 
  "the best".    
  DEFAULT: 10

  @param: DIM_MAX_OFFSPRING Used to set a Maximum for the dimension of the offspring and avoid
  divergence/explosion of the general population.   
  DEAFULT: 10_000

  @param: PROB_MUT Probability to mutate. It is used to give the 'mutate()' method the probability
  to mutate a gene or not. It must be given as a number in the range [0,1).  
  DEFAULT: 0.1  (10%)

  """

  #Sort by: Coverage, Number of used numbers, Number of used lists
  #Crucial to cut the top p%
  population.sort(key=sorting_key, reverse=True)

  #get the index corresponding to the p%
  index = int( PERCENTAGE_SELECTED/100* len(population))

  #divide the population
  top_pop = population[0:index].copy()

  #May be useful for future implementations
  #bot_pop = population[index:]

  offspring = []

  #Mating Session loop
  while len(top_pop) >= 2:
    #Randomly select 2 indis and extract them
    i = random.randint(0, len(top_pop)-1)
    i1 = top_pop.pop(i)
    j = random.randint(0, len(top_pop)-1)
    i2 = top_pop.pop(j)

    #Mate them and obtain the child, mutate it and evaluate
    child = mate(i1, i2)
    child.mutate(PROB_MUT)
    child.evaluate()

    #Add everyone to the offspring
    offspring.append(i1)
    offspring.append(i2)
    offspring.append(child)

 

  #Sort crucial for potential Dim Max limit
  offspring.sort(key=sorting_key, reverse=True)

  if len(offspring) > DIM_MAX_OFFSPRING:
    return offspring[0:DIM_MAX_OFFSPRING]
  return offspring


In [68]:
#@title Population Creation

from time import time
from tqdm import tqdm
#SETTINGs
GENOME_LENGTH = 14
DIM_POP = 4000
DIM_MAX_OFFSPRING= 100
PERCENTAGE_SELECTED =80 #%
NUM_GEN = 1000
PROB_MUT = 0.9 #0.87 can be too high

#Create an array of INDs istances, each already evaluated
population = [random_new_individual() for _ in range(0, DIM_POP)]
population.sort(key=sorting_key, reverse=True)

#Plot variables init
best_for_gen = [-100000]
best_score = -100000
gens = range(0,NUM_GEN)
extinted = False

st  = time()
gen_iterator  = tqdm(range(0, NUM_GEN))
for gen in gen_iterator:
  gen_iterator.set_description("Current Best Score %d" % best_score)
  offspring=generate_offspring_from(population,
                                      PERCENTAGE_SELECTED=PERCENTAGE_SELECTED, 
                                      PROB_MUT = PROB_MUT, 
                                      DIM_MAX_OFFSPRING=DIM_MAX_OFFSPRING)
  
  if len(offspring) == 0:
      print(f"Population extinted at gen: {gen-1} with best: {population[0].fitness}")
      extinted = True
      break

  population = offspring

  best_score = population[0].fitness

if not extinted:

    et  = time()

    elapsed_time = et - st
    best =  population[0]
    cost = -best.fitness
    bloat = cost/N *100
    print("Winner: \n", best)
    print("Cost: ", cost)
    print("Bloat= ", bloat, "%")
    print(f"Elapsed time: {elapsed_time}s")

    print("N: ", N)
    print("GL: ", GENOME_LENGTH)
    print("NG: ", NUM_GEN)



Current Best Score -1492: 100%|██████████| 1000/1000 [00:38<00:00, 25.90it/s]

Winner: 
 Individual(genome=bytearray(b'i\t\xf1s!(\xb55\xd7X\x11\x97=Tv\x08\xf2\xeb\xbc\xdf~d\x8a\xe8x\x1a\xa6,5\xa4F\x80\xf6GB\xaf\xcbv\xb0\xcd\xfe"_\xb6\xbb\x9a\\\xc6}\x83\x92~\xf2:'), fitness=-1492)
Cost:  1492
Bloat=  1492.0 %
Elapsed time: 38.6175000667572s
N:  100
GL:  14
NG:  1000



