Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# LAB9

Write a local-search algorithm (eg. an EA) able to solve the *Problem* instances 1, 2, 5, and 10 on a 1000-loci genomes, using a minimum number of fitness calls. That's all.

### Deadlines:

* Submission: Sunday, December 3 ([CET](https://www.timeanddate.com/time/zones/cet))
* Reviews: Sunday, December 10 ([CET](https://www.timeanddate.com/time/zones/cet))

Notes:

* Reviews will be assigned  on Monday, December 4
* You need to commit in order to be selected as a reviewer (ie. better to commit an empty work than not to commit)

In [1]:
from random import choices

import lab9_lib

In [3]:
fitness = lab9_lib.make_problem(10)
for n in range(10):
    ind = choices([0, 1], k=50)
    print(f"{''.join(str(g) for g in ind)}: {fitness(ind):.2%}")

print(fitness.calls)

01101011010101110000010011101110110100101011110001: 15.33%
10000001101101010101101110110101001010011001000011: 15.34%
01100011101110100110010101001011011111010100011110: 9.11%
11001001111011000101000011101000101101100100010101: 7.33%
11010001111101010000010101000000111000011001110011: 9.33%
00001011011000110011110111000101110001010101110101: 9.13%
11001000010111100011110011011100000010110001111000: 15.34%
10100101001011100000011010010100000010000000111000: 11.56%
10110001100010100111001011000110110000011110110000: 9.33%
01000010110010101001100111010100001011001111110111: 15.34%
10


In [831]:
import numpy as np
from random import randint, choices, random

import lab9_lib

## solution

In [826]:
STARTING_POPULATION = 100
N_SURVIVAL = 5 # less shrinks down procreation to N_survival relatives and then procreation stops
A = 10
K = 30

In [827]:
class Tabu:
    def __init__(self):
        self.tabu_list = []
    
    def check(self, id):
        t_id = tuple(id)
        if t_id in self.tabu_list: return False
        else: 
            self.tabu_list.append(t_id)
            return True

In [865]:
def new_gene(): return choices([0, 1], k= K)

class Individual:
    def __init__(self, genome= None):
        if genome is None: self.genome = new_gene()
        else:
            if type(genome) != np.ndarray: genome = np.array(genome)
            self.genome = np.copy(genome)

    def get_genome(self): return np.copy(self.genome)
    
    def mutate(self):
        idx = randint(0, len(self.genome) - 1)
        new_genome = self.get_genome()
        new_genome[idx] = 1 - self.genome[idx]
        return Individual(new_genome)

    def heavy_mutate(self, rand_length= False, invert= False):
        len_g = len(self.genome)
        middle = len_g // 2 # middle represent the length of the ""gene"" to change
        if rand_length: middle = randint(2, middle)
        idx = randint(0, len_g - middle)
        new_genome = self.get_genome()
        if invert: new_genome[idx:middle + idx] = np.array([1 - x for x in new_genome[idx:middle + idx]])
        else: new_genome[idx:middle + idx] = np.array([choices([0, 1], k= middle)])
        return Individual(new_genome)
    
    def inverse(self): return Individual([1 - g for g in self.genome])

    def reproduce_with(self, id_2):
        len_g = len(self.genome)
        middle = len_g // 2
        idx = randint(0, len_g - middle)
        new_genome = id_2.get_genome()
        new_genome[idx:middle + idx] = self.get_genome()[idx:middle + idx]
        return Individual(new_genome)

    def reproduce_with_2(self, id_2):
        len_g = len(self.genome)
        new_genome = self.get_genome()
        genome_2 = id_2.get_genome()
        for i in range(len_g):
            if randint(0, 1) == 0: new_genome[i] = genome_2[i]
        return Individual(new_genome)

    def reproduce_with_3(self, id_2):
        new_genome = self.get_genome()
        genome_2 = id_2.get_genome()
        start = randint(0, K-1)
        new_genome[start::K] = genome_2[start::K]
        return Individual(new_genome)

def tournament(population, fitness, n_survival):

    len_p = len(population)

    scores = np.array([fitness(population[i].get_genome()) for i in range(len_p)])

    best_idx = np.argsort(scores)[::-1]
    best_scores = scores[best_idx[:n_survival]]

    return best_scores[0] >= 1, population[best_idx[:n_survival]], best_scores

def tournament_3(population, fitness, n_survival):  # is this the real correct base implementation?

    len_p = len(population)

    scores = np.array([fitness(population[i].get_genome()) for i in range(len_p)])

    rand_idx = np.argsort([random() for _ in range(len_p)])
    group_size = len_p // n_survival
    groups = [population[rand_idx[ig * group_size: (ig + 1) * group_size]] for ig in range(n_survival)]
    groups_scores = [scores[rand_idx[ig * group_size: (ig + 1) * group_size]] for ig in range(n_survival)]

    best_population = []
    best_scores = []
    for ig in range(len(groups)):
        best_in_group = np.argmax(groups_scores[ig])
        best_population.append(groups[ig][best_in_group])
        best_scores.append(groups_scores[ig][best_in_group])

    best_population = np.array(best_population)
    best_scores = np.array(best_scores)
    
    sort_idx = np.argsort(best_scores)[::-1]
    best_population = best_population[sort_idx]
    best_scores = best_scores[sort_idx]


    return best_scores[0] >= 1, best_population, best_scores

def tournament_2(population, fitness, n_survival):

    len_p = len(population)
    goal_reached = False

    scores = np.zeros(len_p)
    for i in range(len_p):
        fit_i = fitness(population[i].get_genome())
        if fit_i == 1: goal_reached = True
        for j in range(i+1, len_p):
            fit_j = fitness(population[j].get_genome())
            if fit_i > fit_j: scores[i] += 1
            else: scores[j] += 1

    best_idx = np.argsort(scores)[::-1]

    return goal_reached, population[best_idx[:n_survival]], scores[best_idx[:n_survival]]

def procreate(population, selector, tabu= None):
    new_individuals = []
    new_population = []
    len_p = len(population)

    # exploitation

    for i in range(len_p): new_individuals.append(population[i].mutate())

    #exploration

    if selector[0]:
        for i in range(len_p): new_individuals.append(population[i].heavy_mutate())
    if selector[1]:
        for i in range(len_p): new_individuals.append(population[i].heavy_mutate(rand_length= True))
    if selector[2]:
        for i in range(len_p): new_individuals.append(population[i].heavy_mutate(invert= True))
    if selector[3]:
        for i in range(len_p): new_individuals.append(population[i].heavy_mutate(rand_length= True, invert= True))

    if selector[4]:
        for i in range(len_p):
            for j in range(i+1, len_p): new_individuals.append(population[i].reproduce_with(population[j]))

    if selector[5]:
        for i in range(len_p):
            for j in range(i+1, len_p): new_individuals.append(population[i].reproduce_with_2(population[j]))

    if selector[6]:
        for i in range(len_p):
            for j in range(i+1, len_p): new_individuals.append(population[i].reproduce_with_3(population[j]))

    #new_individuals.append(population[-1].inverse())
        
    #new_individuals.append(Individual())

    if selector[7]:
        if tabu is not None: new_individuals = [p for p in new_individuals if tabu.check(p.get_genome())]

    if selector[8]: new_population = [p for p in population] # keep all previous individuals
    else: new_population.append(population[0])     # keep only champion

    for p in new_individuals: new_population.append(p)

    return np.array(new_population)

In [818]:
## should change by having the population/populations be decided before the fors to have consistent evaluation

n_tries = 100

selectors = []
solutions = []

tournament_f = tournament # tournament_2 uses too many fitness calls

for i0 in [True, False]:
    for i1 in [True, False]:
        for i2 in [True, False]:
            for i3 in [True, False]:
                for i4 in [True, False]:
                    for i5 in [True, False]:
                        for i6 in [True, False]:
                            for i7 in [True, False]:
                                for i8 in [True, False]:
                                    for i9 in [True, False]:
                                        if i9: i10_range = [True, False]
                                        else: i10_range = [False]
                                        for i10 in i10_range:
                                            selector = [i0, i1, i2, i3, i4, i5, i6, i7, i8, i9, i10]
                                            selectors.append(tuple(selector))
                                            print(selector)

                                            avg_calls = 0
                                            avg_epochs = 0
                                            n_success = 0

                                            for _ in range(n_tries):

                                                fitness = lab9_lib.make_problem(A)

                                                tabu = Tabu()

                                                #population = np.array([Individual() for _ in range(STARTING_POPULATION)])

                                                if i9: n_populations = 1
                                                else: n_populations = 5
                                                populations = [np.array([Individual() for _ in range(STARTING_POPULATION)]) for _ in range(n_populations)]
                                                goal_reached_all = np.array([False for _ in range(n_populations)])
                                                scores_all = []

                                                #goal_reached, population, scores = tournament(population, fitness, N_SURVIVAL)
                                                for i in range(n_populations):
                                                    goal_reached, population, scores = tournament_f(populations[i], fitness, N_SURVIVAL)
                                                    goal_reached_all[i] = goal_reached
                                                    populations[i] = population
                                                    scores_all.append(scores)

                                                epoch = 0
                                                max_epochs = 10000
                                                #while (not goal_reached) and (epoch < max_epochs):
                                                while (not np.any(goal_reached_all)) and (epoch < max_epochs):

                                                    #print(np.array([fitness(population[i].get_genome()) for i in range(5)]))
                                                    #for ip in range(n_populations): print(np.array([fitness(populations[ip][i].get_genome()) for i in range(5)]))
                                                    #print('=================================')

                                                    #population = procreate(population)
                                                    for i in range(n_populations): populations[i] = procreate(populations[i], tabu)

                                                    if i10:
                                                        for i in range(n_populations):               # migration
                                                            for j in range(n_populations):
                                                                if i != j:
                                                                    new_population = [p for p in populations[i]]
                                                                    idx = randint(0, len(populations[j]) - 1)
                                                                    new_population.append(populations[j][idx])
                                                                    populations[i] = np.array(new_population)

                                                    #goal_reached, population, scores = tournament(population, fitness, N_SURVIVAL)
                                                    scores_all = []
                                                    for i in range(n_populations):
                                                        goal_reached, population, scores = tournament_f(populations[i], fitness, N_SURVIVAL)
                                                        goal_reached_all[i] = goal_reached
                                                        populations[i] = population
                                                        scores_all.append(scores)

                                                    epoch += 1

                                                population_complete = []
                                                for i in range(n_populations):
                                                    for j in range(N_SURVIVAL):
                                                        population_complete.append(populations[i][j])
                                                population_complete = np.array(population_complete)

                                                goal_reached, population, scores = tournament_f(population_complete, fitness, N_SURVIVAL)

                                                n_fit_calls = fitness.calls
                                                #print(f'best score is {fitness(population[0].get_genome())} - obtained with {n_fit_calls} calls to fitness in {epoch} epochs')

                                                avg_calls += n_fit_calls
                                                avg_epochs += epoch
                                                if goal_reached: n_success += 1

                                            #print('==================================')
                                            #print('==================================')
                                            #print(selector)
                                            #print(f'average over {n_tries} -> avg_calls: {avg_calls // n_tries} - avg_epochs: {avg_epochs // n_tries} - n_success: {n_success}')
                                            #print('==================================')
                                            #print('==================================')
                                            solutions.append((avg_calls, avg_epochs))

[True, True, True, True, True, True, True, True, True, True, True]
[True, True, True, True, True, True, True, True, True, True, False]
[True, True, True, True, True, True, True, True, True, False, False]
[True, True, True, True, True, True, True, True, False, True, True]
[True, True, True, True, True, True, True, True, False, True, False]
[True, True, True, True, True, True, True, True, False, False, False]
[True, True, True, True, True, True, True, False, True, True, True]
[True, True, True, True, True, True, True, False, True, True, False]
[True, True, True, True, True, True, True, False, True, False, False]
[True, True, True, True, True, True, True, False, False, True, True]
[True, True, True, True, True, True, True, False, False, True, False]
[True, True, True, True, True, True, True, False, False, False, False]
[True, True, True, True, True, True, False, True, True, True, True]
[True, True, True, True, True, True, False, True, True, True, False]
[True, True, True, True, True, True

In [822]:
solutions = np.array(solutions)

sort_idx = np.append(np.array([x for x in range(len(solutions))]).reshape(-1, 1), solutions, axis= 1)

best_idx = np.array(sorted(sort_idx, key=lambda tup: (tup[1], tup[2])))[:, 0]

selectors = np.array(selectors)
for i in range(len(selectors)):
    print(f'{selectors[best_idx[i]]} -> {(solutions[best_idx[i]][0] // n_tries, solutions[best_idx[i]][1] // n_tries)}')

for i in range(len(selectors[0])):
    if selectors[best_idx[0]][i]: print(f'i{i}')


[False False False False  True False  True  True  True  True False] -> (298, 13)
[ True  True False  True  True False False False  True  True  True] -> (304, 13)
[False  True  True False  True False False False  True  True  True] -> (307, 13)
[ True  True False  True False  True False False  True  True False] -> (307, 13)
[ True False False  True False False False  True  True  True False] -> (308, 13)
[ True False  True  True  True False  True  True False  True False] -> (309, 13)
[ True  True False  True  True  True  True  True  True  True  True] -> (309, 13)
[ True False  True False False False False  True False  True False] -> (310, 13)
[False  True  True False  True  True False False False  True  True] -> (311, 13)
[False False  True  True  True  True  True  True False  True  True] -> (313, 14)
[ True False False  True  True False False False  True  True  True] -> (313, 14)
[False False False False False False False False  True  True False] -> (313, 14)
[False False False  True Fal

4, 6, 7, 8, 9

In [866]:
fitness = lab9_lib.make_problem(A)
best_selector = selectors[best_idx[0]]

tabu = Tabu()

tournament_f = tournament_3

#population = np.array([Individual() for _ in range(STARTING_POPULATION)])

n_populations = 1
populations = [np.array([Individual() for _ in range(STARTING_POPULATION)]) for _ in range(n_populations)]
goal_reached_all = np.array([False for _ in range(n_populations)])
scores_all = []

#goal_reached, population, scores = tournament(population, fitness, N_SURVIVAL)
for i in range(n_populations):
    goal_reached, population, scores = tournament_f(populations[i], fitness, N_SURVIVAL)
    goal_reached_all[i] = goal_reached
    populations[i] = population
    scores_all.append(scores)

epoch = 0
max_epochs = 100000
#while (not goal_reached) and (epoch < max_epochs):
while (not np.any(goal_reached_all)) and (epoch < max_epochs):

    #print(np.array([fitness(population[i].get_genome()) for i in range(5)]))
    #for ip in range(n_populations): print(np.array([fitness(populations[ip][i].get_genome()) for i in range(5)]))
    #print('=================================')

    #population = procreate(population)
    for i in range(n_populations): populations[i] = procreate(populations[i], best_selector, tabu)

    #for i in range(n_populations):               # migration
    #    for j in range(n_populations):
    #        if i != j:
    #            new_population = [p for p in populations[i]]
    #            idx = randint(0, len(populations[j]) - 1)
    #            new_population.append(populations[j][idx])
    #            populations[i] = np.array(new_population)

    #goal_reached, population, scores = tournament(population, fitness, N_SURVIVAL)
    scores_all = []
    for i in range(n_populations):
        goal_reached, population, scores = tournament_f(populations[i], fitness, N_SURVIVAL)
        goal_reached_all[i] = goal_reached
        populations[i] = population
        scores_all.append(scores)

    epoch += 1

population_complete = []
for i in range(n_populations):
    for j in range(N_SURVIVAL):
        population_complete.append(populations[i][j])
population_complete = np.array(population_complete)

goal_reached, population, scores = tournament(population_complete, fitness, N_SURVIVAL)

n_fit_calls = fitness.calls
print(f'best score is {fitness(population[0].get_genome())} - obtained with {n_fit_calls} calls to fitness in {epoch} epochs')

best score is 1.0 - obtained with 53168 calls to fitness in 4357 epochs


In [550]:
id = population_complete[0]
best_new_id = id

In [558]:
id = best_new_id

fit = fitness(id.get_genome())
print(fit)
found = False
best_new_fit = fit
for _ in range(100000):
    new_id = id.heavy_mutate(rand_length= True, invert= True)
    new_fit = fitness(new_id.get_genome())
    if new_fit > best_new_fit:
        best_new_fit = new_fit
        best_new_id = new_id
        found = True
if found: print(best_new_fit)
else: print('nope')

0.56
nope
