Copyright **`(c)`** 2023 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

In [54]:
from itertools import product
from random import random, randint, shuffle, seed, choice, choices
import numpy as np
from scipy import sparse
import logging
import platform
from scipy.sparse import linalg, vstack, hstack, identity, eye
from functools import reduce
import copy

In [3]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points*2654435761+num_sets+density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets-1), p] = True
    return sets

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [4]:
x = make_set_covering_problem(1000, 1000, .3)
print("Element at row=42 and column=42:", x[42, 42])

Element at row=42 and column=42: False


In [11]:
num_points = [100, 1_000, 5_000]
num_sets = num_points
density = [0.3, 0.7]

## Hill Climbing

In [6]:
def fitness(sets, state):
    cost = np.sum(state)
    if np.array(state).any():
        valid = sets[np.array(state), :].max(axis=0).sum()
    else:
        valid = 0
    return valid, -cost


In [7]:
def tweak(state, size):
    new_state = state.copy()
    index = randint(0, size - 1)
    new_state[index] = not new_state[index]
    return new_state

In [38]:
def hill_climbing(problem_size, num_sets, density, nb_steps = 100000):
    sets = make_set_covering_problem(num_sets, num_sets, .3).toarray()
    initial_state = [False for _ in range(num_sets)]
    current_state = initial_state
    fit = fitness(sets, initial_state)
    print(" The fitness of the initial state is : ", fit)
    visited_states = dict()
    visited_states[tuple(current_state)] = fit
    counter  = 0

    
    for step in range(nb_steps):
        new_state = tweak(current_state, problem_size)
        # if step % 50 == 0 : print(new_state)


        if tuple(new_state) in visited_states:
            new_fit = visited_states[tuple(new_state)]
        else:
            new_fit = fitness(sets, new_state)
            counter += 1
            visited_states[tuple(new_state)] = new_fit
    

        if fit <= new_fit:
            current_state = new_state
            fit = new_fit
        
   
    return fit, counter

In [39]:
res_list = []
for nb_pts in num_points:
    for dst in density : 
        res, count = hill_climbing(nb_pts, nb_pts, dst)
        print(" The fitness at the end of hill climbing for {} points and a  {} density is {} and they were {} call to the fitness function.".format(nb_pts, dst, res[1], count))
        res_list.append([nb_pts, dst, res[1], count])

res_list


 The fitness of the initial state is :  (0, 0)
 The fitness at the end of hill climbing for 100 points and a  0.3 density is -8 and they were 220 call to the fitness function.
 The fitness of the initial state is :  (0, 0)
 The fitness at the end of hill climbing for 100 points and a  0.7 density is -8 and they were 220 call to the fitness function.
 The fitness of the initial state is :  (0, 0)
 The fitness at the end of hill climbing for 1000 points and a  0.3 density is -15 and they were 1613 call to the fitness function.
 The fitness of the initial state is :  (0, 0)
 The fitness at the end of hill climbing for 1000 points and a  0.7 density is -15 and they were 1613 call to the fitness function.
 The fitness of the initial state is :  (0, 0)
 The fitness at the end of hill climbing for 5000 points and a  0.3 density is -21 and they were 6980 call to the fitness function.
 The fitness of the initial state is :  (0, 0)
 The fitness at the end of hill climbing for 5000 points and a  

[[100, 0.3, -8, 220],
 [100, 0.7, -8, 220],
 [1000, 0.3, -15, 1613],
 [1000, 0.7, -15, 1613],
 [5000, 0.3, -21, 6980],
 [5000, 0.7, -21, 6980]]

## Simulated annealing hill climbing

In [64]:
def simulated_annealing(problem_size, num_sets, density, T = 1, Tmin = 0.001, alpha = 0.95, nb_steps = 1000):

    sets = make_set_covering_problem(num_sets, num_sets, .3).toarray()
    initial_state = [False for _ in range(num_sets)]
    current_state = initial_state
    fit = fitness(sets, initial_state)
    visited_states = {}
    visited_states[tuple(current_state)] = fit
    counter  = 0
    global_min = ()
    min_fit = (0, 0)
    
    while T >= Tmin:
        for step in range(nb_steps):
            new_state = tweak(current_state, problem_size)

            if tuple(new_state) in visited_states:
                new_fit = visited_states[tuple(new_state)]
            else:
                new_fit = fitness(sets, new_state)
                counter += 1
                visited_states[tuple(new_state)] = new_fit
    
            if fit <= new_fit:
                current_state = new_state
                fit = new_fit
                if min_fit < new_fit:
                    global_min = new_state
                    min_fit = new_fit
                    
            else:
                p = np.exp(-(sum(fit) - sum(new_fit)) / T)
                current_state = choices([current_state, new_state], weights=(1 - p, p), k=1)[0] 
        
        T *= alpha  

    return min_fit, counter

In [66]:
res_list = []
for nb_pts in num_points:
    for dst in density : 
        res, count = simulated_annealing(nb_pts, nb_pts, dst)
        print(" The fitness at the end of hill climbing for {} points and a  {} density is {} and they were {} call to the fitness function.".format(nb_pts, dst, res[1], count))
        res_list.append([nb_pts, dst, res[1], count])

res_list

 The fitness at the end of hill climbing for 100 points and a  0.3 density is -10 and they were 2248 call to the fitness function.
 The fitness at the end of hill climbing for 100 points and a  0.7 density is -10 and they were 2248 call to the fitness function.
 The fitness at the end of hill climbing for 1000 points and a  0.3 density is -19 and they were 2645 call to the fitness function.
 The fitness at the end of hill climbing for 1000 points and a  0.7 density is -19 and they were 2645 call to the fitness function.
 The fitness at the end of hill climbing for 5000 points and a  0.3 density is -23 and they were 7012 call to the fitness function.
 The fitness at the end of hill climbing for 5000 points and a  0.7 density is -23 and they were 7012 call to the fitness function.


[[100, 0.3, -10, 2248],
 [100, 0.7, -10, 2248],
 [1000, 0.3, -19, 2645],
 [1000, 0.7, -19, 2645],
 [5000, 0.3, -23, 7012],
 [5000, 0.7, -23, 7012]]

Now let's optimize alpha and nb_steps to have the better result with the least iterations.


Warning : The next cell is long to run

In [68]:
res_list = []
for alpha in [0.95, 0.9, 0.8, 0.7]:
    for nb_steps in [500,750,1000,1250,1500]:
        for nb_pts in num_points:
            for dst in density : 
                res, count = simulated_annealing(nb_pts, nb_pts, dst, alpha = alpha, nb_steps = nb_steps)
                print(" The fitness at the end of hill climbing for {} points and a  {} density is {} and they were {} call to the fitness function (alpha = {}, nb_steps = {}.".format(nb_pts, dst, res[1], count, alpha, nb_steps))
                res_list.append([alpha, nb_pts, dst, res[1], count])

res_list

 The fitness at the end of hill climbing for 100 points and a  0.3 density is -11 and they were 2193 call to the fitness function (alpha = 0.95, nb_steps = 500.
 The fitness at the end of hill climbing for 100 points and a  0.7 density is -11 and they were 2193 call to the fitness function (alpha = 0.95, nb_steps = 500.
 The fitness at the end of hill climbing for 1000 points and a  0.3 density is -19 and they were 1675 call to the fitness function (alpha = 0.95, nb_steps = 500.
 The fitness at the end of hill climbing for 1000 points and a  0.7 density is -19 and they were 1675 call to the fitness function (alpha = 0.95, nb_steps = 500.
 The fitness at the end of hill climbing for 5000 points and a  0.3 density is -23 and they were 5429 call to the fitness function (alpha = 0.95, nb_steps = 500.
 The fitness at the end of hill climbing for 5000 points and a  0.7 density is -23 and they were 5429 call to the fitness function (alpha = 0.95, nb_steps = 500.
 The fitness at the end of hil

[[0.95, 100, 0.3, -11, 2193],
 [0.95, 100, 0.7, -11, 2193],
 [0.95, 1000, 0.3, -19, 1675],
 [0.95, 1000, 0.7, -19, 1675],
 [0.95, 5000, 0.3, -23, 5429],
 [0.95, 5000, 0.7, -23, 5429],
 [0.95, 100, 0.3, -7, 5624],
 [0.95, 100, 0.7, -7, 5624],
 [0.95, 1000, 0.3, -19, 2645],
 [0.95, 1000, 0.7, -19, 2645],
 [0.95, 5000, 0.3, -23, 5429],
 [0.95, 5000, 0.7, -23, 5429],
 [0.95, 100, 0.3, -10, 2248],
 [0.95, 100, 0.7, -10, 2248],
 [0.95, 1000, 0.3, -19, 2645],
 [0.95, 1000, 0.7, -19, 2645],
 [0.95, 5000, 0.3, -23, 7012],
 [0.95, 5000, 0.7, -23, 7012],
 [0.95, 100, 0.3, -9, 2380],
 [0.95, 100, 0.7, -9, 2380],
 [0.95, 1000, 0.3, -19, 2645],
 [0.95, 1000, 0.7, -19, 2645],
 [0.95, 5000, 0.3, -23, 7012],
 [0.95, 5000, 0.7, -23, 7012],
 [0.95, 100, 0.3, -11, 2092],
 [0.95, 100, 0.7, -11, 2092],
 [0.95, 1000, 0.3, -19, 2645],
 [0.95, 1000, 0.7, -19, 2645],
 [0.95, 5000, 0.3, -23, 7012],
 [0.95, 5000, 0.7, -23, 7012],
 [0.9, 100, 0.3, -9, 1830],
 [0.9, 100, 0.7, -9, 1830],
 [0.9, 1000, 0.3, -19, 1199]

we can see that the minimum is yield when $\alpha$ = 0.7 and number of steps = 500. And that the density in the problem is not really making the results change when we choose between 0.3 and 0.7. 

It took 4714 to the fitness function with simulated annealing hill climbing wich is really bette than what we had with standard hill climbing.