In [170]:
from itertools import product
import random
import numpy as np
from scipy import sparse
from copy import copy, deepcopy

In [171]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    random.seed(num_points*2654435761+num_sets+density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random.random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[random.randint(0, num_sets-1), p] = True
    # for the return, thanks Beatrice Occhiena
    return np.array(sets.toarray())

num_points = [100, 1000, 5000]
num_sets = num_points
density = [0.3, 0.7]

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [172]:
x = make_set_covering_problem(5, 5, .3)
print("Element at row=0 and column=1:", x[0, 1])

Element at row=0 and column=1: False


The matrix generated by make_set_covering_problem() function is made of num_sets rows and num_points columns.
Each row is randomly generated and is made of True or False elements.
The objective is to pick the lowest amount of rows that, together, cover the biggest number of positions.
i.e.
* row 1 = True, False, False, True, False, True
* row 2 = False, True, True, False, False, False
* row 3 = True, False, False, False, False, False
* row 4 = False, False, False, False, True, True
* row 5 = False, False, True, True, False, True

If we pick rows 1, 2, 4, they cover with True all the positions

The **state** is an array of dimension num_sets that indicates which rows we took from the problem.
i.e. in the aforemensioned example:
* state = [True, True, False, True, False]

Because we solved the problem taking rows corresponding to indexes 0,1,3 of the problem matrix

We firstly need a fitness function and a tweak function

In [174]:
def fitness_old(state):
    return np.sum(state)

def fitness(state, sets):
    selected_sets = sets[state == 1]
    universe_coverage = np.sum(np.sum(selected_sets, axis=0) > 0)
    return universe_coverage

def fitness_beatrice(state, sets):
    """
    OB: Maximise the number of covered points, minimise the cost, minimise the overlap
    """
    cost = np.sum(state)
    selected_sets = sets[state == 1]
    overlap = np.sum(np.sum(selected_sets, axis=0) > 1)
    universe_coverage = np.sum(np.sum(selected_sets, axis=0) > 0)

    return universe_coverage, -cost, -overlap

def tweak(state):
    # we choose a random element of the array and flip the corresponding bit
    new_state = copy(state)
    index = random.randint(0, len(state) - 1)
    new_state[index] = not new_state[index]
    return new_state

In [175]:
def random_mutation_hc(initial_solution, problem, max_iterations):
    actual_state = initial_solution
    number_iterations = 0

    # Loop until we reach the maximum number of iterations
    for _ in range(max_iterations):
        number_iterations += 1
        next_state = tweak(actual_state)

        # If the next state is better, update the problem
        if(fitness(next_state, problem) > fitness(actual_state, problem)):
            actual_state = next_state

            # If we have reached a satisfactory solution, stop
            if(fitness(actual_state, problem) == len(problem)):
                break

    return actual_state, number_iterations

In [187]:
max_iterations = 1000

for n, d in product(num_points, density):
    print("n={}, d={}".format(n, d))
    problem = make_set_covering_problem(n, n, d)
    initial_solution = np.zeros(n, dtype=bool)
    rmhc, iterations = random_mutation_hc(initial_solution, problem, max_iterations)
    print("RMHC: {} iterations".format(iterations))
    print("RMHC: {} fitness".format(fitness(rmhc, problem)))
    print("-------------------------")
    

n=100, d=0.3
RMHC: 11 iterations
RMHC: 100 fitness
-------------------------
n=100, d=0.7
RMHC: 4 iterations
RMHC: 100 fitness
-------------------------
n=1000, d=0.3
RMHC: 22 iterations
RMHC: 1000 fitness
-------------------------
n=1000, d=0.7
RMHC: 6 iterations
RMHC: 1000 fitness
-------------------------
n=5000, d=0.3
RMHC: 24 iterations
RMHC: 5000 fitness
-------------------------
n=5000, d=0.7
RMHC: 9 iterations
RMHC: 5000 fitness
-------------------------
