Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

In [519]:
import numpy as np
from numpy.typing import NDArray
from icecream import ic


## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

In [520]:
UNIVERSE_SIZE = 10_000
NUM_SETS = 1_000

DENSITY = 0.3

rng = np.random.Generator(np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(NUM_SETS * DENSITY)]))

In [521]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = np.pow(SETS.sum(axis=1), 1.1)

## Helper Functions

In [522]:
def valid(solution) -> np.bool:
    """Checks wether solution is valid (ie. covers all universe)"""
    return np.all(np.logical_or.reduce(SETS[solution]))


def cost(solution)-> np.float64:
    """Returns the cost of a solution (to be minimized)"""
    return COSTS[solution].sum()

## Have Fun!

In [None]:
def find_median_index(bool_array):
    # Find the indexes where the array is True
    true_indexes = np.where(bool_array)[0]
    
    # Find the median of the true indexes
    median_index = int(np.median(true_indexes))
    
    return median_index

In [524]:
def fitness(solution: NDArray[np.bool]) -> np.float64:
    return -cost(solution) if valid(solution) else np.float64(-1e12)

def mutate(solution: NDArray[np.bool], bound_den: int) -> NDArray[np.bool]:
    new_sol: NDArray[np.bool] = solution.copy()
    while not valid(new_sol) or np.array_equal(new_sol, solution):
        m_index = find_median_index(new_sol)
        bound_range = NUM_SETS//bound_den
        low_bound = max(0, m_index - bound_range)
        high_bound = min(NUM_SETS, m_index + bound_range)
        i: int = rng.integers(low_bound, high_bound)
        i: int = rng.integers(0, NUM_SETS)
        new_sol[i] = not new_sol[i]
    return new_sol

In [525]:
def solve(solution: NDArray[np.bool], num_steps: int = 50_000, bound_range_den: int = 5) -> NDArray[np.bool]:
    new_solution: NDArray[np.bool] = solution.copy()
    for _ in range(num_steps):
        new_solution = mutate(new_solution, bound_range_den)
        if fitness(new_solution) > fitness(solution):
            solution = new_solution

    return solution

In [526]:
# Starting with a random solution with random 50% of the sets
random_solution: NDArray[np.bool] = rng.random(NUM_SETS) < .5
ic(fitness(random_solution), cost(random_solution), valid(random_solution))

for bound_range_den in range(1, 10):
    solution: NDArray[np.bool] = solve(random_solution, 500, bound_range_den)
    ic(fitness(solution), cost(solution), valid(solution), bound_range_den)




ic| fitness(random_solution): np.float64(-3401340.9106805306)
    cost(random_solution): np.float64(3401340.9106805306)
    valid(random_solution): np.True_
ic| fitness(solution): np.float64(-3119557.268680146)
    cost(solution): np.float64(3119557.268680146)
    valid(solution): np.True_
    bound_range_den: 1
ic| fitness(solution): np.float64(-3256367.067815709)
    cost(solution): np.float64(3256367.067815709)
    valid(solution): np.True_
    bound_range_den: 2
ic| fitness(solution): np.float64(-3130086.799782158)
    cost(solution): np.float64(3130086.799782158)
    valid(solution): np.True_
    bound_range_den: 3
ic| fitness(solution): np.float64(-3236004.4533163733)
    cost(solution): np.float64(3236004.4533163733)
    valid(solution): np.True_
    bound_range_den: 4
ic| fitness(solution): np.float64(-3248420.1454414725)
    cost(solution): np.float64(3248420.1454414725)
    valid(solution): np.True_
    bound_range_den: 5
ic| fitness(solution): np.float64(-3242363.229341618)



ic| fitness(random_solution): np.float64(-3401340.9106805306)
    cost(random_solution): np.float64(3401340.9106805306)
    valid(random_solution): np.True_
    
ic| fitness(solution): np.float64(-3119557.268680146)
    cost(solution): np.float64(3119557.268680146)
    valid(solution): np.True_
    bound_range_den: 1


ic| fitness(solution): np.float64(-3256367.067815709)
    cost(solution): np.float64(3256367.067815709)
    valid(solution): np.True_
    bound_range_den: 2


ic| fitness(solution): np.float64(-3130086.799782158)
    cost(solution): np.float64(3130086.799782158)
    valid(solution): np.True_
    bound_range_den: 3


ic| fitness(solution): np.float64(-3236004.4533163733)
    cost(solution): np.float64(3236004.4533163733)
    valid(solution): np.True_
    bound_range_den: 4


ic| fitness(solution): np.float64(-3248420.1454414725)
    cost(solution): np.float64(3248420.1454414725)
    valid(solution): np.True_
    bound_range_den: 5

    
ic| fitness(solution): np.float64(-3242363.229341618)
    cost(solution): np.float64(3242363.229341618)
    valid(solution): np.True_
    bound_range_den: 6


ic| fitness(solution): np.float64(-3187933.834220215)
    cost(solution): np.float64(3187933.834220215)
    valid(solution): np.True_
    bound_range_den: 7


ic| fitness(solution): np.float64(-3279612.999792894)
    cost(solution): np.float64(3279612.999792894)
    valid(solution): np.True_
    bound_range_den: 8
    
    
ic| fitness(solution): np.float64(-3301443.935719664)
    cost(solution): np.float64(3301443.935719664)
    valid(solution): np.True_
    bound_range_den: 9
