Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

In [386]:
import numpy as np
from numpy.typing import NDArray
from icecream import ic


## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

In [387]:
UNIVERSE_SIZE = 1_000
NUM_SETS = 200

DENSITY = 0.3

rng = np.random.Generator(np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(NUM_SETS * DENSITY)]))

In [388]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = np.pow(SETS.sum(axis=1), 1.1)

## Helper Functions

In [389]:
def valid(solution):
    """Checks wether solution is valid (ie. covers all universe)"""
    return np.all(np.logical_or.reduce(SETS[solution]))


def cost(solution):
    """Returns the cost of a solution (to be minimized)"""
    return COSTS[solution].sum()

## Have Fun!

In [390]:
def fitness(solution: NDArray[np.bool]) -> float:
    return -cost(solution) if valid(solution) else -np.inf

def mutate(solution: NDArray[np.bool], tried_indices: set) -> NDArray[np.bool]:
    new_sol: NDArray[np.bool] = solution.copy()
    while not valid(new_sol) or np.array_equal(new_sol, solution):
        i: int = rng.integers(0, NUM_SETS)
        new_sol[i] = not new_sol[i]
    return new_sol

In [396]:
def solve(solution: NDArray[np.bool]) -> NDArray[np.bool]:
    new_solution: NDArray[np.bool] = solution.copy()
    tried_indices = set()
    for _ in range(50_000):
        new_solution = mutate(new_solution, tried_indices)
        if fitness(new_solution) > fitness(solution):
            solution = new_solution

    return solution

In [397]:
# Starting with a random solution with random 50% of the sets
random_solution: NDArray[np.bool] = rng.random(NUM_SETS) < .5
ic(fitness(random_solution), cost(random_solution), valid(random_solution))

solution: NDArray[np.bool] = solve(random_solution)
ic(fitness(solution), cost(solution), valid(solution))


ic| fitness(random_solution): np.float64(-54678.078973476026)
    cost(random_solution): np.float64(54678.078973476026)
    valid(random_solution): np.True_
ic| fitness(solution): np.float64(-39178.88541156995)
    cost(solution): np.float64(39178.88541156995)
    valid(solution): np.True_
ic| fitness(solution): np.float64(-37389.91944993916)
    cost(solution): np.float64(37389.91944993916)
    valid(solution): np.True_
