Copyright **`(c)`** 2024 Giovanni Squillero `<giovanni.squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  

# Set Cover problem

See: https://en.wikipedia.org/wiki/Set_cover_problem

In [10]:
from random import random, seed
from itertools import product
import numpy as np
import numpy.typing as npt
from icecream import ic

## Reproducible Initialization

If you want to get reproducible results, use `rng` (and restart the kernel); for non-reproducible ones, use `np.random`.

In [2]:
# The original value were too high for my computer to handle
# UNIVERSE_SIZE = 100_000
# NUM_SETS = 10_000
UNIVERSE_SIZE = 10_000
NUM_SETS = 1_000
DENSITY = 0.3

rng = np.random.Generator(np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(10_000 * DENSITY)]))

In [3]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = np.pow(SETS.sum(axis=1), 1.1)

## Helper Functions

In [4]:
def valid(solution):
    """Checks wether solution is valid (ie. covers all universe)"""
    return np.all(np.logical_or.reduce(SETS[solution]))


def cost(solution):
    """Returns the cost of a solution (to be minimized)"""
    return COSTS[solution].sum()

## Have Fun!

In [5]:
# A dumb solution of "all" sets
solution = np.full(NUM_SETS, True)
valid(solution), cost(solution)

(np.True_, np.float64(6682783.894103612))

In [6]:
# A random solution with random 50% of the sets
solution: npt.NDArray[np.bool] = rng.random(NUM_SETS) < .5
valid(solution), cost(solution)

(np.True_, np.float64(3322440.238779854))

### Note
There is some repetition for both solution but in this case I preferred to abstract the two method as they were completely independent, 
mainly for readability.

In [7]:
def greedy_set_cover() -> npt.NDArray[np.bool]:
    """ Solves the set cover problem using a greedy algorithm 
    that selects the set that maximize the number of uncovered elements.
    
    Note: This is a naive solution that does not consider the added cost of each set.
    
    Returns:
        NDArray[np.bool]: Solution to the set cover problem
    """
    covered: set = set()

    # NUM_SETS.size bool array, initialized with False
    solution: npt.NDArray[np.bool] = np.zeros(NUM_SETS, dtype=np.bool)

    # Create a copy of SETS to avoid modifying the original
    sets_copy: npt.NDArray[np.bool] = SETS.copy()

    while len(covered) < UNIVERSE_SIZE:
        # Select the set that covers the most uncovered elements
        best_set_index: int = max(range(NUM_SETS), key=lambda i: len(set(np.where(sets_copy[i])[0]) - covered))
        solution[best_set_index] = True
        covered.update(np.where(sets_copy[best_set_index])[0])
        sets_copy[best_set_index] = np.zeros(UNIVERSE_SIZE, dtype=np.bool_)  # Mark the set as used by emptying it

    return solution


def greedy_set_cover_with_cost() -> npt.NDArray[np.bool]:
    """ Solves the set cover problem using a greedy algorithm
    that selects the set that minimizes the cost per uncovered element.
    
    Note: The logic is similar to the algorithm used in the "greedy_set_cover" function , but the set selection
    is based on the cost per uncovered element.
    
    Returns:
        NDArray[np.bool]: Solution to the set cover problem
    """
    covered: set = set()

    # NUM_SETS.size bool array, initialized with False
    solution: npt.NDArray[np.bool] = np.zeros(NUM_SETS, dtype=np.bool)

    # Create a copy of SETS to avoid modifying the original
    sets_copy: npt.NDArray[np.bool] = SETS.copy()

    while len(covered) < UNIVERSE_SIZE:
        # Select the set that minimizes the cost per uncovered element
        best_set_index: int = min(
            range(NUM_SETS), 
            key=lambda i: COSTS[i] / len(set(np.where(sets_copy[i])[0]) - covered) if len(set(np.where(sets_copy[i])[0]) - covered) > 0 else float('inf')
        )
        
        solution[best_set_index] = True
        covered.update(np.where(sets_copy[best_set_index])[0])
        sets_copy[best_set_index] = np.zeros(UNIVERSE_SIZE, dtype=np.bool_)  # Mark the set as used by emptying it

    return solution


# Util function to solve set cover problem with different methods
def solve_set_cover(method: str, print_metrics: bool | None = None) -> npt.NDArray[np.bool] | None:
    naive_methods: list[str] = ["greedy"]
    cost_methods: list[str] = ["greedy_with_cost", "with_cost", "cost"]
    valid_methods: list[str] = naive_methods + cost_methods
    solution:npt.NDArray[np.bool] 
    
    method = method.lower().strip()

    if method in ["greedy"]:
        solution = greedy_set_cover()
    elif method in ["greedy_with_cost", "with_cost", "cost"]:
        solution = greedy_set_cover_with_cost()
    else:
        ic("Invalid method, please use one of the following: ", valid_methods)
        return None
    
    if print_metrics:
        ic(method, valid(solution), cost(solution))
        return None
    else:
        return solution


In [8]:
solve_set_cover("greedy", True)
solve_set_cover("with_cost", True)

ic| method: 'greedy'
    valid(solution): np.True_
    cost(solution): np.float64(101583.5177978479)
ic| method: 'with_cost'
    valid(solution): np.True_
    cost(solution): np.float64(105978.24275451187)
