# Set Covering - 2023-10-10
Copyright(c) 2023 Alex Buffa


In [30]:
import numpy as np
from random import random
from typing import Tuple, Set
from functools import reduce
from operator import or_
from pprint import pprint
from queue import PriorityQueue, LifoQueue, SimpleQueue, Queue
from collections import namedtuple
from typing import Callable
Result = namedtuple("Result", ["queue_name", "iterations", "state", "visualization", "cost"])
State = Tuple[Set[int], Set[int]]

Define our problem data

In [31]:
PROBLEM_SIZE = 8
NUM_SETS = 10
THRESHOLD = 0.3
SETS = tuple(np.array([random() < THRESHOLD for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))
while not all(reduce(or_, [SETS[i] for i in range(NUM_SETS)])):
    SETS = tuple(np.array([random() < THRESHOLD for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))
results: dict[str, Result] = dict()


In [32]:
# Utility function just to see our current taken array
def visualize_state(state: State) -> list[int]:
    return sum([SETS[i] for i in state[0]])

In [33]:
def goal_check(state: State):
    return all(reduce(or_, [SETS[i] for i in state[0]], np.array([False for _ in range(PROBLEM_SIZE)])))

In [34]:
def search(name: str, initial_state: State =None,*, frontier = None, priority: Callable[[State],int] = None) -> Result:
    if initial_state is None:
        initial_state = (set(), set(range(NUM_SETS)))
    assert len(initial_state) == 2
    if frontier is None:
        frontier = PriorityQueue()
    if priority is None:
        priority = lambda x: None
    name = f"{name} - {frontier.__class__.__qualname__}"
    WrappedState = namedtuple("WrappedState", ["priority", "state"])
    frontier.put(WrappedState(priority(initial_state), initial_state))
    _, state = frontier.get()
    counter = 0
    while not goal_check(state):
        counter += 1
        for a in state[1]:
            new_state = (state[0] ^ {a}, state[1] ^ {a})
            frontier.put(WrappedState(priority(new_state), new_state))
        _, state = frontier.get()
    res = Result(name, counter, state, visualize_state(state), priority(state))
    results[name] = res
    return res


Depth First Search

In [35]:
search("Depth First", frontier=LifoQueue()).state

({3, 4, 5, 6, 7, 8, 9}, {0, 1, 2})

Breadth First Search

In [36]:
# Using SimpleQueue, which does it internally
search("Breadth-First", frontier=SimpleQueue()).state

({3, 6, 8}, {0, 1, 2, 4, 5, 7, 9})

In [37]:
# Manually doing it with PriorityQueue and defining the priority as the number of taken sets
search("Breadth-First", frontier=PriorityQueue(), priority=lambda x: len(x[0])).state

({3, 6, 8}, {0, 1, 2, 4, 5, 7, 9})

I now define a function to measure a cost of a given state, based only on the action done in the past.  
Doing so we are approaching the problem with an *uninformed* approach.

In [38]:
def uninformed_cost(state: State) -> int:
    """Number of tiles summed up"""
    if(len(state[0])==0):
        return 0
    return sum(sum([SETS[i] for i in state[0]]))


In [39]:
search("Djikstra", priority=uninformed_cost).state

({3, 5, 6, 9}, {0, 1, 2, 4, 7, 8})

We try now the *informed* approach by defining a cost function that takes into account the distance from the goal.  
For example, we define the distance function as the number of nodes that are not yet covered.

In [40]:
def distance(state: State) -> int:
    if(len(state[0]) == 0 ):
        return PROBLEM_SIZE
    return (sum([SETS[i] for i in state[0]]) == 0).sum()

A* requires a heuristic function that is admissible, i.e. it never overestimates the cost to reach the goal.  
With the above distance function we have an admissible heuristic function.
The priority for A* is given by the sum of the uninformed cost function and the heuristic function.

In [41]:
search("A*", priority=lambda x: uninformed_cost(x) + distance(x)).state

({3, 5, 6, 9}, {0, 1, 2, 4, 7, 8})

In [44]:
print("All the results obtained above, sorted by number of iterations")
for result in sorted(results.values(), key=lambda x: x.iterations):
    print(result)

All the results obtained above, sorted by number of iterations
Result(queue_name='Depth First - LifoQueue', iterations=7, state=({3, 4, 5, 6, 7, 8, 9}, {0, 1, 2}), visualization=array([2, 2, 3, 1, 1, 4, 5, 3]), cost=None)
Result(queue_name='Breadth-First - PriorityQueue', iterations=132, state=({8, 3, 6}, {0, 1, 2, 4, 5, 7, 9}), visualization=array([1, 1, 1, 1, 1, 3, 2, 1]), cost=3)
Result(queue_name='Breadth-First - SimpleQueue', iterations=363, state=({8, 3, 6}, {0, 1, 2, 4, 5, 7, 9}), visualization=array([1, 1, 1, 1, 1, 3, 2, 1]), cost=None)
Result(queue_name='A* - PriorityQueue', iterations=680, state=({9, 3, 5, 6}, {0, 1, 2, 4, 7, 8}), visualization=array([1, 2, 1, 1, 1, 2, 2, 1]), cost=11)
Result(queue_name='Djikstra - PriorityQueue', iterations=2784, state=({9, 3, 5, 6}, {0, 1, 2, 4, 7, 8}), visualization=array([1, 2, 1, 1, 1, 2, 2, 1]), cost=11)
