In [252]:
import numpy as np
from random import random
from functools import reduce
from queue import PriorityQueue

# Lab 1 -- Set Covering

### What is set covering
The goal of set covering is to find all the sets that covers all the values. We have `NUM_SETS` that is the number of sets and `PROBLEM_SIZE` that is the lenght of the values of each set. The value is choosen between `True` and `False`.

In [253]:
PROBLEM_SIZE = 8
NUM_SETS = 10

In [254]:
sets = [np.array([random() < .3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS)]
assert np.all(reduce(np.logical_or,[sets[i] for i in range(NUM_SETS)])), "Not solvable"

### State
The state , for exampe `state = ({1,3,5}, {0,2,4,6,7})` is composed of 2 parts:
- The first part is the taken sets
- The second part is the not taken sets

The goal is to cover all the sets, but with the minimum number of taken sets

In [255]:
state = ({1,3,5}, {0,2,4,6,7})
#We took 1,3,5 and not taken 0,2,4,6,7, we'll rapresent the states in this way, 2 sets.

### Functions
- `goal_check(state)`: returns _True_ only if the first set of the state contains all True, meaning that the sets are all covered.
- `distance(state)`: returns the number of sets that have to be covered.
- `h(state)`: this is the heristic function. 
    - `largest_set_size` is the set with the max number of _True_.
    - `missing_size` is the number of uncovered values to get to the goal (everything covered).
    At the end the heuristic tells us how many sets we need at least to get the job done.
    For example if `larget_set_size` is 4 and `missing_size` is 5 we'll need at least 2 sets for compleating the covering. (we have that the max set is 4, to get to 5 we'll need at least 2 sets)
- `actual_cost(state)`: return the actual cost, that is the lenght of the taken sets.
- `f(n)`: this is the A* function. The goal of A* is to look at the actual cost (the past) but also have an heuristic about the future, given the actual node.

In [256]:
def goal_check(state):
    return np.all(reduce(np.logical_or,[sets[i] for i in state[0]],np.array([False for _ in range(PROBLEM_SIZE)])))
#the reduce does the OR operation on the sets[i] for i in state[0], so the elements taken!!. Then return the AND between the elementes returned
# by the reduce (by the np.all()).

def distance(state):
    return PROBLEM_SIZE - sum(reduce(np.logical_or,[sets[i] for i in state[0]],np.array([False for _ in range(PROBLEM_SIZE)])))
#This function  return the distance of the state from the goal. That's the number of false we still have and
# that has to be covered. The sum returs us the number of true value. 

def covered(state):
    return reduce(
        np.logical_or,
        [sets[i] for i in state[0]],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    )

def h(state):
    largest_set_size = max(sum(s) for s in sets)
    missing_size = PROBLEM_SIZE - sum(covered(state))
    optimistic_estimate = np.ceil(missing_size / largest_set_size)
    return optimistic_estimate

def actual_cost(state):
    return len(state[0]) #The actual cost is the number of elements I have in the first set

def f(n):
    return actual_cost(n) + h(n) #This is the A* function

### Code 
We use a `PriorityQueue` where the "key" is the f(n) !! So when we are doing `_,state = frontier.get()` we'll get the state with the lowest `f(state)`

In [263]:
frontier = PriorityQueue()
initial_state = (set(),set(range(NUM_SETS))) #everithing not taken !
frontier.put((f(initial_state),initial_state))

_,state = frontier.get()
counter =0 
while not goal_check(state):
    for a in state[1]: #in state[1] I have all the elements that I didn't take
        counter+=1
        new_state = (state[0] | {a}, state[1] - {a}) #The | is UNION, - is DIFFERENCE
        frontier.put((f(new_state),new_state))
    _,state = frontier.get()

In [261]:
print(counter)
state

112


({0, 2, 6}, {1, 3, 4, 5, 7, 8, 9})