# Set covering implementation

In [81]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue
import numpy as np

In [82]:
PROBLEM_SIZE = 5
NUM_SETS = 10
SET_PROBABILITY = 0.3
SETS = tuple(
    np.array([random() <SET_PROBABILITY for _ in range(PROBLEM_SIZE)])
    for _ in range(NUM_SETS)
)
State = namedtuple('State', ['taken', 'not_taken'])

print(SETS)

(array([False, False, False, False,  True]), array([ True,  True, False, False,  True]), array([False,  True, False,  True,  True]), array([False, False,  True, False, False]), array([False, False, False, False, False]), array([False, False, False,  True, False]), array([ True, False,  True, False, False]), array([False, False, False, False, False]), array([False,  True, False, False, False]), array([False, False,  True, False, False]))


## Utility functions
These are utility functions:
* goal_check: check if the current state covers all the positions
* distance: compute how many positions are covered by the current set
* count_taken_sets: count the actual number of positions covered by the set

In [83]:
def goal_check(state):
    return np.all(reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    ))

def distance(state):
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        ))

def count_taken_sets(state):
    return len(state.taken)

assert goal_check(
    State(set(range(NUM_SETS)), set())
), "Probelm not solvable"


## Search function
This is a function to implement a search for set covering 
It allows to specify both the data structure on which to memorize the frontier and the priority function to use on that.

In [84]:
def set_covering_search(state=None, frontier=None, priority_func= None):
    
    if state is None:
        state = State(set(), set(range(NUM_SETS)))
    if frontier is None:
        frontier = PriorityQueue()
    if priority_func is None:
        priority_func = lambda _: None
        
    frontier.put((priority_func(state), state))
    counter = 0
    _, current_state = frontier.get()
    while not goal_check(current_state):
        counter += 1
        for action in current_state[1]:
            new_state = State(
                current_state.taken ^ {action},
                current_state.not_taken ^ {action},
            )
            frontier.put((priority_func(new_state), new_state))
        _, current_state = frontier.get()
    
    print(
        f"Solved in {counter:,} steps ({len(current_state.taken)} tiles)"
    )
    print(f"Solution: {current_state}")

In [85]:
print("Breadth-first search")
set_covering_search(frontier=SimpleQueue())
print("Depth-first search")
set_covering_search(frontier=LifoQueue())
print("Greedy best-first search")
set_covering_search(frontier=PriorityQueue(), priority_func=distance)

Breadth-first search
Solved in 34 steps (2 tiles)
Solution: State(taken={2, 6}, not_taken={0, 1, 3, 4, 5, 7, 8, 9})
Depth-first search
Solved in 8 steps (8 tiles)
Solution: State(taken={2, 3, 4, 5, 6, 7, 8, 9}, not_taken={0, 1})
Greedy best-first search
Solved in 3 steps (3 tiles)
Solution: State(taken={1, 2, 3}, not_taken={0, 4, 5, 6, 7, 8, 9})


## Lab 1
To address the lab1 request to make an A* search algorithm, the 2 functions to define the estimated cost for each node are:
* h(n): heuristic function that compute the cost to get from the actual node n to the goal state; the 'distance' function is used for that
* g(n): actual cost function that compute the cost to reach the actual node n; the 'count_taken_sets' is used for that

In [86]:
print("A* search")
set_covering_search(frontier=PriorityQueue(), priority_func=lambda state: count_taken_sets(state) + distance(state))

A* search
Solved in 3 steps (2 tiles)
Solution: State(taken={2, 6}, not_taken={0, 1, 3, 4, 5, 7, 8, 9})
