### Set cover problem 

Given a set of elements {1, 2, …, n} (called the universe) and a collection S of m subsets whose union equals the universe, the set cover problem is to identify the smallest sub-collection of S whose union equals the universe.Given a set of elements {1, 2, …, n} (called the universe) and a collection S of m subsets whose union equals the universe, the set cover problem is to identify the smallest sub-collection of S whose union equals the universe.

In [406]:
from random import random, choice, randint
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue
from copy import  copy
from math import ceil 
import numpy as np
from tqdm.auto import tqdm

### Setting problem parameters

In [424]:
PROBLEM_SIZE  = 8 #elements to cover
NUM_SETS = 5
SETS =  tuple(np.array([random() < 0.3 for _ in range(PROBLEM_SIZE)])  for _ in range(NUM_SETS) )
    #the value True means that the set contains the element
    #we randomly create NUM_SETS sets of PROBLEM_SIZE elements (True/False)\

#print('Problem size:', SETS)
State = namedtuple('State', ['taken', 'not_taken'])

In [425]:
#Function to check all the elements are covered

def goal_check(state):
    return np.all(covered(state))

def covered(state):
    return reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    )

assert goal_check( ##check if taking all sets a solution exists
    State(set(range(NUM_SETS)), set())
), "Probelm not solvable"

## A* algorithm
The heuristic function should be always optimistic and respect some constraint to provide the best solution. 

In [409]:
#using the distance from the goal as a heuristic
def heuristic1(state):
    return PROBLEM_SIZE - len(state.taken)

#This heuristic does not provide always the best solution. Indeed it is not admissible.
#I tried many different run using the breadth-first to find the minimun number of sets to reach the goal 
#and not always this heuristic provides the best solution.

In [442]:
#find an optimistic solution 
#i take dynamically the sets with the most elements covered
#how many max sets are necessary to cover all the current uncovered elements? 

def heuristic2(state : State):
    already_covered = covered(state) 
    if np.all(already_covered):
        return 0
    not_already_covered = np.logical_not(already_covered)   
    max_val = max(sum(np.logical_and(SETS[i], not_already_covered)) for i in state.not_taken)
    #max_val is the maximum number of uncovered element covered by a single element in the not_taken group
    return ceil(sum(not_already_covered)/max_val) if max_val!= 0 else PROBLEM_SIZE
    

In [439]:
#define the heuristic to use
heuristic = heuristic2


def actual_cost(state): 
    return len(state.taken)

def a_star(state): 
    return actual_cost(state) + heuristic(state)

In [440]:
frontier = PriorityQueue()
state = State(set(), set(range(NUM_SETS)))

frontier.put((a_star(state), state))

steps = 0
weight , current_state = frontier.get()

with tqdm(total=None) as pbar: 
    while not goal_check(current_state): 
        steps += 1
        for action in current_state.not_taken:
            new_state = State(
                current_state.taken ^ {action},
                current_state.not_taken ^ {action},
            )
            frontier.put((a_star(new_state), new_state))
            #print("current_state taken : " , new_state.taken, "weight : ", a_star(new_state))
        weight, current_state = frontier.get()
        #print("Next step")
        #print("current_state taken : " , current_state.taken, "weight : ", weight)
        pbar.update(1)

print(f'Solution found in {steps} steps and {len(current_state.taken)} tiles')
print(f'Final state: {current_state.taken}')


3it [00:00, 3000.93it/s]

current_state taken :  {0} weight :  2
current_state taken :  {1} weight :  3
current_state taken :  {2} weight :  2
current_state taken :  {3} weight :  3
current_state taken :  {4} weight :  3
Next step
current_state taken :  {0} weight :  2
current_state taken :  {0, 1} weight :  3
current_state taken :  {0, 2} weight :  2
current_state taken :  {0, 3} weight :  3
current_state taken :  {0, 4} weight :  3
Next step
current_state taken :  {2} weight :  2
current_state taken :  {0, 2} weight :  2
current_state taken :  {1, 2} weight :  3
current_state taken :  {2, 3} weight :  3
current_state taken :  {2, 4} weight :  3
Next step
current_state taken :  {0, 2} weight :  2
Solution found in 3 steps and 2 tiles
Final state: {0, 2}



