In [15]:
import numpy as np
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue

from tqdm import tqdm


In [16]:
# constants
PROBLEM_SIZE = 5  # dimension of the finite set U
NUMBER_SET = 20  # number of subsets in the collection S
SETS = tuple(
    np.array([random() < 0.3 for i in range(PROBLEM_SIZE)]) for j in range(NUMBER_SET)
)  # generate sets in S

# Define State as a named tuple
State = namedtuple("State", ["taken", "cost", "heuristic"])


In [17]:
def goal_check(state, sets):
    """
    check if the logical OR all the elements yeald a line of all true ie the
    condition for a state to be covering the whole set U
    """
    return np.all(
        reduce(np.logical_or, [sets[i] for i in state.taken], np.zeros(PROBLEM_SIZE))
    )


# assert generated problem is solvable, ie the goal check of a stete with all
# sets taken is true
assert goal_check(State(range(NUMBER_SET), 0, 0), SETS)


In [18]:
def cost(state):
    """The cost function calculates the cost of reaching a particular state"""
    return len(state.taken)

#### Commonly used Heuristic for set covering:

- **Minimum Remaining Elements (MRE)**: This heuristic estimates the number of elements that are not yet covered by any selected subset. It encourages selecting subsets that cover the fewest remaining uncovered elements, which is consistent with minimizing the actual cost. Therefore, it is admissible.

- **Maximum Coverage (MC)**:  This heuristic estimates the number of currently uncovered elements and prioritizes selecting subsets that contribute to covering the largest number of these elements. It also encourages minimizing the actual cost, making it admissible. *The Greedy Heuristic is essentially the same as the MC. Both heuristics prioritize selecting subsets that cover the largest number of currently uncovered elements.*

- **Combination of MRE and MC**:  A combination of admissible heuristics remains admissible. If both MRE and MC individually are admissible, their combination (e.g., taking the average) is still admissible.

- **Randomized Heuristics**: A random heuristic does not guarantee admissibility. It may overestimate or underestimate the true cost. *It is not guaranteed to be admissible*, but its fun.

- **All or None Heuristic**: This heuristic checks first for notable sets (ones that cover all or none of the remaining elements) and assigns them corresponding priority. If no such sets exist, it uses the MRE heuristic. This heuristic is admissible.
 

In [19]:
#
# state = namedtuple("State", ["taken", "cost", "heuristic"])
#
# SETS example =
#   Set 0 [False  True  True  True False] Coverage: 3
#   Set 1 [False  True False False False] Coverage: 1
#   Set 2 [ True  True False False False] Coverage: 2
#   Set 3 [ True False False False  True] Coverage: 2
#   Set 4 [False  True  True  True False] Coverage: 3
#   Set 5 [ True False False False False] Coverage: 1
#   Set 6 [False False False False False] Coverage: 0
#   Set 7 [False False False False  True] Coverage: 1
#   Set 8 [False False False False False] Coverage: 0
#   Set 9 [False  True False False False] Coverage: 1
#


def MRE_heuristic(state, sets):
    # Calculate the uncovered elements by taking the logical OR of selected subsets
    uncovered = reduce(
        np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0]))
    )

    # Estimate the remaining cost by counting the number of uncovered elements
    heuristic = -np.sum(np.logical_not(uncovered))

    return heuristic


def MC_heuristic(state, sets):
    # Calculate the uncovered elements by taking the logical OR of selected subsets
    uncovered = reduce(
        np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0]))
    )

    # Sort the subsets by how many uncovered elements they cover (in descending order)
    subsets_by_coverage = sorted(
        range(len(sets)), key=lambda i: -np.sum(sets[i] & np.logical_not(uncovered))
    )

    # Initialize the heuristic value
    heuristic = 0
    covered_elements = np.copy(uncovered)

    # Iterate through subsets sorted by coverage and count their contribution
    for subset_index in subsets_by_coverage:
        subset = sets[subset_index]
        contribution = subset & np.logical_not(covered_elements)
        heuristic += 1  # Increment the cost
        covered_elements = np.logical_or(covered_elements, contribution)

        if np.all(covered_elements):
            break  # All elements are covered

    return heuristic


def MRE_MC_heuristic(state, sets):
    return (MRE_heuristic(state, sets) + MC_heuristic(state, sets)) / 2


def RANDOM_heuristic(state, sets):
    return random()


def ALLorNONE_heuristic(state, sets):
    # Calculate the uncovered elements by taking the logical OR of selected subsets
    uncovered = reduce(np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0])))

    # Check if there exist a set with all true 
    all_true_covered = np.any(np.all(np.logical_and(sets, np.logical_not(uncovered)), axis=1))

    # Check if there exist a set with all false
    all_false_covered = np.any(np.all(np.logical_and(np.logical_not(sets), np.logical_not(uncovered)), axis=1))


    # Determine the heuristic value based on the conditions
    if all_true_covered:
        heuristic = 0  # Highest priority (goal state already reached)
    elif all_false_covered:
        heuristic = float('inf')  # Lowest priority (impossible to reach the goal state)
    else:
        # Calculate the remaining cost as in the MRE heuristic
        heuristic = -np.sum(np.logical_not(uncovered))

    return heuristic



In [20]:
def astar(sets, heuristic):
    # Initialize the priority queue with the initial state
    initial_state = State(
        taken=[],
        cost=0,
        heuristic=heuristic(State(taken=[], cost=0, heuristic=0), sets),
    )
    open_set = PriorityQueue()
    open_set.put((initial_state.cost + initial_state.heuristic, initial_state))

    # Initialize the closed set as an empty set
    closed_set = set()

    checked_states = 0

    progress_bar = tqdm(total=None)

    while not open_set.empty():
        # Get the state with the lowest f score from the priority queue
        _, current_state = open_set.get()

        checked_states += 1

        progress_bar.update(1)

        # If the current state is a goal state, return the solution
        if goal_check(current_state, sets):
            progress_bar.close()
            return current_state.taken, checked_states

        # Add the current state to the closed set
        closed_set.add(tuple(current_state.taken))

        # Generate successor states by adding one more subset
        for subset in range(NUMBER_SET):
            if subset not in current_state.taken:
                # Create a new state by adding the subset
                new_taken = current_state.taken + [subset]
                new_cost = cost(State(new_taken, 0, 0))
                new_heuristic = heuristic(State(new_taken, 0, 0), sets)
                new_state = State(new_taken, new_cost, new_heuristic)

                # If the state is not in the closed set, add it to the open set
                if tuple(new_taken) not in closed_set:
                    open_set.put((new_state.cost + new_state.heuristic, new_state))

    # If the open set is empty and no solution is found, return None
    progress_bar.close()
    return None


In [21]:
if NUMBER_SET <= 10:
    for i in range(NUMBER_SET):
        print("Set", i, SETS[i], "Coverage:", np.sum(SETS[i]))

print()
print("MRE")
taken, states = astar(SETS, MRE_heuristic)
print(
    " Solution:",
    taken,
    "\n Cost:",
    len(taken),
)
print()
print("MC")
taken, states = astar(SETS, MC_heuristic)
print(
    " Solution:",
    taken,
    "\n Cost:",
    len(taken),
)
print()
print("MRE+MC")
taken, states = astar(SETS, MRE_MC_heuristic)
print(
    " Solution:",
    taken,
    "\n Cost:",
    len(taken),
)
print()
print("RANDOM")
taken, states = astar(SETS, RANDOM_heuristic)
print(
    " Solution:",
    taken,
    "\n Cost:",
    len(taken),
)
print()
print("ALLorNONE")
taken, states = astar(SETS, ALLorNONE_heuristic)
print(
    " Solution:",
    taken,
    "\n Cost:",
    len(taken),
)



MRE


244948it [02:38, 1544.18it/s]


 Solution: [1, 2, 6] 
 Cost: 3

MC


48it [00:00, 184.69it/s]


 Solution: [1, 2, 6] 
 Cost: 3

MRE+MC


7482it [00:34, 214.43it/s]


 Solution: [1, 2, 6] 
 Cost: 3

RANDOM


437it [00:00, 5257.56it/s]


 Solution: [12, 4, 1] 
 Cost: 3

ALLorNONE


176420it [04:06, 714.96it/s]

 Solution: [1, 2, 6] 
 Cost: 3



