In [17]:
import numpy as np
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue

from tqdm import tqdm


# Problem Definition:

In [18]:
# constants
PROBLEM_SIZE = 20  # dimension of the finite set U to cover
NUMBER_SET = 40  # number of randomized subsets to chose from
SETS = tuple(
    np.array([random() < 0.3 for i in range(PROBLEM_SIZE)]) for j in range(NUMBER_SET)
)  # generate radnomized sets

# Define State as a named tuple
State = namedtuple("State", ["taken", "cost", "heuristic"])


In [19]:
def goal_check(state, sets):
    """
    check if the logical OR all the elements yeald a line of all true ie the
    condition for a state to be covering the whole set U
    """
    return np.all(
        reduce(np.logical_or, [sets[i] for i in state.taken], np.zeros(PROBLEM_SIZE))
    )


# assert generated problem is solvable, ie the goal check of a stete with all
# sets taken is true
assert goal_check(State(range(NUMBER_SET), 0, 0), SETS)


# Cost and Heuristic Functions:

In [20]:
def cost(state):
    """the cost is the number of sets taken by the solution"""
    return len(state.taken)

In [21]:
#
# state = namedtuple("State", ["taken", "cost", "heuristic"])
#
# SETS example =
#   Set 0 [False  True  True  True False] Coverage: 3
#   Set 1 [False  True False False False] Coverage: 1
#   Set 2 [ True  True False False False] Coverage: 2
#   Set 3 [ True False False False  True] Coverage: 2
#   Set 4 [False  True  True  True False] Coverage: 3
#   Set 5 [ True False False False False] Coverage: 1
#   Set 6 [False False False False False] Coverage: 0
#   Set 7 [False False False False  True] Coverage: 1
#   Set 8 [False False False False False] Coverage: 0
#   Set 9 [False  True False False False] Coverage: 1
#


def TRIVIAL_heuristic(state, sets):
    return 0


def MRSC_heuristic(state, sets):
    """
    Minimum Remaining Set Coverage

    This heuristic estimates the cost based on how many elements in "U" are still
    uncovered and divides it by the number of subsets not taken. This heuristic
    assumes that the subsets have an equal chance of covering remaining uncovered
    elements.

    h(state) = (number of uncovered elements in "U") / (number of subsets not taken)
    """

    uncovered = reduce(
        np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0]))
    )

    not_taken_subsets = NUMBER_SET - len(state.taken)

    return -np.sum(uncovered) / not_taken_subsets


def MSC_heuristic(state, sets):
    """
    Maximum Subset Coverage

    This heuristic estimates the cost by assuming that each additional subset chosen
    will cover as many uncovered elements as possible. It divides the number of
    uncovered elements in "U" by the number of subsets already taken.

    h(state) = (number of uncovered elements in "U") / (number of subsets already taken)
    """

    uncovered = reduce(
        np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0]))
    )

    return (-np.sum(uncovered) / len(state.taken)) if len(state.taken) > 0 else 0


def MRSC_MSC_heuristic(state, sets):
    return (MRSC_heuristic(state, sets) + MSC_heuristic(state, sets)) / 2


def ASC_heuristic(state, sets):
    """
    Average Subset Coverage

    This heuristic estimates the cost based on the average size of the remaining
    subsets and assumes that each chosen subset will, on average, cover this many
    elements.

    h(state) = (number of uncovered elements in "U") / (average size of remaining subsets)
    """

    uncovered = reduce(
        np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0]))
    )

    remaining_sets = [sets[i] for i in range(NUMBER_SET) if i not in state.taken]

    average_size = np.sum([np.sum(s) for s in remaining_sets]) / len(remaining_sets)

    return -np.sum(uncovered) / average_size


def RANDOM_heuristic(state, sets):
    """
    !! not admissible but funny !!
    """
    return random()


def DENSITY_heuristic(state, sets):
    """
    Density Heuristic

    This heuristic estimates the cost based on the density of uncovered elements in
    "U". It assumes that the subsets have an equal chance of covering remaining
    uncovered elements.

    h(state) = (density of uncovered elements in "U") * (number of subsets)
    """

    uncovered = reduce(
        np.logical_or, [sets[i] for i in state.taken], np.zeros(len(sets[0]))
    )

    # Calculate the density of uncovered elements in "U"
    uncovered_density = np.sum(uncovered) / len(uncovered)

    # Estimate the remaining cost based on the uncovered density
    return -uncovered_density * NUMBER_SET



# A*

In [22]:
def astar(sets, heuristic):
    # Initialize the priority queue with the initial state
    initial_state = State(
        taken=[],
        cost=0,
        heuristic=heuristic(State(taken=[], cost=0, heuristic=0), sets),
    )
    open_set = PriorityQueue()
    open_set.put((initial_state.cost + initial_state.heuristic, initial_state))

    # Initialize the closed set as an empty set
    closed_set = set()

    progress_bar = tqdm(total=None)

    while not open_set.empty():
        # Get the state with the lowest f score from the priority queue
        _, current_state = open_set.get()

        progress_bar.update(1)

        # If the current state is a goal state, return the solution
        if goal_check(current_state, sets):
            progress_bar.close()
            return current_state

        # Add the current state to the closed set
        closed_set.add(tuple(current_state.taken))

        # Generate successor states by adding one more subset
        for subset in range(NUMBER_SET):
            if subset not in current_state.taken:
                # Create a new state by adding the subset
                new_taken = current_state.taken + [subset]
                new_cost = cost(State(new_taken, 0, 0))
                new_heuristic = heuristic(State(new_taken, 0, 0), sets)
                new_state = State(new_taken, new_cost, new_heuristic)

                # If the state is not in the closed set, add it to the open set
                if tuple(new_taken) not in closed_set:
                    open_set.put((new_state.cost + new_state.heuristic, new_state))

    # If the open set is empty and no solution is found, return None
    progress_bar.close()
    return None


# Results:

In [23]:
heuristics = [
    TRIVIAL_heuristic,
    MRSC_heuristic,
    MSC_heuristic,
    MRSC_MSC_heuristic,
    ASC_heuristic,
    DENSITY_heuristic,
    RANDOM_heuristic,
]
print("Problem size:", PROBLEM_SIZE, "Number of sets:", NUMBER_SET, "\n")
for i in heuristics:
    print(i.__name__)
    solution = astar(SETS, i)
    print(" Solution:", solution.taken)
    if PROBLEM_SIZE <= 10:
        for i in solution.taken:
            print(" Set", i, SETS[i], "Coverage:", np.sum(SETS[i]))
    print(" Solution check:", goal_check(solution, SETS))
    print()


Problem size: 20 Number of sets: 40 

TRIVIAL_heuristic


63980it [00:21, 2985.56it/s]


 Solution: [0, 3, 9, 30]
 Solution check: True

MRSC_heuristic


60882it [00:56, 1079.29it/s]


 Solution: [0, 3, 9, 30]
 Solution check: True

MSC_heuristic


31255it [00:28, 1096.85it/s]


 Solution: [0, 3, 9, 30]
 Solution check: True

MRSC_MSC_heuristic


55890it [01:21, 685.21it/s]


 Solution: [0, 3, 9, 30]
 Solution check: True

ASC_heuristic


3852it [00:37, 102.87it/s]


 Solution: [9, 13, 20, 28]
 Solution check: True

DENSITY_heuristic


5it [00:00, 926.06it/s]


 Solution: [13, 20, 9, 0]
 Solution check: True

RANDOM_heuristic


62341it [00:24, 2549.58it/s]


 Solution: [1, 16, 11, 30]
 Solution check: True

