In [501]:
import numpy as np
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue

# Problem Definition

PROBLEM_SIZE: This constant represents the dimension of the finite set U. It specifies the size of the universal set that needs to be covered.

NUMBER_SET: This constant represents the number of subsets in the collection S. It indicates how many sets are available to choose from for covering the universal set.

SETS: This is a tuple containing arrays of binary values, where each array represents a subset. The binary values indicate whether an element is present in the respective subset. The subsets are generated randomly, with a 30% probability of an element being included in a subset.

In [502]:
# constants
PROBLEM_SIZE = 5  # dimension of the finite set U
NUMBER_SET = 10  # number of subsets in the collection S
SETS = tuple(
    np.array([random() < 0.3 for i in range(PROBLEM_SIZE)]) for j in range(NUMBER_SET)
)  # generate sets in S

# Define State as a named tuple
State = namedtuple("State", ["taken", "cost", "heuristic"])



# Solution Check
This function is used to check whether a given state is a valid solution to the set covering problem. In this context, a "state" is typically a set of selected subsets, and the goal is to check if the logical OR operation (union) of all the selected subsets covers the entire universal set U.

The function takes two arguments:

- state: A named tuple that presumably contains information about which subsets have been selected.
- sets: The collection of subsets (S).

The function returns True if the union of the selected subsets covers the entire universal set U, which is represented by a line of all True values (i.e., all elements are covered). It uses NumPy's logical OR and reduce functions to perform this operation efficiently.

In [503]:
def goal_check(state, sets):
    """
    check if the logical OR all the elements yeald a line of all true ie the
    condition for a state to be covering the whole set U
    """
    return np.all(
        reduce(np.logical_or, [sets[i] for i in state.taken], np.zeros(PROBLEM_SIZE))
    )


# assert generated problem is solvable, ie the goal check of a stete with all
# sets taken is true
assert goal_check(State(range(NUMBER_SET), 0, 0), SETS)


# Cost and Heuristic Functions

#### Cost Function:
The cost function calculates the actual cost incurred to reach a given state.
In the context of the Set Covering problem, you can define the cost as the number of selected subsets in the current state. The goal is to minimize this cost.
The cost can be calculated as follows:

#### Heuristic Function:
The heuristic function estimates the cost from the current state to the goal state. It should be admissible, meaning it should never overestimate the true cost.
A common heuristic for the Set Covering problem is to estimate the remaining number of elements in the universal set U that are not yet covered. This can be done using the following heuristic function:

In [504]:
def cost(state):
    """The cost function calculates the cost of reaching a particular state"""
    return len(state.taken)


def heuristic(state, sets):
    """Calculate the number of uncovered elements in U"""
    uncovered = np.logical_not(
        reduce(np.logical_or, [sets[i] for i in state.taken], np.zeros(PROBLEM_SIZE))
    )
    remaining_elements = np.sum(uncovered)
    return remaining_elements



# A* Search

A* is a popular graph search algorithm used to find the shortest path from a start state to a goal state in a graph. 

The A* algorithm is guided by the combination of the actual cost and the heuristic. It explores states in a way that minimizes the cost while using the heuristic to prioritize promising paths. When used correctly with an admissible heuristic, A* guarantees that it finds the optimal solution in terms of the cost.

In this case, the graph represents different possible states of subset selections, and the goal state is to find a set of subsets that collectively cover the universal set U.

**Initialization:**

Start with an initial state. In the Set Covering problem, this state is typically an empty set of selected subsets.
Initialize two sets: the open set and the closed set.
The open set is a priority queue that stores states to be explored, sorted by their estimated cost from the start state.
The closed set stores states that have already been explored.

**Cost Function:**

Define a cost function that calculates the actual cost (in your case, the number of selected subsets) to reach a state.

**Heuristic Function:**

Define a heuristic function that estimates the cost from the current state to the goal state. The heuristic should be admissible, meaning it never overestimates the true cost.
In the Set Covering problem, a common heuristic estimates the remaining number of elements in the universal set U that are not yet covered by the selected subsets.

**Main Loop:**

While the open set is not empty, do the following:
Pop the state with the lowest estimated cost (a combination of the actual cost and the heuristic) from the open set.
If this state is the goal state (i.e., it covers all elements in U), you have found a solution, and you can extract the selected subsets.
If not, mark this state as explored by adding it to the closed set.
Generate successor states by considering additional subsets. For each subset that hasn't been added to the current state, create a new state by adding it. Calculate the cost and heuristic for each successor state.
Add the successor states to the open set if they are not already in the closed set.

**Termination:**

If the open set becomes empty and no goal state has been found, the algorithm terminates without a solution.

**Output:**

If a solution is found, it consists of the selected subsets that cover the universal set U.



In [505]:
def astar(sets):
    # Initialize the priority queue with the initial state
    initial_state = State(
        taken=[],
        cost=0,
        heuristic=heuristic(State(taken=[], cost=0, heuristic=0), sets),
    )
    open_set = PriorityQueue()
    open_set.put((initial_state.cost + initial_state.heuristic, initial_state))

    # Initialize the closed set as an empty set
    closed_set = set()

    while not open_set.empty():
        # Get the state with the lowest f score from the priority queue
        _, current_state = open_set.get()

        print(current_state)

        # If the current state is a goal state, return the solution
        if goal_check(current_state, sets):
            return current_state.taken

        # Add the current state to the closed set
        closed_set.add(tuple(current_state.taken))

        # Generate successor states by adding one more subset
        for subset in range(NUMBER_SET):
            if subset not in current_state.taken:
                # Create a new state by adding the subset
                new_taken = current_state.taken + [subset]
                new_cost = cost(State(new_taken, 0, 0))
                new_heuristic = heuristic(State(new_taken, 0, 0), sets)
                new_state = State(new_taken, new_cost, new_heuristic)

                # If the state is not in the closed set, add it to the open set
                if tuple(new_taken) not in closed_set:
                    open_set.put((new_state.cost + new_state.heuristic, new_state))

    # If the open set is empty and no solution is found, return None
    return None


In [506]:
SETS

(array([False,  True, False, False, False]),
 array([False, False,  True,  True,  True]),
 array([ True, False, False, False,  True]),
 array([False, False, False,  True, False]),
 array([False, False, False, False,  True]),
 array([False,  True, False, False, False]),
 array([False, False, False, False, False]),
 array([ True,  True, False, False, False]),
 array([False, False, False, False,  True]),
 array([False,  True,  True, False, False]))

In [507]:
# Call the A* function and print the result
result = astar(SETS)
if result:
    print("\nSolution:", result)
    for i in result:
        print(SETS[i])
else:
    print("\nNo solution found.")

State(taken=[], cost=0, heuristic=5)
State(taken=[1], cost=1, heuristic=2)
State(taken=[1, 7], cost=2, heuristic=0)

Solution: [1, 7]
[False False  True  True  True]
[ True  True False False False]
