In [6]:
"""
SET-COVERING PROBLEM
The set covering problem is a significant NP-hard problem in combinatorial optimization. Given a collection of elements, the set covering problem aims 
to find the minimum number of sets that incorporate (cover) all of these elements. In the set covering problem, two sets are given: a set {U} of elements 
and a set {S} of subsets of the set {U}. Each subset in {S} is associated with a predetermined cost, and the union of all the subsets covers the set 
{U}. This combinatorial problem then concerns finding the optimal number of subsets whose union covers the universal set while minimizing the total cost.

SOLUTION SPACE
What is a state? In this problem a state should be composed of 2 groups: one contains all the taken sets the other one the not taken. Each set contains a
specific number of elements. 
We will need a function that checks for the goal state.
We will use a path searching algorithms.
"""

'\nSET-COVERING PROBLEM\nThe set covering problem is a significant NP-hard problem in combinatorial optimization. Given a collection of elements, the set covering problem aims \nto find the minimum number of sets that incorporate (cover) all of these elements. In the set covering problem, two sets are given: a set {U} of elements \nand a set {S} of subsets of the set {U}. Each subset in {S} is associated with a predetermined cost, and the union of all the subsets covers the set \n{U}. This combinatorial problem then concerns finding the optimal number of subsets whose union covers the universal set while minimizing the total cost.\n\nSOLUTION SPACE\nWhat is a state? In this problem a state should be composed of 2 groups: one contains all the taken sets the other one the not taken. Each set contains a\nspecific number of elements. \nWe will need a function that checks for the goal state.\nWe will use a path searching algorithms.\n'

In [7]:
import numpy as np
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue

In [8]:
# Since we don't care about the order we decide to use sets instead of arrays to represent the tiles (group of elements)


PROBLEM_SIZE = 50 # Number of elements (so the solution MUST contain them all)
NUM_SETS = 20 # Number of sets/tiles

# random() generates a random float number between 0 and 1
# random() < .x gives me a boolean
# If True it means that the element in that position is covered by the set otherwise it isn't
# EXAMPLE: True, True, False -> Set contains element 0 and 1
# Since I do not want to modify them I can wrap everything in a tuple (that is faster and more memory efficient than an array)
SETS = tuple(np.array([random() < .3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))
State = namedtuple("State", "taken not_taken") # From now on when I create a State() the first element will be labeled as taken, the second one as not_taken

In [17]:
# The function that checks if the state is a goal state has to understand if with all the taken sets I'm covering all the elements (PROBLEM_SIZE)
def goal_check(state):
    # The second argument of the reduce function is the initial value (a nparray of all False)
    return np.all(reduce(np.logical_or, [SETS[i] for i in state.taken], np.array([False for _ in range(PROBLEM_SIZE)])))

# Before going on I check if the problem is solvable in general by putting all sets as taken and see if they cover all the elements
assert goal_check(State(set(range(NUM_SETS)), set())), "Problem not solvable"

def distance(state):
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        ))

In [10]:
# PATH SEARCH ALGORITHM

# Define the frontier
# Breadth-first search -> FIFO queue (SimpleQueue)
# Depth-first search -> LIFO queue
# Uniform-cost search -> Priority queue
frontier = SimpleQueue()
# Define the initial state
frontier.put(State(set(), set(range(NUM_SETS))))
current_state = frontier.get()

# I want to count the steps it takes to find a solution
counter = 0

while not goal_check(current_state):
    counter += 1
    for action in current_state.not_taken:
        # Apply the action and create a new state
        # In this case I add a set to the takens and remove it from the not_takens
        new_state = State(current_state.taken | {action}, current_state.not_taken - {action})
        frontier.put(new_state)
    current_state = frontier.get()
print(f"solved in {counter:,} steps")
print(current_state)
print(sum([SETS[i] for i in current_state.taken]))


solved in 1,985,093 steps
State(taken={0, 1, 2, 17, 7, 12}, not_taken={3, 4, 5, 6, 8, 9, 10, 11, 13, 14, 15, 16, 18, 19})
[1 1 3 2 2 2 3 3 3 3 1 1 3 2 2 3 2 2 2 4 4 2 1 3 4 1 2 1 1 3 1 2 2 1 2 1 2
 3 1 1 4 1 1 1 1 3 3 3 1 2]


In [15]:
def get_priority(state):
    return sum(filter(lambda x: x>0, sum([SETS[i] for i in state.taken])-1)) + distance(state)

In [19]:
# Optimization request: the solution MUST have the less repetiotion of elements (the less overlap of elements)
# IDEA: My frontier is a priority queue with the priority imposed by the following equation:
# Kp = n. of repetition (how many times an element is in overlap with the other sets)
forntier = PriorityQueue()
state = State(set(), set(range(NUM_SETS)))
frontier.put(0, state)

counter = 0
current_state = frontier.get()
while not goal_check(current_state):
    counter += 1
    for action in current_state.not_taken:
        # Apply the action and create a new state
        # In this case I add a set to the takens and remove it from the not_takens
        new_state = State(current_state.taken | {action}, current_state.not_taken - {action})
        priority = get_priority(new_state)
        frontier.put(priority, new_state)
    current_state = frontier.get()
print(f"solved in {counter:,} steps")
print(current_state.taken)
print(sum([SETS[i] for i in current_state.taken]))


solved in 1,068 steps
{0, 1, 2, 17, 7, 12}
[1 1 3 2 2 2 3 3 3 3 1 1 3 2 2 3 2 2 2 4 4 2 1 3 4 1 2 1 1 3 1 2 2 1 2 1 2
 3 1 1 4 1 1 1 1 3 3 3 1 2]
