In [2]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue
import numpy as np
import math

SET COVERING

We have a set to cover with some subsets, we cover the set when taking subsets that complete the set even if some tiles are taken multiple times.

In [3]:
PROBLEM_SIZE = 10
NUM_SETS = 20
SETS = tuple(
    np.array([random() < 0.3 for _ in range(PROBLEM_SIZE)])
    for _ in range(NUM_SETS)
)
State = namedtuple('State', ['taken', 'not_taken'])

## A*


I try to improve the Eucledian heuristic but Euclidean distance can be problematic as a heuristic in set problems because it may not satisfy the admissibility condition required for the A* algorithm. The heuristic in A* must be admissible, i.e. it can never overestimate the cost of reaching the destination. In the context of problem sets, Euclidean distance might violate this condition. It doesn't work.

The best heuristics for now are based on larger sets and the missing sets...
Moreover, every set is generated with a probability < 0.3, so we need not taken*0.3 (heuristic_3)

In [3]:
def goal_check(state):
    return np.all(reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    ))

def set_covered(state):
    return reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    )

def count_missing_sets(state):
    free_space= PROBLEM_SIZE- sum(set_covered(state))
    return free_space

def distance(state):
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        ))

def calculate_centroid(state):
    if not state.taken:
        return None
    dimension = len(state.taken[0])  
    centroid = [0] * dimension

    for element in state.taken:
        for i in range(dimension):
            centroid[i] += element[i]

    centroid = [coord / len(state.taken) for coord in centroid]
    return centroid

def euclidean_distance(set1, set2):
    if not set1 or not set2:
        return float('inf')  

    centroid1 = calculate_centroid(set1)
    centroid2 = calculate_centroid(set2)
    if centroid1 is None or centroid2 is None:
        return float('inf')  
    squared_distance = sum((c1 - c2) ** 2 for c1, c2 in zip(centroid1, centroid2))
    distance = math.sqrt(squared_distance)
    return distance

def heuristic_max(state):
    largest_set= max(sum(s) for s in SETS)
    opt_estimate= math.ceil(count_missing_sets(state)/largest_set)
    return opt_estimate

def heuristic_missing(state):
    missing_sets = count_missing_sets(state)
    return missing_sets

def heuristic_euclidean_distance(state):
    total_distance = sum(euclidean_distance(set1, set2) for set1 in state for set2 in state)
    return total_distance


def heuristic_3(state):
    return int(len(state.not_taken)/(len(state.not_taken)*0.3)) + 1 


def f(state):
    return heuristic_max(state)+ len(state.taken)


In [4]:
assert goal_check(
    State(set(range(NUM_SETS)), set())
), "Problem not solvable"

In [5]:
frontier = PriorityQueue()
#frontier = SimpleQueue()
state = State(set(), set(range(NUM_SETS)))
frontier.put((distance(state), state))


counter = 0
_, current_state = frontier.get()
while not goal_check(current_state):
    counter += 1
    for action in current_state[1]:
        new_state = State(
            current_state.taken ^ {action},
            current_state.not_taken ^ {action},
        )
        frontier.put((f(new_state), new_state))
    _, current_state = frontier.get()

print(
    f"Solved in {counter:,} steps ({len(current_state.taken)} tiles)"
)

Solved in 77 steps (3 tiles)


In [6]:
current_state

State(taken={19, 15, 7}, not_taken={0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18})

In [7]:
SETS[1]

array([False,  True, False, False, False, False,  True, False,  True,
        True])