In [None]:
from random import random
from math import ceil
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue

import numpy as np

In [None]:
PROBLEM_SIZE = 20
NUM_SETS = 40
SETS = tuple(
    np.array([random() < 0.2 for _ in range(PROBLEM_SIZE)])
    for _ in range(NUM_SETS)
)
State = namedtuple('State', ['taken', 'not_taken'])

In [None]:
def covered(state):
    return reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    )

def goal_check(state):
    return np.all(covered(state))

In [None]:
assert goal_check(State(set(range(NUM_SETS)), set())), "Probelm not solvable"

## Professor's solution

In [None]:
def h1(state):
    largest_set_size = max(sum(s) for s in SETS)
    missing_size = PROBLEM_SIZE - sum(covered(state))
    optimistic_estimate = ceil(missing_size / largest_set_size)
    return optimistic_estimate

def h2(state):
    already_covered = covered(state)
    if np.all(already_covered):
        return 0
    largest_set_size = max(sum(np.logical_and(s, np.logical_not(already_covered))) for s in SETS)
    missing_size = PROBLEM_SIZE - sum(already_covered)
    optimistic_estimate = ceil(missing_size / largest_set_size)
    return optimistic_estimate

def h3(state):
    already_covered = covered(state)
    if np.all(already_covered):
        return 0
    missing_size = PROBLEM_SIZE - sum(already_covered)
    candidates = sorted((sum(np.logical_and(s, np.logical_not(already_covered))) for s in SETS), reverse=True)
    taken = 1
    while sum(candidates[:taken]) < missing_size:
        taken += 1
    return taken

def f(state):
    return len(state.taken) + h3(state)

## My solution

I tried to improve the performance of professor's heuristic 'h3', basically my solution tries to cover the missing size with the unique sets, which are the only ones to have the value 'True' for some positions because this special sets must be in the solutions. If the missing size can't be completely covered with the special sets, the function sorts candidate sets by the number of uncovered spaces they can cover and tries to find the minimun number of candidate needed, so the value returned by the heuristic function will be the sum of special sets and candidate sets taken.
I test my solution in different configuration and I observed that we can obtain an improvement in terms of total number of steps needed by reducing the probability of generate a True value during the creation of the random initial sets, because in this way there are more unique sets; while in the other cases the number of steps will be the same, but in all cases the complexity in terms of execution time is slightly worse than professor's solution due to the necessity of search unique sets for each current state.

In [None]:
def find_candidates(state):
    already_covered = covered(state)
    d = {i : np.logical_not(np.logical_and(SETS[i], np.logical_not(already_covered))) for i in state.not_taken}
    unique_sets = []
    for s in state.not_taken:
        copy = d.copy()
        del copy[s]
        if sum(reduce(np.logical_and, copy.values(), np.logical_and(SETS[s], np.logical_not(already_covered)))) > 0:
            unique_sets.append(s)
    
    candidates = sorted((sum(np.logical_and(SETS[s], np.logical_not(already_covered))) for s in state.not_taken if s not in unique_sets), reverse=True)
    return unique_sets, candidates

def h4(state):
    already_covered = covered(state)
    if np.all(already_covered):
        return 0
    missing_size = PROBLEM_SIZE - sum(already_covered)
    unique_sets, candidates = find_candidates(state)
    unique_sets = [sum(np.logical_and(SETS[s], np.logical_not(already_covered))) for s in unique_sets]

    if sum(unique_sets) >= missing_size:
        return len(unique_sets)
    
    taken = 1
    while sum(candidates[:taken]) + sum(unique_sets) < missing_size:
        taken += 1
    return taken + len(unique_sets)

def f(state):
    return len(state.taken) + h4(state) 

In [None]:
frontier = PriorityQueue()
state = State(set(), set(range(NUM_SETS)))
frontier.put((f(state), state))

counter = 0
_, current_state = frontier.get()
while not goal_check(current_state):
    counter += 1
    for action in current_state[1]:
        new_state = State(
            current_state.taken ^ {action},
            current_state.not_taken ^ {action},
        )
        frontier.put((f(new_state), new_state))
    _, current_state = frontier.get()

print(f"Solved in {counter:,} steps ({len(current_state.taken)} tiles)")