In [6]:
import random

def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [7]:
from queue import PriorityQueue

def compute_cost(I, Si):
    '''
        I represents set of current elements included in the solution.  Initially I = {}
        Si represents i-th subset taken into consideration in order to be added to the solution
        |Si - I| is the number of new elements added by Si
        the cost is inversely proportional to the sum between |Si - I| and the number of elements already into I
        return -1 if Si do not add any element (|Si - I| = 0), the cost otherwise
    '''
    # compute how many elements of I are already inside Si 
    nI = 0
    for i in I:
        if i in Si:
            nI += 1 
    # compute how many new elements will be added 
    n_new_el = abs(len(Si) - nI)

    if n_new_el == 0:
        return -1
    else:
        return 1/(n_new_el + len(I))


def tree_search(blocks, goal):

    frontier = PriorityQueue()
    frontier.put((1.0,((), tuple(blocks))))

    solution = None

    n = 0
    while frontier:
        n += 1
        state = frontier.get()

        current_bag, available_blocks = state[1]

        # create a flatten list (transformed then into set) from current state in order to check a possible solution
        current_sol_set = set([x for sublist in current_bag for x in sublist])

        if len(current_sol_set)> 0 and len(current_sol_set)==len(goal):
            solution = current_bag
            #logging.info(f"Found a solution in {n:,} steps: {current_bag}")
            break

        for i, object in enumerate(available_blocks):

            new_state = (
                tuple((*current_bag, object)), # insert a new element on the current state
                tuple(available_blocks[:i] + available_blocks[i + 1 :]),
            )
            
            # taken into consideration a new state evaluate its cost
            current_bag_set = set([x for sublist in current_bag for x in sublist])
            new_cost = compute_cost(current_bag_set, object)

            if new_cost != -1:
                frontier.put((new_cost,new_state))

    return solution

In [14]:
import logging

logging.getLogger().setLevel(logging.INFO)

for N in [5, 10 , 20, 100, 500, 1000]:
    blocks = problem(N,seed=42)
    goal = [x for x in range(N)]
    sol = tree_search(blocks, goal)
    logging.info(f"Found solution for N={N}: w={sum(len(_) for _ in sol)} (bloat={(sum(len(_) for _ in sol)-N)/N*100:.0f}%)")

INFO:root:Found solution for N=5: w=6 (bloat=20%)
INFO:root:Found solution for N=10: w=13 (bloat=30%)
INFO:root:Found solution for N=20: w=29 (bloat=45%)
INFO:root:Found solution for N=100: w=188 (bloat=88%)
INFO:root:Found solution for N=500: w=1295 (bloat=159%)
INFO:root:Found solution for N=1000: w=3088 (bloat=209%)
