In [1]:
import random
import logging

from gx_utils import *


logging.basicConfig(format="%(message)s", level=logging.INFO)

In [2]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

In [13]:
from queue import PriorityQueue

def tree_search2(lists, goal_test, slzCost, priority_function,states):
    frontier = PriorityQueue()

    state=(set(),(), lists) #initial state
    
    n=0
    while state is not None:
        
        selected,solution,available=state
        
        if goal_test(selected):
            logging.info(f"Found a solution in {n:,} steps: {solution}")
            break
        n+=1
        
        for i,newlist in enumerate(available):
            if not set(newlist) < selected:
                
                newState=(selected | set(newlist),solution+(newlist,),available[i+1 :])
                
                frontier.put((priority_function(selected,solution+(newlist,)),newState))
        
        if frontier:
            state = frontier.get()[1]
        else:
            state = None
        
    return solution

In [17]:
def goal_test_gen(N):
    def goal_test(selected):
        return selected==set(range(N))
        
    return goal_test

def priority_function(selected,solution):
    newlist=solution[-1]
    return len(set(newlist)&selected),-len(set(newlist)|selected)

def priority_dijkstra(_,solution):
    cnt = Counter()
    cnt.update(sum((e for e in solution), start=()))
    return sum(cnt[c] - 1 for c in cnt if cnt[c] > 1), -sum(cnt[c] == 1 for c in cnt)

for N in [5, 10, 20]:
    lists = sorted(problem(N, seed=42), key=lambda l: len(l))
    filteredLists=sorted(list(list(_) for _ in set(tuple(l) for l in lists)), key=lambda l:len(l))
    
    states=dict()

    tuples=tuple(tuple(sublist) for sublist in filteredLists)
    
    solution=tree_search2(tuples, goal_test_gen(N), slzCost, lambda a,b: priority_function(a,b),states)
    print(f"Solution for N={N}: w={sum(len(_) for _ in solution)} (bloat={(sum(len(_) for _ in solution)-N)/N*100:.0f}%)")
    
    solution2=tree_search2(tuples, goal_test_gen(N), slzCost, lambda a,b: priority_dijkstra(a,b),states)
    print(f"Dijkstra Solution for N={N}: w={sum(len(_) for _ in solution2)} (bloat={(sum(len(_) for _ in solution2)-N)/N*100:.0f}%)")
    

Found a solution in 12 steps: ((3,), (0, 1), (2, 4))
Found a solution in 12 steps: ((3,), (0, 1), (2, 4))
Found a solution in 39 steps: ((0, 9, 3), (8, 2, 7), (1, 4, 5, 6))
Found a solution in 39 steps: ((0, 9, 3), (8, 2, 7), (1, 4, 5, 6))
Found a solution in 1,167 steps: ((8, 4, 7), (16, 9, 19, 6), (1, 3, 13, 14), (0, 5, 11, 16, 17), (2, 6, 8, 10, 12, 15, 18))


Solution for N=5: w=5 (bloat=0%)
Dijkstra Solution for N=5: w=5 (bloat=0%)
Solution for N=10: w=10 (bloat=0%)
Dijkstra Solution for N=10: w=10 (bloat=0%)
Solution for N=20: w=23 (bloat=15%)


Found a solution in 1,604 steps: ((8, 4, 7), (16, 9, 19, 6), (1, 3, 13, 14), (0, 5, 11, 16, 17), (2, 6, 8, 10, 12, 15, 18))


Dijkstra Solution for N=20: w=23 (bloat=15%)


# Greedy
basic greedy algorithm by professor Squillero

In [21]:
def greedy(N, all_lists):
    """Vanilla greedy algorithm"""

    goal = set(range(N))
    covered = set()
    solution = list()
    all_lists = sorted(all_lists, key=lambda l: len(l))
    while goal != covered:
        x = all_lists.pop(0)
        if not set(x) < covered:
            solution.append(x)
            covered |= set(x)
    logging.debug(f"{solution}")
    return solution

In [25]:
    
for N in [5, 10, 20,100,500,1000]:
    solution = greedy(N, problem(N, seed=42))
    logging.info(
        f" Greedy solution for N={N:,}: "
        + f"w={sum(len(_) for _ in solution):,} "
        + f"(bloat={(sum(len(_) for _ in solution)-N)/N*100:.0f}%)" 
    )

 Greedy solution for N=5: w=5 (bloat=0%)
 Greedy solution for N=10: w=13 (bloat=30%)
 Greedy solution for N=20: w=46 (bloat=130%)
 Greedy solution for N=100: w=332 (bloat=232%)
 Greedy solution for N=500: w=2,162 (bloat=332%)
 Greedy solution for N=1,000: w=4,652 (bloat=365%)
