In [2]:
import numpy as np
from random import random
from queue import PriorityQueue, SimpleQueue
from functools import reduce
from tqdm import tqdm
import timeit

In [3]:
PROBLEM_SIZE = 20
NUM_SETS = 40

In [4]:
def random_sets():
    sets = tuple(sorted([np.array([random() < .3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS)], key=lambda x: -sum(x)))
    while not goal_check((set(range(NUM_SETS)), set()), sets):
        sets = tuple(sorted([np.array([random() < .3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS)], key=lambda x: -sum(x)))
    return sets

In [5]:
def goal_check(state, sets):
    return np.all(reduce(
        np.logical_or,
        [sets[i] for i in state[0]],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    ))


def h_not_admissible(state, sets):
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [sets[i] for i in state[0]],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        ))

def h(state, sets):
    already_covered = reduce(
            np.logical_or,
            [sets[i] for i in state[0]],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    if np.all(already_covered):
        return 0
    missing = PROBLEM_SIZE - sum( already_covered )
    candidates = sorted((sum(np.logical_and(sets[set_id], np.logical_not(already_covered))) for set_id in state[1]), reverse=True)
    taken = 1
    cur_sum = candidates[0]
    while cur_sum < missing:
        cur_sum += candidates[taken]
        taken += 1
    return taken

def professorh(state, sets):
    already_covered = reduce(
            np.logical_or,
            [sets[i] for i in state[0]],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    if np.all(already_covered):
        return 0
    missing_size = PROBLEM_SIZE - sum(already_covered)
    candidates = sorted((sum(np.logical_and(s, np.logical_not(already_covered))) for s in sets), reverse=True)
    taken = 1
    while sum(candidates[:taken]) < missing_size:
        taken += 1
    return taken


def professorf(state, sets):
    return len(state[0]) + professorh(state, sets)

def g(state):
    return len(state[0])

def f(state, sets):
    return g(state) + h(state, sets)


In [6]:
def find_sol(sets):
    frontier = PriorityQueue()
    actions = filter(lambda x: sum(sets[x]) != 0, range(NUM_SETS))
    frontier.put( (0, (set(), set(actions))) )
    current_state = frontier.get()[1]
    counter = 0

    while not goal_check(current_state, sets):
        counter += 1
        for action in current_state[1]:
            if sum(sets[action]) == 0:
                continue
            new_state = (current_state[0] | {action}, current_state[1] - {action})
            frontier.put( (f(new_state, sets), new_state) )
        current_state = frontier.get()[1]
    return counter, len(current_state[0])

def find_sol_professor(sets):
    frontier = PriorityQueue()
    actions = filter(lambda x: sum(sets[x]) != 0, range(NUM_SETS))
    frontier.put( (0, (set(), set(actions))) )
    current_state = frontier.get()[1]
    counter = 0

    while not goal_check(current_state, sets):
        counter += 1
        for action in current_state[1]:
            new_state = (current_state[0] | {action}, current_state[1] - {action})
            frontier.put( (professorf(new_state, sets), new_state) )
        current_state = frontier.get()[1]
    return counter, len(current_state[0])

In [None]:
sum_counter_sol = 0
sum_counter_professor_sol = 0
steps = 10
for _ in tqdm(range(steps)):
    curr_sets = random_sets()
    c, len_sol = find_sol(curr_sets)
    sum_counter_sol += c
    c, len_professor_sol = find_sol_professor(curr_sets)
    sum_counter_professor_sol += c
    if len_sol != len_professor_sol:
        print("Solution is not optimal")
        print(f'sol: {len_sol}, professor_sol: {len_professor_sol}')
avg_sol = float(sum_counter_sol) / float(steps)
avg_professor_sol = float(sum_counter_professor_sol) / float(steps)
print(f'Average my solution steps: {avg_sol}')
print(f'Average professor solution steps: {avg_professor_sol}')

In [8]:
sets_batch = [random_sets() for _ in range(10)]
for i, sets in enumerate(sets_batch):
    print(f'My solution - set n°{i} time: {timeit.timeit(lambda: find_sol(sets), number = 10) / 10}')
    print(f'Professor solution - set n°{i} time: {timeit.timeit(lambda: find_sol_professor(sets), number = 10) / 10}')


My solution - set n°0 time: 0.786881191700013
Professor solution - set n°0 time: 0.8096247249999579
My solution - set n°1 time: 5.86022115420019
Professor solution - set n°1 time: 6.1391498583001525
My solution - set n°2 time: 5.617038737499752
Professor solution - set n°2 time: 5.812892770799953
My solution - set n°3 time: 1.2400040542001078
Professor solution - set n°3 time: 1.2740204124998855
My solution - set n°4 time: 6.035132554200027
Professor solution - set n°4 time: 6.318131079100203
My solution - set n°5 time: 0.7692482457998266
Professor solution - set n°5 time: 0.7859249665998504
My solution - set n°6 time: 0.4888968958999612
Professor solution - set n°6 time: 0.49718988749991694
My solution - set n°7 time: 5.751743604200238
Professor solution - set n°7 time: 6.067144595900027
My solution - set n°8 time: 0.5179373041999498
Professor solution - set n°8 time: 0.5319015291999676
My solution - set n°9 time: 0.9086001416999352
Professor solution - set n°9 time: 0.941891491699789