In [39]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue

import numpy as np

from tqdm import tqdm

In [40]:
PROBLEM_SIZE = 50
NUM_SETS = 100
# seed(10)
SETS = tuple(
    np.array([random() < 0.3 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS)
)
State = namedtuple("State", ["taken", "not_taken"])

In [41]:
def goal_check(state):
    return np.all(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )
    

In [42]:
assert goal_check(State(set(range(NUM_SETS)), set())), "Problem not solvable"

First attempt to the solution. After further analysis it resulted to not find an optimal solution.

In [43]:
def distance(state):
    return PROBLEM_SIZE - sum(
        reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)])
        )
    )
    
def cost_function1(state):
    dist = distance(state)
    n_taken = len(state.taken)
    return dist + n_taken

Second attempt: this time the solution seems to be always optimal (further test are always needed). 

It slows down the iteration time but is very fast to reach an optimal solution.

In [44]:
def covered(state):
    return reduce(
            np.logical_or,
            [SETS[i] for i in state.taken],
            [False for _ in range(PROBLEM_SIZE)],
        )
    
    
def max_left(state):    
    _, res = max(
        (sum(
            np.logical_and(SETS[i], np.logical_not(covered(state)))
        ), i) for i in state.not_taken
    )
    return res

def h2(state):
    n_step = 0
    while not goal_check(state):
        max = max_left(state)
        state = State(state.taken ^ {max},
                       state.not_taken ^ {max})
        n_step += 1
    
    return n_step

def cost_function2(state):
    return len(state.taken) + h2(state)

Code tested during the lectures provided from the professor: confronted with h2 it is much more faster in iteration computation but it needs more time to obtain the result (sometimes is the same set of tiles)

In [45]:
def h3(state):
    already_covered = covered(state)
    if np.all(already_covered):
        return 0
    missing_size = PROBLEM_SIZE - sum(already_covered)
    candidates = sorted((sum(np.logical_and(s, np.logical_not(already_covered))) for s in SETS), reverse=True)
    taken = 1
    while sum(candidates[:taken]) < missing_size:
        taken += 1
    return taken

def cost_function3(state):
    return len(state.taken) + h3(state)

In [46]:
def a_star(cost_function):
    frontier = PriorityQueue()
    state = State(set(), set(range(NUM_SETS)))
    frontier.put((cost_function(state), state))

    counter = 0
    _, current_state = frontier.get()
    with tqdm(total=None) as pbar:
        while not goal_check(current_state):
            counter += 1
            for action in current_state.not_taken:
                new_state = State(
                    current_state.taken ^ {action}, current_state.not_taken ^ {action}
                )
                frontier.put((cost_function(new_state), new_state))
            _, current_state = frontier.get()
            pbar.update(1)

    print(f"Solved in {counter:,} steps ({len(current_state.taken)} tiles)")
    return current_state

In [47]:
cost_function = cost_function1
final_state = a_star(cost_function)
print(final_state.taken)

join = reduce(
            np.logical_or,
            [SETS[i] for i in final_state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
print(f"Coverage is {sum(join)} with {len(final_state.taken)} tiles")

5it [00:00, 312.56it/s]

Solved in 5 steps (5 tiles)
{2, 34, 35, 10, 89}
Coverage is 50 with 5 tiles





In [48]:
cost_function = cost_function2
final_state = a_star(cost_function)
print(final_state.taken)

join = reduce(
            np.logical_or,
            [SETS[i] for i in final_state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
print(f"Coverage is {sum(join)} with {len(final_state.taken)} tiles")

0it [00:00, ?it/s]

86it [00:47,  1.81it/s]

Solved in 86 steps (5 tiles)
{96, 68, 11, 13, 61}
Coverage is 50 with 5 tiles





In [49]:
cost_function = cost_function3
final_state = a_star(cost_function)
print(final_state.taken)

join = reduce(
            np.logical_or,
            [SETS[i] for i in final_state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
print(f"Coverage is {sum(join)} with {len(final_state.taken)} tiles")

10836it [21:21,  8.45it/s]

Solved in 10,836 steps (5 tiles)
{6, 72, 46, 84, 61}
Coverage is 50 with 5 tiles





# Test
I've tested the several times on the google colab platform and collected some of the results in order to compare the performances.
In each of them the outputs are highlighted in order to better show them because the format of the print where reordered in between the tests.

In this test a strange behaviour occured for the third algorithm. In fact he didn't reached a solution but unfortunately I lost the SETS before having the chance to look at them and better understand the result.

![Image of the first test](../Images/L01_first_result.png "First test")

The second test allows to better analyze the differences at execution time of the last two algorithms. It can be seen what previously said: we have the third algorithm requiring more time to compute a similar solution to the second one. In both cases the solution covers correctly all the space. 

![Image of the second test](../Images/L01_second_result.png "Second test")

In this test can be seen that the first algorithm doesn't always provide a optimal solution implying that the cost function was wrongly imposed. Meanwhile the second and the third one have provided the exactly same solution but with the second algorithm requiring much less time to obtain it.  

![Image of the third test](../Images/L01_third_result.png "Third test")