LAB. 1 - A*
===
An implementation of the A* search algorithm designed and programmed by Luca Catalano (s308658). I designed the algorithm thinking of a heuristic function that takes into account the number of sets necessary to complete the set covering problem calculated as the ratio between the number of spaces not yet uncovered and the maximum size of the possible sets considering only the spaces not yet uncovered by the problem.
I also compared it with 2 heuristic functions proposed by the profesor (1 of it is the same of the one implemented by me in a previous revision).

In [2]:
from random import random
from functools import reduce
from collections import namedtuple
from queue import PriorityQueue, SimpleQueue, LifoQueue
from math import ceil

import numpy as np

In [3]:
PROBLEM_SIZE = 27
NUM_SETS = 100
SETS = tuple(np.array([random() < .4 for _ in range(PROBLEM_SIZE)]) for _ in range(NUM_SETS))
State = namedtuple('State', ['taken', 'not_taken'])

In [4]:
def goal_check(state):
    return np.all(reduce(np.logical_or, [SETS[i] for i in state.taken], np.array([False for _ in range(PROBLEM_SIZE)])))

assert goal_check(State(set(range(NUM_SETS)), set())), "Probelm not solvable" # sometimes an error may occur

In [5]:
def g_cost(state):
    return len(state.taken)

# Proposed by professor
def covered(state):
    return reduce(
        np.logical_or,
        [SETS[i] for i in state.taken],
        np.array([False for _ in range(PROBLEM_SIZE)]),
    )

def h(state):
    uncovered = PROBLEM_SIZE - sum(
        reduce(
            np.logical_or, 
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )
    largest_set_size = max(sum(s) for s in SETS)
    print(uncovered, largest_set_size, ceil(uncovered/largest_set_size))
    return ceil(uncovered/largest_set_size)

def h2(state):
    already_covered = covered(state)
    if np.all(already_covered):
        return 0
    largest_set_size = max(sum(np.logical_and(s, np.logical_not(already_covered))) for s in SETS)
    missing_size = PROBLEM_SIZE - sum(already_covered)
    optimistic_estimate = ceil(missing_size / largest_set_size)
    return optimistic_estimate

# What about considering only the size of the largest set whose spaces that 
# count are the ones that coincide with the ones not yet uncovered?
def custom_logic(a, b):
    if (a == True and b == False):
        return False
    else:
        return True

def custom_logic_vector(a, b):
    if len(a) != len(b):
        raise ValueError("Vectors a and b must be of the same length.")

    a_np = np.array(a)
    b_np = np.array(b)

    result_np = np.vectorize(custom_logic)(a_np, b_np)
    return len(result_np.tolist())

def h3(state):
    covered = reduce(
            np.logical_or, 
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    largest_size_that_match = max(custom_logic_vector(covered, s) for s in SETS)
    missing_size = PROBLEM_SIZE - sum(covered)
    return ceil(missing_size/largest_size_that_match)


In [19]:
# A* approach -> professor h's best
def astar_search():
    frontier = PriorityQueue()
    start_state = State(set(), set(range(NUM_SETS)))
    frontier.put((0 + h2(start_state), start_state))
    
    counter = 0
    _, current_state = frontier.get()
    while not goal_check(current_state):
        for action in current_state.not_taken:
            new_taken = set(current_state.taken ^ {action})
            new_not_taken = set(current_state.not_taken ^ {action})
            new_state = State(new_taken, new_not_taken)
            frontier.put((g_cost(new_state) + h2(new_state), new_state))
        counter += 1
        _, current_state = frontier.get()
    return counter, current_state
counter, current_state = astar_search()
print(f"Solved using A* in {counter:,} steps with {len(current_state.taken)} sets")

Solved using A* in 18 steps with 3 sets


In [24]:
# A* approach --> Luca's
# more slow than professor's
def astar_search_l():
    frontier = PriorityQueue()
    start_state = State(set(), set(range(NUM_SETS)))
    frontier.put((0 + h3(start_state), start_state))
    
    counter = 0
    _, current_state = frontier.get()
    while not goal_check(current_state):
        for action in current_state.not_taken:
            new_taken = set(current_state.taken ^ {action})
            new_not_taken = set(current_state.not_taken ^ {action})
            new_state = State(new_taken, new_not_taken)
            frontier.put((g_cost(new_state) + h3(new_state), new_state))
        counter += 1
        _, current_state = frontier.get()
    return counter, current_state

counter, current_state = astar_search_l()
print(f"Solved using A* in {counter:,} steps with {len(current_state.taken)} sets")

Solved using A* in 659 steps with 3 sets


Additional
===
Here I have analyzed also other search algorithms in order to show their performances.

In [25]:
def search(frontier):
    current_state = frontier.get()
    counter = 0
    while not goal_check(current_state):
        counter += 1
        for action in current_state.not_taken:
            new_state = State(
                current_state.taken ^ {action},
                current_state.not_taken ^ {action},
            )
            frontier.put(new_state)
        current_state = frontier.get()
    print(f"Solved in {counter} steps")
    return current_state

# Breadth first search
fifo = SimpleQueue()
fifo.put(State(set(), set(range(NUM_SETS))))
breadth_solution_state = search(fifo)
print("Breadth first solution space:", breadth_solution_state)

# Depth first search
lifo = LifoQueue()
lifo.put(State(set(), set(range(NUM_SETS))))
depth_solution_state = search(lifo)
print("Depth first solution space:", depth_solution_state)

Solved in 10422 steps
Breadth first solution space: State(taken={0, 5, 31}, not_taken={1, 2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99})
Solved in 12 steps
Breadth first solution space: State(taken={96, 97, 98, 99, 88, 89, 90, 91, 92, 93, 94, 95}, not_taken={0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87})


In [6]:
# Greedy best search --> no optimal solution is guaranteed

# NB: this function is not optimistic 
#     ==> it is not feasible for the A* algorithm 
#     ==> the optimality of A* is not guaranteed

def distance(state):
    uncovered = PROBLEM_SIZE - sum(
        reduce(
            np.logical_or, 
            [SETS[i] for i in state.taken],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )
    return uncovered

def greedy_best_searh(frontier):
    current_state = frontier.get()

    steps = 0
    while not goal_check(current_state):
        steps += 1
        for action in current_state.not_taken:
            new_state = State(
                current_state.taken ^ {action},
                current_state.not_taken ^ {action},
            )
            frontier.put((distance(new_state), new_state))
        _, current_state = frontier.get()
    print(f"Solved in {steps} steps and with {len(current_state.taken)} tiles")
    return current_state

frontier = PriorityQueue()
frontier.put(State(set(), set(range(NUM_SETS))))
greedy_solution_search = greedy_best_searh(frontier)

Solved in 3 steps and with 3 tiles
