Copyright **`(c)`** 2022 Giovanni Squillero `<squillero@polito.it>`  
[`https://github.com/squillero/computational-intelligence`](https://github.com/squillero/computational-intelligence)  
Free for personal or classroom use; see [`LICENSE.md`](https://github.com/squillero/computational-intelligence/blob/master/LICENSE.md) for details.  


# Lab 1: Set Covering

First lab + peer review. List this activity in your final report, it will be part of your exam.

## Task

Given a number $N$ and some lists of integers $P = (L_0, L_1, L_2, ..., L_n)$, 
determine, if possible, $S = (L_{s_0}, L_{s_1}, L_{s_2}, ..., L_{s_n})$
such that each number between $0$ and $N-1$ appears in at least one list

$$\forall n \in [0, N-1] \ \exists i : n \in L_{s_i}$$

and that the total numbers of elements in all $L_{s_i}$ is minimum. 

## Instructions

* Create the directory `lab1` inside the course repo (the one you registered with Andrea)
* Put a `README.md` and your solution (all the files, code and auxiliary data if needed)
* Use `problem` to generate the problems with different $N$
* In the `README.md`, report the the total numbers of elements in $L_{s_i}$ for problem with $N \in [5, 10, 20, 100, 500, 1000]$ and the total number on $nodes$ visited during the search. Use `seed=42`.
* Use `GitHub Issues` to peer review others' lab

## Notes

* Working in group is not only allowed, but recommended (see: [Ubuntu](https://en.wikipedia.org/wiki/Ubuntu_philosophy) and [Cooperative Learning](https://files.eric.ed.gov/fulltext/EJ1096789.pdf)). Collaborations must be explicitly declared in the `README.md`.
* [Yanking](https://www.emacswiki.org/emacs/KillingAndYanking) from the internet is allowed, but sources must be explicitly declared in the `README.md`.

**Deadline**

* Sunday, October 16th 23:59:59 for the working solution
* Sunday, October 23rd 23:59:59 for the peer reviews

In [23]:
import random

In [24]:
def problem(N, seed=None):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

<h1>Generative Solution</h1>

In [25]:
class State:
    def __init__(self, data: set):
        self._data = set(data)

    def __hash__(self):
        return hash(frozenset(self._data))

    def __eq__(self, other):
        return self._data == other._data

    def __lt__(self, other):
        return self._data < other._data

    def __or__(self, other):
        return State(self._data | other._data)

    def __and__(self, other):
        return State(self._data & other._data)

    def __sub__(self, other):
        return State(self._data - other._data)

    def __str__(self):
        return str(self._data)

    def __repr__(self):
        return repr(self._data)  

    def __len__(self):
        return len(self._data)        

    @property
    def data(self):
        return self._data

    def copy_data(self):
        return self._data.copy()

In [26]:

from gx_utils import *
import logging
from random import seed, choice
from typing import Callable

logging.basicConfig(format="%(message)s", level=logging.INFO)

In [27]:
def goal_test(state):
    return state == GOAL

def possible_actions():
    return (State(x) for x in P)

def result(state, action):
    return state | action
    
def search( 
    initial_state: State,
    goal_test: Callable,
    parent_state: dict,
    state_cost: dict,
    priority_function: Callable,
    unit_cost: Callable
):
    parent_state.clear()
    state_cost.clear()
    frontier = PriorityQueue()
    state = initial_state
    parent_state[state] = None
    state_cost[state] = 0
    generated_states = 0

    while state is not None and not goal_test(state):
        for a in possible_actions():            
            new_state = result(state, a)
            cost = unit_cost(state, a)
            generated_states += 1

            if new_state not in state_cost and new_state not in frontier:
                parent_state[new_state] = (state, a)
                state_cost[new_state] = state_cost[state] + cost
                frontier.push(new_state, p=priority_function(new_state))
                logging.debug(f"Added new node {new_state} to frontier (cost={state_cost[new_state]}, h = {priority_function(new_state)})")
            elif new_state in frontier and state_cost[new_state] > state_cost[state] + cost:
                old_cost = state_cost[new_state]
                parent_state[new_state] = (state, a)
                state_cost[new_state] = state_cost[state] + cost
                logging.debug(f"Updated node {new_state} cost in frontier: {old_cost} -> {state_cost[new_state]}")
        if frontier:
            state = frontier.pop()
        else:
            state = None

    path = list()
    s = state
    while parent_state[s]:
        s, a = parent_state[s]
        path.append(a)

    logging.info(f"Found a solution with {sum(len(_.data) for _ in path)} elements; visited {len(state_cost):,} states over {generated_states} generated states")
    return list(reversed(path))

<h2>Exact Solution</h1>

In [34]:
logging.getLogger().setLevel(logging.INFO)
for N in [5, 10, 20]:
    parent_state = dict()
    state_cost = dict()
    GOAL = State(set(range(N)))
    P = problem(N, seed=42) 

    INITIAL_STATE = State(set())
    logging.info(f'N = {N}')
    final = search(
        INITIAL_STATE,
        goal_test=goal_test,
        parent_state=parent_state,
        state_cost=state_cost,
        priority_function=lambda s: state_cost[s],
        unit_cost=lambda state, action: len(state & action),
    )
    logging.debug(final)


N = 5
Found a solution with 5 elements; visited 32 states over 775 generated states
N = 10
Found a solution with 10 elements; visited 706 states over 29100 generated states
N = 20
Found a solution with 23 elements; visited 13,044 states over 97342 generated states


<h2>Heuristic Solution</h1>

In [33]:
logging.getLogger().setLevel(logging.INFO)
for N in [5, 10, 20, 100, 500, 1000]:
    parent_state = dict()
    state_cost = dict()
    GOAL = State(set(range(N)))
    P = problem(N, seed=42) 
    
    def h(state):
        return N - len(state)

    INITIAL_STATE = State(set())
    logging.info(f'N = {N}')
    final = search(
        INITIAL_STATE,
        goal_test=goal_test,
        parent_state=parent_state,
        state_cost=state_cost,
        priority_function=lambda s: h(s),
        unit_cost=lambda state, action: len(state & action),
    )
    logging.debug(final)

N = 5
Found a solution with 5 elements; visited 17 states over 75 generated states
N = 10
Found a solution with 10 elements; visited 63 states over 150 generated states
N = 20
Found a solution with 28 elements; visited 74 states over 136 generated states
N = 100
Found a solution with 192 elements; visited 1,706 states over 2135 generated states
N = 500
Found a solution with 1304 elements; visited 10,778 states over 12663 generated states
N = 1000
Found a solution with 2893 elements; visited 24,238 states over 28952 generated states


<h2>Enhanced Greedy Solution</h2>

In [31]:
def solve(N):
    goal = set(range(N))
    covered = set()
    solution = list()
    visited_nodes = 0

    max_finder = lambda l: len(set(l) - covered)/len(l) 

    P = problem(N, seed=42)

    while goal != covered:        
        x = max(P, key = max_finder)
        if not set(x) < covered:
            solution.append(x)
            covered |= set(x)
            visited_nodes += 1

    logging.info(
        f"Greedy solution for N={N}: w={sum(len(_) for _ in solution)}, visited {visited_nodes} states"
    )
    logging.debug(f"{solution}")

for N in [5, 10, 20, 100, 500, 1000]:
    solve(N)

Greedy solution for N=5: w=5, visited 5 states
Greedy solution for N=10: w=12, visited 5 states
Greedy solution for N=20: w=30, visited 6 states
Greedy solution for N=100: w=171, visited 8 states
Greedy solution for N=500: w=1256, visited 12 states
Greedy solution for N=1000: w=2913, visited 13 states
