In [1]:
from itertools import product
from random import random, randint, shuffle, seed
import numpy as np
from scipy import sparse
from copy import copy
from tqdm import tqdm
from functools import reduce

In [2]:
def make_set_covering_problem(num_points, num_sets, density, prob):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points * 2654435761 + num_sets + density)
    np.random.seed(int(num_points * 435761 + num_sets + density))
    sets = sparse.lil_array((num_sets, num_points), dtype=bool).toarray()
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets - 1), p] = True
    initial_state = np.random.choice([True, False], size=(num_sets,), p=[prob, 1 - prob])
    return sets, initial_state

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [3]:
NUM_SETS = 1000
NUM_POINTS = 1000
DENSITY = 0.3
PROB = 0.02

problem, initial_state = make_set_covering_problem(NUM_POINTS, NUM_SETS, DENSITY, PROB)
print(
    f'Problem shape: {problem.shape}',
    f'Initial state shape: {initial_state.shape}, Taken sets: {np.sum(initial_state)}',
    sep='\n',
)

Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 10


In [4]:
def check_goal(problem, state):
    return np.all(
        reduce(
            np.logical_or,
            [problem[i] for i, t in enumerate(state) if t],
            np.array([False for _ in range(NUM_POINTS)]),
        )
    )

In [5]:
assert check_goal(problem, np.full((NUM_SETS,), True)), "Problem not solvable"

In [6]:
def fitness(problem, state):
    goal = check_goal(problem, state)
    cost = np.sum(state)
    return goal, cost if not goal else -cost

In [7]:
fitness(problem, initial_state)

(False, 10)

In [8]:
def RMHC(problem, state, fitness, max_it, max_give_up):
    def tweak(state):
        """
        Tweak a state randomly.

        Args:
            state: 1-D boolean ndarray.

        Returns:
            New state with a changed boolean value.
        """
        new_state = copy(state)
        index = randint(0, NUM_SETS - 1)
        new_state[index] = not new_state[index]
        return new_state

    """
    Random-Mutation Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int);
        max_give_up: maximum number of evaluations before giving up (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals_giveup = 0
    for evals in tqdm(range(max_it)):
        new_state = tweak(state)
        if fitness(problem, new_state) > fitness(problem, state):
            state = new_state
            changes += 1
            evals_giveup = 0
        else:
            evals_giveup += 1
            if evals_giveup == max_give_up:
                print('Maximum number of evaluations without improvement reached.')
                break

    evals += 1
    if evals == max_it:
        print('Maximum number of iterations reached.')
    print(f'Terminated after {changes} changes.', f'Number of evaluations: {evals}.', sep='\n')

    goal, cost = fitness(problem, state)

    print(f'Goal reached? {"Yes" if goal else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [9]:
_ = RMHC(problem, initial_state, fitness, 10_000, 2_000)

 28%|██▊       | 2822/10000 [00:00<00:01, 4412.84it/s]

Maximum number of evaluations without improvement reached.
Terminated after 13 changes.
Number of evaluations: 2823.
Goal reached? Yes
State cost: 15





In [10]:
def SAHC(problem, state, fitness, max_it):
    def tweak(state, index):
        """
        Tweak a state changing the value in position index.

        Args:
            state: 1-D boolean ndarray;
            index: int value indicating the boolean value to change.

        Returns:
            New state with a changed boolean value according
            to the value of index.
        """
        new_state = copy(state)
        new_state[index] = not new_state[index]
        return new_state

    """
    Steepest-Ascent Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    evals = 0
    changes = 0
    for it in tqdm(range(max_it)):
        changed = False
        for index in range(NUM_SETS):
            new_state = tweak(state, index)
            if fitness(problem, new_state) > fitness(problem, state):
                state = new_state
                changed = True
                changes += 1
            evals += 1

        if not changed:
            break

    it += 1
    if evals == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = fitness(problem, state)

    print(f'Goal reached? {"Yes" if goal else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [15]:
_ = SAHC(problem, initial_state, fitness, 1_000)

  0%|          | 2/1000 [00:00<05:38,  2.95it/s]

Terminated after 3 iterations.
Terminated after 13 changes.
Number of evaluations: 3000.
Goal reached? Yes
State cost: 15



