In [1]:
from itertools import product
from random import random, randint, seed, uniform, sample
import numpy as np
import math
from scipy import sparse
from copy import copy
from tqdm import tqdm
from functools import reduce

In [2]:
def make_set_covering_problem(num_points, num_sets, density, prob):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points * 2654435761 + num_sets + density)
    np.random.seed(int(num_points * 435761 + num_sets + density))
    sets = sparse.lil_array((num_sets, num_points), dtype=bool).toarray()
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets - 1), p] = True
    initial_state = np.random.choice([True, False], size=(num_sets,), p=[prob, 1 - prob])
    return sets, initial_state

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

In [3]:
NUM_SETS = 1000
NUM_POINTS = 1000
DENSITY = 0.3
PROB = 0.02

problem, initial_state = make_set_covering_problem(NUM_POINTS, NUM_SETS, DENSITY, PROB)
print(
    f'Problem shape: {problem.shape}',
    f'Initial state shape: {initial_state.shape}, Taken sets: {np.sum(initial_state)}',
    sep='\n',
)

Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 10


In [4]:
def check_goal(problem, state):
    return np.all(
        reduce(
            np.logical_or,
            [problem[i] for i, t in enumerate(state) if t],
            np.array([False for _ in range(NUM_POINTS)]),
        )
    )

In [5]:
assert check_goal(problem, np.full((NUM_SETS,), True)), "Problem not solvable"

In [6]:
def fitness1(problem, state):
    goal = check_goal(problem, state)
    cost = np.sum(state)
    return goal, cost if not goal else -cost


def fitness2(problem, state):
    def covered(state):
        return reduce(
            np.logical_or,
            [problem[i] for i, t in enumerate(state) if t],
            np.array([False for _ in range(NUM_POINTS)]),
        )

    cost = sum(state)
    occupied_cells = np.sum(covered(state))
    return occupied_cells, -cost


fitness = fitness2

In [7]:
fitness(problem, initial_state)

(974, -10)

In [8]:
def random_tweak(state):
    """
    Tweak a state randomly.

    Args:
        state: 1-D boolean ndarray.

    Returns:
        New state with a changed boolean value.
    """
    new_state = copy(state)
    index = randint(0, NUM_SETS - 1)
    new_state[index] = not new_state[index]
    return new_state


def tweak_by_index(state, index):
    """
    Tweak a state changing the value in position index.

    Args:
        state: 1-D boolean ndarray;
        index: int value indicating the boolean value to change.

    Returns:
        New state with a changed boolean value according
        to the value of index.
    """
    new_state = copy(state)
    new_state[index] = not new_state[index]
    return new_state

In [9]:
def RMHC(problem, state, fitness, max_it, max_give_up):
    """
    Random-Mutation Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int);
        max_give_up: maximum number of evaluations before giving up (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals_giveup = 0
    for evals in tqdm(range(max_it)):
        new_state = random_tweak(state)
        if fitness(problem, new_state) > fitness(problem, state):
            state = new_state
            changes += 1
            evals_giveup = 0
        else:
            evals_giveup += 1
            if evals_giveup == max_give_up:
                print('Maximum number of evaluations without improvement reached.')
                break

    evals += 1
    if evals == max_it:
        print('Maximum number of iterations reached.')
    print(f'Terminated after {changes} changes.', f'Number of evaluations: {evals}.', sep='\n')

    goal, cost = fitness(problem, state)

    print(f'Goal reached? {"Yes" if goal else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [10]:
_ = RMHC(problem, initial_state, fitness, 10_000, 2_000)

 26%|██▌       | 2565/10000 [00:00<00:02, 2860.60it/s]

Maximum number of evaluations without improvement reached.
Terminated after 9 changes.
Number of evaluations: 2566.
Goal reached? Yes
State cost: 15





In [11]:
def SAHC(problem, state, fitness, max_it):
    """
    Steepest-Ascent Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    evals = 0
    changes = 0
    for it in tqdm(range(max_it)):
        succ = tweak_by_index(state, 0)
        for index in range(1, NUM_SETS):
            new_state = tweak_by_index(state, index)
            evals += 1
            if fitness(problem, new_state) > fitness(problem, succ):
                succ = new_state
        evals += 1
        if fitness(problem, succ) > fitness(problem, state):
            state = succ
            changes += 1
        else:
            break

    it += 1
    if it == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = fitness(problem, state)

    print(f'Goal reached? {"Yes" if goal else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [12]:
_ = SAHC(problem, initial_state, fitness, 1_000)

  0%|          | 3/1000 [00:01<07:34,  2.19it/s]

Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 4000.
Goal reached? Yes
State cost: 13





In [13]:
def SAHCwReplacement(problem, state, fitness, n_neighbors, max_it):
    """
    Steepest-Ascent Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        n_neighbors: number of desired tweaks to try;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    evals = 0
    changes = 0
    for it in tqdm(range(max_it)):
        index = randint(0, NUM_SETS)
        succ = tweak_by_index(state, index)
        for index in set(sample(range(NUM_SETS), n_neighbors)) - {index}:
            new_state = tweak_by_index(state, index)
            evals += 1
            if fitness(problem, new_state) > fitness(problem, succ):
                succ = new_state
        evals += 1
        if fitness(problem, succ) > fitness(problem, state):
            state = succ
            changes += 1
        else:
            break

    it += 1
    if it == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = fitness(problem, state)

    print(f'Goal reached? {"Yes" if goal else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [14]:
_ = SAHCwReplacement(problem, initial_state, fitness, 100, 1_000)

  0%|          | 3/1000 [00:00<00:47, 21.20it/s]

Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 404.
Goal reached? Yes
State cost: 13





In [15]:
def simulated_annealing(problem, state, fitness, max_it):
    """
    Simulated Annealing implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    evals = 0
    changes = 0
    for it in tqdm(range(max_it)):
        T = 1 - ((it + 1) / max_it)
        if T == 0:
            break
        new_state = random_tweak(state)
        evals += 1
        new_value, old_value = fitness(problem, new_state), fitness(problem, state)
        if new_value > old_value:
            state = new_state
            changes += 1
        else:
            if uniform(0, 1) < math.exp((new_value[1] - old_value[1]) / T):
                state = new_state
                changes += 1

    it += 1
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = fitness(problem, state)

    print(f'Goal reached? {"Yes" if goal else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [16]:
_ = simulated_annealing(problem, initial_state, fitness, 5_900)

100%|█████████▉| 5899/5900 [00:02<00:00, 2118.63it/s]

Terminated after 5900 iterations.
Terminated after 1476 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 18





## Results

In [23]:
for NUM_POINTS, DENSITY, PROB in product((100, 1_000, 5_000), (0.3, 0.7), (0.02,)):
    NUM_SETS = NUM_POINTS

    print(f'** Combination: NUM_POINTS={NUM_POINTS}, NUM_SETS={NUM_SETS}, DENSITY={DENSITY} **', end='\n\n')

    problem, initial_state = make_set_covering_problem(NUM_POINTS, NUM_SETS, DENSITY, PROB)
    print(
        f'Problem shape: {problem.shape}',
        f'Initial state shape: {initial_state.shape}, Taken sets: {np.sum(initial_state)}',
        sep='\n',
    )

    print('\n-- RMHC')
    _ = RMHC(problem, initial_state, fitness, 10_000, 2_000)
    print('\n-- SAHC')
    _ = SAHC(problem, initial_state, fitness, 1_000)
    print('\n-- SAHCwReplacement')
    _ = SAHCwReplacement(problem, initial_state, fitness, 100, 1_000)
    print('\n-- simulated_annealing')
    _ = simulated_annealing(problem, initial_state, fitness, 5_900)

    print()

** Combination: NUM_POINTS=100, NUM_SETS=100, DENSITY=0.3 **

Problem shape: (100, 100)
Initial state shape: (100,), Taken sets: 4

-- RMHC


 20%|██        | 2048/10000 [00:00<00:00, 18245.79it/s]


Maximum number of evaluations without improvement reached.
Terminated after 12 changes.
Number of evaluations: 2049.
Goal reached? Yes
State cost: 10

-- SAHC


  0%|          | 5/1000 [00:00<00:06, 162.27it/s]


Terminated after 6 iterations.
Terminated after 5 changes.
Number of evaluations: 600.
Goal reached? Yes
State cost: 7

-- SAHCwReplacement


  0%|          | 5/1000 [00:00<00:06, 159.74it/s]


Terminated after 6 iterations.
Terminated after 5 changes.
Number of evaluations: 600.
Goal reached? Yes
State cost: 7

-- simulated_annealing


100%|█████████▉| 5899/5900 [00:00<00:00, 16109.29it/s]


Terminated after 5900 iterations.
Terminated after 2020 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 8

** Combination: NUM_POINTS=100, NUM_SETS=100, DENSITY=0.7 **

Problem shape: (100, 100)
Initial state shape: (100,), Taken sets: 4

-- RMHC


 20%|█▉        | 1999/10000 [00:00<00:00, 21318.38it/s]


Maximum number of evaluations without improvement reached.
Terminated after 0 changes.
Number of evaluations: 2000.
Goal reached? Yes
State cost: 4

-- SAHC


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 100.
Goal reached? Yes
State cost: 4

-- SAHCwReplacement


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 100.
Goal reached? Yes
State cost: 4

-- simulated_annealing


100%|█████████▉| 5899/5900 [00:00<00:00, 16926.83it/s]


Terminated after 5900 iterations.
Terminated after 1624 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 4

** Combination: NUM_POINTS=1000, NUM_SETS=1000, DENSITY=0.3 **

Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 10

-- RMHC


 26%|██▌       | 2565/10000 [00:00<00:02, 2766.46it/s]


Maximum number of evaluations without improvement reached.
Terminated after 9 changes.
Number of evaluations: 2566.
Goal reached? Yes
State cost: 15

-- SAHC


  0%|          | 3/1000 [00:01<07:36,  2.18it/s]


Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 4000.
Goal reached? Yes
State cost: 13

-- SAHCwReplacement


  0%|          | 3/1000 [00:00<00:48, 20.69it/s]


Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 404.
Goal reached? Yes
State cost: 13

-- simulated_annealing


100%|█████████▉| 5899/5900 [00:02<00:00, 2107.40it/s]


Terminated after 5900 iterations.
Terminated after 1476 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 18

** Combination: NUM_POINTS=1000, NUM_SETS=1000, DENSITY=0.7 **

Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 10

-- RMHC


 52%|█████▏    | 5246/10000 [00:01<00:01, 2902.15it/s]


Maximum number of evaluations without improvement reached.
Terminated after 5 changes.
Number of evaluations: 5247.
Goal reached? Yes
State cost: 5

-- SAHC


  0%|          | 5/1000 [00:02<06:51,  2.42it/s]


Terminated after 6 iterations.
Terminated after 5 changes.
Number of evaluations: 6000.
Goal reached? Yes
State cost: 5

-- SAHCwReplacement


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 101.
Goal reached? Yes
State cost: 10

-- simulated_annealing


100%|█████████▉| 5899/5900 [00:02<00:00, 2069.39it/s]


Terminated after 5900 iterations.
Terminated after 1482 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 22

** Combination: NUM_POINTS=5000, NUM_SETS=5000, DENSITY=0.3 **

Problem shape: (5000, 5000)
Initial state shape: (5000,), Taken sets: 106

-- RMHC


 88%|████████▊ | 8824/10000 [00:15<00:02, 586.47it/s]


Maximum number of evaluations without improvement reached.
Terminated after 83 changes.
Number of evaluations: 8825.
Goal reached? Yes
State cost: 23

-- SAHC


  9%|▉         | 88/1000 [12:34<2:10:22,  8.58s/it]


Terminated after 89 iterations.
Terminated after 88 changes.
Number of evaluations: 445000.
Goal reached? Yes
State cost: 18

-- SAHCwReplacement


  0%|          | 2/1000 [00:00<04:30,  3.68it/s]


Terminated after 3 iterations.
Terminated after 2 changes.
Number of evaluations: 303.
Goal reached? Yes
State cost: 104

-- simulated_annealing


100%|█████████▉| 5899/5900 [00:13<00:00, 429.82it/s]


Terminated after 5900 iterations.
Terminated after 1333 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 393

** Combination: NUM_POINTS=5000, NUM_SETS=5000, DENSITY=0.7 **

Problem shape: (5000, 5000)
Initial state shape: (5000,), Taken sets: 106

-- RMHC


100%|██████████| 10000/10000 [00:17<00:00, 581.16it/s]


Maximum number of iterations reached.
Terminated after 93 changes.
Number of evaluations: 10000.
Goal reached? Yes
State cost: 13

-- SAHC


 10%|█         | 100/1000 [19:05<2:51:47, 11.45s/it]


Terminated after 101 iterations.
Terminated after 100 changes.
Number of evaluations: 505000.
Goal reached? Yes
State cost: 6

-- SAHCwReplacement


  0%|          | 2/1000 [00:00<04:29,  3.71it/s]


Terminated after 3 iterations.
Terminated after 2 changes.
Number of evaluations: 303.
Goal reached? Yes
State cost: 104

-- simulated_annealing


100%|█████████▉| 5899/5900 [00:13<00:00, 434.61it/s]

Terminated after 5900 iterations.
Terminated after 1275 changes.
Number of evaluations: 5899.
Goal reached? Yes
State cost: 389




