In [1]:
from itertools import product
from random import random, randint, seed, uniform, sample
import numpy as np
import math
from scipy import sparse
from copy import copy
from tqdm import tqdm
from functools import reduce
from collections import deque

In [2]:
def make_set_covering_problem(num_points, num_sets, density, prob=None):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points * 2654435761 + num_sets + density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool).toarray()
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets - 1), p] = True
    if prob is not None:
        np.random.seed(int(num_points * 435761 + num_sets + density))
        initial_state = np.random.choice([True, False], size=(num_sets,), p=[prob, 1 - prob])
    else:
        initial_state = np.full((num_sets,), False, dtype=np.bool_)
    return sets, initial_state

# Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:

* `num_points = [100, 1_000, 5_000]`
* `num_sets = num_points`
* `density = [.3, .7]` 

## Problem definition

In [3]:
NUM_SETS = 1000
NUM_POINTS = 1000
DENSITY = 0.3
# PROB = 0.02

# problem, initial_state = make_set_covering_problem(NUM_POINTS, NUM_SETS, DENSITY, PROB)
problem, initial_state = make_set_covering_problem(NUM_POINTS, NUM_SETS, DENSITY)
print(
    f'Problem shape: {problem.shape}',
    f'Initial state shape: {initial_state.shape}, Taken sets: {np.sum(initial_state)}',
    sep='\n',
)

Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 0


In [4]:
def check_goal(problem, state):
    return np.all(
        reduce(
            np.logical_or,
            [problem[i] for i, t in enumerate(state) if t],
            np.array([False for _ in range(NUM_POINTS)]),
        )
    )

In [5]:
assert check_goal(problem, np.full((NUM_SETS,), True, dtype=np.bool_)), "Problem not solvable"

In [6]:
def fitness1(problem, state):
    goal = check_goal(problem, state)
    cost = np.sum(state)
    return goal, cost if not goal else -cost


def fitness2(problem, state):
    def covered(state):
        return reduce(
            np.logical_or,
            [problem[i] for i, t in enumerate(state) if t],
            np.array([False for _ in range(NUM_POINTS)]),
        )

    cost = sum(state)
    occupied_cells = np.sum(covered(state))
    return occupied_cells, -cost


fitness = fitness2

In [7]:
fitness(problem, initial_state)

(0, 0)

## Implemented methods - Single-state methods
The Halloween challenge has been accepted. To solve it, I'd reply with the following methods (with some modifications described for each algorithm, if any):
- **Random-Mutation Hill Climber**;
- **Steepest-Step Hill Climber**;
- **Steepest-Step Hill Climber with Replacement**;
- **Simulated Annealing**.

In [8]:
def random_tweak(state):
    """
    Tweak a state randomly.

    Args:
        state: 1-D boolean ndarray.

    Returns:
        New state with a changed boolean value.
    """
    new_state = copy(state)
    index = randint(0, NUM_SETS - 1)
    new_state[index] = not new_state[index]
    return new_state


def tweak_by_index(state, index):
    """
    Tweak a state changing the value in position index.

    Args:
        state: 1-D boolean ndarray;
        index: int value indicating the boolean value to change.

    Returns:
        New state with a changed boolean value according
        to the value of index.
    """
    new_state = copy(state)
    new_state[index] = not new_state[index]
    return new_state

To make the Random-Mutation Hill Climber run faster, I decided to give up the search when for `max_give_up` times I do not improve my best current solution.

In [9]:
def RMHC(problem, state, fitness, max_it, max_give_up):
    """
    Random-Mutation Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int);
        max_give_up: maximum number of evaluations before giving up (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals_giveup = 0
    evals = 1
    best_fitness = fitness(problem, state)
    for it in tqdm(range(max_it)):
        new_state = random_tweak(state)
        evals += 1
        new_state_fitness = fitness(problem, new_state)
        if new_state_fitness > best_fitness:
            state = new_state
            best_fitness = new_state_fitness
            changes += 1
            evals_giveup = 0
        else:
            evals_giveup += 1
            if evals_giveup == max_give_up:
                print('Maximum number of evaluations without improvement reached.')
                break

    it += 1
    if it == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = best_fitness
    cond = goal if isinstance(goal, bool) else goal == problem.shape[1]

    print(f'Goal reached? {"Yes" if cond else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [10]:
_ = RMHC(problem, initial_state, fitness, 10_000, 2_000)

 28%|██▊       | 2822/10000 [00:00<00:01, 5570.53it/s]

Maximum number of evaluations without improvement reached.
Terminated after 2823 iterations.
Terminated after 23 changes.
Number of evaluations: 2824.
Goal reached? Yes
State cost: 15





In [11]:
def SAHC(problem, state, fitness, max_it):
    """
    Steepest-Ascent Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals = 1
    best_fitness = fitness(problem, state)
    for it in tqdm(range(max_it)):
        succ = tweak_by_index(state, 0)
        evals += 1
        succ_fitness = fitness(problem, succ)
        for index in range(1, NUM_SETS):
            new_state = tweak_by_index(state, index)
            evals += 1
            new_state_fitness = fitness(problem, new_state)
            if new_state_fitness > succ_fitness:
                succ = new_state
                succ_fitness = new_state_fitness
        if succ_fitness > best_fitness:
            state = succ
            best_fitness = succ_fitness
            changes += 1
        else:
            break

    it += 1
    if it == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = best_fitness
    cond = goal if isinstance(goal, bool) else goal == problem.shape[1]

    print(f'Goal reached? {"Yes" if cond else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [12]:
_ = SAHC(problem, initial_state, fitness, 1_000)

  1%|          | 10/1000 [00:01<03:07,  5.27it/s]

Terminated after 11 iterations.
Terminated after 10 changes.
Number of evaluations: 11001.
Goal reached? Yes
State cost: 10





In [13]:
def SAHCwReplacement(problem, state, fitness, n_neighbors, max_it):
    """
    Steepest-Ascent Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        n_neighbors: number of desired tweaks to try;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals = 1
    best_fitness = fitness(problem, state)
    for it in tqdm(range(max_it)):
        index = randint(0, NUM_SETS - 1)
        succ = tweak_by_index(state, index)
        evals += 1
        succ_fitness = fitness(problem, succ)
        for index in set(sample(range(NUM_SETS), n_neighbors)) - {index}:
            new_state = tweak_by_index(state, index)
            evals += 1
            new_state_fitness = fitness(problem, new_state)
            if new_state_fitness > succ_fitness:
                succ = new_state
                succ_fitness = new_state_fitness
        if succ_fitness > best_fitness:
            state = succ
            best_fitness = succ_fitness
            changes += 1
        else:
            break

    it += 1
    if it == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = best_fitness
    cond = goal if isinstance(goal, bool) else goal == problem.shape[1]

    print(f'Goal reached? {"Yes" if cond else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [14]:
_ = SAHCwReplacement(problem, initial_state, fitness, 100, 1_000)

  1%|          | 11/1000 [00:00<00:19, 50.69it/s]

Terminated after 12 iterations.
Terminated after 11 changes.
Number of evaluations: 1211.
Goal reached? Yes
State cost: 11





For the following method, I took suggestions and code parts from these links:
- [www.geeksforgeeks.org](https://www.geeksforgeeks.org/what-is-tabu-search/);
- [stackoverflow.com](https://stackoverflow.com/questions/60492520/check-if-array-is-in-deque-of-arrays-python)

I implemented the method in a slightly different way than what you can find on the first link. I leave the search is the best successor is not better than the best current solution or each successor is already in the tabu list.

In [15]:
def tabu_search(problem, state, fitness, n_neighbors, max_tabu_list, max_it):
    """
    Steepest-Ascent Hill Climber implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        n_neighbors: number of desired tweaks to try;
        max_tabu_list: tabu list size;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals = 1
    best_fitness = fitness(problem, state)
    tabu_list = deque()
    for it in tqdm(range(max_it)):
        index = randint(0, NUM_SETS - 1)
        succ = tweak_by_index(state, index)
        evals += 1
        succ_fitness = fitness(problem, succ)
        for index in set(sample(range(NUM_SETS), n_neighbors)) - {index}:
            new_state = tweak_by_index(state, index)
            if not any((new_state == elem).all() for elem in tabu_list):
                evals += 1
                new_state_fitness = fitness(problem, new_state)
                if new_state_fitness > succ_fitness:
                    succ = new_state
                    succ_fitness = new_state_fitness
        if any((succ == elem).all() for elem in tabu_list):
            break
        tabu_list.append(succ)
        if len(tabu_list) > max_tabu_list:
            tabu_list.popleft()
        if succ_fitness > best_fitness:
            state = succ
            best_fitness = succ_fitness
            changes += 1
        else:
            break

    it += 1
    if it == max_it:
        print('Maximum number of iterations reached.')
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = best_fitness
    cond = goal if isinstance(goal, bool) else goal == problem.shape[1]

    print(f'Goal reached? {"Yes" if cond else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [16]:
_ = tabu_search(problem, initial_state, fitness, 100, 5, 1_000)

  1%|          | 11/1000 [00:00<00:20, 49.38it/s]

Terminated after 12 iterations.
Terminated after 11 changes.
Number of evaluations: 1210.
Goal reached? Yes
State cost: 11





In [17]:
def simulated_annealing(problem, state, fitness, max_it):
    """
    Simulated Annealing implementation.

    Args:
        problem: 2-D boolean ndarray;
        state: 1-D boolean ndarray (dim equal to #rows of problem);
        fitness: fitness function to evaluate a state;
        max_it: maximum number of iterations (int).

    Returns:
        Possible state solution to the problem.
    """
    changes = 0
    evals = 1
    best_fitness = fitness(problem, state)
    for it in tqdm(range(max_it)):
        T = 1 - ((it + 1) / max_it)
        if T == 0:
            break
        new_state = random_tweak(state)
        evals += 1
        new_state_fitness = fitness(problem, new_state)
        if new_state_fitness > best_fitness:
            state = new_state
            best_fitness = new_state_fitness
            changes += 1
        else:
            if uniform(0, 1) < math.exp((new_state_fitness[1] - best_fitness[1]) / T):
                state = new_state
                best_fitness = new_state_fitness
                changes += 1

    it += 1
    print(
        f'Terminated after {it} iterations.',
        f'Terminated after {changes} changes.',
        f'Number of evaluations: {evals}.',
        sep='\n',
    )

    goal, cost = best_fitness
    cond = goal if isinstance(goal, bool) else goal == problem.shape[1]

    print(f'Goal reached? {"Yes" if cond else "No"}', f'State cost: {abs(cost)}', sep='\n')
    return state

In [28]:
_ = simulated_annealing(problem, initial_state, fitness, 5_400)

100%|█████████▉| 5399/5400 [00:01<00:00, 4122.60it/s]

Terminated after 5400 iterations.
Terminated after 1423 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 25





## Results

In [29]:
for NUM_POINTS, DENSITY, PROB in product((100, 1_000, 5_000), (0.3, 0.7), (0.02,)):
    NUM_SETS = NUM_POINTS

    print(f'** Combination: NUM_POINTS={NUM_POINTS}, NUM_SETS={NUM_SETS}, DENSITY={DENSITY} **', end='\n\n')

    problem, initial_state = make_set_covering_problem(NUM_POINTS, NUM_SETS, DENSITY, PROB)
    print(
        f'Problem shape: {problem.shape}',
        f'Initial state shape: {initial_state.shape}, Taken sets: {np.sum(initial_state)}',
        sep='\n',
    )

    print('\n-- RMHC')
    _ = RMHC(problem, initial_state, fitness, 10_000, 2_000)
    print('\n-- SAHC')
    _ = SAHC(problem, initial_state, fitness, 1_000)
    print('\n-- SAHCwReplacement')
    _ = SAHCwReplacement(problem, initial_state, fitness, 100, 1_000)
    print('\n-- tabu_search')
    _ = tabu_search(problem, initial_state, fitness, 100, 5, 1_000)
    print('\n-- simulated_annealing')
    _ = simulated_annealing(problem, initial_state, fitness, 5_400)

    print()

** Combination: NUM_POINTS=100, NUM_SETS=100, DENSITY=0.3 **

Problem shape: (100, 100)
Initial state shape: (100,), Taken sets: 4

-- RMHC


 20%|██        | 2048/10000 [00:00<00:00, 34441.02it/s]


Maximum number of evaluations without improvement reached.
Terminated after 2049 iterations.
Terminated after 12 changes.
Number of evaluations: 2050.
Goal reached? Yes
State cost: 10

-- SAHC


  0%|          | 5/1000 [00:00<00:03, 327.50it/s]


Terminated after 6 iterations.
Terminated after 5 changes.
Number of evaluations: 601.
Goal reached? Yes
State cost: 7

-- SAHCwReplacement


  0%|          | 5/1000 [00:00<00:03, 301.74it/s]


Terminated after 6 iterations.
Terminated after 5 changes.
Number of evaluations: 601.
Goal reached? Yes
State cost: 7

-- tabu_search


  0%|          | 4/1000 [00:00<00:03, 258.51it/s]


Terminated after 5 iterations.
Terminated after 4 changes.
Number of evaluations: 498.
Goal reached? Yes
State cost: 8

-- simulated_annealing


100%|█████████▉| 5399/5400 [00:00<00:00, 29938.72it/s]


Terminated after 5400 iterations.
Terminated after 1741 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 11

** Combination: NUM_POINTS=100, NUM_SETS=100, DENSITY=0.7 **

Problem shape: (100, 100)
Initial state shape: (100,), Taken sets: 4

-- RMHC


 20%|█▉        | 1999/10000 [00:00<00:00, 39376.76it/s]

Maximum number of evaluations without improvement reached.





Terminated after 2000 iterations.
Terminated after 0 changes.
Number of evaluations: 2001.
Goal reached? Yes
State cost: 4

-- SAHC


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 101.
Goal reached? Yes
State cost: 4

-- SAHCwReplacement


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 101.
Goal reached? Yes
State cost: 4

-- tabu_search


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 101.
Goal reached? Yes
State cost: 4

-- simulated_annealing


100%|█████████▉| 5399/5400 [00:00<00:00, 30658.84it/s]

Terminated after 5400 iterations.
Terminated after 1484 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 4

** Combination: NUM_POINTS=1000, NUM_SETS=1000, DENSITY=0.3 **






Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 10

-- RMHC


 26%|██▌       | 2565/10000 [00:00<00:01, 5296.96it/s]


Maximum number of evaluations without improvement reached.
Terminated after 2566 iterations.
Terminated after 9 changes.
Number of evaluations: 2567.
Goal reached? Yes
State cost: 15

-- SAHC


  0%|          | 3/1000 [00:00<03:50,  4.32it/s]


Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 4001.
Goal reached? Yes
State cost: 13

-- SAHCwReplacement


  0%|          | 3/1000 [00:00<00:24, 41.47it/s]


Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 405.
Goal reached? Yes
State cost: 13

-- tabu_search


  0%|          | 3/1000 [00:00<00:24, 40.13it/s]


Terminated after 4 iterations.
Terminated after 3 changes.
Number of evaluations: 403.
Goal reached? Yes
State cost: 13

-- simulated_annealing


100%|█████████▉| 5399/5400 [00:01<00:00, 4110.66it/s]


Terminated after 5400 iterations.
Terminated after 1419 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 23

** Combination: NUM_POINTS=1000, NUM_SETS=1000, DENSITY=0.7 **

Problem shape: (1000, 1000)
Initial state shape: (1000,), Taken sets: 10

-- RMHC


 52%|█████▏    | 5246/10000 [00:00<00:00, 5731.18it/s]


Maximum number of evaluations without improvement reached.
Terminated after 5247 iterations.
Terminated after 5 changes.
Number of evaluations: 5248.
Goal reached? Yes
State cost: 5

-- SAHC


  0%|          | 5/1000 [00:01<03:25,  4.84it/s]


Terminated after 6 iterations.
Terminated after 5 changes.
Number of evaluations: 6001.
Goal reached? Yes
State cost: 5

-- SAHCwReplacement


  0%|          | 0/1000 [00:00<?, ?it/s]


Terminated after 1 iterations.
Terminated after 0 changes.
Number of evaluations: 102.
Goal reached? Yes
State cost: 10

-- tabu_search


  0%|          | 2/1000 [00:00<00:29, 34.41it/s]


Terminated after 3 iterations.
Terminated after 2 changes.
Number of evaluations: 304.
Goal reached? Yes
State cost: 8

-- simulated_annealing


100%|█████████▉| 5399/5400 [00:01<00:00, 4064.02it/s]


Terminated after 5400 iterations.
Terminated after 1351 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 31

** Combination: NUM_POINTS=5000, NUM_SETS=5000, DENSITY=0.3 **

Problem shape: (5000, 5000)
Initial state shape: (5000,), Taken sets: 106

-- RMHC


 88%|████████▊ | 8824/10000 [00:07<00:01, 1164.04it/s]


Maximum number of evaluations without improvement reached.
Terminated after 8825 iterations.
Terminated after 83 changes.
Number of evaluations: 8826.
Goal reached? Yes
State cost: 23

-- SAHC


  9%|▉         | 88/1000 [06:17<1:05:14,  4.29s/it]


Terminated after 89 iterations.
Terminated after 88 changes.
Number of evaluations: 445001.
Goal reached? Yes
State cost: 18

-- SAHCwReplacement


  0%|          | 2/1000 [00:00<02:16,  7.33it/s]


Terminated after 3 iterations.
Terminated after 2 changes.
Number of evaluations: 304.
Goal reached? Yes
State cost: 104

-- tabu_search


  1%|          | 7/1000 [00:00<01:42,  9.68it/s]


Terminated after 8 iterations.
Terminated after 7 changes.
Number of evaluations: 808.
Goal reached? Yes
State cost: 99

-- simulated_annealing


100%|█████████▉| 5399/5400 [00:06<00:00, 877.38it/s]


Terminated after 5400 iterations.
Terminated after 1243 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 377

** Combination: NUM_POINTS=5000, NUM_SETS=5000, DENSITY=0.7 **

Problem shape: (5000, 5000)
Initial state shape: (5000,), Taken sets: 106

-- RMHC


100%|██████████| 10000/10000 [00:08<00:00, 1175.16it/s]


Maximum number of iterations reached.
Terminated after 10000 iterations.
Terminated after 93 changes.
Number of evaluations: 10001.
Goal reached? Yes
State cost: 13

-- SAHC


 10%|█         | 100/1000 [07:08<1:04:15,  4.28s/it]


Terminated after 101 iterations.
Terminated after 100 changes.
Number of evaluations: 505001.
Goal reached? Yes
State cost: 6

-- SAHCwReplacement


  0%|          | 2/1000 [00:00<02:14,  7.41it/s]


Terminated after 3 iterations.
Terminated after 2 changes.
Number of evaluations: 304.
Goal reached? Yes
State cost: 104

-- tabu_search


  1%|          | 6/1000 [00:00<01:45,  9.45it/s]


Terminated after 7 iterations.
Terminated after 6 changes.
Number of evaluations: 708.
Goal reached? Yes
State cost: 100

-- simulated_annealing


100%|█████████▉| 5399/5400 [00:06<00:00, 872.37it/s]

Terminated after 5400 iterations.
Terminated after 1188 changes.
Number of evaluations: 5400.
Goal reached? Yes
State cost: 386




