In [None]:
from itertools import product
from random import random, randint, shuffle, seed, choice
import numpy as np
from scipy import sparse
from functools import *
from copy import copy
import math

In [None]:
def make_set_covering_problem(num_points, num_sets, density):
    """Returns a sparse array where rows are sets and columns are the covered items"""
    seed(num_points * 2654435761 + num_sets + density)
    sets = sparse.lil_array((num_sets, num_points), dtype=bool)
    for s, p in product(range(num_sets), range(num_points)):
        if random() < density:
            sets[s, p] = True
    for p in range(num_points):
        sets[randint(0, num_sets - 1), p] = True
    return sets

## Halloween Challenge

Find the best solution with the fewest calls to the fitness functions for:
- num_points = [100, 1_000, 5_000]
- num_sets = num_points
- density = [.3, .7]

In [None]:
PROBLEM_SIZE = 5000
NUM_SETS = 5000
DENSITY= 0.3
fc=0
x = make_set_covering_problem(PROBLEM_SIZE, NUM_SETS, DENSITY)

In [None]:
def fitness(state):
    global fc
    fc+=1
    cost = sum(state)
    valid = np.sum(
        reduce(
            np.logical_or,
            [x.getrow(i).toarray()[0] for i, t in enumerate(state) if t],
            np.array([False for _ in range(PROBLEM_SIZE)]),
        )
    )

    return (valid, -cost)

In [None]:
def random_mutation_hill_climber(sets):
    current_state = [
        choice([False, False, False, False, False, False]) for _ in range(NUM_SETS)
    ]
    current_fitness=fitness(current_state)
    global fc 
    for step in range(10_000):
        new_state = tweak1(current_state)
        new_fitness= fitness(new_state)

        if new_fitness >= current_fitness:
            current_state = new_state
            current_fitness=new_fitness
            
        if new_fitness[0] == PROBLEM_SIZE:
            break

    

    return current_fitness

In [None]:
def steepest_ascent_hill_climber( sets,num_nested_iterations):
    global fc
    current_solution = [False for _ in range(NUM_SETS)]  # Initialize with an empty solution
    current_fitness = fitness(current_solution)
    
    for step in range(10_000):
        best_neighbor = [False for _ in range(NUM_SETS)]
        best_fitness = fitness(best_neighbor)

        # Generate neighbors by adding/removing a set
        candidate_sets= [{"index":i,"value":set} for i,set in enumerate(sets) if current_solution[i]==False ]
        num_iterations=num_nested_iterations
        for candidate_set in candidate_sets:
            
            neighbor = copy(current_solution)
            neighbor[candidate_set["index"]]=True
            neighbor_fitness=fitness(neighbor)
            
            if neighbor_fitness >= best_fitness:
                best_neighbor = neighbor
                best_fitness= neighbor_fitness
                
            
            num_iterations-=1
            if num_iterations <=0:
                break
                

        if best_fitness >= current_fitness:
            current_solution=best_neighbor
            current_fitness= best_fitness

        
        
        if current_fitness[0]==PROBLEM_SIZE:
            break

    return current_fitness

In [None]:
def simulated_annealing( sets, initial_temperature, cooling_rate, max_iterations):
    current_solution = [False for _ in range(NUM_SETS)]  # Initialize with an empty solution
    current_f = fitness(current_solution)
    best_solution = current_solution
    best_f = current_f
    temperature = initial_temperature
    global fc
    for iteration in range(max_iterations):
        # Generate a neighbor by adding/removing a set
        neighbor = copy(current_solution)
        index = randint(0, PROBLEM_SIZE - 1)
        neighbor[index] = not neighbor[index]
        neighbor_f = fitness(neighbor)
        
        # Calculate the change in cost
        f_change =  np.subtract(current_f ,neighbor_f)
        res = np.all(f_change <= 0)
        if res or random() < np.exp(-f_change[0] / temperature):
            current_solution = neighbor
            current_f = neighbor_f

            # Update the best solution if needed
            if current_f > best_f:
                best_solution = current_solution
                best_f = current_f

        # Decrease the temperature
        temperature *= cooling_rate
        if current_f[0]== PROBLEM_SIZE:
            break

    return  best_f

In [None]:
def benchmark(iterations=10):
    global fc
    rmhc_fitness=[]
    sahc_fitness=[]
    simann_fitness=[]
    for i in range(iterations):
        fc=0
        rmhc_fit=random_mutation_hill_climber(x.toarray())
        rmhc_fitness.append({"taken_sets":-rmhc_fit[1],"calls":fc})
        fc=0
        sahc_fit=steepest_ascent_hill_climber(x.toarray(),50)
        sahc_fitness.append({"taken_sets":-sahc_fit[1],"calls":fc})
        fc=0
        simann_fit=simulated_annealing(x.toarray(),100, 0.75, 10_000)
        simann_fitness.append({"taken_sets":-simann_fit[1],"calls":fc})

    rmhc_min_cost = min(rmhc_fitness, key=lambda x: x["taken_sets"])
    sahc_min_cost = min(sahc_fitness, key=lambda x: x["taken_sets"])
    simann_min_cost = min(simann_fitness, key=lambda x: x["taken_sets"])

    print(f"RMHC : {PROBLEM_SIZE} | {DENSITY} | Minimum cost among all the iterations :",rmhc_min_cost)
    print(f"SAHC : {PROBLEM_SIZE} | {DENSITY} | Minimum cost among all the iterations :",sahc_min_cost)
    print(f"Simulated Annealing : {PROBLEM_SIZE} | {DENSITY} | Minimum cost among all the iterations :",simann_min_cost)

## Results: 
Density = 0.3 :


- RMHC : 100 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 9, 'calls': 15}
- SAHC : 100 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 6, 'calls': 307}
- Simulated Annealing : 100 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 8, 'calls': 9}

- RMHC : 1000 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 16, 'calls': 25}
- SAHC : 1000 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 12, 'calls': 613}
- Simulated Annealing : 1000 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 16, 'calls': 17}

- RMHC : 5000 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 20, 'calls': 22}
- SAHC : 5000 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 16, 'calls': 817}
- Simulated Annealing : 5000 | 0.3 | Minimum cost among all the iterations : {'taken_sets': 20, 'calls': 21}

Density = 0.7 :


- RMHC : 100 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 3, 'calls': 4}
- SAHC : 100 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 4, 'calls': 17}
- Simulated Annealing : 100 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 3, 'calls': 4}

- RMHC : 1000 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 5, 'calls': 6}
- SAHC : 1000 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 5, 'calls': 26}
- Simulated Annealing : 1000 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 5, 'calls': 6}

- RMHC : 5000 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 7, 'calls': 8}
- SAHC : 5000 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 6, 'calls': 43}
- Simulated Annealing : 5000 | 0.7 | Minimum cost among all the iterations : {'taken_sets': 7, 'calls': 8}