# Lab. 9: Design of Experiments

## Introduction

#### <u>The goal of this lab is observe the effect of the DoE in the Bayesian optimization and Bio-inspired approaches.</u>

You'll have to implement four sampling methods:

Random sampling
The Halton sequence
The full factorial sampling
The Latin Hypercube sampling

- *Random sampling*
- *The Halton sequence*
- *The full factorial sampling*
- *The Latin Hypercude sampling*
---

Getting started: The following cells contain the implementation of the methods that we will use throughout this lab, together with utilities.

**NOTE**:

When studying the effect of the parameters is extremely important to vary just one parameter at a time. Therefore, it is suggested to study one parameter by fixing all the others, and then moving to the next.

Moreover, when comparing different algorithms, is very important to run each of them several times (e.g., 30) by using different initial random seeds.

In [None]:
import functools
from copy import deepcopy
from time import time
from typing import Any, Optional

import benchmark_functions as bf
import matplotlib.pyplot as plt
import numpy as np
import pylab as pl
from inspyred import ec
from inspyred.ec import Individual, replacers, selectors, terminators, variators
from numpy.typing import NDArray

In [None]:
from enum import Enum

GLOBAL = "Global"
INDIVIDUAL = "Individual"
CORRELATED = "Correlated"
STAR = "star"
RING = "ring"


class GeneratorType(Enum):
    RANDOM = "random"
    LHS = "LHS"
    HALTON = "Halton"
    FF = "FF"
    RANDOM_2D = "random_2d"
    LHS_2D = "LHS_2d"
    HALTON_2D = "Halton_2d"
    FF_2D = "FF_2d"


class NumpyRandomWrapper(pl.RandomState):
    def __init__(self, seed: Optional[int] = None) -> None:
        super(NumpyRandomWrapper, self).__init__(seed)

    def sample(self, pop: int | list[float], k: int) -> NDArray[np.float64]:
        population: list[int] | list[float] = []
        if isinstance(pop, int):
            population = list(range(pop))
        else:
            population = deepcopy(pop)

        return np.asarray(
            [
                population[i]
                for i in self._choice_without_replacement(len(population), k)
            ]
        )

    def random(self) -> float:  # type: ignore
        return self.random_sample()

    def gauss(self, mu: float, sigma: float) -> float:
        return self.normal(mu, sigma)

    def _choice_without_replacement(self, n: int, size: int) -> set[int]:
        result: set[int] = set()
        while len(result) < size:
            result.add(self.randint(0, n))
        return result


class OptFun:
    def __init__(self, wf: bf.BenchmarkFunction) -> None:
        self.f = wf
        self.history: list[list[float]] = []
        self.__name__ = f"OptFun({wf.__class__})"

    def __call__(
        self, candidates: list[list[float]], *args: Any, **kwargs: Any
    ) -> list[float]:
        """
        Evaluate the objective function for a list of candidates.
        """
        y: list[float] = []
        for x0 in candidates:
            self.history.append(deepcopy(x0))
            y.append(self.f(x0))  # type: ignore
        return y

    @property
    def name(self) -> str:
        return self.f.name()

    def minima(self) -> list[bf.fil.Optimum]:
        return self.f.minima()

    def bounder(self) -> Any:
        def fcn(candidate: list[float], *args: Any) -> list[float]:
            bounds: tuple[list[float], list[float]] = self.f.suggested_bounds()

            for i, (m, M) in enumerate(zip(*bounds)):
                if candidate[i] < m:
                    candidate[i] = m
                if candidate[i] > M:
                    candidate[i] = M
            return candidate

        return fcn

    def bounds(self) -> list[tuple[float, float]]:
        """
        Return the bounds of the objective function.
        """
        return self._convert_bounds(self.f.suggested_bounds())

    def heatmap(self, fn: Optional[str] = None) -> None:
        plt.clf()
        resolution = 50
        fig = plt.figure()
        fig.suptitle("Benchmark Function: " + self.f.name())
        bounds_lower, bounds_upper = self.f.suggested_bounds()
        x = np.linspace(bounds_lower[0], bounds_upper[0], resolution)
        if self.f.n_dimensions() > 1:
            y = np.linspace(bounds_lower[1], bounds_upper[1], resolution)
            X, Y = np.meshgrid(x, y)
            Z = np.asarray(
                [
                    [self.f((X[i][j], Y[i][j])) for j in range(len(X[i]))]
                    for i in range(len(X))
                ]
            )
        else:
            raise ValueError("Function has only one dimension")

        plt.contour(x, y, Z, 15, linewidths=0.5, colors="k")  # hight lines
        plt.contourf(
            x, y, Z, 15, cmap="viridis", vmin=Z.min(), vmax=Z.max()
        )  # heat map
        plt.xlabel("x")
        plt.ylabel("y")
        cbar = plt.colorbar()
        cbar.set_label("z")
        if len(self.history) > 0:  # plot points
            xdata = [x[0] for x in self.history]
            ydata = [x[1] for x in self.history]
            plt.plot(xdata, ydata, "or-", markersize=3, linewidth=1)
        if fn is None:
            plt.show()
        else:
            plt.savefig(fn, dpi=400)

    def plot_population_evolution(
        self,
        populations: list[list[NDArray[np.float64]]],
        generation_step: int = 1,
        single: bool = False,
        grid: bool = False,
        args: dict[str, Any] = {},
        title: str = "",
        optima: Optional[list[float]] = None,
    ) -> None:
        plt.clf()
        resolution = 50
        if single:
            fig, ax = plt.subplots(1, 1)
            ax = [ax]
            fig.set_figwidth(5)
            fig.set_figheight(5)
        else:
            fig, ax = plt.subplots(2, 3)
            ax = ax.flatten()
            fig.set_figwidth(10)
            fig.set_figheight(7)

        bounds_lower, bounds_upper = self.f.suggested_bounds()
        x = np.linspace(bounds_lower[0], bounds_upper[0], resolution)
        if self.f.n_dimensions() > 1:
            y = np.linspace(bounds_lower[1], bounds_upper[1], resolution)
            X, Y = np.meshgrid(x, y)
            Z = np.asarray(
                [
                    [self.f((X[i][j], Y[i][j])) for j in range(len(X[i]))]
                    for i in range(len(X))
                ]
            )
        else:
            raise ValueError("Function has only one dimension")

        for i in range(min(len(populations), 6)):
            if i * generation_step < len(populations):
                ax[i].contour(x, y, Z, 15, linewidths=0.5, colors="k")
                ax[i].contourf(
                    x, y, Z, 15, cmap="viridis", vmin=Z.min(), vmax=Z.max()
                )  # heat map
                ax[i].set_xlabel("x")
                ax[i].set_ylabel("y")
                ax[i].set_title("Generation " + str(i * generation_step))
                # scatter plot of the population
                current_pop = populations[i * generation_step]
                xdata = [x[0] for x in current_pop]
                ydata = [x[1] for x in current_pop]
                ax[i].scatter(xdata, ydata, color="r", zorder=2, label="population")

                if optima is not None:
                    ax[i].plot(
                        optima[0],
                        optima[1],
                        "wx",
                        zorder=3,
                        markersize=8,
                        markeredgewidth=4,
                        label="optimum",
                    )

                if single:
                    break

        # Add the grid
        if single and grid:
            # grid_spacing = (args["pop_init_range"][1] - args["pop_init_range"][0])/args["pop_size"]
            # intervals = float(grid_spacing)
            ax[0].set_yticks(
                np.linspace(
                    args["pop_init_range"][0],
                    args["pop_init_range"][1],
                    len(populations[0]) + 1,
                ),
                minor=False,
            )
            ax[0].set_xticks(
                np.linspace(
                    args["pop_init_range"][0],
                    args["pop_init_range"][1],
                    len(populations[0]) + 1,
                ),
                minor=False,
            )
            ax[0].grid(
                which="both", axis="both", linestyle="-", color="k", linewidth=1.4
            )
            if optima is not None:
                ax[0].plot(
                    optima[0],
                    optima[1],
                    "wx",
                    zorder=3,
                    markersize=8,
                    markeredgewidth=4,
                    label="optimum",
                )

        handles, labels = ax[0].get_legend_handles_labels()
        fig.suptitle(title)
        fig.legend(handles, labels, loc="upper right")
        plt.tight_layout()
        plt.show()

    def plot(self) -> None:
        plt.clf()
        values = [self.f(v) for v in self.history]
        min = self.minima()[0].score
        if min is None:
            raise ValueError("No minimum found")
        plt.plot(values)
        plt.axhline(min, color="r", label="optimum")
        plt.legend()
        plt.show()

    def _convert_bounds(
        self, bounds: tuple[list[float], list[float]]
    ) -> list[tuple[float, float]]:
        new_bounds: list[tuple[float, float]] = []
        for i in range(len(bounds[0])):
            new_bounds.append((bounds[0][i], bounds[1][i]))
        return new_bounds

    def current_calls(self) -> int:
        return len(self.history)

In [None]:
def initial_pop_observer(
    population: list[Individual],
    num_generations: int,
    num_evaluations: int,
    args: dict[str, Any],
) -> None:
    if num_generations == 0:
        args["initial_pop_storage"]["individuals"] = np.asarray(
            [guy.candidate for guy in population]
        )
        args["initial_pop_storage"]["fitnesses"] = np.asarray(
            [guy.fitness for guy in population]
        )


def generator_wrapper(func: Any) -> Any:
    @functools.wraps(func)
    def _generator(
        random: NumpyRandomWrapper, args: dict[str, Any]
    ) -> NDArray[np.float64]:
        return np.asarray(func(random, args))

    return _generator


# helper function used to store the various populations at each generation
def my_archiver(
    random: NumpyRandomWrapper,
    population: list[Individual],
    archive: list[list[Individual]],
    args: dict[str, Any],
) -> list[list[Individual]]:
    archive.append(population)
    return archive


def run_ga(
    random: NumpyRandomWrapper,
    generator_type: str,
    func: OptFun,
    num_vars: int = 0,
    maximize: bool = False,
    **kwargs: Any,
) -> tuple[
    NDArray[np.float64], float, list[Individual], list[list[NDArray[np.float64]]]
]:
    """Run the genetic algorithm on the given function.

    Args:
        random: Random number generator.
        generator_type
        func: Objective function.
        num_vars: Number of variables.
        maximize: Whether to maximize the function.
        kwargs: Additional arguments.

    Returns:
        tuple: Best candidate, best fitness, final population, all populations.
    """
    # create dictionaries to store data about initial population, and lines
    initial_pop_storage: dict[Any, Any] = {}

    algorithm = ec.EvolutionaryComputation(random)
    algorithm.terminator = terminators.generation_termination
    algorithm.replacer = replacers.generational_replacement
    algorithm.variator = [  # type: ignore
        variators.uniform_crossover,
        variators.gaussian_mutation,
    ]
    algorithm.selector = selectors.tournament_selection
    algorithm.archiver = my_archiver
    algorithm.observer = initial_pop_observer

    kwargs["num_selected"] = kwargs["pop_size"]
    kwargs["bounder"] = func.bounder()
    kwargs["n_vars"] = num_vars
    kwargs["generator"] = generator(generator_type, random, kwargs)  # type: ignore

    final_pop: list[Individual] = algorithm.evolve(
        evaluator=func,
        maximize=False,
        initial_pop_storage=initial_pop_storage,
        num_vars=num_vars,
        **kwargs,
    )

    all_populations: list[list[Individual]] = algorithm.archive  # type: ignore
    all_pop: list[list[NDArray[np.float64]]] = [] * len(all_populations)
    for i in range(len(all_populations)):
        all_pop.append([elem.candidate for elem in all_populations[i]])  # type: ignore

    # best_guy = final_pop[0].candidate
    # best_fitness = final_pop[0].fitness
    final_pop_fitnesses = np.asarray([guy.fitness for guy in final_pop])
    final_pop_candidates = np.asarray([guy.candidate for guy in final_pop])

    sort_indexes = sorted(
        range(len(final_pop_fitnesses)), key=final_pop_fitnesses.__getitem__
    )
    final_pop_fitnesses = final_pop_fitnesses[sort_indexes]
    final_pop_candidates = final_pop_candidates[sort_indexes]

    best_guy = final_pop_candidates[0]
    best_fitness = final_pop_fitnesses[0]

    return best_guy, best_fitness, final_pop, all_pop

## Exercises

### Exercise 1/3: Implement different sampling methods

In [None]:
def random_generator(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    return random.uniform(
        args["pop_init_range"][0], args["pop_init_range"][1], args["pop_size"]
    )


def random_generator_2d(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    seq1 = random_generator(random, args)
    seq2 = random_generator(random, args)

    return np.asarray(list(zip(seq1, seq2)))

In [None]:
def Halton_generator(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    b = args["base"]
    res = np.zeros(args["pop_size"])
    for i in range(args["pop_size"]):
        f = 1
        r = 0
        n = i
        while n > 0:
            f = f / b
            r = r + f * (n % b)
            n = n // b
        res[i] = r

    # scale the result to the desired range
    return np.asarray(
        [
            args["pop_init_range"][0]
            + x * (args["pop_init_range"][1] - args["pop_init_range"][0])
            for x in res
        ]
    )


def Halton_generator_2d(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    bases = args["bases"]
    args["base"] = bases[0]
    seq1 = Halton_generator(random, args)
    args["base"] = bases[1]
    seq2 = Halton_generator(random, args)

    return np.asarray(list(zip(seq1, seq2)))

In [None]:
# Latin Hypercube Sampling
def lhs_generator(
    random: NumpyRandomWrapper, args: dict[str, Any], n: int = 1
) -> NDArray[np.float64]:
    m: int = args["pop_size"]
    perms = np.tile(np.arange(1, m + 1), (n, 1)).T  # M N

    samples = random.uniform(size=(m, n))
    samples = (perms - samples) / m
    for i in range(n):
        random.shuffle(samples[:, i])

    # Rescale the samples to the given ranges
    for i in range(n):
        samples[:, i] = (
            samples[:, i] * (args["pop_init_range"][1] - args["pop_init_range"][0])
            + args["pop_init_range"][0]
        )

    return samples


def lhs_generator_2d(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    return lhs_generator(random, args, args["n_vars"])

In [None]:
# Full Factorial
from itertools import product
from math import ceil


def ff_generator(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    divisions = ceil(args["pop_size"] ** (1 / args["n_vars"]))

    return np.linspace(
        args["pop_init_range"][0],
        args["pop_init_range"][1],
        divisions,
    )


def ff_generator_2d(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    seq1 = ff_generator(random, args)
    seq2 = ff_generator(random, args)

    return np.asarray([x for x in product(seq1, seq2)])

In [None]:
def generator(case: str, random: NumpyRandomWrapper, args: dict[str, Any]) -> Any:
    if case == "random":
        args["sequence"] = random_generator(random, args)
    if case == "LHS":
        args["sequence"] = lhs_generator(random, args)
    if case == "Halton":
        args["sequence"] = Halton_generator(random, args)
    if case == "FF":
        args["sequence"] = ff_generator(random, args)
    if case == "random_2d":
        args["sequence"] = random_generator_2d(random, args)
    if case == "LHS_2d":
        args["sequence"] = lhs_generator_2d(random, args)
    if case == "Halton_2d":
        args["sequence"] = Halton_generator_2d(random, args)
    if case == "FF_2d":
        args["sequence"] = ff_generator_2d(random, args)
    args["index"] = 0
    return fake_generator


def fake_generator(
    random: NumpyRandomWrapper, args: dict[str, Any]
) -> NDArray[np.float64]:
    index = args["index"]
    args["index"] += 1

    return args["sequence"][index]

### Exercise 2/3: Visualize the implemented methods

In [None]:
def plot_sampled_point(sample_method: str, args: dict[str, Any]) -> None:
    sample: NDArray[np.float64] = np.asarray([0, 0])
    match sample_method:
        case "random":
            sample = random_generator(NumpyRandomWrapper(), args)
        case "LHS":
            sample = lhs_generator(NumpyRandomWrapper(), args)
        case "Halton":
            sample = Halton_generator(NumpyRandomWrapper(), args)
        case "FF":
            sample = ff_generator(NumpyRandomWrapper(), args)
        case "random_2d":
            sample = random_generator_2d(NumpyRandomWrapper(), args)
        case "LHS_2d":
            sample = lhs_generator_2d(NumpyRandomWrapper(), args)
        case "Halton_2d":
            sample = Halton_generator_2d(NumpyRandomWrapper(), args)
        case "FF_2d":
            sample = ff_generator_2d(NumpyRandomWrapper(), args)
        case _:
            raise ValueError("Invalid sample method")

    f, ax = plt.subplots()
    f.suptitle("Sampled point from " + sample_method)
    print(sample.shape)
    if len(sample.shape) == 1:
        ax.scatter(sample, np.ones(len(sample)), color="r")
        ax.yaxis.set_visible(False)
    else:
        plt.scatter(sample[:, 0], sample[:, 1], color="r")
    plt.show()

In [None]:
plot_sampled_point(
    "LHS_2d",
    {
        "n_vars": 2,
        "pop_init_range": [0, 1],
        "pop_size": 100,
    },
)

In [None]:
plot_sampled_point(
    "Halton_2d",
    {
        "pop_init_range": [0, 1],
        "pop_size": 100,
        "bases": [2, 3],
    },
)

In [None]:
plot_sampled_point(
    "FF_2d",
    {
        "n_vars": 2,
        "pop_init_range": [0, 1],
        "pop_size": 100,
    },
)

In [None]:
plot_sampled_point(
    "random_2d",
    {
        "pop_init_range": [0, 1],
        "pop_size": 100,
    },
)

### Exercise 3/3: Genetic algorithm - testing different initial sampling methods

In [None]:
def run_algorithm(
    fun: OptFun, method: str, num_simulations: int, plot: bool, args: dict[str, Any]
) -> tuple[float, float, float]:
    best_fitnesses: list[float] = []
    all_pops: list[list[list[NDArray[np.float64]]]] = []
    best_sim = None
    best_sim_index = 0

    start = time()
    for i in range(num_simulations):
        _, best_fitness, _, all_pop = run_ga(
            NumpyRandomWrapper(),
            method,
            fun,
            num_vars=fun.f.n_dimensions(),
            maximize=False,
            **args,
        )
        best_fitnesses.append(best_fitness)
        all_pops.append(all_pop)

        if best_sim is None or best_fitness < best_sim:
            best_sim = best_fitness
            best_sim_index = i

    end = time()
    if plot:
        fun.plot_population_evolution(
            all_pops[best_sim_index],
            generation_step=4,
            single=False,
            grid=True,
            args=args,
            title=f"{fun.name} - {method}",
            optima=fun.minima()[0].position,  # type: ignore
        )

    mean_best_fitness = np.mean(best_fitnesses)
    std_best_fitness = np.std(best_fitnesses)

    return mean_best_fitness.item(), std_best_fitness.item(), end - start

In [None]:
func = OptFun(bf.Ackley(2))
args: dict[str, Any] = {}
args["gaussian_stdev"] = 0.5  # Standard deviation of the Gaussian mutations
args["tournament_size"] = 2
args["num_elites"] = 1  # number of elite individuals to maintain in each gen
args["pop_size"] = 49  # population size
args["pop_init_range"] = func.bounds()[0]  # Range for the initial population
args["max_generations"] = 20  # Number of generations of the GA
args["crossover_rate"] = 0.7
args["mutation_rate"] = 0.2
args["initial_pop_size"] = args["pop_size"]
args["bases"] = [2, 3]

num_simulations = 30
sampling_methods = ["Halton_2d", "LHS_2d", "FF_2d", "random_2d"]

for method in sampling_methods:
    func = OptFun(bf.Ackley(2))
    mean, std, t = run_algorithm(func, method, num_simulations, True, args)
    print(f"{method}: {mean} ± {std}")
    print(f"Time: {t} s")

In [None]:
func = OptFun(bf.Rosenbrock(2))

args: dict[str, Any] = {}
args["gaussian_stdev"] = 0.1  # Standard deviation of the Gaussian mutations
args["tournament_size"] = 2
args["num_elites"] = 1  # number of elite individuals to maintain in each gen
args["pop_size"] = 49  # population size
args["pop_init_range"] = func.bounds()[0]  # Range for the initial population
args["max_generations"] = 20  # Number of generations of the GA
args["crossover_rate"] = 0.7
args["mutation_rate"] = 0.2
args["initial_pop_size"] = args["pop_size"]
args["bases"] = [2, 3]

sampling_methods = ["Halton_2d", "LHS_2d", "FF_2d", "random_2d"]
num_simulations = 30

for method in sampling_methods:
    func = OptFun(bf.Rosenbrock(2))
    mean, std, t = run_algorithm(func, method, num_simulations, True, args)
    print(f"{method}: {mean} ± {std}")
    print(f"Time: {t} s")

In [None]:
func = OptFun(bf.DeJong5())

args: dict[str, Any] = {}
args["gaussian_stdev"] = 0.1  # Standard deviation of the Gaussian mutations
args["tournament_size"] = 2
args["num_elites"] = 1  # number of elite individuals to maintain in each gen
args["pop_size"] = 81  # population size
args["pop_init_range"] = func.bounds()[0]  # Range for the initial population
args["max_generations"] = 20  # Number of generations of the GA
args["crossover_rate"] = 0.7
args["mutation_rate"] = 0.2
args["initial_pop_size"] = args["pop_size"]
args["bases"] = [2, 3]

sampling_methods = ["Halton_2d", "LHS_2d", "FF_2d", "random_2d"]
num_simulations = 30

for method in sampling_methods:
    func = OptFun(bf.DeJong5())
    mean, std, t = run_algorithm(func, method, num_simulations, True, args)
    print(f"{method}: {mean} ± {std}")
    print(f"Time: {t} s")

In [None]:
func = OptFun(bf.Keane())

args: dict[str, Any] = {}
args["gaussian_stdev"] = 0.1  # Standard deviation of the Gaussian mutations
args["tournament_size"] = 2
args["num_elites"] = 1  # number of elite individuals to maintain in each gen
args["pop_size"] = 49  # population size
args["pop_init_range"] = func.bounds()[0]  # Range for the initial population
args["max_generations"] = 20  # Number of generations of the GA
args["crossover_rate"] = 0.7
args["mutation_rate"] = 0.2
args["initial_pop_size"] = args["pop_size"]
args["bases"] = [2, 3]

sampling_methods = ["Halton_2d", "LHS_2d", "FF_2d", "random_2d"]
num_simulations = 30

for method in sampling_methods:
    func = OptFun(bf.Keane())
    mean, std, t = run_algorithm(func, method, num_simulations, True, args)
    print(f"{method}: {mean} ± {std}")
    print(f"Time: {t} s")

In [None]:
func = OptFun(bf.Rastrigin())

args: dict[str, Any] = {}
args["gaussian_stdev"] = 0.1  # Standard deviation of the Gaussian mutations
args["tournament_size"] = 2
args["num_elites"] = 1  # number of elite individuals to maintain in each gen
args["pop_size"] = 49  # population size
args["pop_init_range"] = func.bounds()[0]  # Range for the initial population
args["max_generations"] = 20  # Number of generations of the GA
args["crossover_rate"] = 0.7
args["mutation_rate"] = 0.2
args["initial_pop_size"] = args["pop_size"]
args["bases"] = [2, 3]

sampling_methods = ["Halton_2d", "LHS_2d", "FF_2d", "random_2d"]
num_simulations = 30

for method in sampling_methods:
    func = OptFun(bf.Rastrigin())
    mean, std, t = run_algorithm(func, method, num_simulations, True, args)
    print(f"{method}: {mean} ± {std}")
    print(f"Time: {t} s")

## Questions

Compare the GA studied in the previous lessons with its enhanced version with different DOE techniques.

#### 1. **How do the performances increases? Are the algorithms faster to converge, or can they find better solutions?**

#### 2. **Is there an approach better than the others in terms of performance?**

#### 3. **How much do the DOEs affect the search cost?**