In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
from __future__ import annotations
import itertools
%matplotlib widget

from typing import TypeAlias, Iterator
Vector: TypeAlias = np.ndarray  # A 1-D array
Matrix: TypeAlias = np.ndarray  # A 2-D array

In [3]:
class LayerInitializationStrategy:
    """Parent class for initialization strategies of weights and biases in Layer."""
    def run(self, size_of_previous_layer: int, size_of_current_layer: int) -> tuple[np.ndarray, np.ndarray]:
        raise NotImplementedError("The 'run' method is implemented only in child classes")

class RandomUniform(LayerInitializationStrategy):
    """
    Initialization strategy sampling weights and biases uniformly in a given interval.

    Attributes
    ----------
    scale: float
        The half-lenght of the interval [center-scale, center+scale] from which values are sampled.
    center: float
        The center of the aforementioned interval.

    Methods
    -------
    run(self, size_of_previous_layer: int, size_of_current_layer: int) -> tuple[np.ndarray, np.ndarray]
        Returns the tuple (random_weights, random_biases), where random_weights and random_biases are np.ndarrays of the appropriate shape.
    """
    def __init__(self, scale_of_interval: float, center_of_interval: float = 0):
        self.scale: float = scale_of_interval
        self.center: float = center_of_interval
        
    
    def run(self, size_of_previous_layer: int, size_of_current_layer: int) -> tuple[np.ndarray, np.ndarray]:
        random_weights: np.ndarray = np.random.uniform(
            -self.scale + self.center, self.scale + self.center,
            (size_of_previous_layer, size_of_current_layer)
            )
        random_biases: np.ndarray = np.random.uniform(
            -self.scale + self.center, self.scale + self.center,
            size_of_current_layer
            )
        return random_weights, random_biases
    
    def __str__(self):
        return f"Initialization: RandomUniform in [{self.center - self.scale}, {self.center + self.scale}]"

In [3]:
class ActivationFunction:
    """Parent class for activation functions of neural nodes."""
    def __call__(self, x: np.ndarray) -> np.ndarray:
        raise NotImplementedError("The '__call__' method must be implemented in child classes")

    def derivative(self, x: np.ndarray) -> np.ndarray:
        raise NotImplementedError("The 'derivative' method must be implemented in child classes")

class Sigmoid(ActivationFunction):
    """Sigmoid activation function. f(x) = 1 / (1 + np.exp(-x))"""
    def __call__(self, x: np.ndarray) -> np.ndarray:
        return 1 / (1 + np.exp(-x))

    def derivative(self, x: np.ndarray) -> np.ndarray:
        sigmoid = self(x)  # Reuse the __call__ method to compute sigmoid
        return sigmoid * (1 - sigmoid)
    
    def __str__(self):
        return "Sigmoid act. fun."

In [4]:
class Layer:
    """
    A Layer component of a NeuralNetwork.

    Attributes
    ----------
    unit_number: int
        The number of nodes/units in the Layer.
    init_strat: LayerInitializationStrategy
        The initialization strategy for the weights and biases of the Layer
    activation_function: ActivationFunction
        The function.
    """

    def __init__(self, number_of_units: int,
                 initialization_strategy: LayerInitializationStrategy,
                 activation_function: ActivationFunction):
        self.unit_number: int = number_of_units  # The number of nodes/units in the Layer.
        self.init_strat: LayerInitializationStrategy = initialization_strategy
        self.activation_function: ActivationFunction = activation_function

        # The values computed by the units, based on the outputs of the previous layer. Stored for later backprop.
        self.linear_output: Vector = None
        self.output: Vector = None
        
        # The layer preceding the current one in the Neural Network. The NN should connect layers during initialization.
        self.previous_layer: Layer = None
        self.next_layer: Layer = None

        # Weights and biases connecting the layer with the previous layer of the neural network.
        self.weights: Matrix = None; self.biases: Vector = None

        # A variable that needs to be computed from the delta of next layer in the
        # Backprop TrainingAlgorithm
        self.delta: Vector = None
        

    def initialize_weights(self) -> None:
        """
        Initialize the weights and biases of this Layer according to its init_strat.
        """
        self.weights, self.biases = self.init_strat.run(self.previous_layer.unit_number, self.unit_number)

    def compute_output(self):
        """
        Computes the output of this layer as activation_function(np.dot(input, weights) + biases), where
        the input is the output of the previous layer.
        Stores the output as well as just the linear_output np.dot(input, weights) + biases, as it's useful in typical training algorithms.
        """
        self.linear_output = np.dot(self.previous_layer.output, self.weights) + self.biases
        self.output: Vector = self.activation_function(self.linear_output)
        return self.output

class InputLayer(Layer):
    """
    The first Layer of a NeuralNetwork. It has no previous layer, and thus no weights and biases to connect it with.
    Its activation function is the Identity.

    Attributes
    ----------
    unit_number: int
        The number of nodes/units in the Layer.
    """
    def __init__(self, number_of_units: int):
        super().__init__(number_of_units, None, None)
        # An input layer has no previous layer to connect to, so attributes referring to a previous layer are deleted.
        del self.previous_layer, self.weights, self.biases, self.init_strat, self.activation_function
    
    def feed_input(self, value: Vector) -> None:
        """
        Sets the input (which is also the output) of the InputLayer, and thus of the whole NeuralNetwork, to value.
        """
        self.output: Vector = value
    
    def initialize_weights(self):
        raise NotImplementedError("InputLayer does not require weight initialization.")

    def compute_output(self) -> Vector:
        """
        Returns the output (which is also the input) of the InputLayer
        """
        return self.output

class HiddenLayer(Layer):
    def backward(self):
        """
        Computes the delta of this layer from the delta of the next layer
        as np.dot(self.next_layer.weights, self.next_layer.delta) * self.activation_function.derivative(self.linear_output)
        """
        self.delta = np.dot(self.next_layer.weights, self.next_layer.delta) * self.activation_function.derivative(self.linear_output)

class OutputLayer(Layer):
    def __init__(self, number_of_units: int,
                 initialization_strategy: LayerInitializationStrategy,
                 activation_function: ActivationFunction):
        super().__init__(number_of_units, initialization_strategy, activation_function)
        del self.next_layer

In [5]:
class ListOfArrays:
    """
    An utility class for defining element-wise operations on lists containing heteromorphic np.ndarrays.
    Useful for conveniently manipulating network weights and biases in MLP-architecture NeuralNetworks.
    """
    def __init__(self, arrays: list[np.ndarray]):
        self.arrays: list[np.ndarray] = arrays
    
    def __repr__(self):
        return f"ListOfArrays{(self.arrays)}"

    def __getitem__(self, index):
        return self.arrays[index]

    def __setitem__(self, index, value):
        self.arrays[index] = value
    
    def __add__(self, other):
        if not isinstance(other, ListOfArrays): raise TypeError("Operand is not a ListOfArrays")
        return ListOfArrays([x + y for x, y in zip(self.arrays, other.arrays)])
    
    def __mul__(self, scalar: float):
        return ListOfArrays([x * scalar for x in self.arrays])
    
    def __rmul__(self, scalar: float):
        return self.__mul__(scalar)

    def __truediv__(self, scalar: float):
        return ListOfArrays([x / scalar for x in self.arrays])
    
    def __pow__(self, power: float):
        return ListOfArrays([x**power for x in self.arrays])
    
    def sum(self) -> float:
        return np.sum([np.sum(array) for array in self.arrays])
    
    def set_all_values_to(self, value: float) -> None:
        for a in self.arrays:
            a = value*np.ones(a.shape)


class ListOfVectors(ListOfArrays):
    """
    An utility class for convenient manipulation of lists of Vectors of different lengths.
    Useful for network biases in MLP-architecture NeuralNetworks.
    """
    def __init__(self, arrays: list[Vector]):
        super().__init__(arrays)

class ListOfMatrices(ListOfArrays):
    """
    An utility class for convenient manipulation of lists of Matrices of different number of rows and cols.
    Useful for network weights in MLP-architecture NeuralNetworks.
    """
    pass
    def __init__(self, arrays: list[Matrix]):
        super().__init__(arrays)

In [6]:
class MachineLearningModel:
    pass

class NeuralNetworkArchitecture:
    """
    An utility class for storing information about the number of Layers and of units for each Layer of a NeuralNetwork, as well as other
    useful data.

    Used for an alternative constructor of NeuralNetwork when no fine-control for initialization is needed.
    """
    def __init__(self, sizes_of_layers: list[int], activation_function: ActivationFunction, initialization_strategy: LayerInitializationStrategy):
        self.sizes_of_layers: list[int] = sizes_of_layers
        self.activation_function: ActivationFunction = activation_function
        self.initialization_strategy: LayerInitializationStrategy = initialization_strategy
    
    def __str__(self):
        network_shape: str = "(" + ", ".join(map(str, self.sizes_of_layers)) + ")"
        return f"{network_shape}"

class NeuralNetwork(MachineLearningModel):
    """
    A NeuralNetwork.

    Attributes
    ----------
    layers: list[Layer]
        The list of Layer that make the NeuralNetwork.
        Most NeuralNetwork methods work by invoking the appropriate Layer-level methods in the appropriate order.
    input_layer: Layer
        The first Layer of the NN
    hidden_layers: list[Layer]
        The list of non-first-nor-last Layers.
    output_layer: Layer
        The last Layer of the NN
    layers_with_weights: list[Layer]
        The list of all Layers, except the InputLayer.
    """
    def __init__(self, layers: list[Layer]):
        self.layers: list[Layer] = layers
        # Maybe here I should ensure that layers are correctly typed (layers[0] should be an InputLayer, layers[-1] an OutputLayer, all other layers should be HiddenLayer).
        self.input_layer: InputLayer = layers[0]; self.hidden_layers: list[HiddenLayer] = layers[1: -1]; self.output_layer: OutputLayer = layers[-1]
        self.layers_with_weights: list[Layer] = self.layers[1: ]
        self.connect_layers()
        self.initialize_weights()

    @classmethod
    def FromArchitecture(cls, architecture: NeuralNetworkArchitecture):
        sizes: list[int] = architecture.sizes_of_layers; act_fun = architecture.activation_function; init_strat = architecture.initialization_strategy
        il: InputLayer = InputLayer(sizes[0])
        hls: list[HiddenLayer] = [HiddenLayer(n, init_strat, act_fun) for n in sizes[1: -1]]
        ol: OutputLayer = OutputLayer(sizes[-1], init_strat, act_fun)
        layers: list[Layer] = [il] + hls + [ol]
        return cls(layers)

    def connect_layers(self) -> None:
        for (i, layer) in enumerate(self.layers):
            if not isinstance(layer, InputLayer): layer.previous_layer = self.layers[i - 1]
            if not isinstance(layer, OutputLayer): layer.next_layer = self.layers[i + 1]

    def initialize_weights(self) -> None:
        for layer in self.layers_with_weights: layer.initialize_weights()
    
    def feed_input(self, value: np.ndarray) -> None:
        self.input_layer.feed_input(value)

    def activate_network(self) -> np.ndarray:
        for i in range(len(self.layers)): self.layers[i].compute_output()
        return self.output_layer.output
    
    def compute_output(self, value: np.ndarray) -> np.ndarray:
        self.feed_input(value)
        return self.activate_network()
    
    def backward(self) -> None:
        for l in reversed(self.hidden_layers):
            l.backward()
    
    def compute_multiple_outputs(self, x_data: pd.DataFrame | np.ndarray) -> np.ndarray[np.ndarray]:
        if isinstance(x_data, pd.DataFrame): x_data = x_data.to_numpy()
        outputs = np.array(
            [
                self.compute_output(x_data[i]) for i in range(len(x_data))
            ]
        )
        return outputs

In [7]:
class RegularizationTerm:
    def set_network(self, network: NeuralNetwork) -> None:
        self.network = network

    def __call__(self) -> float:
        pass

    def gradient(self) -> tuple[ListOfArrays, ListOfArrays]:
        pass

class NoRegularization(RegularizationTerm):
    def __init__(self):
        pass

    def __call__(self) -> float:
        return 0
    
    def gradient(self) -> tuple[ListOfArrays, ListOfArrays]:
        layers = self.network.layers_with_weights
        return ListOfArrays([np.zeros_like(l.weights) for l in layers]), ListOfArrays([np.zeros_like(l.biases) for l in layers])
    
    def __str__(self):
        return "No reg."

class Tikhonov(RegularizationTerm):
    """
    A regularization penalty term of the form constant*(sum of squares of weights and biases).

    Parameters
    ----------
    penalty: float
        The constant factor multiplying the sum of squares.
    
    Attributes
    ----------
    penalty: float
        The constant factor multiplying the sum of squares.
    network: NeuralNetwork
        The NeuralNetwork that weights and biases are read from.
    """
    def __init__(self, penalty: float):
        self.penalty: float = penalty
        self.network: NeuralNetwork = None

    def __call__(self) -> float:
        layers: list[Layer] = self.network.layers_with_weights
        weights_term = np.sum([np.sum(layer.weigths**2) for layer in layers])  # The sum of squares of all the weights in the NN.
        biases_term = np.sum([np.sum(layer.biases**2) for layer in layers])
        return self.penalty * (weights_term + biases_term) / 2

    def gradient(self) -> tuple[ListOfArrays, ListOfArrays]:
        layers: list[Layer] = self.network.layers_with_weights
        gradient_on_weights: ListOfArrays = ListOfArrays([-self.penalty * l.weights for l in layers])
        gradient_on_biases: ListOfArrays = ListOfArrays([-self.penalty * l.biases for l in layers])
        return gradient_on_weights, gradient_on_biases
    
    def __str__(self):
        return f"Tikhonov({self.penalty})"

In [8]:
class MomentumRule:
    pass

In [9]:
class StoppingCondition:
    def __init__(self):
        self.alg: TrainingAlgorithm = None
    
    def set_alg(self, alg: TrainingAlgorithm) -> None:
        self.alg = alg

    @property
    def is_satisfied(self) -> bool:
        pass

class ThresholdOnTrainingError(StoppingCondition):
    """
    Parameters
    ----------
    threshold: float
    patience: int
    """
    def __init__(self, threshold: float, patience: int):
        super().__init__()
        self.threshold: float = threshold
        self.patience: int = patience
    
    @property
    def is_satisfied(self) -> bool:
        current_training_error: float = self.alg.current_tr_err
        if current_training_error < self.threshold:
            self.consecutive_epochs += 1
            return self.consecutive_epochs > self.patience
        else:
            self.consecutive_epochs = 0
            return False
    
    def __str__(self):
        return f"TR Err threshold: {self.threshold}"

In [10]:
class ErrorFunction:
    def __call__(self, y_data: np.ndarray, y_predicted: np.ndarray) -> float:
        pass
    
    def simple_gradient(self, y_data: np.ndarray, y_predicted: np.ndarray) -> np.ndarray:
        pass

class MSE(ErrorFunction):
    def __call__(self, y_data: np.ndarray, y_predicted: np.ndarray) -> float:
        """
        Returns the average over the dataset of the square euclidean distance between the training outputs and the predictions.
        """
        num_patterns = 1 if y_data.ndim == 1 else len(y_data)
        return 0.5 * np.sum((y_data - y_predicted)**2) / num_patterns
    
    def simple_gradient(self, y_data: np.ndarray, y_predicted: np.ndarray) -> np.ndarray:
        """
        Returns y_data - y_predicted. It is meant to be used on a single pattern at a time, during backpropagation.
        """
        return (y_data - y_predicted)
    
    def __str__(self):
        return "MSE"

In [11]:
class Dataset:
    def __init__(self, x_data: Matrix, y_data: Matrix):
        """
        Initializes the dataset with input (x_data) and output (y_data).
        """

        if isinstance(x_data, pd.DataFrame): x_data = x_data.to_numpy()
        if isinstance(y_data, pd.DataFrame): y_data = y_data.to_numpy()
        
        if x_data.ndim == 1 or y_data.ndim == 1:
            raise ValueError(f"x_data and y_data should be matrices, where each row represents a pattern and each column a feature, but got arguments of shape {x_data.shape} and {y_data.shape}")

        self.x: Matrix = x_data
        self.y: Matrix = y_data
    
    def __len__(self):
        """
        Returns the number of patterns in the dataset.
        """
        return len(self.x)
    
    def __getitem__(self, index):
        """
        Retrieves the input-output pair at the specified index.
        """
        if isinstance(index, slice):
            return Dataset(self.x[index], self.y[index])
        # This should be refactored in the future. The behaviour should be the same as numpy, regardless of index type.
        elif isinstance(index, list) or isinstance(index, np.ndarray):
            return Dataset(self.x[index], self.y[index])
        return self.x[index], self.y[index]
    
    def shuffle(self) -> None:
        indices: np.ndarray = np.arange(len(self))
        np.random.shuffle(indices)
        self.x = self.x[indices, :]
        self.y = self.y[indices, :]
    
    def split(self, fraction: float, shuffle: bool = True) -> tuple[Dataset, Dataset]:
        """
        Returns two datasets, one with fraction*N data and the other with (1-fraction)*N.
        """
        indices: np.ndarray = np.arange(len(self))
        
        if shuffle:
            np.random.shuffle(indices)
        splitting_number: int = int(fraction * len(self))
        indices_1: np.ndarray = indices[:splitting_number]
        
        indices_2: np.ndarray = indices[splitting_number:]
        ds_1: Dataset = Dataset(self.x[indices_1, :], self.y[indices_1, :])
        ds_2: Dataset = Dataset(self.x[indices_2, :], self.y[indices_2, :])
        return ds_1, ds_2


class DataManager:
    """An auxiliary class for extracting minibatches from a Dataset."""
    def __init__(self, dataset: Dataset, minibatch_size: int = None, shuffle: bool = True):
        self.dataset: Dataset = dataset
        self.minibatch_size: int = minibatch_size or len(dataset)
        self.shuffle: bool = shuffle
    
    def __iter__(self):
        """
        An iterator yielding minibatches.
        """
        indices = np.arange(len(self.dataset))
        
        if self.shuffle:
            np.random.shuffle(indices)
        
        for start in range(0, len(indices), self.minibatch_size):
            minibatch_indices = indices[start:start + self.minibatch_size]
            minibatch_x = self.dataset.x[minibatch_indices, :]
            minibatch_y = self.dataset.y[minibatch_indices, :]
            yield minibatch_x, minibatch_y

In [12]:
class TrainingAlgorithm:
    def __init__(self, x_train: pd.DataFrame, y_train: pd.DataFrame, network: NeuralNetwork):
        self.network: NeuralNetwork = network

        if isinstance(x_train, pd.DataFrame): x_train = x_train.to_numpy()
        if isinstance(y_train, pd.DataFrame): y_train = y_train.to_numpy()

        self.training_set = Dataset(x_train, y_train)

        self.current_tr_err: float = float('inf')

        self.history: dict[list] = {'training error': []}
        self.cache: dict = {}

class Backprop(TrainingAlgorithm):
    def __init__(self, x_train: pd.DataFrame, y_train: pd.DataFrame, network: NeuralNetwork,
                 learning_rate: float,
                 error_function: ErrorFunction,
                 stopping_condition: StoppingCondition,
                 regularization_term: RegularizationTerm = None,
                 minibatch_size: int = None
                 ):
        super().__init__(x_train, y_train, network)

        self.learning_rate: float = learning_rate

        self.err_fun: ErrorFunction = error_function
        
        self.stop_cond: StoppingCondition = stopping_condition
        self.stop_cond.set_alg(self)

        self.weights_gradient: ListOfMatrices = ListOfMatrices([np.zeros_like(l.weights) for l in self.network.layers_with_weights])
        self.biases_gradient: ListOfVectors = ListOfVectors([np.zeros_like(l.biases) for l in self.network.layers_with_weights])

        self.regularization_term: RegularizationTerm = regularization_term or NoRegularization()
        self.regularization_term.set_network(self.network)

        self.minibatch_size: int = minibatch_size or len(self.training_set)
        self.minibatch_generator: DataManager = DataManager(self.training_set, self.minibatch_size,
                                                            shuffle = (self.minibatch_size != len(self.training_set))
                                                            )
        
        
        self.current_mb_size: int = None
        self.current_mb_x: Matrix = None; self.current_mb_y: Matrix = None
        
    
    def run(self, max_epochs: int) -> None:
        epoch: int = 0
        while epoch < max_epochs:
            epoch += 1

            for minibatch_x, minibatch_y in self.minibatch_generator:
                self.update_minibatch_metadata(minibatch_x, minibatch_y)
                self.update_gradients()
                self.update_network_parameters()
            
            self.compute_training_error()
            if self.stop_cond.is_satisfied: break

    def update_minibatch_metadata(self, minibatch_x: Matrix, minibatch_y: Matrix):
        self.current_mb_x = minibatch_x; self.current_mb_y = minibatch_y
        self.current_mb_size = len(minibatch_x)
    
    def update_gradients(self):
        self.reset_gradients()

        for x, y in zip(self.current_mb_x, self.current_mb_y):
            predicted_y = self.network.compute_output(x)
            out_l = self.network.output_layer
            out_l.delta = self.err_fun.simple_gradient(y, predicted_y)*out_l.activation_function.derivative(out_l.linear_output)
            self.network.backward()
            self.weights_gradient += ListOfMatrices([np.outer(l.previous_layer.output, l.delta) for l in self.network.layers_with_weights])
            self.biases_gradient += ListOfVectors([l.delta for l in self.network.layers_with_weights])
        self.weights_gradient /= self.current_mb_size; self.biases_gradient /= self.current_mb_size

        self.add_regul_contribution()
        self.add_momentum_contribution()
    
    def reset_gradients(self) -> None:
        self.weights_gradient.set_all_values_to(0)
        self.biases_gradient.set_all_values_to(0)
    
    def add_regul_contribution(self) -> None:
        contribution_to_w, contribution_to_b = self.regularization_term.gradient()
        self.weights_gradient += contribution_to_w; self.biases_gradient += contribution_to_b

    def add_momentum_contribution(self) -> None:
        pass
    
    def update_network_parameters(self) -> None:
        factor = self.learning_rate * self.current_mb_size / len(self.training_set)
        
        for i, l in enumerate(self.network.layers_with_weights):
            l.weights += factor * self.weights_gradient[i]
            l.biases += factor * self.biases_gradient[i]

    def compute_training_error(self) -> None:
        y_prediction = self.network.compute_multiple_outputs(self.training_set.x)
        self.current_tr_err = self.err_fun(self.training_set.y, y_prediction)
        self.history['training error'] += [self.current_tr_err]



In [13]:
class HyperparameterGrid:
    def __init__(self, algorithm_class: type[TrainingAlgorithm], values_of_training_hyperparameters: dict[str, list[object]], list_of_architectures: list[NeuralNetworkArchitecture]):
        self.algorithm_class: type[TrainingAlgorithm] = algorithm_class
        self.tr_hyparams: dict[str, list] = values_of_training_hyperparameters
        self.check_hyperparameter_properness()  # Check the given hyperparameter types are correct and complete for the algorithm class.
        self.architectures: list[NeuralNetworkArchitecture] = list_of_architectures

    def __iter__(self) -> Iterator[tuple[NeuralNetworkArchitecture, dict[str, object]]]:
        for architecture in self.architectures:
            for hyperparameter_combination in itertools.product(*self.tr_hyparams.values()):
                tr_hyparam_comb_dict: dict[str, object] = dict(zip(self.tr_hyparams.keys(), hyperparameter_combination))
                yield architecture, tr_hyparam_comb_dict

    def to_list(self) -> list[tuple[NeuralNetworkArchitecture, dict[str, object]]]:
        return [x for x in self]
    
    def check_hyperparameter_properness(self) -> None:
        pass

In [14]:
class ModelEvaluationMethod:
    pass

class SelectionMethod(ModelEvaluationMethod):
    def __init__(self, dataset: Dataset, algorithm_class: type[TrainingAlgorithm], hyperparameter_combinations: list[tuple[NeuralNetworkArchitecture, dict]]):
        self.dataset: Dataset = dataset
        self.algorithm_class: type[TrainingAlgorithm] = algorithm_class
        self.hyperparameter_combinations: list[tuple[NeuralNetworkArchitecture, dict]] = hyperparameter_combinations

        self.check_hyperparameter_properness()
    
    @classmethod
    def FromGrid(cls, dataset: Dataset, grid: HyperparameterGrid):
        alg_class: type[TrainingAlgorithm] = grid.algorithm_class
        list_of_combinations: list[tuple[NeuralNetworkArchitecture, dict]] = grid.to_list()
        return cls(dataset, alg_class, list_of_combinations)

    def check_hyperparameter_properness(self) -> None:
        pass
    
    @property
    def hyperparam_keys(self) -> list[str]:
        architecture_keys = ["Architecture"]
        training_hyperparameters_keys = [list(item[1].keys()) for item in self.hyperparameter_combinations]
        training_hyperparameters_keys = list(set().union(*[x for x in training_hyperparameters_keys]))
        return architecture_keys + training_hyperparameters_keys

class AssessmentMethod(ModelEvaluationMethod):
    pass

In [None]:
def parenthetical_uncertainty_format(number: float, uncertainty: float, uncertainty_digits: int = 2) -> str:
    import math
    num_exponent = math.floor(math.log10(abs(number))) if number != 0 else 0
    unc_exponent = math.floor(math.log10(abs(uncertainty))) if uncertainty != 0 else 0

    n = num_exponent - unc_exponent + 1  # Number of significant digits
    if n <= 0: return f"{number} +- {uncertainty}"
    scaled_number = number * 10 ** -num_exponent
    num_string = f"{scaled_number:.{n - 1 + uncertainty_digits - 1}f}"

    scaled_uncertainty = uncertainty * 10 ** -unc_exponent
    unc_string = f"{scaled_uncertainty:.{uncertainty_digits - 1}f}".replace('.', '')

    return f"{num_string}({unc_string})e{num_exponent}"

In [70]:
class HoldOutSelection(SelectionMethod):
    def __init__(self, dataset: Dataset | tuple[Dataset, Dataset], validation_split: float,
                 algorithm_class: type[TrainingAlgorithm], hyperparameter_combinations: list[tuple[NeuralNetworkArchitecture, dict]],
                 risk_function: ErrorFunction,
                 shuffle_data: bool = True):
        super().__init__(dataset, algorithm_class, hyperparameter_combinations)

        if isinstance(dataset, Dataset):
            self.vl_set, self.tr_set = self.dataset.split(validation_split, shuffle_data)
        elif isinstance(dataset, tuple):
            self.tr_set, self.vl_set = dataset

        
        self.risk_fun: ErrorFunction = risk_function
        
        self.results: list[dict] = []
    
    def run(self, max_epochs: int, trace_validation_error: bool = True) -> None:
        for architecture, training_hyperparameters in self.hyperparameter_combinations:
            network: NeuralNetwork = NeuralNetwork.FromArchitecture(architecture)
            alg: TrainingAlgorithm = self.algorithm_class(self.tr_set.x, self.tr_set.y, network, **training_hyperparameters)

            if trace_validation_error:
                vl_err_history: list[float] = []
                for _ in range(max_epochs):
                    alg.run(1)
                    y_predicted: np.ndarray = network.compute_multiple_outputs(self.vl_set.x)
                    vl_error: float = self.risk_fun(self.vl_set.y, y_predicted)
                    vl_err_history += [vl_error]
            else:
                alg.run(max_epochs)
                # Measure and store the validation error
                y_predicted: np.ndarray = network.compute_multiple_outputs(self.vl_set.x)
                vl_error: float = self.risk_fun(self.vl_set.y, y_predicted)

            

            result: dict = {
                "Architecture": str(architecture),
                **{k: str(v) if isinstance(v, (int, float)) else v for k, v in training_hyperparameters.items()},
                "Validation Error": vl_error
            }

            if trace_validation_error: result.update({"TR Curve": alg.history["training error"], "VL Curve": vl_err_history})

            self.results.append(result)
    
    def dataframe(self) -> pd.DataFrame:
        tr_hyperparams_keys = self.hyperparameter_combinations[0][1].keys()
        columns: list[str] = ['Architecture', *tr_hyperparams_keys, "Validation Error"]
        return pd.DataFrame(self.results, columns= columns)

class KFoldCrossValidation(SelectionMethod):
    def __init__(self, dataset: Dataset, number_of_folds: int, algorithm_class: type[TrainingAlgorithm], hyperparameter_combinations,
                 risk_function: ErrorFunction,
                 shuffle_data: bool = True):
        super().__init__(dataset, algorithm_class, hyperparameter_combinations)
        self.n_folds: int = number_of_folds
        if shuffle_data: self.dataset.shuffle()
        fold_length: int = int(len(self.dataset) / number_of_folds)
        self.folds: list[Dataset] = [self.dataset[start:start + fold_length] for start in range(number_of_folds)]
        self.risk_fun = risk_function
        
        
        self.results: list[dict] = []
        self.aggregated_results: list[dict] = []

    
    def run(self, max_epochs: int) -> None:
        for k in range(self.n_folds):
            vl_set: Dataset = self.folds[k]
            tr_set: Dataset = Dataset(
                np.concatenate([self.folds[j].x for j in range(self.n_folds) if j != k]),
                np.concatenate([self.folds[j].y for j in range(self.n_folds) if j != k])
            )

            ## Perform hold out validation using vl_set as validation set and tr_set as training set,
            ## and store the performance for each combination of hyperparameters to extract mean and stdev later.
            hold_out = HoldOutSelection(
                dataset = (tr_set, vl_set),
                validation_split = None,
                algorithm_class = self.algorithm_class,
                hyperparameter_combinations = self.hyperparameter_combinations,
                risk_function = self.risk_fun,
                shuffle_data = False
            )

            hold_out.run(max_epochs, trace_validation_error = False)

            fold_results: list[dict] = hold_out.results
            self.results += fold_results
    
    def aggregate_result(self) -> None:

        self.aggregated_results = []

        auxiliary_dictionary: dict = {}
        for item in self.results:
            composite_key: tuple = tuple((k, str(item[k])) for k in self.hyperparam_keys)
            value = item["Validation Error"]
            print(composite_key)
            if composite_key not in auxiliary_dictionary:
                auxiliary_dictionary[composite_key] = []
            auxiliary_dictionary[composite_key].append(value)
    
        for composite_key, list_of_vl_errors in auxiliary_dictionary.items():
            result_dic = dict(composite_key)
            result_dic["Validation Errors"] = np.array(list_of_vl_errors)
            self.aggregated_results.append(result_dic)
        
        for item in self.aggregated_results:
            item["Validation Error"] = f"{np.mean(item["Validation Errors"])} +- {np.std(item["Validation Errors"], ddof = 1)}"
            item["Validation Error"] = parenthetical_uncertainty_format(np.mean(item["Validation Errors"]), np.std(item["Validation Errors"], ddof = 1))

    def dataframe(self) -> pd.DataFrame:
        tr_hyperparams_keys = self.hyperparameter_combinations[0][1].keys()
        columns: list[str] = ['Architecture', *tr_hyperparams_keys, "Validation Error"]
        return pd.DataFrame(self.aggregated_results, columns= columns)

In [16]:
ds = Dataset(np.array([[0], [1], [2]]), np.array([[3], [4], [5]]))
subset = ds[0:2]

print("Is subset a view?", subset.x.base is ds.x)

print(subset.x)


a = np.array([[0], [1], [2]]); b = np.array([[3], [4], [5]])
mask = np.ones((len(a), 1), dtype=bool)

c = np.concatenate((a, b))

print(c)
[n for n in range(5) if n != 3]

Is subset a view? True
[[0]
 [1]]
[[0]
 [1]
 [2]
 [3]
 [4]
 [5]]


[0, 1, 2, 4]

In [74]:
import os
iris_path = os.path.join('..', 'iris', 'iris.data')
iris_df = pd.read_csv(iris_path, names = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
iris_x = iris_df.loc[:, 'sepal length':'petal width']; iris_y = pd.get_dummies(iris_df.loc[:, 'class']).astype(int)

hyperparams: dict[list] = {
    'learning_rate': [0.9],
    'error_function': [MSE()],
    'stopping_condition': [ThresholdOnTrainingError(0.0001, 10)],
    'regularization_term': [Tikhonov(5e-2),
                            Tikhonov(5e-5),
                            NoRegularization()
                            ],
    'minibatch_size': [None]
}

grid = HyperparameterGrid(
    Backprop,
    hyperparams,
    list_of_architectures = [
        NeuralNetworkArchitecture([4, 7, 3], Sigmoid(), RandomUniform(0.3))
    ]
)

validation_method = KFoldCrossValidation(
    Dataset(iris_x, iris_y),
    number_of_folds=6,
    algorithm_class=Backprop,
    hyperparameter_combinations= grid.to_list(),
    risk_function=MSE(),
    shuffle_data=True
)

validation_method.run(1000)
validation_method.aggregate_result()
validation_method.dataframe()

(('Architecture', '(4, 7, 3)'), ('error_function', 'MSE'), ('regularization_term', 'Tikhonov(0.05)'), ('minibatch_size', 'None'), ('stopping_condition', 'TR Err threshold: 0.0001'), ('learning_rate', '0.9'))
(('Architecture', '(4, 7, 3)'), ('error_function', 'MSE'), ('regularization_term', 'Tikhonov(5e-05)'), ('minibatch_size', 'None'), ('stopping_condition', 'TR Err threshold: 0.0001'), ('learning_rate', '0.9'))
(('Architecture', '(4, 7, 3)'), ('error_function', 'MSE'), ('regularization_term', 'No reg.'), ('minibatch_size', 'None'), ('stopping_condition', 'TR Err threshold: 0.0001'), ('learning_rate', '0.9'))
(('Architecture', '(4, 7, 3)'), ('error_function', 'MSE'), ('regularization_term', 'Tikhonov(0.05)'), ('minibatch_size', 'None'), ('stopping_condition', 'TR Err threshold: 0.0001'), ('learning_rate', '0.9'))
(('Architecture', '(4, 7, 3)'), ('error_function', 'MSE'), ('regularization_term', 'Tikhonov(5e-05)'), ('minibatch_size', 'None'), ('stopping_condition', 'TR Err threshold: 0

Unnamed: 0,Architecture,learning_rate,error_function,stopping_condition,regularization_term,minibatch_size,Validation Error
0,"(4, 7, 3)",0.9,MSE,TR Err threshold: 0.0001,Tikhonov(0.05),,3.255(70)e-1
1,"(4, 7, 3)",0.9,MSE,TR Err threshold: 0.0001,Tikhonov(5e-05),,2.01(36)e-2
2,"(4, 7, 3)",0.9,MSE,TR Err threshold: 0.0001,No reg.,,1.91(27)e-2


In [68]:
def parenthetical_uncertainty_format(number: float, uncertainty: float, uncertainty_digits: int = 2) -> str:
    import math
    num_exponent = math.floor(math.log10(abs(number))) if number != 0 else 0
    unc_exponent = math.floor(math.log10(abs(uncertainty))) if uncertainty != 0 else 0

    n = num_exponent - unc_exponent + 1  # Number of significant digits
    if n <= 0: return f"{number} +- {uncertainty}"
    scaled_number = number * 10 ** -num_exponent
    num_string = f"{scaled_number:.{n - 1 + uncertainty_digits - 1}f}"

    scaled_uncertainty = uncertainty * 10 ** -unc_exponent
    unc_string = f"{scaled_uncertainty:.{uncertainty_digits - 1}f}".replace('.', '')

    return f"{num_string}({unc_string})e{num_exponent}"



In [69]:
print(parenthetical_uncertainty_format(0.1817191322531631, 0.01383920501477125, 4))

1.8172(1384)e-1


In [51]:
def parenthetical_uncertainty_format(number: float, uncertainty: float, uncertainty_digits: int = 2) -> str:
    import math
    exponent = math.floor(math.log10(abs(uncertainty))) if uncertainty != 0 else 0
    scale = 10 ** -exponent

    scaled_numer = number  * scale; scaled_unc = uncertainty * scale
    rounded_unc = round(scaled_unc, uncertainty_digits - 1)
    uncertainty_string = f"{rounded_unc:.{uncertainty_digits - 1}f}".replace('.', '')

    number_string = f"{scaled_numer:.{uncertainty_digits - 1}f}".rstrip('0').rstrip('.')
    return f"{number_string}({uncertainty_string})e{exponent}"

In [52]:
print(parenthetical_uncertainty_format(0.1817191322531631, 0.0024035738106961565))

181.7(24)e-3


In [None]:
(('Architecture', '(4, 7, 3)'), ('minibatch_size', 'None'), ('learning_rate', '0.9'), ('error_function', 'MSE'), ('regularization_term', 'Tikhonov(5e-05)'), ('stopping_condition', 'TR Err threshold: 0.0001')) == (('Architecture', '(4, 7, 3)'), ('minibatch_size', 'None'), ('learning_rate', '0.9'), ('error_function', 'MSE'), ('regularization_term', 'Tikhonov(5e-05)'), ('stopping_condition', 'TR Err threshold: 0.0001'))

In [None]:
type(validation_method.results[0])

In [None]:
import os
iris_path = os.path.join('..', 'iris', 'iris.data')
iris_df = pd.read_csv(iris_path, names = ['sepal length', 'sepal width', 'petal length', 'petal width', 'class'])
iris_x = iris_df.loc[:, 'sepal length':'petal width']; iris_y = pd.get_dummies(iris_df.loc[:, 'class']).astype(int)




hyperparams: dict[list] = {
    'learning_rate': [0.9],
    'error_function': [MSE()],
    'stopping_condition': [ThresholdOnTrainingError(0.0001, 10)],
    'regularization_term': [Tikhonov(5e-3),
                            Tikhonov(5e-5),
                            NoRegularization()
                            ],
    'minibatch_size': [None]
}

grid = HyperparameterGrid(
    Backprop,
    hyperparams,
    list_of_architectures = [
        NeuralNetworkArchitecture([4, 7, 3], Sigmoid(), RandomUniform(0.3))
    ]
)

validation_method = HoldOutSelection(
    Dataset(iris_x, iris_y),
    0.85,
    Backprop,
    grid.to_list(),
    MSE(),
    shuffle_data = True
)


validation_method2 = HoldOutSelection(
    Dataset(iris_x, iris_y),
    0.85,
    Backprop,
    grid.to_list(),
    MSE(),
    shuffle_data = True
)

validation_method.run(500, trace_validation_error = True)
validation_method2.run(500, trace_validation_error = False)
df = validation_method.dataframe()
df2 = validation_method2.dataframe()
display(df, df2)

In [38]:
list_dict1 = validation_method.results
list_dict2 = validation_method2.results

total_list_of_dicts = list_dict1 + list_dict2
# forbidden_keys = ['Validation Error', 'VL Curve', 'TR Curve']

aggregate = {}

for dic in total_list_of_dicts:
    composite_key = tuple( (k, dic[k])   for k in dic if k in validation_method.hyperparam_keys )
    value = dic["Validation Error"]
    if composite_key not in aggregate:
        aggregate[composite_key] = []
    aggregate[composite_key] += [value]

final_list = []

for composite_key, list_of_vl_errors in aggregate.items():
    result_dict = dict(composite_key)
    result_dict["Validation Errors"] = np.array(list_of_vl_errors)
    final_list.append(result_dict)

In [None]:
final_list

In [None]:
identification_keys = ["Architecture", *[k for k in [validation_method.hyperparameter_combinations[j][1].keys() for j in len(validation_method.hyperparameter_combinations)]]]
identification_keys

In [None]:

**{k: str(v) if isinstance(v, (int, float)) else v for k, v in training_hyperparameters.items()}
for arch, trhyperp in grid.to_list():
    print(arch)
    print(trhyperp)

In [None]:
{k:v }

In [None]:
ns = [0, 1, 2]
for n in ns:
    tr_hist, vl_hist = validation_method.results[n]["TR Curve"], validation_method.results[n]["VL Curve"]

    plt.plot(tr_hist, ls = '--')
    plt.plot(vl_hist)
#plt.yscale('log')
plt.show()

In [61]:
plt.close()

In [33]:
import os

monk2train_path = os.path.join('..', 'monk+s+problems', 'monks-2.train')
df = pd.read_csv(monk2train_path, sep = ' ', skipinitialspace= True, names= ['class', 'a1','a2','a3','a4','a5','a6', 'ID'])
raw_x = df.loc[:, 'a1':'a6']; raw_y = df.loc[:, 'class']
x = pd.get_dummies(raw_x, columns = raw_x.columns).astype(int); y = pd.get_dummies(raw_y).astype(int)

In [207]:

hyperparams: dict[list] = {
    'learning_rate': [200],
    'error_function': [MSE()],
    'stopping_condition': [ThresholdOnTrainingError(0.0001, 10)],
    'regularization_term': [#Tikhonov(5e-12),
                            #Tikhonov(5e-11),
                            Tikhonov(1e-5)
                            ],
    'minibatch_size': [5]
}

grid = HyperparameterGrid(
    Backprop,
    hyperparams,
    list_of_architectures = [
        NeuralNetworkArchitecture([17, 3, 2], Sigmoid(), RandomUniform(0.3)),
        #NeuralNetworkArchitecture([17, 4, 2], Sigmoid(), RandomUniform(0.3))
    ]
)

validation_method = HoldOutSelection(
    Dataset(x, y),
    0.25,
    Backprop,
    grid.to_list(),
    MSE(),
    shuffle_data = True
)

In [None]:
validation_method.run(500, trace_validation_error = True)
df = validation_method.dataframe()
df

In [None]:
ns = [0]
for n in ns:
    tr_hist, vl_hist = validation_method.results[n]["TR Curve"], validation_method.results[n]["VL Curve"]

    plt.plot(tr_hist, ls = '--')
    plt.plot(vl_hist)
plt.yscale('log')
plt.show()

In [210]:
plt.close()

In [None]:
x.iloc[0:2]

In [64]:
il = InputLayer(17)
hl = HiddenLayer(6, RandomUniform(0.2), Sigmoid())
ol = OutputLayer(2, RandomUniform(0.2), Sigmoid())

nn = NeuralNetwork([il, hl, ol])


training_alg = Backprop(x.iloc[0:122], y.iloc[0:122], nn, 20.00, MSE(), ThresholdOnTrainingError(0.001, 10), regularization_term = Tikhonov(0.002), minibatch_size = 2)

training_alg.run(500)


In [67]:
monk2test_path = os.path.join('..', 'monk+s+problems', 'monks-2.test')
df = pd.read_csv(monk2test_path, sep = ' ', skipinitialspace= True, names= ['class', 'a1','a2','a3','a4','a5','a6', 'ID'])
raw_x = df.loc[:, 'a1':'a6']; raw_y = df.loc[:, 'class']
x_test = pd.get_dummies(raw_x, columns = raw_x.columns).astype(int); y_test = pd.get_dummies(raw_y).astype(int)

y_predicted = nn.compute_multiple_outputs(x_test)
test_error = MSE()(y_test.to_numpy(), y_predicted)

In [None]:
test_error

In [None]:
x = np.array([
    [0,0,0],
    [1,1,1],
    [1,2,3],
    [2,3,4]
])

y = np.array([
    [0,0],
    [1,1],
    [1,2],
    [2,3]
])

ds = Dataset(x, y)

ds_1, ds_2 = ds.split(0.50, shuffle = False)

print(ds_1.x, ds_1.y)

print(ds_2.x, ds_2.y)

In [None]:
x[[0,2], :]

In [154]:
hyperparams: dict[list] = {
    'learning_rate': [0.1, 0.3, 0.5],
    'regularization_term': [Tikhonov(0.1), Tikhonov(0.2)]
}

architectures = [
    NeuralNetworkArchitecture([17, 6, 2], Sigmoid(), RandomUniform(0.3)),
    NeuralNetworkArchitecture([17, 5, 2], Sigmoid(), RandomUniform(0.3))
]

grid = HyperparameterGrid(Backprop, hyperparams, architectures)

In [None]:
for x in grid:
    print(x[0], x[1])

In [None]:
d = {'key_a': [123, 3.45, np.array([0,1,2]), 'hello'], 'key_back': [456, 789]}
#print(d.values())
#print([x for x in d.values()])

import itertools

a = [1,2,3]
b = [4,5,6]
c = [7,8,9]
#for x in itertools.product(*d.values()): print(x)

print(zip(a,b))

d.get('key_b', )

In [None]:
import inspect

inspect.signature(Backprop.__init__)