# Homework 3 - classifiers

In this homework we will use 7 algorithms to classify data.

## Prerequisite knowledge


$$
\begin{align*}
    Precision &= \frac{\text{True positives}}{\text{True positives} + \text{False positives}} \\
    Recall &= \frac{\text{True positives}}{\text{True positives} + \text{False negatives}} \\
    F1 &= 2 \cdot \frac{\text{Precision} \cdot \text{Recall}}{\text{Precision} + \text{Recall}}
\end{align*}
$$

**Note**: This is mostly for learning purposes.


## Dependencies

### General dependencies

In [4]:
import copy
import os
import platform

if platform.system() == "Windows":
    os.environ["R_HOME"] = "C:\Program Files\R\R-4.3.3"

In [5]:
import typing as t
import csv
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import xgboost as xgb

from IPython.display import display, HTML, IFrame

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline
mpl.rcParams['figure.dpi'] = 300

In [20]:
from typing import Optional
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.metrics import (
    precision_score,
    recall_score,
    f1_score,
    accuracy_score,
    ConfusionMatrixDisplay,
    confusion_matrix,
)
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.inspection import DecisionBoundaryDisplay
from sklearn.feature_selection import mutual_info_classif
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier, ExtraTreeClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

import keras
from keras import layers

In [7]:
class OneHotToNumericPipeline(BaseEstimator, ClassifierMixin):

    def __init__(self, base):
        self.base = base
        self.is_fitted_ = True

    def fit(self, X, y=None):
        pass

    def predict(self, X, y=None):
        return np.argmax(self.base.predict(X), axis=1)

    def predict_proba(self, X, y=None):
        raise NotImplementedError()

    @property
    def classes_(self):
        return np.array(list(range(len(self.base.classes_))))

In [8]:
class NumericOnlyPipeline(BaseEstimator, ClassifierMixin):

    def __init__(self, base):
        self.base = base
        self.is_fitted_ = True

    def fit(self, X, y=None):
        self.base.fit(X, np.argmax(y, axis=1))

    def predict(self, X, y=None):
        predicted = self.base.predict(X)

        encoded_arr = np.zeros((predicted.size, 6 + 1), dtype=int)
        encoded_arr[np.arange(predicted.size), predicted] = 1

        return encoded_arr

    def predict_proba(self, X, y=None):
        raise NotImplementedError()

    @property
    def classes_(self):
        n_classes = len(self.base.classes_)
        return [np.array([False, True]) for it in range(n_classes)]

### Dataset-specific dependencies

In [9]:
LABEL_VARIABLE = "NObeyesdad"
NUMERICAL_VARIABLES = ["Age", "Height", "Weight", "FCVC", "NCP", "CH2O", "FAF", "TUE"]
CATEGORICAL_VARIABLES_NO_LABEL = [
    "FAVC",
    "CAEC",
    "CALC",
    "SCC",
    "MTRANS",
    "Gender",
    "family_history_with_overweight",
    "SMOKE",
]
CATEGORICAL_VARIABLES = [
    *CATEGORICAL_VARIABLES_NO_LABEL,
    LABEL_VARIABLE,
]
ALL_VARIABLES_NO_LABEL = [*NUMERICAL_VARIABLES, *CATEGORICAL_VARIABLES_NO_LABEL]
ALL_VARIABLES = [*NUMERICAL_VARIABLES, *CATEGORICAL_VARIABLES]
LABEL_DICTIONARY = {
    "Age": "Age",
    "Height": "Height (cm)",
    "Weight": "Weight (kg)",
    "FCVC": " Frequency of consumption of vegetables (times per day)",
    "NCP": "Number of main meals",
    "CH2O": "Consumption of water daily (Liters)",
    "FAF": "Physical activity frequency (times per day)",
    "TUE": "Time using technology devices (hours)",
    "FAVC": "Frequent consumption of high caloric food",
    "CAEC": "Consumption of food between meals",
    "CALC": "Consumption of alcohol",
    "SCC": "Calories consumption monitoring",
    "MTRANS": "Transportation used",
    "Gender": "Gender",
    "family_history_with_overweight": "Family member suffered or suffers from overweight",
    "SMOKE": "Smoker or not",
    "NObeyesdad": "Obesity level",
}

T = t.TypeVar("T")


class Person:
    Gender: str
    Age: np.int32
    Height: np.float32
    Weight: np.float32
    family_history_with_overweight: str
    FAVC: str
    FCVC: np.float32
    NCP: np.float32
    CAEC: str
    SMOKE: str
    CH2O: np.float32
    SCC: str
    FAF: np.float32
    TUE: np.float32
    CALC: str
    MTRANS: str
    NObeyesdad: str

    def __init__(
        self,
        Gender: str,
        Age: str,
        Height: str,
        Weight: str,
        family_history_with_overweight: str,
        FAVC: str,
        FCVC: str,
        NCP: str,
        CAEC: str,
        SMOKE: str,
        CH2O: str,
        SCC: str,
        FAF: str,
        TUE: str,
        CALC: str,
        MTRANS: str,
        NObeyesdad: str,
    ):
        self.Gender = Gender
        self.Age = np.float32(Age)
        self.Height = np.float32(Height)
        self.Weight = np.float32(Weight)
        self.family_history_with_overweight = family_history_with_overweight
        self.FAVC = FAVC
        self.FCVC = np.float32(FCVC)
        self.NCP = np.float32(NCP)
        self.CAEC = CAEC
        self.SMOKE = SMOKE
        self.CH2O = np.float32(CH2O)
        self.SCC = SCC
        self.FAF = np.float32(FAF)
        self.TUE = np.float32(TUE)
        self.CALC = CALC
        self.MTRANS = MTRANS
        self.NObeyesdad = NObeyesdad

    def __str__(self):
        return vars(self)

    def __len__(self):
        return len(vars(self))

    def __repr__(self):
        return vars(self)


class DatasetManager:
    def __init__(self, path_to_csv: str):
        self.path_to_csv = path_to_csv

    def load_as_obj_list(self) -> list[Person]:
        with open(self.path_to_csv) as csv_file:
            csv_reader = csv.DictReader(csv_file)
            return [Person(**row) for row in csv_reader]

    def process_dataframe_one_hot(
        self, arg_dataset: pd.DataFrame
    ) -> tuple[pd.DataFrame, pd.DataFrame, dict]:
        result_features_dataframe = pd.DataFrame()
        variable_to_indexes = dict()

        old_features_dataframe_len = len(result_features_dataframe.columns)

        for variable in NUMERICAL_VARIABLES:
            result_features_dataframe = pd.concat(
                [result_features_dataframe, arg_dataset[variable]], axis=1
            )

            variable_to_indexes[variable] = tuple(
                range(
                    old_features_dataframe_len, len(result_features_dataframe.columns)
                )
            )
            old_features_dataframe_len = len(result_features_dataframe.columns)

        for variable in CATEGORICAL_VARIABLES_NO_LABEL:
            result_with_dummies = pd.get_dummies(arg_dataset[variable]).astype(float)
            result_features_dataframe = pd.concat(
                [result_features_dataframe, result_with_dummies], axis=1
            )

            variable_to_indexes[variable] = tuple(
                range(
                    old_features_dataframe_len, len(result_features_dataframe.columns)
                )
            )
            old_features_dataframe_len = len(result_features_dataframe.columns)

        result_labels_dataframe = pd.get_dummies(arg_dataset[LABEL_VARIABLE])
        return result_features_dataframe, result_labels_dataframe, variable_to_indexes

    def process_dataframe_numeric(
        self, arg_dataset: pd.DataFrame
    ) -> tuple[pd.DataFrame, pd.DataFrame]:
        result_features_dataframe = pd.DataFrame()

        for variable in NUMERICAL_VARIABLES:
            result_features_dataframe = pd.concat(
                [result_features_dataframe, arg_dataset[variable]], axis=1
            )

        for variable in CATEGORICAL_VARIABLES_NO_LABEL:
            result_with_dummies = pd.DataFrame(
                {variable: LabelEncoder().fit_transform(arg_dataset[variable]).tolist()}
            )
            result_features_dataframe = pd.concat(
                [result_features_dataframe, result_with_dummies], axis=1
            )

        result_labels_dataframe = pd.DataFrame(
            {
                LABEL_VARIABLE: LabelEncoder()
                .fit_transform(arg_dataset[LABEL_VARIABLE])
                .tolist()
            }
        )
        return result_features_dataframe, result_labels_dataframe

In [10]:
dataset_manager = DatasetManager("data/ObesityDataSet.csv")
dataset_obj_list = dataset_manager.load_as_obj_list()
dataset_dataframe = pd.DataFrame.from_records(
    data=[vars(entry) for entry in dataset_obj_list]
)
dataset_output_classes = len(dataset_dataframe[LABEL_VARIABLE].unique())
all_output_classes = dataset_dataframe[LABEL_VARIABLE].unique().tolist()

## Use information gain to select best features

In [11]:
x_numeric, y_numeric = dataset_manager.process_dataframe_numeric(dataset_dataframe)

x_numeric, y_numeric = x_numeric.astype(float), y_numeric.astype(float)

mutual_info = pd.DataFrame(
    mutual_info_classif(x_numeric, y_numeric).reshape(-1, 1),
    columns=["Coefficient"],
    index=x_numeric.columns,
).sort_values(by=["Coefficient"], ascending=False)

HTML(mutual_info.to_html())

Unnamed: 0,Coefficient
Weight,1.248984
Age,0.582005
Height,0.425246
FCVC,0.406311
CH2O,0.305902
TUE,0.286994
FAF,0.283382
NCP,0.260715
Gender,0.193117
CAEC,0.175757


In [12]:
MAIN_SELECTED_FEATURES = ["Weight"]

SECONDARY_SELECTED_FEATURES = [
    "Age",
    "Height",
    "FCVC",
    "FAF",
    "CH2O",
    "TUE",
    "NCP",
    "Gender",
]

SELECTED_FEATURES = MAIN_SELECTED_FEATURES + SECONDARY_SELECTED_FEATURES


def make_feature_combinations(
    main_features: t.List[str], secondary_features: t.List[str]
) -> list[list[str]]:
    combinations = []
    for item in secondary_features:
        combinations.append(main_features + [item])
    return combinations


SELECTED_FEATURES_COMBINATIONS = make_feature_combinations(
    MAIN_SELECTED_FEATURES, SECONDARY_SELECTED_FEATURES
)

In [13]:
class KFoldCrossValidation:
    def __init__(
        self,
        features_dataframe: pd.DataFrame,
        labels_dataframe: pd.DataFrame,
        feature_to_indexes: dict,
    ):
        self.k_fold_data = self.__get_data_splitting_dict(
            features_dataframe, labels_dataframe
        )

        self.feature_to_indexes = feature_to_indexes

    def __get_data_splitting_dict(
        self, arg_features_dataframe: pd.DataFrame, arg_labels_dataframe: pd.DataFrame
    ) -> list[tuple[np.array, np.array]]:
        data_splitting_dict = dict[str, tuple[np.array, np.array]]()

        for column in arg_labels_dataframe.columns:
            indexes = arg_labels_dataframe.index[arg_labels_dataframe[column] == True]

            current_feature_rows = arg_features_dataframe.iloc[indexes].to_numpy()
            current_label_rows = arg_labels_dataframe.iloc[indexes].to_numpy()

            data_splitting_dict[column] = (current_feature_rows, current_label_rows)

        k_fold_data: list[tuple[np.array, np.array]] = []

        for _, (
            current_feature_rows,
            current_label_rows,
        ) in data_splitting_dict.items():
            for it, (features, labels) in enumerate(
                zip(
                    np.array_split(current_feature_rows, 5),
                    np.array_split(current_label_rows, 5),
                )
            ):

                if it >= len(k_fold_data):
                    k_fold_data.append((features, labels))
                else:
                    k_fold_data[it] = np.vstack(
                        (k_fold_data[it][0], features)
                    ), np.vstack((k_fold_data[it][1], labels))

        for it in range(5):
            permutation = np.random.permutation(len(k_fold_data[it][0]))
            k_fold_data[it] = (
                k_fold_data[it][0][permutation],
                k_fold_data[it][1][permutation],
            )

        assert sum([np.shape(k_fold_data[it][0])[0] for it in range(5)]) == len(
            dataset_dataframe
        )
        return k_fold_data

    def get_train_val_split(
        self, idx: int, selected_features: list[str]
    ) -> tuple[tuple[np.array, np.array], tuple[np.array, np.array]]:
        train_split = None
        val_split = None

        for ite, fold in enumerate(self.k_fold_data):
            if idx == ite:
                val_split = fold
            else:
                if train_split is None:
                    train_split = fold
                else:
                    train_split = np.vstack((train_split[0], fold[0])), np.vstack(
                        (train_split[1], fold[1])
                    )

        return self.trim_to_selected_features(
            train_split, selected_features
        ), self.trim_to_selected_features(val_split, selected_features)

    def get_train_val_full(self, selected_features: list[str]):
        return self.get_train_val_split(10, selected_features)[0]

    def trim_to_selected_features(
        self, dataset: tuple[np.array, np.array], selected_features: list[str]
    ) -> tuple[np.array, np.array]:

        if dataset is None:
            return None

        selected_indexes = []
        for sel_feature in selected_features:
            selected_indexes += self.feature_to_indexes[sel_feature]
        return dataset[0][:, selected_indexes], dataset[1]

In [14]:
class ConfusionMatrix:
    def __init__(self, labels: tuple[str]):
        self.y_true: Optional[pd.DataFrame] = None
        self.y_hat: Optional[pd.DataFrame] = None
        self.labels = labels

    def register_predictions(
        self,
        arg_y_true: t.Union[pd.DataFrame, np.array],
        arg_y_hat: t.Union[pd.DataFrame, np.array],
    ):
        assert (self.y_true is None) == (self.y_hat is None)

        if isinstance(arg_y_true, np.ndarray):
            assert isinstance(arg_y_hat, np.ndarray)
            arg_y_true = pd.DataFrame(arg_y_true)
            arg_y_hat = pd.DataFrame(arg_y_hat)

        if self.y_true is None:
            self.y_true = arg_y_true
            self.y_hat = arg_y_hat
        else:
            self.y_true = pd.concat([self.y_true, arg_y_true], axis=0)
            self.y_hat = pd.concat([self.y_hat, arg_y_hat], axis=0)

    def plot_confusion_matrix(self):
        cm = confusion_matrix(
            np.argmax(self.y_true, axis=1),
            np.argmax(self.y_hat, axis=1),
            labels=[0, 1, 2, 3, 4, 5, 6],
        )
        disp = ConfusionMatrixDisplay(
            confusion_matrix=cm, display_labels=all_output_classes
        )
        disp.plot()
        plt.xticks(rotation=90)
        plt.show()

    def compute_accuracy_overall(self) -> float:
        return accuracy_score(self.y_true, self.y_hat)

    def compute_precision_foreach(self) -> np.array:
        return precision_score(self.y_true, self.y_hat, average=None)

    def compute_recall_foreach(self) -> np.array:
        return recall_score(self.y_true, self.y_hat, average=None)

    def compute_f1_foreach(self) -> np.array:
        return f1_score(self.y_true, self.y_hat, average=None)

    def compute_metrics(self) -> pd.DataFrame:
        result = pd.DataFrame(
            {
                "Precision score": [np.average(self.compute_precision_foreach())],
                "Recall score": [np.average(self.compute_recall_foreach())],
                "F1 score": [np.average(self.compute_f1_foreach())],
            }
        )
        return result

In [15]:
class PlotManager:
    def __init__(self, cols: int, samples: int) -> None:
        self.rows = samples // cols + 1
        self.cols = cols
        self.samples = samples
        self.index = 0
        self.fig = None
        self.axs = []

    def init(self):
        fig = plt.figure()
        fig, axs = plt.subplots(self.rows, self.cols)
        plt.tight_layout(h_pad=2, w_pad=2, pad=0)

        self.index = 0
        self.fig = fig
        self.axs = axs

    def finish(self):
        plt.show()

        self.fig = None
        self.axs = []

    def __len__(self):
        return self.samples

    def __getitem__(self, i: int):
        subplot_i = i // (self.rows - 1)
        subplot_j = i % self.cols
        subplot = (subplot_i, subplot_j)

        return self.axs[subplot]

    def __next__(self):
        subplot_i = self.index // (self.rows - 1)
        subplot_j = self.index % self.cols
        subplot = (subplot_i, subplot_j)

        self.index += 1

        return self.axs[subplot]

In [16]:
features_dataframe, labels_dataframe, variable_indexes = (
    dataset_manager.process_dataframe_one_hot(dataset_dataframe)
)
k_fold_cross_validation = KFoldCrossValidation(
    features_dataframe, labels_dataframe, variable_indexes
)

### Algorithms utility functions

In [17]:
class ClassifierRunner:
    _k_fold_cross_validation: KFoldCrossValidation
    _plot_manager: PlotManager
    _classifier_generator: t.Callable[[t.Dict], BaseEstimator]
    _name: str
    _output_label: str

    def __init__(
        self,
        k_fold_cross_validation: KFoldCrossValidation,
        plot_manager: PlotManager,
        classifier_generator: t.Callable[[t.Dict], BaseEstimator],
        name: str,
        output_label: str,
    ):
        self._k_fold_cross_validation = k_fold_cross_validation
        self._plot_manager = plot_manager
        self._classifier_generator = classifier_generator
        self._name = name
        self._output_label = output_label

    def run(
        self,
        params_sets: t.List[t.Dict],
        label_combinations: t.List[t.Tuple[str, ...]],
        all_features: t.List[str],
    ):
        best_classifier = None
        best_metrics = pd.DataFrame(
            {"Precision score": [0], "Recall score": [0], "F1 score": [0]}
        )

        for params_set in params_sets:
            best_classifier, best_metrics = self.run_for_parameter_set(
                params_set=params_set,
                label_combinations=label_combinations,
                all_features=all_features,
            )

        return best_classifier, best_metrics

    def run_for_parameter_set(
        self,
        params_set: t.Dict,
        label_combinations: t.List[t.Tuple[str, ...]],
        all_features: t.List[str],
    ):
        best_classifier = None
        best_metrics = pd.DataFrame(
            {"Precision score": [0], "Recall score": [0], "F1 score": [0]}
        )

        self._plot_manager.init()
        self._display_header(params_set)

        for subplot, label_combination in zip(self._plot_manager, label_combinations):
            X, Y = self._k_fold_cross_validation.get_train_val_full(label_combination)

            current_classifier, current_metrics, _ = (
                self.run_for_k_fold_cross_validation(
                    params_set=params_set,
                    selected_features=label_combination,
                )
            )

            if current_metrics["F1 score"][0] > best_metrics["F1 score"][0]:
                best_metrics = current_metrics
                best_classifier = current_classifier

            DecisionBoundaryDisplay.from_estimator(
                OneHotToNumericPipeline(current_classifier),
                X,
                cmap=plt.cm.RdYlBu,
                response_method="predict",
                ax=subplot,
                xlabel=label_combination[0],
                ylabel=label_combination[1],
            )

            subplot.scatter(
                X[:, 0],
                X[:, 1],
                c=np.argmax(Y, axis=1),
                cmap=plt.cm.RdYlBu,
                linewidths=0.25,
                edgecolor="black",
                s=5,
            )

            self.display_metrics_section(
                label_combination=label_combination,
                current_metrics=current_metrics,
                output_label=self._output_label,
            )

        self._plot_manager.finish()
        self.evaluate_classifier_using_k_fold_for_all(
            params_set=params_set,
            all_features=all_features,
        )

        return best_classifier, best_metrics

    def _display_header(self, params_set):
        display(HTML(f"<h1>{self._name} classifier</h1>"))
        display(HTML(f"<p>Parameters: {params_set}.</p>"))

    def display_metrics_section(
        self,
        label_combination: t.List[t.List[str]],
        current_metrics: pd.DataFrame,
        output_label: str,
    ):
        display(
            HTML(
                f"<p>For input label combination {label_combination} and output label '{output_label}', metrics were:</p>"
            )
        )
        display(current_metrics)

    def run_for_k_fold_cross_validation(
        self,
        params_set: t.Dict,
        selected_features: t.Tuple[str, ...],
    ):
        current_confusion_matrix = ConfusionMatrix(selected_features)

        for it in range(5):
            (X_train, Y_train), (X_test, Y_test) = (
                k_fold_cross_validation.get_train_val_split(it, selected_features)
            )

            classifier = self._classifier_generator(params_set)
            classifier.fit(X_train, Y_train)

            Y_hat = classifier.predict(X_test)
            current_confusion_matrix.register_predictions(
                arg_y_true=Y_test, arg_y_hat=Y_hat
            )

        X_train, Y_train = k_fold_cross_validation.get_train_val_full(selected_features)
        classifier = self._classifier_generator(params_set)
        classifier.fit(X_train, Y_train)

        return (
            classifier,
            current_confusion_matrix.compute_metrics(),
            current_confusion_matrix,
        )

    def evaluate_classifier_using_k_fold_for_all(
        self,
        params_set: t.Dict,
        all_features: t.List[str],
    ):
        display(HTML(f"<h1>Classifier with features: {all_features}</h1>"))

        current_classifier, current_metrics, confusion_matrix = (
            self.run_for_k_fold_cross_validation(
                params_set=params_set,
                selected_features=all_features,
            )
        )

        display(HTML(f"<h3>Precision:</h3>"))
        display(
            HTML(
                pd.DataFrame(
                    confusion_matrix.compute_precision_foreach(),
                    index=all_output_classes,
                ).to_html()
            )
        )

        display(HTML(f"<h3>Recall</h3>"))
        display(
            HTML(
                pd.DataFrame(
                    confusion_matrix.compute_recall_foreach(), index=all_output_classes
                ).to_html()
            )
        )

        display(HTML(f"<h3>F1 Score</h3>"))
        display(
            HTML(
                pd.DataFrame(
                    confusion_matrix.compute_f1_foreach(), index=all_output_classes
                ).to_html()
            )
        )

        confusion_matrix.plot_confusion_matrix()


class ClassifierRunnerWithPresetFeatures:
    _label_combinations: t.List[t.Tuple[str, ...]]
    _all_features: t.List[str]
    _classifier_runner: ClassifierRunner

    def __init__(
        self,
        label_combinations: t.List[t.Tuple[str, ...]],
        all_features: t.List[str],
        classifier_runner: ClassifierRunner,
    ) -> None:
        self._label_combinations = label_combinations
        self._all_features = all_features
        self._classifier_runner = classifier_runner

    def run(self, params_set: t.Dict):
        return self._classifier_runner.run_for_parameter_set(
            all_features=self._all_features,
            label_combinations=self._label_combinations,
            params_set=params_set,
        )

In [18]:
class GreedyMetaOptimiser:
    _classifier_runner: ClassifierRunnerWithPresetFeatures
    _k: int
    _patience: int

    def __init__(
        self,
        classifier_runner: ClassifierRunnerWithPresetFeatures,
        k: int = 10,
        patience: int = 5,
    ) -> None:
        self._classifier_runner = classifier_runner
        self._k = k
        self._patience = patience

    def run(
        self,
        params_set: t.Dict[str, t.Tuple[t.Union[float, str]]],
        params_set_variants: t.Dict[str, t.Tuple[t.Union[float, str]]],
    ):
        """
        Greedy, hillclimber-like hyperparameter meta-optimiser, with patience functionality.

        For numerical, params_set_variants[key] is a tuple of steps. E.g.: (1, -1, 0.1, -0.1)
        For categorial, params_set_variants[key] is a tuple of categories. E.g.: ("gini", "log-entropy")
        """

        best_params = params_set
        best_classifier = None
        best_metrics = pd.DataFrame(
            {"Precision score": [0], "Recall score": [0], "F1 score": [0]}
        )
        previous_best_metrics = best_metrics
        current_patience = self._patience

        for i in range(self._k):
            params_set_trials = self.build_params_trials(
                params_set=best_params, params_set_variants=params_set_variants
            )
            (
                best_classifier,
                best_metrics,
                best_params,
            ) = self.pick_best_params(
                params_set_trials=params_set_trials,
                best_classifier=best_classifier,
                best_metrics=best_metrics,
                best_params=best_params,
            )

            if current_patience <= 0:
                break

            if previous_best_metrics["F1 score"][0] < best_metrics["F1 score"][0]:
                current_patience = self._patience
            else:
                current_patience -= 1

            previous_best_metrics = best_metrics

            self.display_state(best_params, best_metrics, i)

        return best_params, best_classifier, best_metrics

    def display_state(
        self, best_params: pd.DataFrame, best_metrics: pd.DataFrame, i: int
    ):
        print(
            f"Iteration {i}: best F1 score = {best_metrics['F1 score'][0]}, best_params = {best_params}"
        )

    def build_params_trials(self, params_set: t.Dict, params_set_variants: t.Dict):
        params_set_trials = []
        params_set_copy = copy.copy(params_set)

        for params_set_key in params_set_copy.keys():
            param = params_set_copy.get(params_set_key, None)
            param_variants = params_set_variants.get(params_set_key)

            if not param_variants:
                continue

            picked_param_variant = np.random.randint(0, len(param_variants))

            if type(param) == int:
                params_set_copy[params_set_key] = int(
                    params_set_copy[params_set_key]
                    * param_variants[picked_param_variant]
                )

            elif type(param) == float:
                params_set_copy[params_set_key] *= param_variants[picked_param_variant]

            elif type(param) == str:
                params_set_copy[params_set_key] = param_variants[picked_param_variant]

            params_set_trials.append(params_set_copy)

        return params_set_trials

    def pick_best_params(
        self,
        params_set_trials: t.List[t.Dict],
        best_classifier: BaseEstimator,
        best_metrics: pd.DataFrame,
        best_params: t.Dict,
    ):
        for params_set_trial in params_set_trials:
            classifier, metrics = self._classifier_runner.run(
                params_set=params_set_trial
            )

            if metrics["F1 score"][0] > best_metrics["F1 score"][0]:
                best_classifier = classifier
                best_metrics = metrics
                best_params = params_set_trial

        return (
            best_classifier,
            best_metrics,
            best_params,
        )

## Classiers

### 1. Decision trees

In [None]:
params_sets = [
    {"criterion": "gini", "splitter": "best"},
    {"criterion": "gini", "splitter": "random"},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: DecisionTreeClassifier(
        criterion=params_set["criterion"], splitter=params_set["splitter"]
    ),
    name="Decision trees",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

In [None]:
"""
Decision tree with greedy hyperparameter meta-optimisation
"""

params_set = {"criterion": "gini", "splitter": "best"}
params_set_variants = {
    "criterion": ("gini", "entropy", "log_loss"),
    "splitter": ("best", "random"),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: DecisionTreeClassifier(
            criterion=params_set["criterion"], splitter=params_set["splitter"]
        ),
        name="Decision trees",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### 2. RandomForest or ExtraTrees

In [None]:
params_sets = [
    {"criterion": "gini", "splitter": "best"},
    {"criterion": "gini", "splitter": "random"},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: ExtraTreeClassifier(
        criterion=params_set["criterion"], splitter=params_set["splitter"]
    ),
    name="Decision trees",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

In [None]:
"""
ExtraTrees with greedy hyperparameter meta-optimisation
"""

params_set = {"criterion": "gini", "splitter": "best"}
params_set_variants = {
    "criterion": ("gini", "entropy", "log_loss"),
    "splitter": ("best", "random"),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: ExtraTreeClassifier(
            criterion=params_set["criterion"], splitter=params_set["splitter"]
        ),
        name="ExtraTrees",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)


### 3. XGBoost

In [None]:
params_sets = [
    {
        "max_depth": 2,
        "learning_rate": 0.3,
        "objective": "multi:softmax",
        "num_class": dataset_output_classes,
        "rounds": 100,
    },
    {
        "max_depth": 2,
        "learning_rate": 0.2,
        "objective": "multi:softmax",
        "num_class": dataset_output_classes,
        "rounds": 200,
    },
    {
        "max_depth": 2,
        "learning_rate": 0.02,
        "objective": "multi:softmax",
        "num_class": dataset_output_classes,
        "rounds": 500,
    },
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: NumericOnlyPipeline(
        xgb.XGBClassifier(
            max_depth=params_set["max_depth"],
            learning_rate=params_set["learning_rate"],
            n_estimators=params_set["rounds"],
            objective=params_set["objective"],
            tree_method="hist",
        )
    ),
    name="XGBoost",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

In [None]:
"""
XGBoost with greedy hyperparameter meta-optimisation
"""

params_set = {
    "max_depth": 2,
    "learning_rate": 0.3,
    "objective": "multi:softmax",
    "num_class": dataset_output_classes,
    "rounds": 100,
}
params_set_variants = {
    "rounds": (2, 1, 0.5),
    "learning_rate": (2, 1, 0.1),
    "rounds": (1, 0.5),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: NumericOnlyPipeline(
            xgb.XGBClassifier(
                max_depth=params_set["max_depth"],
                learning_rate=params_set["learning_rate"],
                n_estimators=params_set["rounds"],
                objective=params_set["objective"],
                tree_method="hist",
            )
        ),
        name="XGBoost trees",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### 4. Naive Bayes

In [None]:
params_sets = [
    {"alpha": 0.25, "force_alpha": True},
    {"alpha": 0.50, "force_alpha": True},
    {"alpha": 1.00, "force_alpha": True},
    {"alpha": 2.00, "force_alpha": True},
    {"alpha": 2.50, "force_alpha": True},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: NumericOnlyPipeline(
        MultinomialNB(
            alpha=params_set["alpha"],
            force_alpha=params_set["force_alpha"],
        )
    ),
    name="Naive Bayes",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

In [None]:
"""
Naive Bayes with greedy hyperparameter meta-optimisation
"""

params_set = {"alpha": 0.25, "force_alpha": True}
params_set_variants = {
    "alpha": (1.5, 1.25, 1, 0.75, 0.5),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: NumericOnlyPipeline(
            MultinomialNB(
                alpha=params_set["alpha"],
                force_alpha=params_set["force_alpha"],
            )
        ),
        name="Naive Bayes",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### 5. k-NN

In [None]:
params_sets = [
    {"n_neighbors": 5, "algorithm": "ball_tree", "metric": "minkowski"},
    {"n_neighbors": 5, "algorithm": "ball_tree", "metric": "cityblock"},
    {"n_neighbors": 5, "algorithm": "kd_tree", "metric": "minkowski"},
    {"n_neighbors": 5, "algorithm": "kd_tree", "metric": "cityblock"},
    {"n_neighbors": 3, "algorithm": "ball_tree", "metric": "minkowski"},
    {"n_neighbors": 3, "algorithm": "ball_tree", "metric": "cityblock"},
    {"n_neighbors": 3, "algorithm": "kd_tree", "metric": "minkowski"},
    {"n_neighbors": 3, "algorithm": "kd_tree", "metric": "cityblock"},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: KNeighborsClassifier(
        n_neighbors=params_set["n_neighbors"],
        algorithm=params_set["algorithm"],
        metric=params_set["metric"],
    ),
    name="K-NN",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

classifier_runner.evaluate_classifier_using_k_fold_for_all(
    params_set={"n_neighbors": 5, "algorithm": "ball_tree", "metric": "minkowski"},
    all_features=ALL_VARIABLES_NO_LABEL,
)

In [None]:
params_set = {
    "n_neighbors": 5,
    "algorithm": "ball_tree",
    "metric": "minkowski",
}
params_set_variants = {
    "n_neighbors": (1.25, 0.75),
    "algorithm": ("ball_tree", "kd_tree"),
    "metric": ("minkowski", "cityblock"),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: KNeighborsClassifier(
            n_neighbors=params_set["n_neighbors"],
            algorithm=params_set["algorithm"],
            metric=params_set["metric"],
        ),
        name="K-NN",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### 6. Neural networks

In [112]:
class RNModel(BaseEstimator, ClassifierMixin):

    def __init__(
        self,
        batch_size=10,
        epochs=4,
        lr=0.001,
        hidden_activation="relu",
        optimizer="adam",
    ):
        self.is_fitted_ = True

        self.batch_size = batch_size
        self.epochs = epochs
        self.lr = lr
        self.optimizer = optimizer

        self.model = None
        self.hidden_activation = hidden_activation
        self.scaler = StandardScaler()

    def make_model(self, size: int):
        assert self.model is None

        self.model = keras.Sequential(
            [
                keras.Input(shape=(size,)),
                layers.Dense(128, activation=self.hidden_activation),
                layers.Dense(64, activation=self.hidden_activation),
                layers.Dense(7, activation="softmax"),
            ]
        )

        self.model.compile(
            loss="categorical_crossentropy",
            optimizer=self.optimizer,
            metrics=["accuracy"],
        )

    def fit(self, X, y=None):
        self.make_model(np.shapeJ(X)[1])
        self.scaler.fit(X)

        return self.model.fit(
            self.scaler.transform(X), y, batch_size=self.batch_size, epochs=self.epochs
        )

    def predict(self, X, y=None):
        return (self.model.predict(self.scaler.transform(X)) > 0.5).astype(bool)

    def predict_proba(self, X, y=None):
        raise NotImplementedError()

    @property
    def classes_(self):
        return [np.array([False, True]) for _ in range(7)]


In [None]:
params_sets = [
    {"lr": 0.004, "epochs": 10, "hidden_activation": "relu", "optimizer": "adam"},
    {"lr": 0.01, "epochs": 10, "hidden_activation": "relu", "optimizer": "adam"},
    {"lr": 0.01, "epochs": 10, "hidden_activation": "relu", "optimizer": "sgd"},
    {"lr": 0.002, "epochs": 5, "hidden_activation": "relu", "optimizer": "adam"},
    {"lr": 0.002, "epochs": 10, "hidden_activation": "sigmoid", "optimizer": "adam"},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: RNModel(
        lr=params_set["lr"], epochs=params_set["epochs"]
    ),
    name="Neural Network",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

In [None]:
"""
Neural networks with greedy hyperparameter meta-optimisation
"""

params_set = {"lr": 0.002, "epochs": 5, "hidden_activation": "relu", "optimizer": "adam"}
params_set_variants = {
    "lr": (2, 1.5, 1, 0.75, 0.5),
    "epochs": (2, 1.5, 1, 0.5, 0.25),
    "hidden_activation": ("relu", "sigmoid"),
    "optimizer": ("adam", "sgd"),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: RNModel(
            lr=params_set["lr"], epochs=params_set["epochs"]
        ),
        name="Neural Network",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### 7. SVM

In [None]:
params_sets = [
    {"kernel": "rbf", "gamma": "auto"},
    {"kernel": "sigmoid", "gamma": "auto"},
    {"kernel": "poly", "gamma": "auto"},
    {"kernel": "linear", "gamma": "auto"},
    {"kernel": "rbf", "gamma": 0.1},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: NumericOnlyPipeline(
        make_pipeline(
            StandardScaler(),
            SVC(kernel=params_set["kernel"], gamma=params_set["gamma"]),
        )
    ),
    name="SVM",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

classifier_runner.evaluate_classifier_using_k_fold_for_all(
    params_set={"kernel": "rbf", "gamma": "auto"},
    all_features=ALL_VARIABLES_NO_LABEL,
)

In [None]:
"""
SVM with greedy hyperparameter meta-optimisation
"""

params_set = {"kernel": "rbf", "gamma": "auto", "C": 1.0, "degree": 3}
params_set_variants = {
    "kernel": ("rbf", "sigmoid", "poly", "linear"),
    "C": (2, 1.5, 1, 0.75, 0.5),
    "degree": (2, 1.5, 1, 0.75, 0.5),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: NumericOnlyPipeline(
            make_pipeline(
                StandardScaler(),
                SVC(kernel=params_set["kernel"], gamma=params_set["gamma"]),
            )
        ),
        name="SVM",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### 8. Logistic regression

In [None]:
params_sets = [
    {"penalty": "l2", "tol": 1e-04, "C": 1.0, "intercept_scaling": 0.25},
    {"penalty": "l2", "tol": 1e-04, "C": 1.0, "intercept_scaling": 0.50},
    {"penalty": "l2", "tol": 1e-04, "C": 1.0, "intercept_scaling": 1.00},
    {"penalty": "l2", "tol": 1e-04, "C": 0.5, "intercept_scaling": 0.25},
    {"penalty": "l2", "tol": 1e-04, "C": 0.5, "intercept_scaling": 0.50},
    {"penalty": "l2", "tol": 1e-04, "C": 0.5, "intercept_scaling": 1.00},
]


classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: NumericOnlyPipeline(
        LogisticRegression(
            penalty=params_set["penalty"],
            tol=params_set["tol"],
            C=params_set["C"],
            intercept_scaling=params_set["intercept_scaling"],
        )
    ),
    name="Logistic Regression",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)

In [None]:
"""
Naive Bayes with greedy hyperparameter meta-optimisation
"""

params_set = {"penalty": "l2", "tol": 1e-04, "C": 1.0, "intercept_scaling": 0.25}
params_set_variants = {
    "C": (1.5, 1.25, 1, 0.75, 0.5),
    "intercept_scaling": (1.5, 1.25, 1, 0.75, 0.5),
}


classifier_runner_with_preset_features = ClassifierRunnerWithPresetFeatures(
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
    classifier_runner=ClassifierRunner(
        k_fold_cross_validation=k_fold_cross_validation,
        plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
        classifier_generator=lambda params_set: NumericOnlyPipeline(
            LogisticRegression(
                penalty=params_set["penalty"],
                tol=params_set["tol"],
                C=params_set["C"],
                intercept_scaling=params_set["intercept_scaling"],
            )
        ),
        name="Logistic Regression",
        output_label=LABEL_VARIABLE,
    ),
)

greedy_meta_optimiser = GreedyMetaOptimiser(
    classifier_runner=classifier_runner_with_preset_features
)

_ = greedy_meta_optimiser.run(
    params_set=params_set,
    params_set_variants=params_set_variants,
)

### Stacked Model

In [119]:
class StackedModel(BaseEstimator, ClassifierMixin):

    def __init__(self, lh_model_config: dict, rh_model_config: dict, logistic: dict):
        self.lh_model = RNModel(**lh_model_config)
        self.rh_model = ExtraTreeClassifier(**rh_model_config)

        self.logistic_regression = NumericOnlyPipeline(
            LogisticRegression(
                penalty=logistic["penalty"],
                tol=logistic["tol"],
                C=logistic["C"],
                intercept_scaling=logistic["intercept_scaling"],
            )
        )

    def fit(self, X, y=None):
        self.lh_model.fit(X, y)
        self.rh_model.fit(X, y)
        return self.logistic_regression.fit(
            np.hstack((self.lh_model.predict(X), self.rh_model.predict(X))), y
        )

    def predict(self, X, y=None):
        return self.logistic_regression.predict(
            np.hstack((self.lh_model.predict(X), self.rh_model.predict(X)))
        )

    def predict_proba(self, X, y=None):
        raise NotImplementedError()

    @property
    def classes_(self):
        return [np.array([False, True]) for _ in range(7)]


In [None]:
params_sets = [
    {
        "lh_model_config": {"lr": 0.002, "epochs": 1},
        "rh_model_config": {"criterion": "gini", "splitter": "best"},
        "logistic": {
            "penalty": "l2",
            "tol": 1e-04,
            "C": 1.0,
            "intercept_scaling": 0.25,
        },
    },
]

classifier_runner = ClassifierRunner(
    k_fold_cross_validation=k_fold_cross_validation,
    plot_manager=PlotManager(cols=3, samples=len(SELECTED_FEATURES_COMBINATIONS)),
    classifier_generator=lambda params_set: StackedModel(
        lh_model_config=params_set["lh_model_config"],
        rh_model_config=params_set["rh_model_config"],
        logistic=params_set["logistic"],
    ),
    name="Stacked model (Neural network + extra trees + logistic regression)",
    output_label=LABEL_VARIABLE,
)
_ = classifier_runner.run(
    params_sets=params_sets,
    label_combinations=SELECTED_FEATURES_COMBINATIONS,
    all_features=SELECTED_FEATURES,
)