### Introduction

Dans cette partie nous allons tester six modèles sur nos datasets afin de comparer leur performance.
Les six modèles sont: 
 - la régression logistique pénalisée
 - Une méthode ensembliste basée sur du bagging
 - les forêts aléatoires
 - Une méthode ensembliste basée sur du boosting
 - une méthode ensembliste basée sur du stacking
 - le gradient boosting

### Importation des packages

In [1]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

from scripts.prepdata import data_recovery
from scripts.models_implementation import *
from scripts.cross_validation import cross_validation
from scripts.convert_to_latex import results_to_latex, inject_table_in_template

### Récupération des données

In [2]:
datasets_list = ["abalone20", "abalone17", "yeast6", "wine4", "libras", "pageblocks", "yeast3", "abalone8", "segmentation",
                 "hayes", "vehicle", "german", "glass", "wine", "pima", "iono", "autompg", "balance" ]

### Création du tableau de résultats

In [3]:
results_df = pd.DataFrame(index = datasets_list)

### 1.La régression logistique pénalisée

In [4]:
logreg_grid = {
    "penalty": ["l2", "elasticnet"],   # 'l1' possible mais ici on garde l2+elasticnet
    "C": [0.1, 1.0, 10.0],
    "solver": ["saga"],                # saga supporte elasticnet
    "l1_ratio": [0.5]                  # utilisé seulement si penalty='elasticnet'
}
logreg_list = []
for dataset_name in datasets_list:
    dataset = data_recovery(dataset_name)
    x,y = dataset
    logreg_list.append(cross_validation(x, y, logreg_grid, make_penalized_logreg))
results_df["logreg"] = logreg_list


### 2.Méthode ensembliste basée sur du bagging

In [5]:
bagging_grid = {
    "n_estimators": [10, 50],
    "max_samples": [0.7, 1.0],
    "max_features": [0.7, 1.0],
    "bootstrap": [True]   # fixe à True par défaut
}
bagging_list = []
for dataset_name in datasets_list:
    dataset = data_recovery(dataset_name)
    x,y = dataset
    bagging_list.append(cross_validation(x, y, bagging_grid, make_bagging))
results_df["bagging"] = bagging_list

### 3.Les forêts aléatoires

In [6]:
rf_grid = {
    "n_estimators": [50, 100],
    "max_depth": [None, 10],         # None = pas de limite
    "min_samples_leaf": [1, 4],
    "max_features": ["sqrt", "log2"]
}
rf_list = []
for dataset_name in datasets_list:
    dataset = data_recovery(dataset_name)
    x,y = dataset
    rf_list.append(cross_validation(x, y, rf_grid, make_random_forest))
results_df["randomforest"] = rf_list

### 4.Méthode ensembliste basée sur du boosting: Adaboost

In [7]:
adaboost_grid = {
    "n_estimators": [50, 100],
    "learning_rate": [0.5, 1.0],
}
adaboost_list = []
for dataset_name in datasets_list:
    dataset = data_recovery(dataset_name)
    x,y = dataset
    adaboost_list.append(cross_validation(x, y, adaboost_grid, make_adaboost))
results_df["adaboost"] = adaboost_list

### 5.Méthode ensembliste basée sur du stacking

In [8]:
stacking_grid = {
    "cv": [3, 5],
    "passthrough": [False, True]
}
stacking_list = []
for dataset_name in datasets_list:
    dataset = data_recovery(dataset_name)
    x,y = dataset
    stacking_list.append(cross_validation(x, y, stacking_grid, make_stacking))
results_df["stacking"] = stacking_list

### 6.Le gradient boosting

In [9]:
gb_grid = {
    "n_estimators": [50, 100],
    "learning_rate": [0.1, 0.05],
    "max_depth": [3, 5],
    "subsample": [1.0, 0.8]   # 0.8 = léger bagging, utile pour régulariser
}
gb_list = []
for dataset_name in datasets_list:
    dataset = data_recovery(dataset_name)
    x,y = dataset
    gb_list.append(cross_validation(x, y, gb_grid, make_gradient_boosting))
results_df["gradientboosting"] = gb_list

### Conversion du tableau de résultats en tableau latex

In [10]:
results_df.index.name = "Dataset"
results_df = results_df.reset_index()
results_df_latex = results_to_latex(results_df)
inject_table_in_template(results_df_latex, output_pdf="algo_performance_results.pdf")

✅ Compilation réussie → algo_performance_results.pdf
