In [6]:
# import libraries
import sys
import os
project_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
src_path = os.path.join(project_root, "src") # Ajouter src/ au chemin système
if src_path not in sys.path:
    sys.path.append(src_path)
from ML_functions import enrich_csv_files_with_distances
import pandas as pd
import numpy as np
import warnings
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error

warnings.filterwarnings('ignore')

In [2]:

# Feature engineering
enrich_csv_files_with_distances(
    input_folder="../data/simulated", 
    output_folder="../data/enriched"
)
print("End")

Fichier enrichi : ../data/enriched\12x12_dependance_faible_asymetrie_negative.csv
Fichier enrichi : ../data/enriched\12x12_dependance_faible_asymetrie_nulle.csv
Fichier enrichi : ../data/enriched\12x12_dependance_faible_asymetrie_positive.csv
Fichier enrichi : ../data/enriched\12x12_dependance_forte_asymetrie_negative.csv
Fichier enrichi : ../data/enriched\12x12_dependance_forte_asymetrie_nulle.csv
Fichier enrichi : ../data/enriched\12x12_dependance_forte_asymetrie_positive.csv
Fichier enrichi : ../data/enriched\12x12_dependance_moyenne_asymetrie_negative.csv
Fichier enrichi : ../data/enriched\12x12_dependance_moyenne_asymetrie_nulle.csv
Fichier enrichi : ../data/enriched\12x12_dependance_moyenne_asymetrie_positive.csv
Fichier enrichi : ../data/enriched\15x15_dependance_faible_asymetrie_negative.csv
Fichier enrichi : ../data/enriched\15x15_dependance_faible_asymetrie_nulle.csv
Fichier enrichi : ../data/enriched\15x15_dependance_faible_asymetrie_positive.csv
Fichier enrichi : ../data/en

In [None]:
# Définition des modèles
models = {
    "SVM": SVR(),
    "MLP": MLPRegressor(max_iter=1000),
    "Random_Forest": RandomForestRegressor()
}

# Dossiers de sauvegarde
base_path = "../results/ML_Results"
os.makedirs(base_path, exist_ok=True)
for model_name in models.keys():
    os.makedirs(os.path.join(base_path, model_name), exist_ok=True)

# Dossier des données enrichies
data_path = "../data/enriched"

# Boucle sur tous les fichiers .csv dans le dossier
for file in os.listdir(data_path):
    if file.endswith(".csv"):
        file_path = os.path.join(data_path, file)
        dataset_name = os.path.splitext(file)[0]

        # Chargement des données
        data = pd.read_csv(file_path)

        # Séparation des features et targets
        target_cols = [col for col in data.columns if col.startswith("value_")]
        feature_cols = [col for col in data.columns if col not in target_cols]
        X = data[feature_cols]
        y = data[target_cols]

        # Division train/test (70% - 30%)
        n = len(data)
        train_size = int(n * 0.7)
        X_train, X_test = X.iloc[:train_size, :], X.iloc[train_size:, :]
        y_train, y_test = y.iloc[:train_size, :], y.iloc[train_size:, :]

        # Entraînement et évaluation pour chaque modèle
        for model_name, model in models.items():
            results = []
            for i in range(y.shape[1]):
                y_train_i = y_train.iloc[:, i]
                y_test_i = y_test.iloc[:, i]

                model.fit(X_train, y_train_i)
                y_pred = model.predict(X_test)

                mae = mean_absolute_error(y_test_i, y_pred)
                rmse = np.sqrt(mean_squared_error(y_test_i, y_pred))

                results.append([f"value_{i+1}", mae, rmse])

            df_results = pd.DataFrame(results, columns=["Target", "MAE", "RMSE"])
            save_path = os.path.join(base_path, model_name, f"{dataset_name}.xlsx")
            df_results.to_excel(save_path, index=False)

        print(f"OK_ {file}")
