In [3]:
import os
import time
import logging
import numpy as np
import pandas as pd
import tensorflow as tf
import pyswarms as ps
import matplotlib.pyplot as plt
import optuna
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from tensorflow.keras.layers import ConvLSTM1D, Flatten, Dense
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

globalpath = "../DataCleaning/A - CSV par bâtiment/"
###############################################################################
# 1) Chargement et préparation des données
###############################################################################
def load_and_prepare_data(csv_path, production_column='production', window_size=24):
    """
    Charge le fichier CSV, scale les données, crée des fenêtres (x,y),
    et renvoie les splits (x_train, y_train, x_test, y_test, x_val, y_val).
    """
    df = pd.read_csv(csv_path)
    # Mise à l'échelle
    scaler = MinMaxScaler(feature_range=(0, 1))
    data_scaled = scaler.fit_transform(df.values)
    # ➕ Scaler dédié uniquement à la colonne cible
    target_scaler = MinMaxScaler()
    target_scaler.fit(df[[production_column]])
    # Retrouver l'index de la colonne cible
    target_col_idx = df.columns.get_loc(production_column)
    
    # Création des fenêtres
    x, y = [], []
    for i in range(window_size, len(data_scaled)):
        x.append(data_scaled[i-window_size:i])
        y.append(data_scaled[i, target_col_idx])
    x, y = np.array(x), np.array(y)
    
    # Split train/test/val
    # Ici, 80% train, 10% test, 10% val (à adapter si besoin)
    train_split_index = int(0.8 * len(x))
    test_split_index  = int(0.9 * len(x))
    
    x_train, y_train = x[:train_split_index], y[:train_split_index]
    x_test,  y_test  = x[train_split_index:test_split_index], y[train_split_index:test_split_index]
    x_val,   y_val   = x[test_split_index:], y[test_split_index:]
    
    # Adapter la forme pour ConvLSTM1D (on insère un channel dimension)
    x_train_conv = np.expand_dims(x_train, axis=2)
    x_test_conv  = np.expand_dims(x_test, axis=2)
    x_val_conv   = np.expand_dims(x_val, axis=2)
    
    return (x_train_conv, y_train,
            x_test_conv,  y_test,
            x_val_conv,   y_val,
            df,target_scaler  )


###############################################################################
# 2) Baseline (e-base) : un simple entraînement avec des hyperparamètres fixes
###############################################################################
def build_baseline_model(input_shape):
    """
    Construit un modèle ConvLSTM basique avec des hyperparamètres
    fixes (par ex. 64 filtres, 64 neurones denses, lr=0.001).
    """
    model = tf.keras.Sequential([
        ConvLSTM1D(filters=64, kernel_size=(1,), activation='tanh',
                   return_sequences=True, input_shape=input_shape),
        ConvLSTM1D(filters=64, kernel_size=(1,), activation='tanh', return_sequences=False),
        Flatten(),
        Dense(units=64, activation='relu'),
        Dense(1, activation="linear")
    ], name="baseline_conv_lstm")
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(loss="mae", optimizer=optimizer)
    return model


###############################################################################
# 3) Modèle paramétrable pour PSO
###############################################################################
def build_convlstm_model(lr, filters1, filters2, dense_units, input_shape):
    """
    Construit et compile un modèle ConvLSTM1D avec hyperparamètres modulables.
    """
    model = tf.keras.Sequential([
        ConvLSTM1D(filters=int(filters1), kernel_size=(1,), activation='tanh',
                   return_sequences=True, input_shape=input_shape),
        ConvLSTM1D(filters=int(filters2), kernel_size=(1,), activation='tanh', return_sequences=False),
        Flatten(),
        Dense(units=int(dense_units), activation='relu'),
        Dense(1, activation="linear")
    ], name="model_conv_lstm")
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)
    model.compile(loss="mae", optimizer=optimizer)
    return model

def plot_and_save_analysis(y_test, y_pred, save_dir, dataset_name,target_scaler):
    os.makedirs(save_dir, exist_ok=True)
    y_test = target_scaler.inverse_transform(y_test.reshape(-1, 1)).flatten()
    y_pred = target_scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
    
    # 1. Scatter plot (prédictions vs réel)
    plt.figure(figsize=(6, 6))
    plt.scatter(y_test, y_pred, alpha=0.7, color='orange')
    plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], 'r--', label="Idéal (y = ŷ)")
    plt.xlabel("Valeurs réelles (y)")
    plt.ylabel("Prédictions (ŷ)")
    plt.title(f"{dataset_name} - Prédictions vs Réel")
    plt.legend()
    plt.grid(True)
    scatter_path = os.path.join(save_dir, f"{dataset_name}_scatter.png")
    plt.savefig(scatter_path)
    plt.close()

    # 2. Histogramme des erreurs
    errors = y_test - y_pred
    plt.figure(figsize=(8, 4))
    plt.hist(errors, bins=20, color='orange', edgecolor='black')
    plt.title(f"{dataset_name} - Distribution des erreurs")
    plt.xlabel("Erreur (y - ŷ)")
    plt.ylabel("Fréquence")
    plt.grid(True)
    hist_path = os.path.join(save_dir, f"{dataset_name}_hist.png")
    plt.savefig(hist_path)
    plt.close()

    # 3. Courbe temporelle
    plt.figure(figsize=(10, 4))
    plt.plot(y_test, label="Valeurs réelles", linewidth=2)
    plt.plot(y_pred, '--', label="Prédictions")
    plt.title(f"{dataset_name} - Évolution temporelle")
    plt.xlabel("Index")
    plt.ylabel("Valeur")
    plt.legend()
    plt.grid(True)
    curve_path = os.path.join(save_dir, f"{dataset_name}_courbe.png")
    plt.savefig(curve_path)
    plt.close()

    return scatter_path, hist_path, curve_path
###############################################################################
# 4) Entraînement + évaluation (MAE, MSE, R²) + temps d'exécution
###############################################################################
def train_and_evaluate_model(model, x_train, y_train, x_val, y_val,
                             epochs=50, batch_size=512, verbose=0, dataset_name="dataset"):
    """
    Entraîne le modèle, mesure le temps d'entraînement, et renvoie l'historique.
    """
    stop_early = EarlyStopping(monitor='val_loss', patience=15, restore_best_weights=True)
    start_time = time.time()
    history = model.fit(x_train, y_train,
                        validation_data=(x_val, y_val),
                        epochs=epochs,
                        batch_size=batch_size,
                        verbose=verbose,
                        callbacks=[stop_early])
    training_time = time.time() - start_time
    plt.figure(figsize=(8, 4))
    plt.plot(history.history['loss'], label='train_loss')
    plt.plot(history.history['val_loss'], label='val_loss')
    plt.title(f"{dataset_name}Courbe d'apprentissage (loss)")
    plt.xlabel("Epochs")
    plt.ylabel("MAE")
    plt.legend()
    plt.grid(True)
    plt.savefig(f"{dataset_name}_plots_loss_curve_{model.name}.png")
    plt.close()
    
    return history, training_time


def inference_time_and_metrics(model, x_test, y_test):
    """
    Calcule le temps d'inférence, puis renvoie MAE, MSE, R².
    """
    start_time = time.time()
    preds = model.predict(x_test)
    inference_time = time.time() - start_time
    
    preds = preds.reshape(-1)
    y_test = y_test.reshape(-1)
    
    mae = mean_absolute_error(y_test, preds)
    mse = mean_squared_error(y_test, preds)
    r2  = r2_score(y_test, preds)
    
    return mae, mse, r2, inference_time


###############################################################################
# 5) Fonction objectif pour OPTUNA
###############################################################################

def objective(trial, input_shape, x_train, y_train, x_val, y_val):
    lr = trial.suggest_float('lr', 1e-4, 1e-2, log=True)  # nouvelle syntaxe recommandée
    f1 = trial.suggest_int('filters1', 32, 128)
    f2 = trial.suggest_int('filters2', 32, 128)
    dense = trial.suggest_int('dense_units', 32, 128)

    model = Sequential([
        ConvLSTM1D(filters=f1, kernel_size=(1,), activation='tanh', return_sequences=True, input_shape=input_shape),
        ConvLSTM1D(filters=f2, kernel_size=(1,), activation='tanh', return_sequences=False),
        Flatten(),
        Dense(units=dense, activation='relu'),
        Dense(1)
    ])

    model.compile(optimizer=Adam(learning_rate=lr), loss='mae')

    history = model.fit(
        x_train, y_train,
        validation_data=(x_val, y_val),
        epochs=10,
        batch_size=256,
        verbose=0
    )

    return min(history.history['val_loss'])



###############################################################################
# 6) Boucle sur les datasets et compilation des résultats
###############################################################################
def run_experiments_on_datasets(
    dataset_paths,
    production_column='production',
    window_size=24,
    epochs_baseline=50,
    epochs_optimized=50
):
    """
    - Pour chaque dataset :
        1) Prépare les données
        2) Entraîne le modèle baseline (e-base) et mesure ses métriques
        3) Lance l'optimisation PSO
        4) Entraîne le modèle avec les hyperparams optimisés
        5) Mesure les métriques et temps
        6) Stocke les résultats dans un DataFrame
    """
    results = []
    
    for csv_path in dataset_paths:
        dataset_name = os.path.basename(csv_path).replace('.csv','')
        print(f"\n=== Dataset: {dataset_name} ===")
        
        # 1) Chargement et préparation
        x_train, y_train, x_test, y_test, x_val, y_val, df,target_scaler  = load_and_prepare_data(
            csv_path,
            production_column=production_column,
            window_size=window_size
        )
        input_shape = x_train.shape[1:]
        
        # 2) Modèle baseline
        baseline_model = build_baseline_model(input_shape)
        history_base, t_train_base = train_and_evaluate_model(
            baseline_model, x_train, y_train, x_val, y_val,
            epochs=epochs_baseline, batch_size=512, verbose=0,dataset_name=dataset_name
        )
        mae_base, mse_base, r2_base, t_infer_base = inference_time_and_metrics(baseline_model, x_test, y_test)
        # 3) Optuna
        study = optuna.create_study(direction='minimize')
        study.optimize(lambda trial: objective(trial, input_shape, x_train, y_train, x_val, y_val), n_trials=10)
        print("Best hyperparameters:", study.best_params)

        
        # 4) Entraîner le modèle avec les hyperparamètres optimisés
        best_params = study.best_params
        lr_opt        = best_params['lr']
        filters1_opt  = best_params['filters1']
        filters2_opt  = best_params['filters2']
        dense_opt     = best_params['dense_units']

        # Ajoute ce format dans le dictionnaire des résultats :
        param_opt_string = f"lr={lr_opt:.5f}, f1={filters1_opt}, f2={filters2_opt}, dense={dense_opt}"
        best_model = build_convlstm_model(lr_opt, filters1_opt, filters2_opt, dense_opt, input_shape)
        history_opt, t_train_opt = train_and_evaluate_model(
            best_model, x_train, y_train, x_val, y_val,
            epochs=epochs_optimized, batch_size=512, verbose=0,dataset_name=dataset_name
        )
        
        # 5) Évaluation finale
        mae_opt, mse_opt, r2_opt, t_infer_opt = inference_time_and_metrics(best_model, x_test, y_test)
        
        scatter_path, hist_path, curve_path = plot_and_save_analysis(
            y_test=y_test, 
            y_pred=best_model.predict(x_test).flatten(), 
            save_dir="plots", 
            dataset_name=dataset_name,
            target_scaler=target_scaler
        )
        
        # 6) Stockage des résultats dans un dictionnaire
        result_dict = {
            "Dataset": dataset_name,
            
            # E-base
            "MAE e-base": mae_base,
            "MSE e-base": mse_base,
            "R2 e-base":  r2_base,
            "T(entrainement-e-base)[s]": t_train_base,
            
            
            #Optuna
            "Optuna best R2": r2_opt,
            "Paramètres optimisés": f"lr={lr_opt:.5f}, f1={filters1_opt}, f2={filters2_opt}, dense={dense_opt}",
            
            # Entraînement optimisé
            "T(entrainement-optimisé)[s]": t_train_opt,
            "MAE optimisé": mae_opt,
            "MSE optimisé": mse_opt,
            "R2 optimisé": r2_opt,
            "Graph_scatter": scatter_path,
            "Graph_hist": hist_path,
            "Graph_courbe": curve_path,
            
            # Inférence
            "T(evaluation-inference)[s]": t_infer_opt,
            
            # Nombre d'époques
            "Nombre d'époques e-base": epochs_baseline,
            "Nombre d'époques optimisé": epochs_optimized
        }
        
        results.append(result_dict)
    
    # Conversion en DataFrame
    df_results = pd.DataFrame(results)
    return df_results


###############################################################################
# 7) Lancement final (exemple)
###############################################################################
if __name__ == "__main__":
    # Liste des chemins vers vos CSV
    dataset_paths = [
        #globalpath+"scaled_dataset.csv",
        #globalpath+"batiment_1.csv",
        #globalpath+"batiment_2.csv",
        #globalpath+"batiment_3.csv",
        #globalpath+"batiment_4.csv",
        #globalpath+"batiment_5.csv",
        #globalpath+"batiment_6.csv",
        #globalpath+"batiment_7.csv",
        #globalpath+"batiment_8.csv",
        globalpath+"batiment_9.csv"
    ]
    
    # Paramètres globaux (à adapter)
    production_column = 'production'
    window_size = 24      # taille des fenêtres
    epochs_baseline = 30  # nombre d'époques pour la baseline
    epochs_optimized = 100 # nombre d'époques pour le modèle optimisé
    
    # Lancement des expériences
    df_results = run_experiments_on_datasets(
        dataset_paths,
        production_column=production_column,
        window_size=window_size,
        epochs_baseline=epochs_baseline,
        epochs_optimized=epochs_optimized
    )
    
    # Affichage des résultats finaux
    print("\n=========== RÉSULTATS FINAUX ===========")
    print(df_results)
    # Sauvegarde éventuellement en CSV
    df_results.to_csv("resume_resultatsOptuna.csv", index=False)



=== Dataset: batiment_9 ===


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 193ms/step


[I 2025-06-05 10:39:20,320] A new study created in memory with name: no-name-aed6df85-2f42-495d-990a-b06fb0f0a86f
  super().__init__(**kwargs)
[I 2025-06-05 10:39:32,860] Trial 0 finished with value: 0.19684427976608276 and parameters: {'lr': 0.0022379543000629267, 'filters1': 83, 'filters2': 88, 'dense_units': 113}. Best is trial 0 with value: 0.19684427976608276.
[I 2025-06-05 10:39:44,510] Trial 1 finished with value: 0.2257896363735199 and parameters: {'lr': 0.00011250842680407712, 'filters1': 111, 'filters2': 48, 'dense_units': 79}. Best is trial 0 with value: 0.19684427976608276.
[I 2025-06-05 10:39:57,754] Trial 2 finished with value: 0.20303326845169067 and parameters: {'lr': 0.0011179679623540427, 'filters1': 121, 'filters2': 78, 'dense_units': 42}. Best is trial 0 with value: 0.19684427976608276.
[I 2025-06-05 10:40:08,593] Trial 3 finished with value: 0.1970175802707672 and parameters: {'lr': 0.002140649657360452, 'filters1': 53, 'filters2': 37, 'dense_units': 81}. Best is t

Best hyperparameters: {'lr': 0.0022379543000629267, 'filters1': 83, 'filters2': 88, 'dense_units': 113}
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 201ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step

      Dataset  MAE e-base  MSE e-base  R2 e-base  T(entrainement-e-base)[s]  \
0  batiment_9    0.194877     0.05998   0.051275                  14.943488   

   Optuna best R2                 Paramètres optimisés  \
0        0.060253  lr=0.00224, f1=83, f2=88, dense=113   

   T(entrainement-optimisé)[s]  MAE optimisé  MSE optimisé  R2 optimisé  \
0                    52.714764      0.187082      0.059413     0.060253   

                  Graph_scatter                 Graph_hist  \
0  plots\batiment_9_scatter.png  plots\batiment_9_hist.png   

                  Graph_courbe  T(evaluation-inference)[s]  \
0  plots\batiment_9_courbe.png                     1.20258   

   Nombre d'époques e-base  Nombre d'époques optimisé  
0                