



# Importations

In [None]:
from __future__ import print_function

# Standard libraries
import os
import time
import random
import warnings
from math import sqrt, pow

# Data processing
import pandas as pd
import numpy as np

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Machine learning
import sklearn
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import GroupKFold, GroupShuffleSplit
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score


# Deep learning
import tensorflow as tf
from tensorflow.keras import Sequential, optimizers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout, LSTM, Activation, GRU
import keras
from keras import backend as K

# Optimization
from scipy import optimize
from scipy.stats import spearmanr
from scipy.spatial.distance import pdist, squareform
from hyperopt import fmin, tpe, Trials, hp, STATUS_OK


%matplotlib inline
warnings.filterwarnings('ignore')

SEED = 0
def set_seed(seed=SEED):
    os.environ['PYTHONHASHSEED'] = str(SEED)
    random.seed(SEED)
    np.random.seed(SEED)
    tf.random.set_seed(SEED)

# Appeler la fonction pour fixer le seed
set_seed(SEED)


# Methode.py

In [None]:
# read the train and test data
def prep_data(train, test, drop_sensors, remaining_sensors, alpha, drop = True):
    if drop:
        X_train_interim = add_operating_condition(train.drop(drop_sensors, axis=1))
        X_test_interim = add_operating_condition(test.drop(drop_sensors, axis=1))
    else:
        X_train_interim = add_operating_condition(train)
        X_test_interim = add_operating_condition(test)

    X_train_interim, X_test_interim = condition_scaler(X_train_interim, X_test_interim, remaining_sensors)
    X_train_interim = exponential_smoothing(X_train_interim, remaining_sensors, 0, alpha)
    X_test_interim = exponential_smoothing(X_test_interim, remaining_sensors, 0, alpha)

    return X_train_interim, X_test_interim

def rul_piecewise_fct(X_train, rul):

    X_train['RUL'].clip(upper=rul, inplace=True)

    return X_train

def prepare_data(file_name):
    dir_path =  'C:/Users/RA-RV/Documents/Malick/data/'
    dependent_var = ['RUL']
    index_names = ['Unit', 'Cycle']
    setting_names = ['Altitude', 'Mach', 'TRA']
    sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']
    col_names = index_names + setting_names + sensor_names

    df_train = pd.read_csv(dir_path+'train_'+str(file_name),delim_whitespace=True,names=col_names)

    rul_train = pd.DataFrame(df_train.groupby('Unit')['Cycle'].max()).reset_index()
    rul_train.columns = ['Unit', 'max']
    df_train = df_train.merge(rul_train, on=['Unit'], how='left')
    df_train['RUL'] = df_train['max'] - df_train['Cycle']
    df_train.drop('max', axis=1, inplace=True)

    df_test = pd.read_csv(dir_path+'test_'+str(file_name), delim_whitespace=True, names=col_names)

    y_test = pd.read_csv(dir_path+'RUL_'+(file_name), delim_whitespace=True,names=["RUL"])
    #y_true["Unit"] = y_true.index + 1
    return df_train, df_test, y_test


# add operational condition to then normalize the data based on these operational conditions test
def add_operating_condition(df):
    df_op_cond = df.copy()

    df_op_cond['Altitude'] = df_op_cond['Altitude'].round()
    df_op_cond['Mach'] = df_op_cond['Mach'].round(decimals=2)
    df_op_cond['TRA'] = df_op_cond['TRA'].round()

    # converting settings to string and concatanating makes the operating condition into a categorical variable
    df_op_cond['op_cond'] = df_op_cond['Altitude'].astype(str) + '_' + \
                        df_op_cond['Mach'].astype(str) + '_' + \
                        df_op_cond['TRA'].astype(str)

    return df_op_cond

# normalize the data based on the operational condition
def condition_scaler(df_train, df_test, sensor_names):
  # apply operating condition specific scaling
  #scaler = StandardScaler()
    scaler = MinMaxScaler(feature_range = (0, 1))
    for condition in df_train['op_cond'].unique():
        scaler.fit(df_train.loc[df_train['op_cond']==condition, sensor_names])
        df_train.loc[df_train['op_cond']==condition, sensor_names] = scaler.transform(df_train.loc[df_train['op_cond']==condition, sensor_names])
        df_test.loc[df_test['op_cond']==condition, sensor_names] = scaler.transform(df_test.loc[df_test['op_cond']==condition, sensor_names])
    return df_train, df_test


#to plot each sensors with respect to the RUL
def plot_signal(df, signal_name, unit=None):
#     train = df
    plt.figure(figsize=(13,5))
    if unit:
        plt.plot('RUL', signal_name,
                data=df[df['Unit']==unit])
    else:
        for i in df['Unit'].unique():
            if (i % 10 == 0):  # only ploting every 10th unit_nr
                plt.plot('RUL', signal_name,
                         data=df[df['Unit']==i])
    plt.xlim(350, 0)  # reverse the x-axis so RUL counts down to zero
    plt.xticks(np.arange(0, 375, 25))
    plt.ylabel(signal_name)
    plt.xlabel('Remaining Use fulLife')
    #plt.savefig(signal_name+'.jpeg')
    plt.show()

# denoise the signal using the exponential signal wih an alpha equals to 0.3
def exponential_smoothing(df, sensors, n_samples, alpha=0.2):
    df = df.copy()
    # first, calculate the exponential weighted mean of desired sensors
    new_column = df.groupby('Unit')[sensors].apply(lambda x: x.ewm(alpha=alpha).mean())
    df[sensors] = new_column.reset_index(level=0, drop=True)


    # second, drop first n_samples of each unit_nr to reduce filter delay
    def create_mask(data, samples):
        result = np.ones_like(data)
        result[0:samples] = 0
        return result

    mask = df.groupby('Unit')['Unit'].transform(create_mask, samples=n_samples).astype(bool)
    df = df[mask]

    return df

def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(np.mean(np.square(y_pred - y_true)))

#the score defined in the paper
def compute_s_score(rul_true, rul_pred):
    diff = rul_pred - rul_true
    return np.sum(np.where(diff < 0, np.exp(-diff/13)-1, np.exp(diff/10)-1))

#evaluate the model with R² and RMSE
def evaluate(y_true, y_hat, label='test'):
    mse = mean_squared_error(y_true, y_hat)
    rmse = np.sqrt(mse)
    variance = r2_score(y_true, y_hat)
    print('{} set RMSE:{}, R2:{}'.format(label, rmse, variance))

def generate_sequences(data, sequence_length):
    """
    Generate sequences of a given length from the input data.
    """
    num_samples = data.shape[0]

    # Generate sequences using sliding windows
    for start_idx in range(num_samples - sequence_length + 1):
        end_idx = start_idx + sequence_length
        yield data[start_idx:end_idx, :]

def generate_data_wrapper(df, sequence_length, columns, unit_nrs=None):
    """
    Wrapper function to generate sequences for multiple units in the dataset.
    """
    if unit_nrs is None:
        unit_nrs = df['Unit'].unique()

    # Generate sequences for each unit and concatenate them
    all_sequences = []
    for unit_nr in unit_nrs:
        unit_data = df[df['Unit'] == unit_nr][columns].values
        sequences = list(generate_sequences(unit_data, sequence_length))
        all_sequences.extend(sequences)

    return np.array(all_sequences, dtype=np.float32)


def gen_train_data(df, sequence_length, columns):
    data = df[columns].values
    num_elements = data.shape[0]

    # -1 and +1 because of Python indexing
    for start, stop in zip(range(0, num_elements-(sequence_length-1)), range(sequence_length, num_elements+1)):
        yield data[start:stop, :]

def gen_data_wrapper(df, sequence_length, columns, unit_nrs=np.array([])):
    if unit_nrs.size <= 0:
        unit_nrs = df['Unit'].unique()

    data_gen = (list(gen_train_data(df[df['Unit']==unit_nr], sequence_length, columns))
               for unit_nr in unit_nrs)
    data_array = np.concatenate(list(data_gen)).astype(np.float32)
    return data_array

def create_model(TW , remaining_):
#     history = History()
    model = Sequential()
    model.add(LSTM(units=128, activation='tanh',input_shape=(TW, len(remaining_))))
    model.add(Dense(units=128, activation='relu'))
    #model.add(GlobalAveragePooling1D(name = 'feature_layer'))
    model.add(Dropout(0.1))
    model.add(Dense(1, activation='relu'))
    model.compile(loss='mse',metrics=['mse'], optimizer=tf.keras.optimizers.Adam(learning_rate=0.01))

    return model

def compute_MAPE(y_true, y_hat):
    mape = np.mean(np.abs((y_true - y_hat)/y_true))*100
    return mape

def gen_labels(df, sequence_length, label):
    data_matrix = df[label].values
    num_elements = data_matrix.shape[0]

    # -1 because I want to predict the rul of that last row in the sequence, not the next row
    return data_matrix[sequence_length-1:num_elements, :]

def gen_label_wrapper(df, sequence_length, label, unit_nrs=np.array([])):
    if unit_nrs.size <= 0:
        unit_nrs = df['Unit'].unique()

    label_gen = [gen_labels(df[df['Unit']==unit_nr], sequence_length, label)
                for unit_nr in unit_nrs]
    label_array = np.concatenate(label_gen).astype(np.float32)
    return label_array
def gen_test_data(df, sequence_length, columns, mask_value):
    if df.shape[0] < sequence_length:
        data_matrix = np.full(shape=(sequence_length, len(columns)), fill_value=mask_value) # pad
        idx = data_matrix.shape[0] - df.shape[0]
        data_matrix[idx:,:] = df[columns].values  # fill with available data
    else:
        data_matrix = df[columns].values

    # specifically yield the last possible sequence
    stop = num_elements = data_matrix.shape[0]
    start = stop - sequence_length
    for i in list(range(1)):
        yield data_matrix[start:stop, :]
def plot_loss(fit_history):
    plt.figure(figsize=(13,5))
    plt.plot(range(1, len(fit_history.history['loss'])+1), fit_history.history['loss'], label='train')
    plt.plot(range(1, len(fit_history.history['val_loss'])+1), fit_history.history['val_loss'], label='validate')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()
    plt.show()

def new_column (df, column):
    #df = df.sort_values(by=column, ascending=False)
    df[column] = range(1, len(df) + 1)
    return df

# Préparation des données et configuration initiale

In [None]:
train, test, y_test = prepare_data('FD004.txt')
print(train.shape, test.shape, y_test.shape)
sensor_names = ['T20','T24','T30','T50','P20','P15','P30','Nf','Nc','epr','Ps30','phi',
                    'NRf','NRc','BPR','farB','htBleed','Nf_dmd','PCNfR_dmd','W31','W32']

remaining_sensors = ['T24','T30','T50', 'P15', 'P30','Nf','Nc', 'epr','Ps30','phi',
                     'NRf','NRc','BPR', 'farB','htBleed','W31','W32']
drop_sensors = [element for element in sensor_names if element not in remaining_sensors]

rul_piecewise = 130
train['RUL'].clip(upper=rul_piecewise, inplace=True)

# Configuration des paramètres
alpha = 0.2
sequence_length = 40
epochs = 10
#nodes_per_layer = [64]
#dropout = 0.2
activation = 'tanh'
batch_size = 32
remaining_sensors = remaining_sensors
input_shape = (sequence_length, len(remaining_sensors))

space_val = {
    'hidden_size': {
        'min': 32,
        'max': 256,
        'step': 32
    },
    'learning_rate': {
        'min': np.log(1e-5),
        'max': np.log(1e-2)
    },
    'dropout': {
        'min': 0.1,
        'max': 0.5,
        'step': 0.1
    }
}

# Préparation des données
X_train_interim, X_test_interim = prep_data(train, test, drop_sensors, remaining_sensors, alpha)
train_array = generate_data_wrapper(X_train_interim, sequence_length, remaining_sensors)
label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit']==unit_nr], sequence_length, remaining_sensors, -99.))
               for unit_nr in X_test_interim['Unit'].unique())
test_array = np.concatenate(list(test_gen)).astype(np.float32)

test_rul = rul_piecewise_fct(y_test, rul_piecewise)
print(train_array.shape, label_array.shape, test_array.shape)

path_bootstrap = 'C:/Users/RA-RV/Documents/Malick/data/EO/fd004_bootstrap_s_score.csv'
path_bootstrap2 = 'C:/Users/RA-RV/Documents/Malick/data/EO/fd004_bootstrap2_s_score.csv'
path_grid = 'C:/Users/RA-RV/Documents/Malick/data/EO/fd004_bootbogs_s_score.csv'

(61249, 27) (41214, 26) (248, 1)
(51538, 40, 17) (51538, 1) (248, 40, 17)


# Bayesian optimization avec bootstrap

## Creer n series bootstrap

In [None]:
def create_multivariate_bootstrap_series(data, sequence_length, n_bootstrap):

    n_timesteps = len(data)
    n_blocks = n_timesteps // sequence_length  # Nombre de blocs complets
    bootstrap_series_list = []

    # Découper la série en blocs
    blocks = [data[i * sequence_length:(i + 1) * sequence_length] for i in range(n_blocks)]

    # Créer chaque série bootstrap
    for _ in range(n_bootstrap):
        # Rééchantillonner les blocs avec remise
        sampled_blocks = [blocks[np.random.randint(0, n_blocks)] for _ in range(n_blocks)]

        # Concaténer les blocs pour former une nouvelle série
        new_series = np.concatenate(sampled_blocks, axis=0)

        if len(new_series) > n_timesteps:
            new_series = new_series[:n_timesteps]

        elif len(new_series) < n_timesteps:
          remaining_length = n_timesteps - len(new_series)
          new_series = np.concatenate([new_series, new_series[-sequence_length:][:remaining_length]], axis=0)

        bootstrap_series_list.append(new_series)

    return bootstrap_series_list

## Creation et entrainement du modele

In [None]:
def model_lstm_1layer(input_shape, nodes_per_layer, dropout, activation, learning_rate):
    model = Sequential()
    model.add(LSTM(units=int(nodes_per_layer), activation=activation, input_shape=input_shape))
    model.add(Dropout(dropout))
    model.add(Dense(256))
    model.add(Dense(1))  # Sortie pour la régression
    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=learning_rate))
    return model

# Fonction pour entraîner le modèle et évaluer la RMSE
def train_model(params):
    # Création du modèle
    model = model_lstm_1layer(input_shape, params['hidden_size'], params['dropout'], activation, params['learning_rate'])

    # Entraînement du modèle
    history = model.fit(
        train_array, label_array,
        validation_data=(test_array, test_rul),
        epochs=epochs,
        batch_size=32,
        verbose=0  # Désactiver les logs pour une sortie propre
    )

    # Prédiction sur l'ensemble de validation
    y_pred = model.predict(test_array)

    # Calcul de la RMSE, S-Score, Mape
    rmse = sqrt(mean_squared_error(test_rul, y_pred))
    s_score = compute_s_score(test_rul, y_pred)
    mape = compute_MAPE(test_rul, y_pred)
    r2 = r2_score(test_rul, y_pred)

  # minimiser RMSE/s-score

    # Retourner la RMSE comme métrique à minimiser
    return {'loss': s_score, 'status': STATUS_OK, 's_score':s_score, 'mape':mape, 'rmse':rmse}

## Apply HyperOpt TPE and store the combination of hyperparameters

In [None]:
bootstrap_series_list = create_multivariate_bootstrap_series(train, sequence_length, 10)
results_all = pd.DataFrame()
for i, series in enumerate(bootstrap_series_list):
    print(f"Traitement de la série bootstrap {i + 1}...")

    start_time = time.time()


    space = {
        'hidden_size': hp.quniform('hidden_size',
                              space_val['hidden_size']['min'],
                              space_val['hidden_size']['max'],
                              space_val['hidden_size']['step']),

        'learning_rate': hp.loguniform('learning_rate',
                                    space_val['learning_rate']['min'],
                                    space_val['learning_rate']['max']),

        'dropout': hp.quniform('dropout',
                              space_val['dropout']['min'],
                              space_val['dropout']['max'],
                              space_val['dropout']['step'])
    }

    series = pd.DataFrame(series, columns=train.columns)
    series['RUL'].clip(upper=rul_piecewise, inplace=True)

    X_train_interim, X_test_interim = prep_data(series, test, drop_sensors, remaining_sensors, alpha)

    # create sequences train, test
    train_array = generate_data_wrapper(X_train_interim, sequence_length, remaining_sensors)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit'] == unit_nr], sequence_length, remaining_sensors, -99.))
                for unit_nr in X_test_interim['Unit'].unique())

    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test, rul_piecewise)
    print(train_array.shape, label_array.shape, test_array.shape)

    # Optimisation bayésienne avec Hyperopt
    trials = Trials()
    best = fmin(
        fn=train_model,
        space=space,
        algo=tpe.suggest,
        max_evals=10,
        trials=trials
    )

    model = model_lstm_1layer(input_shape, best['hidden_size'], best['dropout'], activation, best['learning_rate'])

    # Entraînement du modèle
    history = model.fit(
        train_array, label_array,
        validation_data=(test_array, test_rul),
        epochs=epochs,
        batch_size=32,
        verbose=0  # Désactiver les logs pour une sortie propre
    )

    # Prédiction sur l'ensemble de validation
    y_pred = model.predict(test_array)

    # Calcul de la RMSE, S-Score, Mape
    rmse = sqrt(mean_squared_error(test_rul, y_pred))
    s_score = compute_s_score(test_rul, y_pred)
    mape = compute_MAPE(test_rul, y_pred)
    r2 = r2_score(test_rul, y_pred)
    #accuracy = accuracy_score(test_rul, y_pred)


    time_training = time.time() - start_time
    #Sauvegarder les résultats dans un DataFrame

    results_all = pd.concat([results_all, pd.DataFrame([{
      'bootstrap_series': i + 1,
      'hidden_size': best['hidden_size'],
      'learning_rate': best['learning_rate'],
      'dropout': best['dropout'],
      'rmse': rmse,
      's_score': s_score,
      'mape': mape,
      'r2': r2,
      'training_time': time_training
  }])], ignore_index=True)
    print(results_all)

  # Sauvegarder les résultats dans un fichier CSV après chaque itération
    results_all.to_csv(path_bootstrap, index=False)



Traitement de la série bootstrap 1...
(51545, 40, 17) (51545, 1) (248, 40, 17)
[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 115ms/step
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step 
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step

[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 101ms/step         
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step         
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step          

[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 109ms/step         
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step         
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 21ms/step          

[1m1/8[0m [32m━━[0m[37m━━━━━━━━━━━━━━━━━━[0m [1m0s[0m 100ms/step         
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 19ms/step         
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

# Intervalle de confiance

In [None]:
def intervalle_confiance(file_path):
    df = pd.read_csv(file_path)

    Q1 = df["s_score"].quantile(0.25)
    Q3 = df["s_score"].quantile(0.75)
    IQR = Q3 - Q1

    df = df[(df["s_score"] >= (Q1 - 1.5 * IQR)) & (df["s_score"] <= (Q3 + 1.5 * IQR))]
    df = pd.read_csv(file_path)

    # Calculer la moyenne et l'écart type de la colonne "s_score"
    mean_s_score = df["s_score"].mean()
    std_s_score = df["s_score"].std()

    filtered_df = df[(df["s_score"] >= mean_s_score - std_s_score) & (df["s_score"] <= mean_s_score + std_s_score)]

    dropout_rates = filtered_df['dropout'].tolist()
    learning_rates = filtered_df['learning_rate'].tolist()
    neurons_list = filtered_df['hidden_size'].tolist()

    #remove duplicate values and sort list
    dropout_final = sorted(set(dropout_rates), reverse=True)
    learning_final = sorted(set(learning_rates), reverse=True)
    neurons_final = sorted(set(neurons_list), reverse=True)

    print("search space",int(len(dropout_final)*len(learning_final)*len(neurons_final)))
    return dropout_final, learning_final, neurons_final

# 2e TPE


In [None]:
bootstrap_series_list = create_multivariate_bootstrap_series(train, sequence_length, 10)
results_all = pd.DataFrame()
dropout_first, learning_rate_first, hidden_size_first = intervalle_confiance(path_bootstrap)

dropout_first = list(dropout_first)
learning_rate_first = list(learning_rate_first)
hidden_size_first = list(hidden_size_first)

for i, series in enumerate(bootstrap_series_list):
    print(f"Traitement de la série bootstrap {i + 1}...")

    start_time = time.time()


    space = {
        'learning_rate': hp.choice('learning_rate', learning_rate_first),
        'dropout_rate': hp.choice('dropout_rate', dropout_first),
        'hidden_size': hp.choice('hidden_size', hidden_size_first)
    }


    series = pd.DataFrame(series, columns=train.columns)
    series['RUL'].clip(upper=rul_piecewise, inplace=True)

    X_train_interim, X_test_interim = prep_data(series, test, drop_sensors, remaining_sensors, alpha)

    # create sequences train, test
    train_array = generate_data_wrapper(X_train_interim, sequence_length, remaining_sensors)
    label_array = gen_label_wrapper(X_train_interim, sequence_length, ['RUL'])

    test_gen = (list(gen_test_data(X_test_interim[X_test_interim['Unit'] == unit_nr], sequence_length, remaining_sensors, -99.))
                for unit_nr in X_test_interim['Unit'].unique())

    test_array = np.concatenate(list(test_gen)).astype(np.float32)
    test_rul = rul_piecewise_fct(y_test, rul_piecewise)
    print(train_array.shape, label_array.shape, test_array.shape)

    # Optimisation bayésienne avec Hyperopt
    trials = Trials()
    best = fmin(
        fn=train_model,
        space=space,
        algo=tpe.suggest,
        max_evals=10,
        trials=trials
    )

    model = model_lstm_1layer(input_shape, best['hidden_size'], best['dropout'], activation, best['learning_rate'])

    # Entraînement du modèle
    history = model.fit(
        train_array, label_array,
        validation_data=(test_array, test_rul),
        epochs=epochs,
        batch_size=32,
        verbose=0  # Désactiver les logs pour une sortie propre
    )

    # Prédiction sur l'ensemble de validation
    y_pred = model.predict(test_array)

    # Calcul de la RMSE, S-Score, Mape
    rmse = sqrt(mean_squared_error(test_rul, y_pred))
    s_score = compute_s_score(test_rul, y_pred)
    mape = compute_MAPE(test_rul, y_pred)
    r2 = r2_score(test_rul, y_pred)
    #accuracy = accuracy_score(test_rul, y_pred)


    time_training = time.time() - start_time
    #Sauvegarder les résultats dans un DataFrame

    results_all = pd.concat([results_all, pd.DataFrame([{
      'bootstrap_series': i + 1,
      'hidden_size': best['hidden_size'],
      'learning_rate': best['learning_rate'],
      'dropout': best['dropout'],
      'rmse': rmse,
      's_score': s_score,
      'mape': mape,
      'r2': r2,
      'training_time': time_training
  }])], ignore_index=True)
    print(results_all)

  # Sauvegarder les résultats dans un fichier CSV après chaque itération
    results_all.to_csv(path_bootstrap2, index=False)



search space 168
<class 'list'>
Traitement de la série bootstrap 1...
(51546, 40, 17) (51546, 1) (248, 40, 17)
  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]

job exception: 'dropout'



  0%|          | 0/10 [00:00<?, ?trial/s, best loss=?]


KeyError: 'dropout'

# intervalle de confiance 2

In [None]:
def intervalle_confiance2(file_path):
    df = pd.read_csv(file_path)

    Q1 = df["s_score"].quantile(0.25)
    Q3 = df["s_score"].quantile(0.75)
    IQR = Q3 - Q1

    df = df[(df["s_score"] >= (Q1 - 1.5 * IQR)) & (df["s_score"] <= (Q3 + 1.5 * IQR))]

    # Calculer la moyenne et l'écart type de la colonne "s_score"
    mean_s_score = df["s_score"].mean()
    std_s_score = df["s_score"].std()

    filtered_df = df[(df["s_score"] >= mean_s_score - std_s_score) & (df["s_score"] <= mean_s_score + std_s_score)]

    dropout_rates = filtered_df['dropout'].tolist()
    learning_rates = filtered_df['learning_rate'].tolist()
    neurons_list = filtered_df['hidden_size'].tolist()

    #remove duplicate values and sort list
    dropout_final = sorted(set(dropout_rates), reverse=True)
    learning_final = sorted(set(learning_rates), reverse=True)
    neurons_final = sorted(set(neurons_list), reverse=True)

    print("search space",int(len(dropout_final)*len(learning_final)*len(neurons_final)))
    print(type(neurons_final))
    return dropout_final, learning_final, neurons_final

# Grid Search

In [None]:
def model_lstm_1layer(input_shape, nodes_per_layer, dropout, activation, learning_rate):
    model = Sequential()
    model.add(LSTM(units=int(nodes_per_layer), activation=activation, input_shape=input_shape))
    model.add(Dropout(dropout))
    model.add(Dense(256))
    model.add(Dense(1))  # Sortie pour la régression
    model.compile(loss='mean_squared_error', optimizer=Adam(learning_rate=learning_rate))
    return model

dropout, learning_rate, hidden_size = intervalle_confiance2(path_bootstrap2)
# Define the hyperparameter grid
param_grid = {
    'hidden_size': hidden_size,
    'learning_rate': learning_rate,
    'dropout': dropout
}

#Sauvegarder les résultats dans un DataFrame
results_all = pd.DataFrame()
i=0

for hidden_size in param_grid['hidden_size']:
    for learning_rate in param_grid['learning_rate']:
        for dropout in param_grid['dropout']:
            print(f"Training with LSTM units={hidden_size}, learning_rate={learning_rate:.4f}, dropout={dropout}")

            start_time = time.time()

            # Build the LSTM model
            model = model_lstm_1layer(input_shape, hidden_size, dropout, activation, learning_rate)

            # Train the model
            history = model.fit(
                train_array, label_array,
                validation_data=(test_array, test_rul),
                epochs=epochs,
                batch_size=batch_size,
                verbose=0
            )
            # Evaluate the model on the validation set
            y_pred = model.predict(test_array)
            # Calcul de la RMSE
            rmse = root_mean_squared_error(test_rul, y_pred)
            s_score = compute_s_score(test_rul, y_pred)
            mape = compute_MAPE(test_rul, y_pred)
            r2 = r2_score(test_rul, y_pred)
            #accuracy = accuracy_score(test_rul, y_pred)

            print(f"Validation RMSE: {rmse:.4f}")

            time_training = time.time() - start_time
            i+=1
            #Sauvegarder les résultats dans un DataFrame

            results_all = pd.concat([results_all, pd.DataFrame([{
                'bootstrap_series': i,
                'hidden_size': hidden_size,
                'learning_rate': learning_rate,
                'dropout': dropout,
                'rmse': rmse,
                's_score': s_score,
                'mape': mape,
                'r2': r2,
                'training_time': time_training
            }])], ignore_index=True)

            results_all.to_csv(path_grid, index=False)
