In [19]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import pandas as pd
import itertools
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import tensorflow as tf
from IPython.display import clear_output
import time

In [20]:
def nash_sutcliffe(obs, sim):
    return 1 - np.sum((obs - sim) ** 2) / np.sum((obs - np.mean(obs)) ** 2)

def PERS(obs, sim):
    shifted = np.roll(obs, -1)
    obs_1 = np.delete(shifted,len(shifted)-1)
    obs_0 = np.delete(obs, len(obs)-1)
    return 1 - np.sum((obs - sim) ** 2) / np.sum((obs_0 - obs_1) ** 2)

In [21]:
#Chargement des données
df_y = pd.read_csv(r"C:\Users\bricaud\PycharmProjects\Bibliographie_\Adaptation\2-Adaptation\adapt_Alsace_Safran.txt", sep=";")

In [22]:
#Adaptation des tailles entre x et y
len_x, len_y = len(df_x), len(df_y)
len_min = min(len_x, len_y)
x_ini = df_y['Humid_sol'].iloc[:len_min]
y = df_y["Q_Obs"].iloc[:len_min]

In [23]:
#Séparer train/test
x_train, x_valid, y_train, y_valid = train_test_split(x, y, test_size=0.2, shuffle=False)
#Normalisation
x_scaler = StandardScaler()
y_scaler = StandardScaler()
x_isol_scaler = StandardScaler()
y_isol_scaler = StandardScaler()

x_train_scaled = x_scaler.fit_transform(x_train)
x_valid_scaled = x_scaler.transform(x_valid)

y_train_scaled = y_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_valid_scaled = y_scaler.transform(y_valid.values.reshape(-1, 1))

len_before, len_after = int(730), int(730)
len_valid = len(x_valid)
x_valid_isol = x_valid.iloc[len_before:len_valid,:]
y_valid_isol = y_valid.iloc[len_before:len_valid]

x_train_scaled = x_isol_scaler.fit_transform(x_train)
x_valid_isol_scaled = x_isol_scaler.transform(x_valid_isol)

y_train_scaled = y_isol_scaler.fit_transform(y_train.values.reshape(-1, 1))
y_valid_isol_scaled = y_isol_scaler.transform(y_valid_isol.values.reshape(-1, 1))

In [24]:
df = pd.read_csv(r'C:\Users\bricaud\PycharmProjects\Bibliographie_\MLP\SEED.csv', header=None)
random_SEED = df.values.flatten().tolist()

In [31]:
A = [256,256,256,256,64]
ini_SEED = [0]

In [26]:
 # t1 = time.time()
        # print(f"Step 1 - Reading CSV: {t1 - t0:.4f} seconds")

In [74]:
def MLP (type):
    if type == 'isol':
        xt_s,yt_s,xv_s,yv_s = x_train_scaled, y_train_scaled, x_valid_isol_scaled, y_valid_isol_scaled
        xv,yv = x_valid_isol, y_valid_isol
        y_scal = y_isol_scaler
    else :
        xt_s,yt_s,xv_s,yv_s = x_train_scaled, y_train_scaled, x_valid_scaled, y_valid_scaled
        xv,yv = x_valid, y_valid
        y_scal = y_scaler
    print(f"{yv}")

    df_seed = pd.DataFrame()
    df_stat = pd.DataFrame()
    for i in range(len(random_SEED)) :
        print(i)
        len_a = len(A)
        SEED = random_SEED[i]
        os.environ['PYTHONHASHSEED'] = str(SEED)
        random.seed(SEED)
        np.random.seed(SEED)
        tf.random.set_seed(SEED)
        model = Sequential()
        for j in range(len_a) :
            model.add(Dense(int(A[j]), activation='relu'))
            # model.add(Dropout(0.2))
        model.add(Dense(1))  # Sortie régression
        model.compile(optimizer='adam', loss='mse', metrics=['mae', 'mape'])
        early_stop = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
        history = model.fit(xt_s, yt_s,
                            validation_split=0.2,
                            epochs=100,
                            batch_size=64,
                            callbacks=[early_stop],
                            verbose=0)

        loss, mae, mape = model.evaluate(xv_s, yv_s, verbose = 0)

        y_pred_scaled = model.predict(xv_s, verbose = 0)
        y_pred = y_scal.inverse_transform(y_pred_scaled)
        y_pred_flat = y_pred.flatten()

        df_stat[f"Seed :{SEED}"] = y_pred_flat

        nash = nash_sutcliffe(yv.values, y_pred_flat)
        pers = PERS(yv.values, y_pred_flat)

        df_seed.loc[i, "SEED"] = SEED
        df_seed.loc[i, "NSE"] = nash
        df_seed.loc[i, "PERS"] = pers
        df_seed.loc[i, "loss"] = loss
        df_seed.loc[i, 'mae'] = mae

        df_seed.to_csv(f"SEED_Alsace_Safran_['Pluie_Liq','ETR','Humid_sol','Drain']_19500.txt", sep=';', index=False)
        df_stat.to_csv(f"STAT_Alsace_Safran_['Pluie_Liq','ETR','Humid_sol','Drain']_19500.txt", sep=';', index=False)
        clear_output()

    mean_nse = df_seed['NSE'].mean()
    median_nse = df_seed['NSE'].median()
    std_nse = df_seed['NSE'].std()
    row_stats = df_stat.T.describe().T

    dif_plus = 0
    P = []
    dif_minus = 0
    M = []
    centered = 0
    excentered = 0

    for i in range(len(row_stats)):
        if yv.iloc[i] > row_stats.loc[i, 'max']:
            dif_plus += 1
            P.append(yv.iloc[i] - row_stats.loc[i, 'max'])
        elif yv.iloc[i] < row_stats.loc[i, 'min']:
            dif_minus += 1
            M.append(row_stats.loc[i, 'min'] - yv.iloc[i])
        elif row_stats.loc[i, '75%'] > yv.iloc[i] > row_stats.loc[i, '25%']:
            centered += 1
        else:
            excentered += 1

    error_sup = dif_plus / len(row_stats)
    error_inf = dif_minus / len(row_stats)
    close_valid = centered / len(row_stats)
    extended_valid = excentered / len(row_stats)
    mean_P = np.mean(P)
    mean_M = np.mean(M)

    print(f"error_sup (%): {error_sup * 100:.4f}, mean distance (m) {mean_P}")
    print(f"error_inf (%): {error_inf * 100:.4f}, mean distance (m) {mean_M}")
    print(f"close_valid (%): {close_valid * 100:.4f}")
    print(f"extended_valid (%): {extended_valid * 100:.4f}")
    y_valid_index = yv.reset_index()
    plt.figure(figsize=(12, 6))
    plt.plot(row_stats['min'], label='Simulation minimale', color='lightblue')
    plt.plot(row_stats['max'], label='Simulation maximale', color='lightblue')
    plt.plot(row_stats['mean'], label='Simulation moyenne', color='yellow')

    plt.plot(y_valid_index['Q_Obs'], label='Observations', color='green')

    plt.fill_between(row_stats.index, row_stats['min'], row_stats['max'], alpha=0.5, label='Enveloppe')
    plt.text(0.05, 0.95, f'mean_NSE = {mean_nse:.3f} \n'
                         f'median_NSE = {median_nse:.3f} \n'
                         f'std_NSE = {std_nse:.3f}', transform=plt.gca().transAxes,fontsize=12, verticalalignment='top', bbox=dict(facecolor='white', alpha=0.7))
    # plt.xlim(0, 500)
    plt.grid(True)
    plt.legend()
    plt.title(f"Enveloppe des prévisions, taille data : {len(x)}"
              f"\n erreur positive : {error_sup * 100:.4f} à {mean_P:.4f}"
              f"\n erreur negative : {error_inf * 100:.4f} à {mean_M:.4f}")
    plt.xlabel("Jour")
    plt.ylabel("Valeur")
    plt.tight_layout()

    plt.savefig(f"Alsace_Humid_SEED_isol_{len(x)}.png", dpi=300)

    plt.show()

    return df_seed, df_stat


In [None]:
df_seed, df_stat = MLP('isol')

16330    181.34
16331    181.33
16332    181.34
16333    181.34
16334    181.38
          ...  
19495    181.39
19496    181.38
19497    181.37
19498    181.36
19499    181.34
Name: Q_Obs, Length: 3170, dtype: float64
0
