In [1]:
import pandas as pd

# Lire le fichier CSV principal
df = pd.read_csv('./full_dataset.csv')
df = df.drop("date", axis=1)

# Identifier les coordonnées uniques (latitude, longitude)
coordinates = df[['latitude', 'longitude']].drop_duplicates()

# Liste pour stocker les dataframes échantillonnés
sampled_dfs = []

# Filtrer les données pour chaque bâtiment en fonction des coordonnées
for index, row in coordinates.iterrows():
    lat, long = row['latitude'], row['longitude']
    building_data = df[(df['latitude'] == lat) & (df['longitude'] == long)]
    
    # Calculer le nombre de lignes correspondant à 20% du dataframe
    num_rows = int(len(building_data) * 0.1)
    
    # Prendre les premières 20% des lignes
    sampled_df = building_data.head(num_rows)
    
    # Ajouter le dataframe échantillonné à la liste
    sampled_dfs.append(sampled_df)

# Fusionner tous les dataframes échantillonnés en un seul dataframe
data = pd.concat(sampled_dfs, ignore_index=True)

# Afficher un aperçu du dataframe final
print(data.head())


   production   latitude  longitude   vmp  imp   voc   isc  p_per_m2  p_max  \
0         0.8  48.575437   7.768668  27.3  7.7  33.3  8.17     143.0    210   
1        16.9  48.575437   7.768668  27.3  7.7  33.3  8.17     143.0    210   
2         1.4  48.575437   7.768668  27.3  7.7  33.3  8.17     143.0    210   
3         6.6  48.575437   7.768668  27.3  7.7  33.3  8.17     143.0    210   
4         0.3  48.575437   7.768668  27.3  7.7  33.3  8.17     143.0    210   

   panel_area  ...  wind_speed_10m_std  wind_speed_10m_min  wind_speed_10m_q1  \
0        1.72  ...            0.903114                 1.3              1.900   
1        1.72  ...            0.969186                 0.9              2.975   
2        1.72  ...            0.306945                 0.4              1.100   
3        1.72  ...            0.491844                 0.4              0.800   
4        1.72  ...            0.527737                 1.6              2.275   

   wind_speed_10m_q2  wind_speed_10m_q

In [2]:
data_train = data[data['building_id'] != 8]
data_test = data[data['building_id'] == 8]

data_train.shape, data_test.shape

((959, 164), (125, 164))

In [3]:
target_column = 'production'

x_train = data_train.drop(target_column, axis=1)
y_train = data_train[target_column].values.reshape(-1, 1)

x_test = data_test.drop(target_column, axis=1)
y_test = data_test[target_column].values.reshape(-1, 1)

In [4]:
import numpy as np

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error

In [5]:
x_scaler = MinMaxScaler(feature_range=(0, 1))
x_scaler.fit(x_train)

x_train_scaled = x_scaler.transform(x_train)
x_test_scaled = x_scaler.transform(x_test)

In [6]:
def get_windows(x, y, window_size):
    x_windows, y_windows = [], []

    for i in range(len(x) - window_size):
        x_window = x[i:i+window_size]
        y_window = y[i:i+window_size]

        x_window = np.hstack((x_window, y_window))

        x_windows.append(x_window)
        y_windows.append(y[i+window_size])

    return np.array(x_windows), np.array(y_windows)

In [7]:
x_train_windows, y_train_windows = get_windows(x_train_scaled, y_train, 10)
x_test_windows, y_test_windows = get_windows(x_test_scaled, y_test, 10)

In [8]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import time

import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only allocate 1GB of memory on the first GPU
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)
else:
    print("No GPU found, using CPU instead.")

# Définition des niveaux des hyperparamètres
LSTM1_units = [192, 320]
LSTM1_activation = ['tanh', 'sigmoid']
DROPOUT1_rate = [0.2, 0.1]
LSTM2_units = [160, 208]
LSTM2_activation = ['tanh', 'sigmoid']
DROPOUT2_rate = [0.2, 0.1]
learning_rate = [0.001, 0.01]
epoch = 200

# Création du plan orthogonal L8(2^7)
orthogonal_array = [
    [0, 0, 0, 0, 0, 0, 0],
    [0, 0, 1, 1, 1, 1, 1],
    [0, 1, 0, 0, 1, 1, 1],
    [0, 1, 1, 1, 0, 0, 0],
    [1, 0, 0, 1, 0, 1, 1],
    [1, 0, 1, 0, 1, 0, 0],
    [1, 1, 0, 1, 1, 0, 1],
    [1, 1, 1, 0, 0, 1, 0]
]

orthogonal_array = [
    [0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 1, 1, 1, 1],
    [0, 1, 1, 0, 0, 1, 1],
    [0, 1, 1, 1, 1, 0, 0],
    [1, 0, 1, 0, 1, 0, 1],
    [1, 0, 1, 1, 0, 1, 0],
    [1, 1, 0, 0, 1, 1, 0],
    [1, 1, 0, 1, 0, 0, 1],
    #[1, 1, 1, 1, 1, 1, 1],  # Ajout d'essais complémentaires
    #[1, 1, 1, 0, 0, 0, 0],
    #[1, 0, 0, 1, 1, 0, 0],
    #[1, 0, 0, 0, 0, 1, 1],
    #[0, 1, 0, 1, 0, 1, 0],
    #[0, 1, 0, 0, 1, 0, 1],
    #[0, 0, 1, 1, 0, 0, 1],
    #[0, 0, 1, 0, 1, 1, 0]
]

# Générer les combinaisons d'hyperparamètres à partir du plan orthogonal
param_combinations = []
for row in orthogonal_array:
    params = {
        'LSTM1_units': LSTM1_units[row[0]],
        'LSTM1_activation': LSTM1_activation[row[1]],
        'DROPOUT1_rate': DROPOUT1_rate[row[2]],
        'LSTM2_units': LSTM2_units[row[3]],
        'LSTM2_activation': LSTM2_activation[row[4]],
        'DROPOUT2_rate': DROPOUT2_rate[row[5]],
        'learning_rate': learning_rate[row[6]]
    }
    param_combinations.append(params)
# Prepare the data
# x_train_windows and y_train_windows should be pre-defined
# For the example, they are assumed to be loaded or created previously

# Function to create the model
def create_model(LSTM1_units, LSTM1_activation, DROPOUT1_rate, LSTM2_units, LSTM2_activation, DROPOUT2_rate, learning_rate):
    model = Sequential([
        LSTM(LSTM1_units, activation=LSTM1_activation, input_shape=x_train_windows.shape[1:], return_sequences=True),
        Dropout(DROPOUT1_rate),
        LSTM(LSTM2_units, activation=LSTM2_activation, return_sequences=False),
        Dropout(DROPOUT2_rate),
        Dense(64, activation='relu'),
        Dense(1, activation='linear')
    ])
    optimizer = Adam(learning_rate=learning_rate)
    model.compile(optimizer=optimizer, loss='mean_absolute_error')
    return model

# Function to start training and tracking emissions
def start_training(model):
    start_time = time.time()
    history = model.fit(x=x_train_windows, y=y_train_windows, epochs=epoch, batch_size=128, validation_split=0.2, shuffle=False)
    training_duration = time.time() - start_time
    return history, training_duration

# DataFrame to store results
results_df = pd.DataFrame(columns=['LSTM1_units', 'LSTM1_activation', 'DROPOUT1_rate', 'LSTM2_units', 'LSTM2_activation', 'DROPOUT2_rate', 'learning_rate', 'loss', 'val_loss'])

# Iterate over the parameter combinations from the orthogonal array
best_params = None
best_loss = np.inf
total_combinations = len(param_combinations)
current_combination = 0

for params in param_combinations:
    current_combination += 1
    print(f"Training with parameters: {params} ({current_combination}/{total_combinations})")
    
    model = create_model(**params)
    
    try:
        history, training_duration = start_training(model)
        loss = min(history.history['loss'])
        val_loss = min(history.history['val_loss'])
        print(f"Loss: {loss}")
        print(f"Validation loss: {val_loss}")
        print(f"Training duration: {training_duration} seconds")
        
        # Create a temporary DataFrame with the current results
        temp_df = pd.DataFrame([{**params, 'loss': loss, 'val_loss': val_loss}])
        
        # Concatenate the temporary DataFrame with the existing DataFrame
        results_df = pd.concat([results_df, temp_df], ignore_index=True)
        
        # Plot and save the training history

        start = 10
        plt.figure()
        plt.plot(history.history['loss'][start:], label='train')
        plt.plot(history.history['val_loss'][start:], label='validation')
        plt.title(f"Loss Curves - Params: {params}")
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.legend()
        plt.savefig(f"loss_curve_LSTM1_{params['LSTM1_units']}_LSTM1act_{params['LSTM1_activation']}_DROPOUT1_{params['DROPOUT1_rate']}_LSTM2_{params['LSTM2_units']}_LSTM2act_{params['LSTM2_activation']}_DROPOUT2_{params['DROPOUT2_rate']}_LR_{params['learning_rate']}_EPOCH_{epoch}.png")
        plt.close()
        
        # Check if this is the best model so far
        if val_loss < best_loss:
            best_loss = val_loss
            best_params = params
            print(f"New best model found: {best_params} with validation loss: {best_loss}")
    finally:
        pass

# Save the results DataFrame to a CSV file
results_df.to_csv(f"hyperparameter_results_LSTM1_{params['LSTM1_units']}_LSTM1act_{params['LSTM1_activation']}_DROPOUT1_{params['DROPOUT1_rate']}_LSTM2_{params['LSTM2_units']}_LSTM2act_{params['LSTM2_activation']}_DROPOUT2_{params['DROPOUT2_rate']}_LR_{params['learning_rate']}_EPOCH_{epoch}.csv", index=False)

print(f"Best parameters: {best_params}")
print(f"Best validation loss: {best_loss}")

No GPU found, using CPU instead.
Training with parameters: {'LSTM1_units': 192, 'LSTM1_activation': 'tanh', 'DROPOUT1_rate': 0.2, 'LSTM2_units': 160, 'LSTM2_activation': 'tanh', 'DROPOUT2_rate': 0.2, 'learning_rate': 0.001} (1/8)
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200

In [10]:
results_df

Unnamed: 0,LSTM1_units,LSTM1_activation,DROPOUT1_rate,LSTM2_units,LSTM2_activation,DROPOUT2_rate,learning_rate,loss,val_loss
0,192,tanh,0.2,160,tanh,0.2,0.001,6.808808,3.701832
1,192,tanh,0.2,208,sigmoid,0.1,0.01,8.802302,3.687266
2,192,sigmoid,0.1,160,tanh,0.1,0.01,15.591359,6.875353
3,192,sigmoid,0.1,208,sigmoid,0.2,0.001,10.608258,6.938266
4,320,tanh,0.1,160,sigmoid,0.2,0.01,13.718943,6.455424
5,320,tanh,0.1,208,tanh,0.1,0.001,4.871476,3.518692
6,320,sigmoid,0.2,160,sigmoid,0.1,0.001,9.684809,5.422256
7,320,sigmoid,0.2,208,tanh,0.2,0.01,19.876245,9.273034
