In [1]:
import pandas as pd 
df=pd.read_csv("/home/rt/A/hackathon/AquaFlow/aquaflow_ml/dataset/water_usage_300_days.csv")
df

Unnamed: 0,timestamp,water_usage_liters
0,2025-03-10 08:00:00,1.00
1,2025-03-10 08:01:00,0.64
2,2025-03-10 08:02:00,0.43
3,2025-03-10 08:03:00,0.40
4,2025-03-10 08:04:00,0.67
...,...,...
71995,2026-01-03 11:55:00,0.40
71996,2026-01-03 11:56:00,0.50
71997,2026-01-03 11:57:00,0.53
71998,2026-01-03 11:58:00,0.42


In [2]:
len(df['water_usage_liters'].unique())

143

In [3]:
# -*- coding: utf-8 -*-
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from scipy.stats import zscore

# ================================================
# PARAMÈTRES
# ================================================
FICHIER_DATA = "/home/rt/A/hackathon/AquaFlow/aquaflow_ml/dataset/water_usage_300_days.csv"
SEQ_LENGTH = 10  # Longueur des séquences pour le LSTM
SEUIL_ZSCORE = 3.5  # Seuil pour détecter les anomalies

# ================================================
# PRÉPARATION DES DONNÉES
# ================================================
def preparer_donnees(data_path):
    """Charge et prépare les données pour le modèle LSTM."""
    # Charger les données
    df = pd.read_csv(data_path, parse_dates=['timestamp'])
    df.set_index('timestamp', inplace=True)

    # Normalisation des données
    scaler = StandardScaler()
    data_scaled = scaler.fit_transform(df[['water_usage_liters']])

    # Créer des séquences pour le LSTM
    X, y = [], []
    for i in range(len(data_scaled) - SEQ_LENGTH):
        X.append(data_scaled[i:i+SEQ_LENGTH])
        y.append(data_scaled[i+SEQ_LENGTH])
    return np.array(X), np.array(y), scaler

# ================================================
# MODÈLE LSTM
# ================================================
def creer_modele_lstm(input_shape):
    """Crée un modèle LSTM simple."""
    model = Sequential()
    model.add(LSTM(64, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# ================================================
# DÉTECTION D'ANOMALIES AVEC Z-SCORE
# ================================================
def detecter_anomalies(predictions, y, seuil_zscore):
    """Détecte les anomalies en utilisant le Z-Score."""
    erreurs = predictions.flatten() - y.flatten()
    z_scores = zscore(erreurs)
    anomalies = np.where(np.abs(z_scores) > seuil_zscore)[0]
    return anomalies

# ================================================
# CALCUL DE L'ACCURACY
# ================================================
def calculer_accuracy(y, predictions, anomalies):
    """Calcule l'accuracy en comparant les prédictions aux anomalies."""
    labels_reels = np.zeros_like(y)  # 0 = normal, 1 = anomalie
    labels_reels[anomalies] = 1

    labels_predits = np.zeros_like(y)
    labels_predits[np.where(np.abs(predictions.flatten() - y.flatten()) > SEUIL_ZSCORE)] = 1

    accuracy = accuracy_score(labels_reels, labels_predits)
    return accuracy

# ================================================
# EXÉCUTION PRINCIPALE
# ================================================
if __name__ == "__main__":
    # Préparer les données
    X, y, scaler = preparer_donnees(FICHIER_DATA)

    # Créer et entraîner le modèle LSTM
    model = creer_modele_lstm((X.shape[1], X.shape[2]))
    model.fit(X, y, epochs=15, batch_size=32, verbose=1)

    # Prédire sur les données d'entraînement
    predictions = model.predict(X)

    # Détecter les anomalies avec le Z-Score
    anomalies = detecter_anomalies(predictions, y, SEUIL_ZSCORE)

    # Calculer l'accuracy
    accuracy = calculer_accuracy(y, predictions, anomalies)
    print(f"\nAccuracy du modèle : {accuracy * 100:.2f}%")

2025-03-11 20:23:43.226253: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-11 20:23:43.230651: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-03-11 20:23:43.243354: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1741742623.264633   18743 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1741742623.270576   18743 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-03-11 20:23:43.291930: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU ins

Epoch 1/15


2025-03-11 20:23:45.863766: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)
  super().__init__(**kwargs)


[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 5ms/step - loss: 0.9862
Epoch 2/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 0.9915
Epoch 3/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.9934
Epoch 4/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 1.0093
Epoch 5/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 1.0110
Epoch 6/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 1.0086
Epoch 7/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 0.9967
Epoch 8/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 5ms/step - loss: 1.0076
Epoch 9/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 5ms/step - loss: 1.0028
Epoch 10/15
[1m2250/2250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11