In [None]:
import pandas as pd
import numpy as np
import clickhouse_connect
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, GRU, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from datetime import datetime

def connect_to_clickhouse():
    return clickhouse_connect.get_client(host='localhost', port=8123, username='default', password='')

def load_data_from_clickhouse(start_date, end_date):
    client = connect_to_clickhouse()
    query = f"""
        SELECT Date, Speed, Density, Bt, Bz, Mag FROM solar_data
        WHERE Date BETWEEN '{start_date}' AND '{end_date}'
    """
    df = client.query_df(query)
    df.dropna(inplace=True)  # Supprime les lignes avec valeurs manquantes
    return df

def enrich_data(df):
    df['Date'] = pd.to_datetime(df['Date']).dt.date
    return df

def prepare_data_for_model(df, sequence_length):
    features = df.drop(columns=['Date', 'Mag']).astype(np.float32)
    target = df['Mag'].astype(np.float32)
    
    if len(df) < sequence_length:
        raise ValueError("Pas assez de données pour créer des séquences de cette longueur.")
    
    scaler_X = StandardScaler()
    scaler_y = StandardScaler()
    X_scaled = scaler_X.fit_transform(features)
    y_scaled = scaler_y.fit_transform(target.values.reshape(-1, 1))
    
    X, y = [], []
    for i in range(len(X_scaled) - sequence_length):
        X.append(X_scaled[i:i + sequence_length])
        y.append(y_scaled[i + sequence_length])
    
    X, y = np.array(X), np.array(y)
    return train_test_split(X, y, test_size=0.2, random_state=42), scaler_X, scaler_y, features, target

def create_model(input_shape):
    model = Sequential([
        Input(shape=input_shape),
        GRU(64, return_sequences=True),
        Dropout(0.2),
        GRU(32),
        Dropout(0.2),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

def train_model(model, X_train, y_train, X_test, y_test, epochs=100):
    callbacks = [
        EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True),
        ReduceLROnPlateau(monitor='val_loss', patience=5, factor=0.5)
    ]
    
    history = model.fit(
        X_train, y_train, 
        validation_data=(X_test, y_test),
        epochs=epochs, 
        batch_size=32,
        callbacks=callbacks
    )
    return model, history

def evaluate_model(model, X_test, y_test, scaler_y):
    y_pred = model.predict(X_test)
    y_test_inv = scaler_y.inverse_transform(y_test.reshape(-1, 1))
    y_pred_inv = scaler_y.inverse_transform(y_pred)
    
    mse = np.mean((y_test_inv - y_pred_inv) ** 2)
    rmse = np.sqrt(mse)
    mae = np.mean(np.abs(y_test_inv - y_pred_inv))
    
    plt.figure(figsize=(10, 5))
    plt.plot(y_test_inv[:500], label='Réel', color='blue')
    plt.plot(y_pred_inv[:500], label='Prédit', color='red')
    plt.title('Prédiction des perturbations magnétiques')
    plt.xlabel('Échantillons')
    plt.ylabel('Indice de perturbation magnétique')
    plt.legend()
    plt.show()
    
    return mse, rmse, mae, y_test_inv, y_pred_inv

def predict_realtime(model, latest_data, scaler_X, scaler_y, features, sequence_length):
    if len(latest_data) < sequence_length:
        raise ValueError("Pas assez de données pour la prédiction en temps réel.")
    
    latest_data_scaled = scaler_X.transform(latest_data[features.columns])
    latest_sequence = np.array([latest_data_scaled[-sequence_length:]])
    
    predicted_value = model.predict(latest_sequence)
    predicted_intensity = scaler_y.inverse_transform(predicted_value)[0][0]
    
    return {
        'predicted_intensity': predicted_intensity,
        'kp_scale': round(predicted_intensity / 10, 1),
        'probable_latitude': 90 - (predicted_intensity * 2),
        'timestamp': datetime.now()
    }

def get_user_input_date(prompt):
    date_input = input(f"{prompt} (YYYY-MM-DD): ")
    try:
        return datetime.strptime(date_input, "%Y-%m-%d").date()
    except ValueError:
        print("Format de date invalide. Veuillez entrer une date valide.")
        return get_user_input_date(prompt)

def main_with_user_input():
    start_date = get_user_input_date("Entrez la date de début")
    end_date = get_user_input_date("Entrez la date de fin")
    epochs = 100

    print("Chargement des données...")
    df = load_data_from_clickhouse(start_date=start_date.strftime("%Y-%m-%d"), 
                                   end_date=end_date.strftime("%Y-%m-%d"))
    
    print("Enrichissement des données...")
    df = enrich_data(df)
    
    print("Préparation des données pour le modèle...")
    try:
        (X_train, X_test, y_train, y_test), scaler_X, scaler_y, features, target = prepare_data_for_model(df, 24)
    except ValueError as e:
        print(e)
        return
    
    print("Création du modèle...")
    model = create_model((X_train.shape[1], X_train.shape[2]))
    model.summary()
    
    print("Entraînement du modèle...")
    model, history = train_model(model, X_train, y_train, X_test, y_test, epochs=epochs)
    
    print("Évaluation du modèle...")
    evaluate_model(model, X_test, y_test, scaler_y)
    
    print("Sauvegarde du modèle...")
    model.save('solar_magnetic_prediction_model.keras')
    
    print("Processus terminé!")

if __name__ == "__main__":
    main_with_user_input()


2025-04-03 10:08:41.339679: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-03 10:08:42.221408: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-03 10:08:42.229261: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Entrez la date de début (YYYY-MM-DD):  2017-01-01
Entrez la date de fin (YYYY-MM-DD):  2017-02-01
Entrez le nombre d'epochs pour l'entraînement:  10


Chargement des données...
Enrichissement des données...
Préparation des données pour le modèle...
