# Génération des prédictions des modèles pour l'année 2025

## Modèles à comparer

- GNN sur DataSet barycentrique sans critère d'erreur informé par la physique
- GNN sur DataSet barycentrique avec critère d'erreur informé par la physique
- GNN sur DataSet planétaire sans critère d'erreur informé par la physique
- LSTM sur DataSet planétaire sans critère d'erreur informé par la physique
- LSTM sur DataSet planétaire avec critère d'erreur informé par la physique

## 1. Chargement des librairies

In [9]:
from model.GNN import GNN_NBody, InteractionNetwork
from model.SolarLSTM import SolarLSTM
from data import solarSystemDataSet
import torch
from torch_geometric.data import Data
import torch_geometric.nn
import torch_geometric.inspector
import inspect
import _operator
import typing
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from joblib import load
from tqdm import tqdm
# from google.colab import drive
# drive.mount('/content/drive')

## 2. Définition des chemins des fichiers (à ajuster selon votre environnement local)

In [None]:
body_coordinates_and_velocities_dataset_path = "G:/Mon disque/GIF-7005-Project/data/body_coordinates_and_velocities_from_1749-12-31_to_2200-01-09.json"
planetary_centroid_coordinates_and_velocities_dataset_path = "G:/Mon disque/GIF-7005-Project/data/planetary_centroid_coordinates_and_velocities_from_1749-12-31_to_2200-01-09.json"

gnn_planetary_centroid_vanilla_model_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planetary_Centroid_Vanilla/best_model_weights.pth"
gnn_planetary_centroid_vanilla_scaler_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planetary_Centroid_Vanilla/scaler.joblib"

gnn_planetary_centroid_pi_model_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planetary_Centroid_PI/model.pth"
gnn_planetary_centroid_pi_scaler_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planetary_Centroid_PI/scaler.joblib"

gnn_body_coordinates_vanilla_model_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planet_Coordinates_Vanilla/model.pth"
gnn_body_coordinates_vanilla_scaler_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planet_Coordinates_Vanilla/scaler.joblib"

gnn_body_coordinates_pi_model_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planet_Coordinates_PI/model.pth"
gnn_body_coordinates_pi_scaler_file_path = "G:/Mon disque/GIF-7005-Project/models/GNN_Planet_Coordinates_PI/scaler.joblib"

lstm_vanilla_model_file_path = "G:/Mon disque/GIF-7005-Project/models/LSTM_Vanilla/model.pth"
lstm_vanilla_scaler_file_path = "G:/Mon disque/GIF-7005-Project/models/LSTM_Vanilla/scaler.joblib"

lstm_pi_model_file_path = "G:/Mon disque/GIF-7005-Project/models/LSTM_PI/model.pth"
lstm_pi_scaler_file_path = "G:/Mon disque/GIF-7005-Project/models/LSTM_PI/scaler.joblib"

## 3. Chargement des modèles

In [None]:
torch.serialization.add_safe_globals(
    [
        GNN_NBody, 
        torch.nn.modules.linear.Linear, 
        torch.nn.modules.container.ModuleList, 
        InteractionNetwork, 
        torch_geometric.nn.aggr.basic.SumAggregation, 
        torch.nn.modules.container.Sequential,
        torch.nn.modules.activation.ReLU,
        torch_geometric.inspector.Inspector,
        torch_geometric.inspector.Signature,
        torch_geometric.inspector.Parameter,
        inspect._empty,
        _operator.getitem,
        typing.OrderedDict,
        typing.Union,
        type,
        int
    ])

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

gnn_planetary_centroid_vanilla_model = GNN_NBody(input_dim=7, model_dim=128).to(device)
gnn_planetary_centroid_vanilla_model.load_state_dict(torch.load(gnn_planetary_centroid_vanilla_model_file_path, map_location=torch.device('cpu')))
gnn_planetary_centroid_vanilla_model.eval()

gnn_planetary_centroid_pi_model: GNN_NBody = torch.load(gnn_planetary_centroid_pi_model_file_path, map_location=torch.device('cpu'))
gnn_planetary_centroid_pi_model.eval()
gnn_planetary_centroid_pi_model.to(device)

gnn_body_coordinates_vanilla_model: GNN_NBody = torch.load(gnn_body_coordinates_vanilla_model_file_path, map_location=torch.device('cpu'))
gnn_body_coordinates_vanilla_model.eval()
gnn_body_coordinates_vanilla_model.to(device)

gnn_body_coordinates_pi_model: GNN_NBody = torch.load(gnn_body_coordinates_pi_model_file_path, map_location=torch.device('cpu'))
gnn_body_coordinates_pi_model.eval()
gnn_body_coordinates_pi_model.to(device)

lstm_vanilla_model: SolarLSTM = torch.load(lstm_vanilla_model_file_path, map_location=torch.device('cpu'))
lstm_vanilla_model.eval()
lstm_vanilla_model.to(device)

lstm_pi_model: SolarLSTM = torch.load(lstm_pi_model_file_path, map_location=device, weights_only=False)
lstm_pi_model.eval()
lstm_pi_model.to(device)

Using device: cpu


SolarLSTM(
  (lstm): LSTM(63, 128, num_layers=2, batch_first=True, dropout=0.2)
  (decoder): Sequential(
    (0): Linear(in_features=128, out_features=64, bias=True)
    (1): ReLU()
    (2): Linear(in_features=64, out_features=54, bias=True)
  )
)

## 4. Chargement des scalers et des jeux de données

In [None]:
# Chargement des scalers spécifiques à chaque modèle.
gnn_planetary_centroid_vanilla_scaler: StandardScaler = load(gnn_planetary_centroid_vanilla_scaler_file_path)
gnn_planetary_centroid_pi_scaler: StandardScaler = load(gnn_planetary_centroid_pi_scaler_file_path)
gnn_body_coordinates_vanilla_scaler: StandardScaler = load(gnn_body_coordinates_vanilla_scaler_file_path)
gnn_body_coordinates_pi_scaler: StandardScaler = load(gnn_body_coordinates_pi_scaler_file_path)
lstm_vanilla_scaler: StandardScaler = load(lstm_vanilla_scaler_file_path)
lstm_pi_scaler: StandardScaler = load(lstm_pi_scaler_file_path)

# Chargement des données de référence dans un DataFrame.
df_body_coordinates: pd.DataFrame = pd.read_json(body_coordinates_and_velocities_dataset_path, lines=True)
df_planetery_centroid_coordinates: pd.DataFrame = pd.read_json(planetary_centroid_coordinates_and_velocities_dataset_path, lines=True)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [None]:
display(df_body_coordinates.sample(5))
display(df_planetery_centroid_coordinates.sample(5))

Unnamed: 0,body_id,body_name,body_mass,datetime_jd,datetime_str,x,y,z,vx,vy,vz
968976,599,Jupiter,1.8982e+27,2507364.5,A.D. 2152-Oct-30 00:00:00.0000,4.892681,-0.859764,-0.105508,0.001216,0.007786,-6.01951e-05
335756,299,Venus,4.8675e+24,2367251.5,A.D. 1769-Mar-19 00:00:00.0000,-0.481997,0.527069,0.03497,-0.014925,-0.013838,0.0006836823
149139,10,Soleil,1.989e+30,2509372.5,A.D. 2158-Apr-30 00:00:00.0000,0.006962,0.001278,-0.000233,2e-06,8e-06,-8.84e-08
461536,299,Venus,4.8675e+24,2493031.5,A.D. 2113-Aug-03 00:00:00.0000,0.484342,0.541469,-0.020344,-0.015109,0.013439,0.001058368
465787,299,Venus,4.8675e+24,2497282.5,A.D. 2125-Mar-24 00:00:00.0000,0.692955,0.235726,-0.036462,-0.006576,0.019068,0.0006458105


## 5. Prédictions avec les différents modèles

La méthodologie pour la comparaison des modèles sera la suivante :
1. Le point de départ sera la position et la vélocité des planètes (ou du barycentre du système planétaire) le 1er janvier 2025.
2. À partir de ce point de départ, chaque modèle devra prédire la position et la vélocité de la planète ou du barycentre du système planétaire au jour suivant, et ainsi du suite, jusqu'à la fin de l'année.
3. Les positions prédites seront ensuite comparées aux positions "réelles", soient celles récupérées du système Horizons du Jet Propulsion Laboratory de la NASA et déjà présentes dans nos datasets.

In [None]:
DELTA_SCALER = 1000.0
SEQ_LEN = 15          # Doit être identique à celui de l'entraînement
DAYS_TO_PREDICT = 365

def gnn_predict_2025_coordinates(X_dataset: solarSystemDataSet.SolarSystemDataset, model: GNN_NBody, scaler: StandardScaler) -> pd.DataFrame:
    df_predictions_2025: pd.DataFrame = pd.DataFrame()

    # On part l'inférence à partir des positions et vélocitées des planètes le 1er janvier 2025.
    current_pos_and_velocities = X_dataset.states[0]

    with torch.no_grad():
        for day_index in range(len(X_dataset.states)):
            # Prédiction des features pour l'ensemble des corps (delta de positions et de vélocités).
            predicted_deltas_normalized = model(Data(x=current_pos_and_velocities, edge_index=X_dataset.edge_index))

            # "predicted_deltas_normalized" est de shape (9, 6), mais on veut l'additionner aux valeurs de "current_pos_and_velocities", de shape (9, 7).
            # On ajoute donc une colonne de zéros pour que les shapes soient compatibles et pour pouvoir faire l'addition.
            current_pos_and_velocities += torch.tensor(np.append(predicted_deltas_normalized.numpy(), np.zeros((9, 1)), axis=1)) / DELTA_SCALER

            # On dénormalise les valeurs et on les ajoute au DataFrame des prédictions.
            predicted_pos_and_velocities_norm_numpy = current_pos_and_velocities.numpy()
            predicted_pos_and_velocities_denorm = scaler.inverse_transform(predicted_pos_and_velocities_norm_numpy)

            # On transforme les prédictions dénormalisées en DataFrame et on ajoute à celui-ci une colonne pour identifier le jour associé à la prédiction.
            df_day_predictions: pd.DataFrame = pd.DataFrame(predicted_pos_and_velocities_denorm)
            df_day_predictions.insert(loc=0, column="day", value=np.full((9, 1), day_index, dtype=int))

            df_predictions_2025 = pd.concat([df_predictions_2025, df_day_predictions], ignore_index=True)

    df_predictions_2025.columns = ['day', 'x', 'y', 'z', 'vx', 'vy', 'vz', 'body_mass']

    return df_predictions_2025

def lstm_predict_2025_coordinates(X_dataset: solarSystemDataSet.SequentialSolarSystemDataset, model: SolarLSTM, scaler: StandardScaler, body_ids: np.ndarray, delta_scaler: float = 1.0) -> pd.DataFrame:
    current_sequence = [X_dataset.states[i].to(device) for i in range(SEQ_LEN)]
    df_predictions_2025: list = []

    with torch.no_grad():
        for day_idx in tqdm(range(DAYS_TO_PREDICT)):
            # A. Entrée
            input_tensor = torch.stack(current_sequence).unsqueeze(0)

            # B. Prédiction du DELTA (Mouvement)
            pred_delta_norm = model(input_tensor) / delta_scaler # (1, 9, 6)

            # C. Mise à jour : Nouvel État = Ancien État + Delta Prédit
            last_state = current_sequence[-1] # (9, 7)

            # On prépare le delta complet (avec 0 pour la masse)
            delta_phys = pred_delta_norm.squeeze(0)
            zeros_mass = torch.zeros(delta_phys.shape[0], 1).to(device)
            delta_full = torch.cat([delta_phys, zeros_mass], dim=1)

            # ADDITION du mouvement (C'est ici que ça change !)
            new_state = last_state + delta_full

            # D. Glissement
            current_sequence.pop(0)
            current_sequence.append(new_state)

            # E. Sauvegarde
            state_np = new_state.cpu().numpy()
            state_real = scaler.inverse_transform(state_np)

            df_day = pd.DataFrame(state_real, columns=['x', 'y', 'z', 'vx', 'vy', 'vz', 'body_mass'])
            df_day['day'] = day_idx
            df_day['body_id'] = body_ids

            df_predictions_2025.append(df_day)

    return pd.concat(df_predictions_2025, ignore_index=True)

# On extrait les données de l'année 2025 dans de nouveaux DataFrames.
df_body_coordinates_targets_2025 = df_body_coordinates[(df_body_coordinates["datetime_str"].str.startswith("A.D. 2025-"))]
df_body_coordinates_targets_2025['body_mass'] = np.log10(df_body_coordinates_targets_2025['body_mass'])
df_planetery_centroid_coordinates_targets_2025 = df_planetery_centroid_coordinates[(df_planetery_centroid_coordinates["datetime_str"].str.startswith("A.D. 2025-"))]
df_planetery_centroid_coordinates_targets_2025['body_mass'] = np.log10(df_planetery_centroid_coordinates_targets_2025['body_mass'])

X_body_coordinates_vanilla_dataset: solarSystemDataSet.SolarSystemDataset = solarSystemDataSet.SolarSystemDataset(
    dataframe=df_body_coordinates_targets_2025, 
    scaler=gnn_body_coordinates_vanilla_scaler)

X_body_coordinates_pi_dataset: solarSystemDataSet.SolarSystemDataset = solarSystemDataSet.SolarSystemDataset(
    dataframe=df_body_coordinates_targets_2025, 
    scaler=gnn_body_coordinates_pi_scaler)

X_planetery_centroid_coordinates_vanilla_dataset: solarSystemDataSet.SolarSystemDataset = solarSystemDataSet.SolarSystemDataset(
    dataframe=df_planetery_centroid_coordinates_targets_2025, 
    scaler=gnn_planetary_centroid_vanilla_scaler)

X_planetery_centroid_coordinates_pi_dataset: solarSystemDataSet.SolarSystemDataset = solarSystemDataSet.SolarSystemDataset(
    dataframe=df_planetery_centroid_coordinates_targets_2025, 
    scaler=gnn_planetary_centroid_pi_scaler)

X_lstm_vanilla_dataset: solarSystemDataSet.SequentialSolarSystemDataset = solarSystemDataSet.SequentialSolarSystemDataset(
    dataframe=df_body_coordinates_targets_2025, 
    scaler=lstm_vanilla_scaler)

X_lstm_pi_dataset: solarSystemDataSet.SequentialSolarSystemDataset = solarSystemDataSet.SequentialSolarSystemDataset(
    dataframe=df_body_coordinates_targets_2025, 
    scaler=lstm_pi_scaler)

# Prédiction des coordonnées pour l'année 2025 avec chaque modèle.
df_predictions_body_coordinates_vanilla_2025: pd.DataFrame = gnn_predict_2025_coordinates(
    X_dataset=X_body_coordinates_vanilla_dataset,
    model=gnn_body_coordinates_vanilla_model,
    scaler=gnn_body_coordinates_vanilla_scaler)

df_predictions_body_coordinates_pi_2025: pd.DataFrame = gnn_predict_2025_coordinates(
    X_dataset=X_body_coordinates_pi_dataset,
    model=gnn_body_coordinates_pi_model,
    scaler=gnn_body_coordinates_pi_scaler)

df_predictions_planetery_centroid_coordinates_vanilla_2025: pd.DataFrame = gnn_predict_2025_coordinates(
    X_dataset=X_planetery_centroid_coordinates_vanilla_dataset,
    model=gnn_planetary_centroid_vanilla_model,
    scaler=gnn_planetary_centroid_vanilla_scaler)

df_predictions_planetery_centroid_coordinates_pi_2025: pd.DataFrame = gnn_predict_2025_coordinates(
    X_dataset=X_planetery_centroid_coordinates_pi_dataset,
    model=gnn_planetary_centroid_pi_model,
    scaler=gnn_planetary_centroid_pi_scaler)

body_ids: np.ndarray = df_body_coordinates_targets_2025.sort_values(['datetime_jd', 'body_id'])['body_id'].unique()

df_predictions_lstm_vanilla_2025: pd.DataFrame = lstm_predict_2025_coordinates(X_dataset=X_lstm_vanilla_dataset, model=lstm_vanilla_model, scaler=lstm_vanilla_scaler, body_ids=body_ids)
df_predictions_lstm_pi_2025: pd.DataFrame = lstm_predict_2025_coordinates(X_dataset=X_lstm_pi_dataset, model=lstm_pi_model, scaler=lstm_pi_scaler, body_ids=body_ids, delta_scaler=DELTA_SCALER)

# Sauvegarde des prédictions pour comparaison ultérieure.
df_predictions_body_coordinates_vanilla_2025.to_json('results/gnn_planet_coord_vanilla/predicted_planet_coordinates_and_velocities_from_2025-01-01_to_2025-12-31.json', orient='records', lines=True)
df_predictions_body_coordinates_pi_2025.to_json('results/gnn_planet_coord_pi/predicted_planet_coordinates_and_velocities_from_2025-01-01_to_2025-12-31.json', orient='records', lines=True)
df_predictions_planetery_centroid_coordinates_vanilla_2025.to_json('results/gnn_planetery_centroid_coord_vanilla/predicted_planetery_centroid_coordinates_and_velocities_from_2025-01-01_to_2025-12-31.json', orient='records', lines=True)
df_predictions_planetery_centroid_coordinates_pi_2025.to_json('results/gnn_planetery_centroid_coord_pi/predicted_planetery_centroid_coordinates_and_velocities_from_2025-01-01_to_2025-12-31.json', orient='records', lines=True)
df_predictions_lstm_vanilla_2025.to_json('results/lstm_vanilla/predicted_planet_coordinates_and_velocities_from_2025-01-01_to_2025-12-31.json', orient='records', lines=True)
df_predictions_lstm_pi_2025.to_json('results/lstm_pi/predicted_planet_coordinates_and_velocities_from_2025-01-01_to_2025-12-31_V2.json', orient='records', lines=True)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_body_coordinates_targets_2025['body_mass'] = np.log10(df_body_coordinates_targets_2025['body_mass'])
Processing sequential data: 100%|██████████| 365/365 [00:00<00:00, 2038.04it/s]
100%|██████████| 365/365 [00:01<00:00, 348.83it/s]
