In [63]:
# Conversion des inputs d'entrainement en numpy (uniquement 126/ 370/ 585)
import xarray as xr
import numpy as np

# Liste des chemins vers vos fichiers NetCDF dans le sous-dossier
file_paths = [
    'extracted_contents/inputs_ssp126.nc',
    'extracted_contents/inputs_ssp370.nc',
    'extracted_contents/inputs_ssp585.nc'
]

# Dictionnaire pour stocker les tableaux NumPy
np_array_x_train = {}

for file_path in file_paths:
    # Ouvrir le fichier NetCDF
    ds = xr.open_dataset(file_path)
    # Convertir en tableau NumPy
    np_array_x_train[file_path] = ds.to_array().values

# Afficher les dimensions de chaque tableau NumPy
for file_path, array in np_array_x_train.items():
    print(f"Dimensions for {file_path}: {array.shape}")




Dimensions for extracted_contents/inputs_ssp126.nc: (4, 86, 96, 144)
Dimensions for extracted_contents/inputs_ssp370.nc: (4, 86, 96, 144)
Dimensions for extracted_contents/inputs_ssp585.nc: (4, 86, 96, 144)


In [64]:
# Conversion des outputs d'entrainement en numpy (uniquement 126/ 370/ 585)
import xarray as xr
import numpy as np

# Liste des chemins vers vos fichiers NetCDF de labels dans le sous-dossier
label_file_paths = [
    'extracted_contents/outputs_ssp126.nc',
    'extracted_contents/outputs_ssp370.nc',
    'extracted_contents/outputs_ssp585.nc'
]

# Dictionnaire pour stocker les tableaux NumPy des labels
np_array_y_train = {}

for file_path in label_file_paths:
    # Ouvrir le fichier NetCDF
    ds = xr.open_dataset(file_path)
    # Convertir en tableau NumPy
    np_array_y_train[file_path] = ds.to_array().values

# Afficher les dimensions de chaque tableau NumPy des labels
for file_path, array in np_array_y_train.items():
    print(f"Dimensions for {file_path}: {array.shape}")


Dimensions for extracted_contents/outputs_ssp126.nc: (4, 1, 86, 96, 144)
Dimensions for extracted_contents/outputs_ssp370.nc: (4, 3, 86, 96, 144)
Dimensions for extracted_contents/outputs_ssp585.nc: (4, 1, 86, 96, 144)


In [65]:
#on fait la moyenne sur les 3 ensembles members pour que ssp370 ait la meme dimension que les autres
import numpy as np

# Supposons que np_labels contient les tableaux NumPy des labels
# Calculer la moyenne pour le fichier outputs_ssp370.nc
file_path_370 = 'extracted_contents/outputs_ssp370.nc'

# Vérifiez si la clé existe dans le dictionnaire
if file_path_370 in np_array_y_train:
    array_370 = np_array_y_train[file_path_370]

    # Calculer la moyenne sur l'axe 1 (l'axe avec les 3 valeurs)
    averaged_array_370 = np.mean(array_370, axis=1, keepdims=True)

    # Remplacer l'ancien tableau par le nouveau tableau avec la moyenne
    np_array_y_train[file_path_370] = averaged_array_370

# Vérifier les nouvelles dimensions
for file_path, array in np_array_y_train.items():
    print(f"Dimensions for {file_path}: {array.shape}")
    
    



Dimensions for extracted_contents/outputs_ssp126.nc: (4, 1, 86, 96, 144)
Dimensions for extracted_contents/outputs_ssp370.nc: (4, 1, 86, 96, 144)
Dimensions for extracted_contents/outputs_ssp585.nc: (4, 1, 86, 96, 144)


In [66]:


import numpy as np

transformed_x_train = {}
transformed_y_train = {}

for scenario in ['ssp126', 'ssp370', 'ssp585']:
    x_key = f'extracted_contents/inputs_{scenario}.nc'
    y_key = f'extracted_contents/outputs_{scenario}.nc'

    x_data = np_array_x_train[x_key]  # Shape: (4, 86, 96, 144)
    y_data = np_array_y_train[y_key]  # Shape: (4, 1, 86, 96, 144)

    num_variables, num_years, lat, lon = x_data.shape
    time_window = 10
    num_sequences = num_years - time_window + 1  # 77 sequences

    # Reshape inputs: (4, 77, 10, 96, 144)
    x_transformed = np.zeros((num_variables, num_sequences, time_window, lat, lon))
    y_transformed = np.zeros((num_variables, num_sequences, lat, lon))  # (4, 77, 96, 144)

    for t in range(num_sequences):
        x_transformed[:, t, :, :, :] = x_data[:, t:t+time_window, :, :]
        y_transformed[:, t, :, :] = y_data[:, 0, t+time_window-1, :, :]  # Prendre la dernière année du bloc

    transformed_x_train[x_key] = x_transformed
    transformed_y_train[y_key] = y_transformed
    
    
    # Configurer les options d'affichage pour voir tous les éléments
#np.set_printoptions(threshold=np.inf)
print(transformed_x_train)





{'extracted_contents/inputs_ssp126.nc': array([[[[[1.53607222e+03, 1.53607222e+03, 1.53607222e+03, ...,
           1.53607222e+03, 1.53607222e+03, 1.53607222e+03],
          [1.53607222e+03, 1.53607222e+03, 1.53607222e+03, ...,
           1.53607222e+03, 1.53607222e+03, 1.53607222e+03],
          [1.53607222e+03, 1.53607222e+03, 1.53607222e+03, ...,
           1.53607222e+03, 1.53607222e+03, 1.53607222e+03],
          ...,
          [1.53607222e+03, 1.53607222e+03, 1.53607222e+03, ...,
           1.53607222e+03, 1.53607222e+03, 1.53607222e+03],
          [1.53607222e+03, 1.53607222e+03, 1.53607222e+03, ...,
           1.53607222e+03, 1.53607222e+03, 1.53607222e+03],
          [1.53607222e+03, 1.53607222e+03, 1.53607222e+03, ...,
           1.53607222e+03, 1.53607222e+03, 1.53607222e+03]],

         [[1.57190649e+03, 1.57190649e+03, 1.57190649e+03, ...,
           1.57190649e+03, 1.57190649e+03, 1.57190649e+03],
          [1.57190649e+03, 1.57190649e+03, 1.57190649e+03, ...,
           

In [67]:
#vérification des dimensions
# Afficher les dimensions de chaque tableau NumPy
for file_path, array in transformed_x_train.items():
    print(f"Dimensions for {file_path}: {array.shape}")
    
    
# Afficher les dimensions de chaque tableau NumPy des labels
for file_path, array in transformed_y_train.items():
    print(f"Dimensions for {file_path}: {array.shape}")


Dimensions for extracted_contents/inputs_ssp126.nc: (4, 77, 10, 96, 144)
Dimensions for extracted_contents/inputs_ssp370.nc: (4, 77, 10, 96, 144)
Dimensions for extracted_contents/inputs_ssp585.nc: (4, 77, 10, 96, 144)
Dimensions for extracted_contents/outputs_ssp126.nc: (4, 77, 96, 144)
Dimensions for extracted_contents/outputs_ssp370.nc: (4, 77, 96, 144)
Dimensions for extracted_contents/outputs_ssp585.nc: (4, 77, 96, 144)


In [68]:
# Concatenation des valeurs dans l'ordre de 126, 370 et 585
x_train = np.concatenate([
    transformed_x_train['extracted_contents/inputs_ssp126.nc'],
    transformed_x_train['extracted_contents/inputs_ssp370.nc'],
    transformed_x_train['extracted_contents/inputs_ssp585.nc']
], axis=1)

y_train = np.concatenate([
    transformed_y_train['extracted_contents/outputs_ssp126.nc'],
    transformed_y_train['extracted_contents/outputs_ssp370.nc'],
    transformed_y_train['extracted_contents/outputs_ssp585.nc']
], axis=1)

# x_train et y_train sont maintenant des tableaux concaténés
print(x_train.shape, y_train.shape)


(4, 231, 10, 96, 144) (4, 231, 96, 144)


In [69]:
from sklearn.preprocessing import StandardScaler
import numpy as np

# Fonction pour normaliser/standardiser les données en supprimant les valeurs nulles
def normalize_data(x_train, y_train):
    # Aplatir les données pour appliquer la standardisation/normalisation à chaque feature
    x_flat = x_train.reshape(-1, x_train.shape[-1])  # Aplatir en 2D pour les features
    y_flat = y_train.reshape(-1, y_train.shape[-1])  # Aplatir en 2D pour les outputs
    
    # On crée un scaler pour la standardisation (moyenne=0, écart-type=1)
    scaler_x = StandardScaler()
    scaler_y = StandardScaler()

    # Appliquer la standardisation (moyenne=0, écart-type=1)
    x_flat_scaled = scaler_x.fit_transform(x_flat)
    y_flat_scaled = scaler_y.fit_transform(y_flat)
    
    # On redimensionne les données pour les remettre dans leur forme d'origine
    x_train_scaled = x_flat_scaled.reshape(x_train.shape)
    y_train_scaled = y_flat_scaled.reshape(y_train.shape)
    
    return x_train_scaled, y_train_scaled

# Normaliser/standardiser les données
x_train = np.transpose(x_train, (1, 2, 3, 4, 0))  # (231, 10, 96, 144, 4)
y_train = np.transpose(y_train, (1, 2, 3, 0))      # (231, 96, 144, 4)

x_train_scaled, y_train_scaled = normalize_data(x_train, y_train)




# Vérifier les dimensions et valeurs
print(x_train_scaled.shape, y_train_scaled.shape)
# Sauvegarder le tableau
# Sauvegarder les tableaux
np.save('x_train_scaled.npy', x_train_scaled)
np.save('y_train_scaled.npy', y_train_scaled)



(231, 10, 96, 144, 4) (231, 96, 144, 4)


In [70]:
#on va maintenant construire x_test et y_test
import xarray as xr
import numpy as np

# Chemin vers les fichiers NetCDF
input_file_path = 'inputs_ssp245.nc'
output_file_path = 'outputs_ssp245.nc'

# Convertir le fichier inputs_ssp245.nc en tableau NumPy
ds_input = xr.open_dataset(input_file_path)
x_test = ds_input.to_array().values

# Convertir le fichier outputs_ssp245.nc en tableau NumPy
ds_output = xr.open_dataset(output_file_path)
y_test = ds_output.to_array().values

# Vérifier les dimensions des tableaux
print("Dimensions de x_test :", x_test.shape)
print("Dimensions de y_test :", y_test.shape)


Dimensions de x_test : (4, 86, 96, 144)
Dimensions de y_test : (4, 3, 86, 96, 144)


In [71]:
# Calculer la moyenne sur l'axe 1 (l'axe avec les 3 valeurs)
averaged_y_test = np.mean(y_test, axis=1, keepdims=True)
# Vérifier les dimensions des tableaux
print("Dimensions de x_test :", x_test.shape)
print("Dimensions de averaged_y_test :", averaged_y_test.shape)




    

Dimensions de x_test : (4, 86, 96, 144)
Dimensions de averaged_y_test : (4, 1, 86, 96, 144)


In [72]:




num_variables, num_years, lat, lon = x_test.shape
time_window = 10
num_sequences = num_years - time_window + 1  # 77 sequences

    # Reshape inputs: (4, 77, 10, 96, 144)
x_test_transformed = np.zeros((num_variables, num_sequences, time_window, lat, lon))
y_test_transformed = np.zeros((num_variables, num_sequences, lat, lon))  # (4, 77, 96, 144)

for t in range(num_sequences):
    x_test_transformed[:, t, :, :, :] = x_test[:, t:t+time_window, :, :]
    y_test_transformed[:, t, :, :] = averaged_y_test[:, 0, t+time_window-1, :, :]  # Prendre la dernière année du bloc


print("Dimensions de x_test_transformed :", x_test_transformed.shape)
print("Dimensions de y_test_transformed :", y_test_transformed.shape)





Dimensions de x_test_transformed : (4, 77, 10, 96, 144)
Dimensions de y_test_transformed : (4, 77, 96, 144)


In [73]:

# Normaliser/standardiser les données
x_test_transformed = np.transpose(x_test_transformed, (1, 2, 3, 4, 0))  # (77, 10, 96, 144, 4)
y_test_transformed = np.transpose(y_test_transformed, (1, 2, 3, 0))      # (77, 96, 144, 4)

# Normaliser/standardiser les données
x_test_scaled, y_test_scaled = normalize_data(x_test_transformed, y_test_transformed)




# Vérifier les dimensions et valeurs
print(x_test_scaled.shape, y_test_scaled.shape)



# Sauvegarder les tableaux
np.save('x_test_scaled.npy', x_test_scaled)
np.save('y_test_scaled.npy', y_test_scaled)


(77, 10, 96, 144, 4) (77, 96, 144, 4)
