In [37]:
import pandas as pd
import numpy as np
import os
import folium
import geopandas as gpd
import branca.colormap as cm
from scipy import stats
from sklearn.preprocessing import StandardScaler
from cartiflette import carti_download
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam

# Cr√©ation de l'arborescence pour le projet LSTM
folders = ["cartes_climat2/historique", "cartes_climat2/predictions_2008"]
for folder in folders:
    os.makedirs(folder, exist_ok=True)

print("‚úÖ Dossiers cr√©√©s : cartes_climat2")

‚úÖ Dossiers cr√©√©s : cartes_climat2


In [38]:
# 1. Chargement des donn√©es m√©t√©o (assurez-vous que df_raw est bien charg√© en Cellule 0/1)
df_raw = pd.read_csv("df_clean.csv", index_col=0)
df_raw.index = pd.to_datetime(df_raw.index)
df_raw = df_raw.sort_values(['latitude', 'longitude', 'time'])

# 2. T√©l√©chargement des contours des d√©partements (URL stable GitHub)
print("‚è≥ T√©l√©chargement des fronti√®res administratives...")
# Cette URL est tr√®s stable et contient les codes (01, 02, 03...) au format texte
url_geojson = "https://raw.githubusercontent.com/gregoiredavid/france-geojson/master/departements.geojson"

try:
    # Lecture directe depuis GitHub
    departements_fr = gpd.read_file(url_geojson)
    print("‚úÖ Donn√©es g√©ographiques t√©l√©charg√©es.")
except Exception as e:
    print(f"‚ùå Erreur lors du t√©l√©chargement : {e}")

# 3. Filtrage Auvergne (03=Allier, 15=Cantal, 43=H-Loire, 63=P-de-D√¥me)
# Attention : Dans ce fichier, la colonne s'appelle 'code' et non 'INSEE_DEP'
deps_codes = ['03', '15', '43', '63']
auvergne_deps = departements_fr[departements_fr['code'].isin(deps_codes)].copy()

# 4. Filtrage spatial des points m√©t√©o
gdf_total = gpd.GeoDataFrame(
    df_raw, 
    geometry=gpd.points_from_xy(df_raw['longitude'], df_raw['latitude']),
    crs="EPSG:4326"
)

# Jointure spatiale pour ne garder que les points √† l'int√©rieur de l'Auvergne
gdf_auvergne = gpd.sjoin(gdf_total, auvergne_deps[['code', 'geometry']], predicate="within")

# Tri indispensable pour la logique de s√©rie temporelle (LSTM)
gdf_auvergne = gdf_auvergne.sort_values(['latitude', 'longitude', 'time'])
print(f"‚úÖ Points filtr√©s en Auvergne : {gdf_auvergne.shape[0]} lignes.")

‚è≥ T√©l√©chargement des fronti√®res administratives...
‚úÖ Donn√©es g√©ographiques t√©l√©charg√©es.
‚úÖ Points filtr√©s en Auvergne : 90300 lignes.


In [39]:
# 1. Features
gdf_auvergne['month'] = gdf_auvergne.index.month
gdf_auvergne['month_sin'] = np.sin(2 * np.pi * gdf_auvergne['month'] / 12)
gdf_auvergne['month_cos'] = np.cos(2 * np.pi * gdf_auvergne['month'] / 12)

# 2. Scalers
scaler_skt = StandardScaler()
scaler_geo = StandardScaler()

# S√©paration temporelle
df_train_full = gdf_auvergne[gdf_auvergne.index.year <= 2007].copy()
df_test_full = gdf_auvergne[gdf_auvergne.index.year >= 2007].copy() # On garde 2007 pour le "lookback"

df_train_full['skt_norm'] = scaler_skt.fit_transform(df_train_full[['skt']])
df_train_full[['lat_norm', 'lon_norm']] = scaler_geo.fit_transform(df_train_full[['latitude', 'longitude']])

df_test_full['skt_norm'] = scaler_skt.transform(df_test_full[['skt']])
df_test_full[['lat_norm', 'lon_norm']] = scaler_geo.transform(df_test_full[['latitude', 'longitude']])

features_list = ['skt_norm', 'month_sin', 'month_cos', 'lat_norm', 'lon_norm']

In [40]:
WINDOW_SIZE = 12 

def create_lstm_sequences(df, window, is_test=False):
    X, y, info = [], [], []
    for (lat, lon), group in df.groupby(['latitude', 'longitude']):
        values = group[features_list].values
        if len(values) > window:
            for i in range(len(values) - window):
                target_date = group.index[i + window]
                if is_test and target_date.year != 2008: continue
                X.append(values[i : i + window, :])
                y.append(values[i + window, 0])
                info.append([target_date, lat, lon])
    return np.array(X), np.array(y), info

X_train, y_train, _ = create_lstm_sequences(df_train_full, WINDOW_SIZE)
X_test, y_test, info_test = create_lstm_sequences(df_test_full, WINDOW_SIZE, is_test=True)

# Architecture LSTM
model = Sequential([
    Input(shape=(WINDOW_SIZE, len(features_list))),
    LSTM(64, return_sequences=True, activation='tanh'),
    Dropout(0.2),
    LSTM(32, activation='tanh'),
    Dense(1)
])
model.compile(optimizer=Adam(0.001), loss='mse')

print("üöÄ Entra√Ænement du LSTM Spatial...")
model.fit(X_train, y_train, epochs=50, batch_size=256)

# Pr√©diction
y_pred_norm = model.predict(X_test)
df_2008_lstm = pd.DataFrame(info_test, columns=['time', 'latitude', 'longitude'])
df_2008_lstm['skt'] = scaler_skt.inverse_transform(y_test.reshape(-1, 1))
df_2008_lstm['pred_lstm'] = scaler_skt.inverse_transform(y_pred_norm)
df_2008_lstm.set_index('time', inplace=True)

üöÄ Entra√Ænement du LSTM Spatial...
Epoch 1/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m3s[0m 11ms/step - loss: 0.2461
Epoch 2/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0812
Epoch 3/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0753
Epoch 4/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0724
Epoch 5/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0703
Epoch 6/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 11ms/step - loss: 0.0682
Epoch 7/50
[1m99/99[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m1s[0m 11ms/st

In [41]:
def generate_pro_map(data, date_label, col_name, output_path, title_text):
    # 1. Fond de carte clair
    m = folium.Map(location=[45.7, 3.2], zoom_start=8, tiles="CartoDB positron")
    
    # 2. CONVERSION DES BORNES POUR L'√âCHELLE (Kelvin -> Celsius)
    # On calcule le min et max en Celsius pour que la l√©gende affiche -5, 10, 20...
    vmin_c = data[col_name].min() - 273.15
    vmax_c = data[col_name].max() - 273.15
    
    # Palette Bleu (Froid) -> Jaune -> Rouge (Chaud)
    colors = cm.linear.RdYlBu_11.colors[::-1]
    colormap = cm.LinearColormap(colors=colors, vmin=vmin_c, vmax=vmax_c)
    colormap.caption = "Temp√©rature (¬∞C)"
    colormap.add_to(m)

    # 3. √âchantillonnage pour la clart√©
    sampled = data.iloc[::2]

    # 4. Contours des d√©partements
    folium.GeoJson(auvergne_deps, style_function=lambda x: {
        'fillColor': 'none', 'color': '#333', 'weight': 1.5, 'opacity': 0.4
    }).add_to(m)

    # 5. Dessin des points avec conversion individuelle pour le popup et la couleur
    for _, row in sampled.iterrows():
        val_k = row[col_name]
        val_c = val_k - 273.15 # <--- CONVERSION R√âELLE ICI
        
        # On utilise la valeur Celsius pour obtenir la couleur sur l'√©chelle
        color_point = colormap(val_c)
        
        popup_html = f"""
        <div style="font-family: Arial; font-size: 12px; width: 160px;">
            <h4 style="margin:0; color:#333;">{title_text}</h4>
            <hr style="margin:5px 0;">
            <b>Mois :</b> {date_label}<br>
            <b>Temp√©rature :</b> <b style="color:{color_point};">{val_c:.2f} ¬∞C</b><br>
            <b>Position :</b> {row['latitude']:.2f}, {row['longitude']:.2f}
        </div>
        """
        
        folium.CircleMarker(
            location=[row['latitude'], row['longitude']],
            radius=5,
            color=color_point,
            fill=True,
            fill_color=color_point,
            fill_opacity=0.8,
            stroke=True,
            weight=0.8,
            popup=folium.Popup(popup_html, max_width=250)
        ).add_to(m)

    # 6. Titre de la carte
    title_html = f'''
             <div style="position: fixed; top: 10px; left: 50px; width: 300px; z-index:9999; 
                         background-color: white; border:2px solid black; padding: 10px; border-radius:5px; opacity:0.9;">
                 <b>{title_text}</b><br>P√©riode : {date_label}
             </div>
             '''
    m.get_root().html.add_child(folium.Element(title_html))
    m.save(output_path)

In [42]:
# 1. Calcul des pr√©dictions sur les donn√©es de test
y_pred_norm = model.predict(X_test)

# 2. Inversion de la normalisation pour revenir aux vrais degr√©s Celsius
y_pred_real = scaler_skt.inverse_transform(y_pred_norm).flatten()
y_true_real = scaler_skt.inverse_transform(y_test.reshape(-1, 1)).flatten()

# 3. CR√âATION DU DATAFRAME DE R√âSULTATS (C'est ici qu'on d√©finit le nom)
df_results_lstm = pd.DataFrame(info_test, columns=['time', 'latitude', 'longitude'])
df_results_lstm['skt'] = y_true_real
df_results_lstm['pred_lstm'] = y_pred_real
df_results_lstm.set_index('time', inplace=True)

print(f"‚úÖ df_results_lstm cr√©√© avec {len(df_results_lstm)} lignes.")

[1m113/113[0m [32m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m[37m[0m [1m0s[0m 2ms/step
‚úÖ df_results_lstm cr√©√© avec 3612 lignes.


In [43]:
from scipy import stats
import numpy as np

# --- 1. PR√âPARATION DES DONN√âES HISTORIQUES (2000-2007) ---
print("‚è≥ Pr√©paration et g√©n√©ration de l'Historique en Celsius...")
# On s'assure que la colonne 'period' existe √† partir de l'index temporel
df_train_full['period'] = df_train_full.index.to_period('M')

for period, group in df_train_full.groupby('period'):
    # Moyenne spatiale mensuelle
    monthly = group.groupby(['latitude', 'longitude'])[['skt']].mean().reset_index()
    path_h = f"cartes_climat2/historique/HIST_{period}.html"
    
    generate_pro_map(
        monthly, 
        str(period), 
        'skt', 
        path_h, 
        "Historique Auvergne (¬∞C)"
    )

# --- 2. PR√âPARATION DES PR√âDICTIONS LSTM (2008) ---
print("‚è≥ Pr√©paration et g√©n√©ration des Pr√©dictions LSTM en Celsius...")

# On v√©rifie que les noms de colonnes correspondent √† vos r√©sultats LSTM
# On applique le Z-score pour la propret√© (calcul√© sur 'skt')
z_scores = np.abs(stats.zscore(df_results_lstm['skt']))
df_2008_clean = df_results_lstm[z_scores < 3].copy()

# On recr√©e la colonne 'period' ici aussi
df_2008_clean['period'] = df_2008_clean.index.to_period('M')



for p, group in df_2008_clean.groupby('period'):
    # Moyenne par point pour le mois
    monthly = group.groupby(['latitude', 'longitude'])[['skt', 'pred_lstm']].mean().reset_index()
    
    # Carte PR√âDITE (IA)
    path_p = f"cartes_climat2/predictions_2008/MAP_PRED_LSTM_{p}.html"
    generate_pro_map(monthly, str(p), 'pred_lstm', path_p, "IA : Pr√©diction LSTM (¬∞C)")
    
    # Carte R√âELLE (Contr√¥le)
    path_r = f"cartes_climat2/predictions_2008/MAP_REEL_{p}.html"
    generate_pro_map(monthly, str(p), 'skt', path_r, "R√©alit√© : Terrain Auvergne (¬∞C)")

print("‚ú® Termin√© ! Vos cartes en degr√©s Celsius sont pr√™tes dans 'cartes_climat2'.")

‚è≥ Pr√©paration et g√©n√©ration de l'Historique en Celsius...
‚è≥ Pr√©paration et g√©n√©ration des Pr√©dictions LSTM en Celsius...
‚ú® Termin√© ! Vos cartes en degr√©s Celsius sont pr√™tes dans 'cartes_climat2'.
