# notebook_auto.ipynb
#### Analyse automatisée des vents par site
Paramétré pour Papermill : prend **site_ref** et génère figures + tables

In [None]:
# Parameters
site_ref = "DEFAULT_SITE"

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import weibull_min, gumbel_r

sns.set(style="whitegrid")

base_dir = f"data/{site_ref}"
figures_dir = os.path.join(base_dir, "figures")
tables_dir = os.path.join(base_dir, "tables")

os.makedirs(figures_dir, exist_ok=True)
os.makedirs(tables_dir, exist_ok=True)

print(f"[✅] Notebook auto v2 démarré pour : {site_ref}")


In [None]:
all_dfs = {}
for file in os.listdir(base_dir):
    if file.endswith('.csv') and (
        'station' in file or 
        'openmeteo' in file or 
        'era5' in file or 
        'nasa_power' in file
    ):
        path = os.path.join(base_dir, file)
        try:
            df = pd.read_csv(path, parse_dates=['date'])
            all_dfs[file.replace('.csv','')] = df
            print(f"[✅] Chargé : {file} ({df.shape[0]} lignes)")
        except Exception as e:
            print(f"[⚠️] Erreur chargement {file} : {e}")

if not all_dfs:
    raise Exception("[❌] Aucun CSV trouvé pour ce site.")


In [None]:
stat_results = []
for name, df in all_dfs.items():
    for var in ['windspeed_mean', 'windspeed_gust']:
        if var in df.columns:
            series = df[var].dropna()
            if not series.empty:
                stats = {
                    'Source': name,
                    'Variable': var,
                    'Count': len(series),
                    'Mean': series.mean(),
                    'Std': series.std(),
                    'Min': series.min(),
                    'Max': series.max(),
                    '25%': series.quantile(0.25),
                    '50%': series.quantile(0.5),
                    '75%': series.quantile(0.75)
                }
                stat_results.append(stats)

df_stats = pd.DataFrame(stat_results)
df_stats.to_csv(f"{tables_dir}/stats_descriptives_{site_ref}.csv", index=False)
df_stats


In [None]:
for var in ['windspeed_mean', 'windspeed_gust']:
    plt.figure(figsize=(10, 6))
    for name, df in all_dfs.items():
        if var in df.columns:
            sns.histplot(df[var].dropna(), label=name, kde=True, bins=30, alpha=0.6)
    plt.title(f"Histogramme - {var.replace('_',' ').capitalize()} par Source")
    plt.xlabel("Vitesse (m/s)")
    plt.ylabel("Occurrences")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{figures_dir}/hist_{var}_{site_ref}.png")
    plt.close()


In [None]:
for var in ['windspeed_mean', 'windspeed_gust']:
    combined = []
    for name, df in all_dfs.items():
        if var in df.columns:
            for val in df[var].dropna():
                combined.append({'Source': name, var: val})

    if combined:
        df_combined = pd.DataFrame(combined)
        plt.figure(figsize=(12, 6))
        sns.boxplot(data=df_combined, x='Source', y=var)
        plt.title(f"Boxplot - {var.replace('_',' ').capitalize()} par Source")
        plt.xticks(rotation=45)
        plt.tight_layout()
        plt.savefig(f"{figures_dir}/boxplot_{var}_{site_ref}.png")
        plt.close()


In [None]:
import matplotlib.cm as cm
import matplotlib.colors as mcolors

def plot_wind_rose(df, name, site_ref, var='windspeed_mean'):
    if 'wind_direction' in df.columns and var in df.columns:
        direction = df['wind_direction'].dropna()
        speed = df[var].dropna()
        if not direction.empty and not speed.empty:
            df_rose = pd.DataFrame({'direction': direction, 'speed': speed})
            df_rose['dir_bin'] = (df_rose['direction'] // 30) * 30

            rose_data = df_rose.groupby('dir_bin')['speed'].mean()

            angles = np.deg2rad(rose_data.index.tolist() + [rose_data.index[0]])
            values = rose_data.tolist() + [rose_data.tolist()[0]]

            fig, ax = plt.subplots(subplot_kw={'projection': 'polar'}, figsize=(8,8))
            ax.plot(angles, values, 'o-', linewidth=2)
            ax.fill(angles, values, alpha=0.25)
            ax.set_theta_zero_location('N')
            ax.set_theta_direction(-1)
            ax.set_xticks(np.deg2rad(np.arange(0, 360, 30)))
            ax.set_title(f"Rose des vents (radar) - {name} - {var}")
            plt.tight_layout()
            plt.savefig(f"{figures_dir}/rose_radar_{var}_{site_ref}_{name}.png")
            plt.close()

for name, df in all_dfs.items():
    for var in ['windspeed_mean', 'windspeed_gust']:
        plot_wind_rose(df, name, site_ref, var)


In [None]:
for var in ['windspeed_mean', 'windspeed_gust']:
    plt.figure(figsize=(14, 7))
    for name, df in all_dfs.items():
        if var in df.columns and 'date' in df.columns:
            plt.plot(df['date'], df[var], label=name, alpha=0.8)
    plt.title(f"Comparaison temporelle - {var.replace('_',' ').capitalize()}")
    plt.xlabel("Date")
    plt.ylabel("Vitesse (m/s)")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{figures_dir}/courbe_temporelle_{var}_{site_ref}.png")
    plt.close()


In [None]:
for name, df in all_dfs.items():
    for var in ['windspeed_mean', 'windspeed_gust']:
        if var in df.columns:
            data = df[var].dropna()
            if len(data) > 0:
                x = np.linspace(data.min(), data.max(), 100)

                # Ajustement Weibull
                try:
                    plt.figure(figsize=(10, 6))
                    c, loc, scale = weibull_min.fit(data, floc=0)
                    sns.histplot(data, bins=30, kde=False, stat='density', label='Données', color='gray', alpha=0.6)
                    plt.plot(x, weibull_min.pdf(x, c, loc=loc, scale=scale), 'r-', label=f'Weibull Fit\nc={c:.2f}')
                    plt.title(f"Ajustement Weibull - {name} - {var}")
                    plt.xlabel("Vitesse (m/s)")
                    plt.ylabel("Densité")
                    plt.legend()
                    plt.tight_layout()
                    plt.savefig(f"{figures_dir}/weibull_{var}_{name}_{site_ref}.png")
                    plt.close()
                except Exception as e:
                    print(f"[Erreur Weibull : {name} {var} : {e}]")

                # Ajustement Gumbel
                try:
                    plt.figure(figsize=(10, 6))
                    loc, scale = gumbel_r.fit(data)
                    sns.histplot(data, bins=30, kde=False, stat='density', label='Données', color='gray', alpha=0.6)
                    plt.plot(x, gumbel_r.pdf(x, loc=loc, scale=scale), 'g-', label='Gumbel Fit')
                    plt.title(f"Ajustement Gumbel - {name} - {var}")
                    plt.xlabel("Vitesse (m/s)")
                    plt.ylabel("Densité")
                    plt.legend()
                    plt.tight_layout()
                    plt.savefig(f"{figures_dir}/gumbel_{var}_{name}_{site_ref}.png")
                    plt.close()
                except Exception as e:
                    print(f"[Erreur Gumbel : {name} {var} : {e}]")
