# notebook_auto.ipynb#### Analyse automatisée des vents par siteParamétré pour Papermill : prend **site_ref** et génère figures + tables

In [None]:
# Parameters
site_ref = "DEFAULT_SITE"

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import weibull_min, gumbel_r

sns.set(style="whitegrid")

base_dir = f"data/{site_ref}"
figures_dir = os.path.join(base_dir, "figures")
tables_dir = os.path.join(base_dir, "tables")

os.makedirs(figures_dir, exist_ok=True)
os.makedirs(tables_dir, exist_ok=True)

print(f"[Site chargé : {site_ref}]")

In [None]:
all_dfs = {}
for file in os.listdir(base_dir):
    if file.endswith('.csv') and ('station' in file or 'openmeteo' in file or 'era5' in file or 'nasa_power' in file):
        path = os.path.join(base_dir, file)
        try:
            df = pd.read_csv(path, parse_dates=['date'])
            all_dfs[file.replace('.csv','')] = df
            print(f"[Données chargées : {file} ({df.shape[0]} lignes)]")
        except Exception as e:
            print(f"[Erreur : {file} : {e}]")

if not all_dfs:
    raise Exception("[❌] Aucun CSV trouvé!")

In [None]:
stat_results = []
for name, df in all_dfs.items():
    for var in ['windspeed_mean', 'windspeed_gust']:
        if var in df.columns:
            series = df[var].dropna()
            if not series.empty:
                stats = {
                    'Source': name,
                    'Variable': var,
                    'Count': len(series),
                    'Mean': series.mean(),
                    'Std': series.std(),
                    'Min': series.min(),
                    'Max': series.max(),
                    '25%': series.quantile(0.25),
                    '50%': series.quantile(0.5),
                    '75%': series.quantile(0.75)
                }
                stat_results.append(stats)

df_stats = pd.DataFrame(stat_results)
df_stats.to_csv(f"{tables_dir}/stats_descriptives_{site_ref}.csv", index=False)
df_stats

In [None]:
for var in ['windspeed_mean', 'windspeed_gust']:
    plt.figure(figsize=(8,6))
    for name, df in all_dfs.items():
        if var in df.columns:
            sns.histplot(df[var].dropna(), label=name, kde=True, bins=30, alpha=0.6)
    plt.title(f"Histogramme {var.replace('_',' ')}")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"{figures_dir}/hist_{var}_{site_ref}.png")
    plt.close()

In [None]:
combined = []
for name, df in all_dfs.items():
    if 'windspeed_mean' in df.columns:
        for val in df['windspeed_mean'].dropna():
            combined.append({'Source': name, 'windspeed_mean': val})

if combined:
    df_combined = pd.DataFrame(combined)
    plt.figure(figsize=(10,6))
    sns.boxplot(data=df_combined, x='Source', y='windspeed_mean')
    plt.title("Boxplot des vitesses moyennes")
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.savefig(f"{figures_dir}/boxplot_wind_speed_{site_ref}.png")
    plt.close()

In [None]:
for name, df in all_dfs.items():
    if 'wind_direction' in df.columns:
        plt.figure(figsize=(6,6))
        plt.hist(df['wind_direction'].dropna(), bins=36, range=(0,360), color='skyblue')
        plt.title(f"Rose des vents - {name}")
        plt.xlabel('Direction (°)')
        plt.ylabel('Observations')
        plt.tight_layout()
        plt.savefig(f"{figures_dir}/rose_wind_direction_{site_ref}_{name}.png")
        plt.close()

In [None]:
plt.figure(figsize=(12,6))
for name, df in all_dfs.items():
    if 'windspeed_mean' in df.columns:
        plt.plot(df['date'], df['windspeed_mean'], label=name, alpha=0.7)
plt.legend()
plt.title("Comparaison des vitesses moyennes par source")
plt.xlabel("Date")
plt.ylabel("Vitesse (m/s)")
plt.tight_layout()
plt.savefig(f"{figures_dir}/compare_sources_{site_ref}.png")
plt.close()

In [None]:
for name, df in all_dfs.items():
    for var in ['windspeed_mean', 'windspeed_gust']:
        if var in df.columns:
            data = df[var].dropna()
            if len(data) > 0:
                x = np.linspace(data.min(), data.max(), 100)

                # Weibull
                try:
                    plt.figure(figsize=(8,6))
                    c, loc, scale = weibull_min.fit(data, floc=0)
                    sns.histplot(data, bins=30, kde=False, stat='density', label='Data', color='gray', alpha=0.6)
                    plt.plot(x, weibull_min.pdf(x, c, loc=loc, scale=scale), 'r-', label=f'Weibull Fit\nc={c:.2f}')
                    plt.title(f"Weibull - {name} - {var}")
                    plt.legend()
                    plt.tight_layout()
                    plt.savefig(f"{figures_dir}/weibull_{var}_{name}_{site_ref}.png")
                    plt.close()
                except Exception as e:
                    print(f"[Weibull fit failed for {name} {var}: {e}]")

                # Gumbel
                try:
                    plt.figure(figsize=(8,6))
                    loc, scale = gumbel_r.fit(data)
                    sns.histplot(data, bins=30, kde=False, stat='density', label='Data', color='gray', alpha=0.6)
                    plt.plot(x, gumbel_r.pdf(x, loc=loc, scale=scale), 'g-', label=f'Gumbel Fit')
                    plt.title(f"Gumbel - {name} - {var}")
                    plt.legend()
                    plt.tight_layout()
                    plt.savefig(f"{figures_dir}/gumbel_{var}_{name}_{site_ref}.png")
                    plt.close()
                except Exception as e:
                    print(f"[Gumbel fit failed for {name} {var}: {e}]")