# notebook_auto.ipynb
#### Analyse automatis√©e des vents par site
Param√©tr√© pour Papermill : prend **site_ref** et g√©n√®re figures + tables

In [None]:
# üìå Param√®tre Papermill
site_ref = "WFR001_PIOLENC"  # Valeur par d√©faut pour test

In [None]:
# üìå Import des biblioth√®ques
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

print("[‚úÖ] Biblioth√®ques import√©es.")


# üìñ Introduction
Ce notebook s'ex√©cute automatiquement pour un site donn√© (`site_ref`).
- Il scanne le dossier data/<site_ref>
- Charge tous les fichiers CSV (hors fichiers raw_)
- Calcule les statistiques descriptives
- G√©n√®re et sauvegarde les graphiques (boxplots, histogrammes, s√©ries temporelles)


In [None]:
# üìå Scan et chargement des fichiers CSV
data_folder = f"data/{site_ref}"
print(f"[üìÅ] Dossier analys√© : {data_folder}")

available_files = [f for f in os.listdir(data_folder) if f.endswith(".csv") and not f.startswith("raw_")]
print(f"[‚úÖ] Fichiers CSV d√©tect√©s : {available_files}")

all_data = {}
for filename in available_files:
    path = os.path.join(data_folder, filename)
    try:
        df = pd.read_csv(path)
        if not df.empty:
            key = filename.replace(f"_{site_ref}.csv", "")
            all_data[key] = df
            print(f"[üì¶] Charg√© : {filename} ({len(df)} lignes)")
        else:
            print(f"[‚ö†Ô∏è] Fichier vide ignor√© : {filename}")
    except Exception as e:
        print(f"[‚ùå] Erreur lecture {filename} : {e}")

print("\nüì¶ Sources charg√©es :", list(all_data.keys()))


In [None]:
# üìå Statistiques descriptives
stats_list = []

for source, df in all_data.items():
    for col in df.columns:
        if 'wind' in col and df[col].dtype in [np.float64, np.float32, np.int64]:
            series = df[col].dropna()
            if not series.empty:
                stats_list.append({
                    "Source": source,
                    "Variable": col,
                    "Count": series.count(),
                    "Mean": series.mean(),
                    "Std": series.std(),
                    "Min": series.min(),
                    "Max": series.max(),
                    "25%": series.quantile(0.25),
                    "50%": series.quantile(0.50),
                    "75%": series.quantile(0.75)
                })

if stats_list:
    stats_df = pd.DataFrame(stats_list)
    display(stats_df)

    # üìå Sauvegarde
    tables_dir = os.path.join(data_folder, "tables")
    os.makedirs(tables_dir, exist_ok=True)
    stats_path = os.path.join(tables_dir, f"stats_descriptives_{site_ref}.csv")
    stats_df.to_csv(stats_path, index=False)
    print(f"[üíæ] Stats sauvegard√©es ‚Üí {stats_path}")
else:
    print("[‚ö†Ô∏è] Aucune statistique calculable.")


In [None]:
# üìå Graphiques automatiques
figures_dir = os.path.join(data_folder, "figures")
os.makedirs(figures_dir, exist_ok=True)

for source, df in all_data.items():
    print(f"\nüìà Traitement : {source}")

    wind_cols = [c for c in df.columns if 'wind' in c]

    if wind_cols:
        # Boxplot
        plt.figure(figsize=(8, 5))
        df[wind_cols].boxplot()
        plt.title(f"Boxplot - {source}")
        plt.ylabel("Vitesse du vent (m/s)")
        plt.xticks(rotation=45)
        plt.grid()
        path_plot = os.path.join(figures_dir, f"boxplot_{source}.png")
        plt.savefig(path_plot)
        plt.close()
        print(f"[‚úÖ] Boxplot sauvegard√© : {path_plot}")

        # Histogrammes
        for col in wind_cols:
            plt.figure(figsize=(8, 5))
            df[col].hist(bins=30)
            plt.title(f"Histogramme - {source} - {col}")
            plt.xlabel("Valeur")
            plt.ylabel("Fr√©quence")
            plt.grid()
            path_hist = os.path.join(figures_dir, f"hist_{col}_{source}.png")
            plt.savefig(path_hist)
            plt.close()
            print(f"[‚úÖ] Histogramme sauvegard√© : {path_hist}")

        # S√©ries temporelles
        if "date" in df.columns:
            for col in wind_cols:
                plt.figure(figsize=(12, 5))
                plt.plot(pd.to_datetime(df['date']), df[col])
                plt.title(f"S√©rie temporelle - {source} - {col}")
                plt.xlabel("Date")
                plt.ylabel(col)
                plt.grid()
                path_ts = os.path.join(figures_dir, f"timeseries_{col}_{source}.png")
                plt.savefig(path_ts)
                plt.close()
                print(f"[‚úÖ] S√©rie temporelle sauvegard√©e : {path_ts}")
