# Etape 3.1 : Statistiques descriptives

In [7]:
import pandas as pd

df = pd.read_parquet("../output/consommations_enrichies.parquet")

df.shape, df.head()


((100000, 33),
   batiment_id           timestamp  conso_clean unite        date type_energie  \
 0     BAT0001 2023-01-01 03:00:00         7.99   kWh  2023-01-01  electricite   
 1     BAT0001 2023-01-01 03:00:00         7.71   kWh  2023-01-01          gaz   
 2     BAT0001 2023-01-01 13:00:00        90.48   kWh  2023-01-01          gaz   
 3     BAT0001 2023-01-01 16:00:00        69.08   kWh  2023-01-01  electricite   
 4     BAT0001 2023-01-01 17:00:00         2.60    m3  2023-01-01          eau   
 
              nom   type commune  surface_m2  ...  date_jour date_debut  \
 0  Ecole Paris 1  ecole   Paris        1926  ... 2023-01-01 2023-01-01   
 1  Ecole Paris 1  ecole   Paris        1926  ... 2023-01-01 2023-01-01   
 2  Ecole Paris 1  ecole   Paris        1926  ... 2023-01-01 2023-01-01   
 3  Ecole Paris 1  ecole   Paris        1926  ... 2023-01-01 2023-01-01   
 4  Ecole Paris 1  ecole   Paris        1926  ... 2023-01-01 2023-01-01   
 
     date_fin tarif_unitaire  cout_eur 

## Calculer les statistiques par type d'énergie, type de bâtiment et commune

In [5]:
# apr type d'énergie 
stats_energie = (
    df
    .groupby("type_energie")["conso_clean"]
    .agg(["count", "sum", "mean", "median", "std"])
    .reset_index()
)

stats_energie
# export 
stats_energie.to_csv("../output/stats_par_type_energie.csv", index=False)

# apr type de bâtiment 

stats_type_bat = (
    df
    .groupby("type")["conso_par_m2"]
    .agg(["count", "mean", "median", "std"])
    .reset_index()
    .sort_values("mean", ascending=False)
)

stats_type_bat

stats_type_bat.to_csv("../output/stats_par_type_batiment.csv", index=False)

# par cmmune 

stats_commune = (
    df
    .groupby("commune")["conso_clean"]
    .sum()
    .reset_index(name="conso_totale_kwh")
    .sort_values("conso_totale_kwh", ascending=False)
)

stats_commune

stats_commune.to_csv("../output/stats_par_commune.csv", index=False)


## Identifier les batiments les plus/moins energivores

In [6]:
conso_bat = (
    df
    .groupby(["batiment_id", "nom", "type", "commune"])["conso_par_m2"]
    .mean()
    .reset_index()
)

# top 10 les plus enegivores
top_plus = conso_bat.sort_values("conso_par_m2", ascending=False).head(10)
top_plus

# top 10 les moins enegivores
top_moins = conso_bat.sort_values("conso_par_m2", ascending=True).head(10)
top_moins

# export 
top_plus.to_csv("../output/batiments_plus_energivores.csv", index=False)
top_moins.to_csv("../output/batiments_moins_energivores.csv", index=False)


## Calculer la repartition des consommations par classe energetique DPE

In [None]:
repartition_dpe = (
    df
    .groupby("classe_energetique")["conso_clean"]
    .sum()
    .reset_index(name="conso_totale_kwh")
    .sort_values("classe_energetique")
)

repartition_dpe
# option %

repartition_dpe["part_%"] = (
    repartition_dpe["conso_totale_kwh"]
    / repartition_dpe["conso_totale_kwh"].sum()
    * 100
)

repartition_dpe.to_csv("../output/repartition_conso_dpe.csv", index=False)


## Analyser l'evolution temporelle (tendances mensuelles, saisonnalite)

In [None]:
# mensuel 
df["mois"] = pd.to_datetime(df["date_jour"]).dt.to_period("M")

conso_mensuelle = (
    df
    .groupby("mois")["conso_clean"]
    .sum()
    .reset_index()
)

conso_mensuelle
conso_mensuelle.to_csv("../output/conso_mensuelle.csv", index=False)

# saison 

conso_saison = (
    df
    .groupby("saison")["conso_clean"]
    .sum()
    .reset_index()
)

conso_saison
conso_saison.to_csv("../output/conso_par_saison.csv", index=False)


## Comparer la consommation theorique (selon DPE) vs reelle

In [None]:
dpe_ref = {
    "A": 50,
    "B": 90,
    "C": 150,
    "D": 230,
    "E": 330,
    "F": 420,
    "G": 500
}

df["conso_theorique_kwh_m2"] = df["classe_energetique"].map(dpe_ref)


# comparaison 
comparaison_dpe = (
    df
    .groupby("classe_energetique")
    .agg(
        conso_reelle_moy=("conso_par_m2", "mean"),
        conso_theorique=("conso_theorique_kwh_m2", "mean")
    )
    .reset_index()
)

comparaison_dpe["ecart_reel_vs_theorique"] = (
    comparaison_dpe["conso_reelle_moy"]
    - comparaison_dpe["conso_theorique"]
)

comparaison_dpe

# export
comparaison_dpe.to_csv("../output/comparaison_dpe_reel_vs_theorique.csv", index=False)
