In [3]:
import pandas as pd
import numpy as np
from pathlib import Path

path = "../data/donnees_communales_parc_jardin.csv"

WEIGHTS = {
    "surface_parcs_totale": 40,
    "pct_acces_pieton": 20,
    "pct_avec_eau": 15,
    "pct_labellises": 15,
    "diversite_equipements": 30
}


# Infos

In [4]:
df = pd.read_csv(path)
display(df.head(3)) 
df.info()

Unnamed: 0,FID,uid,id_ariane,nom,num,numvoie,voie,codepost,commune,code_insee,...,eau,toilettes,chien,esp_can,photo,gid,the_geom,openinghours,last_update_fme,horaires
0,com_donnees_communales.comparcjardin_1_0_0.371,PAR-69204-001,,Parc historique de Beauregard,1,,Rue Edouard Millaud,69230.0,Saint-Genis-Laval,69204.0,...,oui,oui,Tenus en laisse,Non,,371,"MULTIPOLYGON (((4.786552 45.69293704090367, 4....","[""Mo-Su 08:00-18:00""]",2022-03-31T14:13:25.31,
1,com_donnees_communales.comparcjardin_1_0_0.372,PAR-69204-002,,Parc de la Maison des Champs,2,130.0,Avenue Georges Clemenceau,69230.0,Saint-Genis-Laval,69204.0,...,non,non,Tenus en laisse,Propreté,,372,"MULTIPOLYGON (((4.792575 45.69654604090331, 4....","[""Mo-Su 08:00-18:00""]",2022-03-31T14:13:25.63,
2,com_donnees_communales.comparcjardin_1_0_0.76,PAR-69385-05110,S8220,Jardin André Malraux,5110,,Place des Minimes,69005.0,Lyon 5e Arrondissement,69385.0,...,oui,non,Oui,Non,0.0,76,MULTIPOLYGON (((4.821792696554102 45.758162063...,[],2025-07-25T08:55:31.762,


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 627 entries, 0 to 626
Data columns (total 31 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   FID                        627 non-null    object 
 1   uid                        627 non-null    object 
 2   id_ariane                  318 non-null    object 
 3   nom                        627 non-null    object 
 4   num                        627 non-null    int64  
 5   numvoie                    324 non-null    object 
 6   voie                       623 non-null    object 
 7   codepost                   625 non-null    float64
 8   commune                    625 non-null    object 
 9   code_insee                 625 non-null    float64
 10  reglement                  568 non-null    object 
 11  surf_tot_m2                577 non-null    float64
 12  gestion                    621 non-null    object 
 13  ann_ouvert                 120 non-null    float64

Standardisation des noms et nettoyage de base

In [5]:
# Standardiser les noms de colonnes
df.columns = (
    df.columns.str.strip().str.lower()
      .str.replace(" ", "_")
      .str.replace(r"[^a-z0-9_]", "", regex=True)
)

# Dédoublonner
df = df.drop_duplicates()

# Gérer la clé commune
if "code_insee" not in df.columns:
    raise ValueError("Colonne 'code_insee' absente du fichier source.")
df = df.dropna(subset=["code_insee"])
df["code_commune"] = df["code_insee"].astype(str).str.replace(r"\.0$", "", regex=True).str.zfill(5)

# Libellé commune (facultatif pour l’agrégation, utile pour l’affichage)
df["commune"] = df.get("commune", "").astype(str).str.strip().str.title()


Suppression des colonnes inutiles

In [6]:
colonnes_inutiles = [
    "fid","uid","id_ariane","gid","the_geom","photo",
    "num","numvoie","voie","codepost",
    "openinghours","openinghoursspecification","precision_horaires","horaires",
    "last_update_fme","toilettes","chien","esp_can"
]
df = df.drop(columns=[c for c in colonnes_inutiles if c in df.columns], errors="ignore")

# Ne garder que les colonnes utiles pour créer des KPI
colonnes_utiles = [
    "code_commune","commune","nom","surf_tot_m2","ann_ouvert","gestion",
    "acces","circulation","label","type_equip","eau","clos","reglement"
]
df = df[[c for c in colonnes_utiles if c in df.columns]].copy()
df.head(3)


Unnamed: 0,code_commune,commune,nom,surf_tot_m2,ann_ouvert,gestion,acces,circulation,label,type_equip,eau,clos,reglement
0,69204,Saint-Genis-Laval,Parc historique de Beauregard,36358.95,,Commune,Piéton,,non,Édifice|Fontaine|Jardin à l'anglaise|Jardin po...,oui,oui,
1,69204,Saint-Genis-Laval,Parc de la Maison des Champs,4506.04,,Commune,Piéton,,non,Terrain de pétanque,non,oui,
2,69385,Lyon 5E Arrondissement,Jardin André Malraux,3334.82,,Autre,Piéton,,non,Fontaine,oui,oui,oui


Nettoyage du contenu

In [7]:
# Surface
if "surf_tot_m2" in df.columns:
    df["surf_tot_m2"] = pd.to_numeric(df["surf_tot_m2"], errors="coerce").fillna(0.0)
    df = df[(df["surf_tot_m2"] > 0) & (df["surf_tot_m2"] <= 1_000_000)]

# Booléens/étiquettes → oui/non
def to_oui_non(x):
    x = str(x).strip().lower()
    if x in {"oui","o","yes","true","1"}: return "oui"
    if x in {"non","n","no","false","0"}: return "non"
    return np.nan

for bcol in ["label","eau","clos","reglement"]:
    if bcol in df.columns:
        df[bcol] = df[bcol].map(to_oui_non)

# Annee d'ouverture
if "ann_ouvert" in df.columns:
    df["ann_ouvert"] = pd.to_numeric(df["ann_ouvert"], errors="coerce").astype("Int64")

# Accès / gestion
if "acces" in df.columns:
    df["acces"] = df["acces"].astype(str).str.strip().str.lower()
if "gestion" in df.columns:
    df["gestion"] = df["gestion"].astype(str).str.strip().str.title()

# Flags d’équipements (depuis type_equip)
def has_token(s, token):
    if pd.isna(s): 
        return False
    return any(token.lower() in t.strip().lower() for t in str(s).split("|"))

if "type_equip" in df.columns:
    df["equip_aire_jeux"] = df["type_equip"].apply(lambda x: int(has_token(x, "Aire")))
    df["equip_sport"]     = df["type_equip"].apply(lambda x: int(has_token(x, "Sport")))
    df["equip_fontaine"]  = df["type_equip"].apply(lambda x: int(has_token(x, "Fontaine")))
else:
    df["equip_aire_jeux"] = 0
    df["equip_sport"]     = 0
    df["equip_fontaine"]  = 0


Agrégation par commune (car plusieur parc par commune)

In [8]:
def pct(series_bool_or_01):
    s = pd.Series(series_bool_or_01).astype(float)
    return s.mean()*100 if len(s) else 0.0

group_cols = ["code_commune","commune"]

agg = (
    df.groupby(group_cols, as_index=False)
      .agg(
        nb_parcs                = ("surf_tot_m2","count"),
        surface_parcs_totale_m2 = ("surf_tot_m2","sum"),
        surface_parc_median_m2  = ("surf_tot_m2","median"),
        pct_parcs_clos          = ("clos",  lambda s: pct(s.map(lambda x: 1 if x=="oui" else 0)) if "clos"  in df.columns else 0),
        pct_acces_pieton        = ("acces", lambda s: pct(s.str.lower().isin(["piéton","pieton"])) if "acces" in df.columns else 0),
        pct_labellises          = ("label", lambda s: pct(s.map(lambda x: 1 if x=="oui" else 0)) if "label" in df.columns else 0),
        pct_avec_eau            = ("eau",   lambda s: pct(s.map(lambda x: 1 if x=="oui" else 0)) if "eau"   in df.columns else 0),
      )
)

# % d’équipements
for flag_col in ["equip_aire_jeux","equip_sport","equip_fontaine"]:
    tmp = df.groupby(group_cols)[flag_col].mean().mul(100).round(1).reset_index(name=flag_col.replace("equip_","pct_"))
    agg = agg.merge(tmp, on=group_cols, how="left")

# Diversité d’équipements (nombre de types uniques)
if "type_equip" in df.columns:
    diversite = (
        df.assign(_types=df["type_equip"].fillna("").str.split("|"))
          .explode("_types")
          .assign(_types=lambda d: d["_types"].str.strip().str.lower())
          .query("_types != ''")
          .groupby(group_cols)["_types"].nunique()
          .rename("diversite_equipements")
          .reset_index()
    )
    agg = agg.merge(diversite, on=group_cols, how="left")
agg["diversite_equipements"] = agg["diversite_equipements"].fillna(0).astype(int)

# Score simple 0–100 (min–max pour surface/diversité)
for c, norm_name in [("surface_parcs_totale_m2","surface_norm"),
                     ("diversite_equipements","diversite_norm")]:
    maxv = agg[c].max()
    agg[norm_name] = (agg[c]/maxv) if maxv>0 else 0.0

agg["score_espaces_verts"] = (
    agg["surface_norm"]              * WEIGHTS["surface_parcs_totale"] +
    agg["pct_acces_pieton"].fillna(0)* (WEIGHTS["pct_acces_pieton"]/100) +
    agg["pct_avec_eau"].fillna(0)    * (WEIGHTS["pct_avec_eau"]/100) +
    agg["pct_labellises"].fillna(0)  * (WEIGHTS["pct_labellises"]/100) +
    agg["diversite_norm"]           * WEIGHTS["diversite_equipements"]
).round(1)

agg = agg.drop(columns=["surface_norm","diversite_norm"], errors="ignore")
agg.head(10)


Unnamed: 0,code_commune,commune,nb_parcs,surface_parcs_totale_m2,surface_parc_median_m2,pct_parcs_clos,pct_acces_pieton,pct_labellises,pct_avec_eau,pct_aire_jeux,pct_sport,pct_fontaine,diversite_equipements,score_espaces_verts
0,69029,Bron,8,370700.0,6000.0,12.5,0.0,0.0,62.5,75.0,25.0,0.0,8,34.1
1,69034,Caluire-Et-Cuire,54,542533.0,1778.5,14.814815,0.0,0.0,18.518519,38.9,1.9,0.0,4,28.4
2,69040,Champagne-Au-Mont-D'Or,6,11746.0,2127.5,50.0,0.0,0.0,0.0,33.3,16.7,0.0,4,5.9
3,69044,Charbonnières-Les-Bains,4,25696.0,6885.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4,6.4
4,69081,Écully,7,123620.0,11500.0,85.714286,0.0,0.0,42.857143,42.9,0.0,0.0,3,15.1
5,69091,Givors,17,123444.0,4253.0,23.529412,5.882353,0.0,11.764706,0.0,5.9,0.0,13,25.3
6,69143,Neuville-Sur-Saône,7,28349.0,1430.0,28.571429,0.0,0.0,42.857143,14.3,0.0,0.0,2,10.2
7,69191,Saint-Cyr-Au-Mont-D'Or,2,7640.0,3820.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0.3
8,69194,Saint-Didier-Au-Mont-D'Or,2,23615.0,11807.5,0.0,0.0,0.0,50.0,50.0,0.0,0.0,5,15.2
9,69204,Saint-Genis-Laval,4,106389.55,20626.51,50.0,75.0,0.0,25.0,25.0,0.0,25.0,7,32.3


export des donner utile donc pase tableau juste au dessus

In [9]:
# agg.to_csv("parcs_jardins_communes_agg.csv", index=False)
# print("✅ Fichier écrit : parcs_jardins_communes_agg.csv")


In [11]:
df.head(10)

Unnamed: 0,code_commune,commune,nom,surf_tot_m2,ann_ouvert,gestion,acces,circulation,label,type_equip,eau,clos,reglement,equip_aire_jeux,equip_sport,equip_fontaine
0,69204,Saint-Genis-Laval,Parc historique de Beauregard,36358.95,,Commune,piéton,,non,Édifice|Fontaine|Jardin à l'anglaise|Jardin po...,oui,oui,,0,0,1
1,69204,Saint-Genis-Laval,Parc de la Maison des Champs,4506.04,,Commune,piéton,,non,Terrain de pétanque,non,oui,,0,0,0
2,69385,Lyon 5E Arrondissement,Jardin André Malraux,3334.82,,Autre,piéton,,non,Fontaine,oui,oui,oui,0,0,1
3,69386,Lyon 6E Arrondissement,Jardin Cardinal Jean Villot,2094.41,,Espaces Verts,piéton,,non,Aire de jeux,oui,oui,oui,1,0,0
4,69388,Lyon 8E Arrondissement,Jardin du Presbytère,1330.87,,Espaces Verts,piéton,,non,Aire de jeux,non,oui,oui,1,0,0
5,69387,Lyon 7E Arrondissement,Jardin Garibaldi Berthelot,2037.58,,Espaces Verts,piéton,,non,Aire de jeux,non,non,oui,1,0,0
6,69386,Lyon 6E Arrondissement,Square de l'Ossuaire,2642.52,,Mairie,piéton,,non,,non,oui,oui,0,0,0
7,69286,Rillieux-La-Pape,Parc des Horizons,10804.0,2015.0,Commune,piéton / cycle / motorisé,Piétonne,non,Aire de jeux / Terrain Sportif,oui,non,oui,1,1,0
9,69266,Villeurbanne,Espace de vie de l'Abbe Bordes,3423.625396,1989.0,Commune,piéton,Piéton,non,Fontaine-Terrain de pétanque-Aire de jeux,oui,oui,oui,1,0,1
10,69266,Villeurbanne,Parc Jacob Hugentobler (et Esplanade),6215.485394,2017.0,Commune,piéton,Piéton,non,Fontaine-Aire de jeux,oui,oui,oui,1,0,1
