# Pipeline Page Bundle par commune
Ce notebook génère un Page Bundle Hugo pour chaque commune à partir du fichier Eure_et_Loire.csv.

Pour chaque commune :
- Titre = nom de la commune
- Table récapitulative des informations hiérarchiques
- Graphique Plotly de l'évolution de la population

In [19]:
# Import des librairies nécessaires
import pandas as pd
import plotly.express as px
import os
import re

In [None]:
# Notebook cell: génère un Page Bundle Hugo par commune
# pip install pandas plotly

import pandas as pd
import re
import os
from pathlib import Path
import html

# --- Configuration ---
CSV_PATH = "../data/Eure_et_Loire.csv"   # chemin vers ton CSV
OUTPUT_BASE = Path("../content/communes")  # racine des bundles Hugo
OUTPUT_BASE.mkdir(parents=True, exist_ok=True)
ENCODING = "utf-8"

# Colonnes à considérer comme "hiérarchie"
HIER_COLS_PREFERRED = [
    "insee","section","superficie","intendance","election","subdelegation",
    "grenier","coutume","parlement","bailliage","gouvernement",
    "diocese","archidiacone","doyenne","vocable","presentateur",
    "district_1790","canton_1790","canton_1801","arrondissement_1982","canton _1982"
]

# --- fonctions utilitaires ---

def detect_year_from_col(colname):
    """Retourne l'année trouvée dans le nom de colonne (ex: 'V_1790_f' -> 1790)"""
    m = re.search(r'(1[0-9]{3}|17[0-9]{2}|18[0-9]{2}|19[0-9]{2}|20[0-9]{2}|\d{3,4})', colname)
    if m:
        y = int(m.group(0))
        if 800 <= y <= 2100:
            return y
    return None

def sanitize_numeric(val):
    """Nettoie 'n_c', 'lac.', '29!' etc. et renvoie un int ou None"""
    if pd.isna(val):
        return None
    s = str(val).strip()
    if s in {"n_c", "lac.", "lac", "", "-", "?"}:
        return None
    m = re.search(r'(-?\d+)', s.replace(" ", ""))
    if m:
        try:
            return int(m.group(1))
        except:
            return None
    return None

def safe_dirname(s):
    """Nom de dossier Hugo sûr (sans espaces, accents, ni ponctuation problématique)"""
    s = str(s).strip().lower().replace(" ", "_")
    s = re.sub(r'[^a-z0-9_\-]', '', s)
    return s or "commune"

# --- Lire le CSV ---
df = pd.read_csv(CSV_PATH, encoding=ENCODING, dtype=str).head(1)
df.columns = [c.strip() for c in df.columns]

# Colonnes population
pop_cols = {c: detect_year_from_col(c) for c in df.columns}
pop_cols = {c: y for c,y in pop_cols.items() if y is not None}
pop_cols = dict(sorted(pop_cols.items(), key=lambda kv: kv[1]))

# Colonnes hiérarchiques
hier_cols = [c for c in HIER_COLS_PREFERRED if c in df.columns]
if not hier_cols:
    exclude = set(pop_cols.keys()) | {"nom", "insee"}
    hier_cols = [c for c in df.columns if c not in exclude][:12]

# --- Génération des bundles ---
for idx, row in df.iterrows():
    commune_nom = row.get("nom") or f"commune_{idx}"
    insee = row.get("insee") or ""
    title = str(commune_nom)

    # Créer dossier ../content/communes/<NomCommune>
    dirname = safe_dirname(commune_nom)
    commune_dir = OUTPUT_BASE / dirname
    commune_dir.mkdir(parents=True, exist_ok=True)
    qmd_path = commune_dir / "index.qmd"

    # Table population
    data_pairs = []
    for col, year in pop_cols.items():
        v = sanitize_numeric(row.get(col, ""))
        data_pairs.append((int(year), (int(v) if v is not None else None)))

    # Table hiérarchique
    hier_table_lines = ["| Propriété | Valeur |", "|---|---|"]
    for col in hier_cols:
        val = row.get(col, "")
        safe_val = "" if pd.isna(val) else html.escape(str(val))
        hier_table_lines.append(f"| {col} | {safe_val} |")

    # YAML Hugo/Quarto
    yaml_header = [
        "---",
        f'title: "{title}"',
        "format: html",
        "execute:",
        "  evaluate: true",
        "---",
        ""
    ]

    # Corps QMD
    body_lines = []
    body_lines.append(f"# {title}\n")
    body_lines.append("## Informations hiérarchiques\n")
    body_lines.extend(hier_table_lines)
    body_lines.append("\n## Évolution historique de la population\n")
    body_lines.append("\n```{python}\nimport pandas as pd\nimport plotly.express as px\n\npairs = " + repr(data_pairs) + "\ndf_pop = pd.DataFrame(pairs, columns=['year','population']).sort_values('year')\ndf_pop['population'] = pd.to_numeric(df_pop['population'], errors='coerce')\ndf_plot = df_pop.dropna(subset=['population'])\n\nif df_plot.empty:\n    print('Aucune donnée disponible.')\nelse:\n    fig = px.scatter(df_plot, x='year', y='population',\n                  title='Population par année')\n    fig.update_layout(xaxis_title='Année', yaxis_title='Population')\n    fig.show()\n```\n")

    with open(qmd_path, "w", encoding=ENCODING) as f:
        f.write("\n".join(yaml_header + body_lines))

    print(f"Créé: {qmd_path}")

print("✅ Tous les bundles Hugo sont générés dans:", OUTPUT_BASE.resolve())


Créé: ../content/communes/abondant/index.qmd
✅ Tous les bundles Hugo sont générés dans: /home/bdumenieu/Professionnel/Bac_a_sable/cartoscope-hugo/content/communes


In [21]:
# Génération de bundles Hugo avec index.qmd contenant un graphe R (ggplot2)
# pip install pandas

import pandas as pd
import re
import os
from pathlib import Path
import html

# --- Configuration ---
CSV_PATH = "../data/Eure_et_Loire.csv"   # chemin vers ton CSV
OUTPUT_BASE = Path("../content/communes")  # racine des bundles Hugo
OUTPUT_BASE.mkdir(parents=True, exist_ok=True)
ENCODING = "utf-8"

# Colonnes à considérer comme "hiérarchie"
HIER_COLS_PREFERRED = [
    "insee","section","superficie","intendance","election","subdelegation",
    "grenier","coutume","parlement","bailliage","gouvernement",
    "diocese","archidiacone","doyenne","vocable","presentateur",
    "district_1790","canton_1790","canton_1801","arrondissement_1982","canton _1982"
]

# --- fonctions utilitaires ---

def detect_year_from_col(colname):
    m = re.search(r'(1[0-9]{3}|17[0-9]{2}|18[0-9]{2}|19[0-9]{2}|20[0-9]{2}|\d{3,4})', colname)
    if m:
        y = int(m.group(0))
        if 800 <= y <= 2100:
            return y
    return None

def sanitize_numeric(val):
    if pd.isna(val):
        return None
    s = str(val).strip()
    if s in {"n_c", "lac.", "lac", "", "-", "?"}:
        return None
    m = re.search(r'(-?\d+)', s.replace(" ", ""))
    if m:
        try:
            return int(m.group(1))
        except:
            return None
    return None

def safe_dirname(s):
    s = str(s).strip().lower().replace(" ", "_")
    s = re.sub(r'[^a-z0-9_\-]', '', s)
    return s or "commune"

# --- Lire le CSV ---
df = pd.read_csv(CSV_PATH, encoding=ENCODING, dtype=str).head(1)
df.columns = [c.strip() for c in df.columns]

# Colonnes population
pop_cols = {c: detect_year_from_col(c) for c in df.columns}
pop_cols = {c: y for c,y in pop_cols.items() if y is not None}
pop_cols = dict(sorted(pop_cols.items(), key=lambda kv: kv[1]))

# Colonnes hiérarchiques
hier_cols = [c for c in HIER_COLS_PREFERRED if c in df.columns]
if not hier_cols:
    exclude = set(pop_cols.keys()) | {"nom", "insee"}
    hier_cols = [c for c in df.columns if c not in exclude][:12]

# --- Génération des bundles ---
for idx, row in df.iterrows():
    commune_nom = row.get("nom") or f"commune_{idx}"
    insee = row.get("insee") or ""
    title = str(commune_nom)

    # Dossier ../content/communes/<NomCommune>
    dirname = safe_dirname(commune_nom)
    commune_dir = OUTPUT_BASE / dirname
    commune_dir.mkdir(parents=True, exist_ok=True)
    qmd_path = commune_dir / "index.qmd"

    # Table population
    years, values = [], []
    for col, year in pop_cols.items():
        v = sanitize_numeric(row.get(col, ""))
        years.append(year)
        values.append(v if v is not None else "NA")

    # Encodage R
    r_years = "c(" + ",".join(str(y) for y in years) + ")"
    r_values = "c(" + ",".join(str(v) for v in values) + ")"

    # Table hiérarchique
    hier_table_lines = ["| Propriété | Valeur |", "|---|---|"]
    for col in hier_cols:
        val = row.get(col, "")
        safe_val = "" if pd.isna(val) else html.escape(str(val))
        hier_table_lines.append(f"| {col} | {safe_val} |")

    # YAML
    yaml_header = [
        "---",
        f'title: "{title}"',
        "format: html",
        "execute:",
        "  eval: true",
        "---",
        ""
    ]

    # Corps QMD
    body_lines = []
    body_lines.append(f"# {title}\n")
    body_lines.append("## Informations hiérarchiques\n")
    body_lines.extend(hier_table_lines)
    body_lines.append("\n## Évolution historique de la population\n")
    body_lines.append(
        f"""\n```{{r}}\nlibrary(ggplot2)\nyears <- {r_years}\nvalues <- {r_values}\n\n# Construire data.frame\npop_df <- data.frame(year = years, population = values)\npop_df <- pop_df[!is.na(pop_df$population), ]\n\nif (nrow(pop_df) == 0) {{\n  cat("Aucune donnée disponible.")\n}} else {{\n  ggplot(pop_df, aes(x = year, y = population)) +\n    geom_line(color = "steelblue") +\n    geom_point(color = "darkred") +\n    labs(title = "Population par année", x = "Année", y = "Population") +\n    theme_minimal()\n}}\n```\n"""
    )

    with open(qmd_path, "w", encoding=ENCODING) as f:
        f.write("\n".join(yaml_header + body_lines))

    print(f"Créé: {qmd_path}")

print("✅ Bundles Hugo générés dans:", OUTPUT_BASE.resolve())


Créé: ../content/communes/abondant/index.qmd
✅ Bundles Hugo générés dans: /home/bdumenieu/Professionnel/Bac_a_sable/cartoscope-hugo/content/communes
