In [None]:
!pip install -q beautifulsoup4 requests



[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [25]:
# SCRAPING RECETTES HELLOFRESH → + desc_part_1..6 + total_time, prep_time, difficulty
import json, re, time, math
from pathlib import Path
import requests
from bs4 import BeautifulSoup

UA = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/124.0.0.0 Safari/537.36"
    )
}

UNITS = [
    "g","kg","mg","ml","cl","l",
    "cs","cc","c.à.s","c.à.c",
    "sachet","sachet(s)",
    "pièce","pièce(s)",
    "tranche","tranche(s)",
    "botte","botte(s)",
    "pincée","pincée(s)",
    "brin","brin(s)",
    "bouquet","bouquet(s)",
    "paquet","paquet(s)",
    "pot","pot(s)",
    "cm",
    "boule", "boule(s)",
    "boîte(s)", "boîte",
    "filet(s)",
    "tube(s)", "tube"
]
TEXT_QTY = ["selon le goût", "au goût", "à volonté"]

FRACTION_MAP = {
    "¼": 1/4, "½": 1/2, "¾": 3/4,
    "⅐": 1/7, "⅑": 1/9, "⅒": 1/10,
    "⅓": 1/3, "⅔": 2/3,
    "⅕": 1/5, "⅖": 2/5, "⅗": 3/5, "⅘": 4/5,
    "⅙": 1/6, "⅚": 5/6,
    "ⅈ": None,  # rare, laissé ici au cas où
    "⅛": 1/8, "⅜": 3/8, "⅝": 5/8, "⅞": 7/8,
}
FRACTIONS_CLASS = "".join(k for k,v in FRACTION_MAP.items() if v is not None)
NUM = rf"(?:\d+(?:[.,]\d+)?|\d+/\d+|[{re.escape(FRACTIONS_CLASS)}])"

UNIT = r"(?:{})(?!\S)".format("|".join([re.escape(u) for u in UNITS]))
QTY_CORE = rf"{NUM}(?:\s*{UNIT})?"
LEADING_QTY_RE = re.compile(rf"^\s*({QTY_CORE}(?:\s+{QTY_CORE})*)\s+(.+?)\s*$")
TRAILING_QTY_RE = re.compile(rf"^\s*(.+?)\s+({QTY_CORE}(?:\s+{QTY_CORE})*)\s*$")

def normalize_space(s: str) -> str:
    return re.sub(r"\s+", " ", s.strip())

def frac_to_float(s: str):
    s = s.strip().replace(",", ".")
    if s in FRACTION_MAP and FRACTION_MAP[s] is not None:
        return FRACTION_MAP[s]
    if "/" in s:
        try:
            a, b = s.split("/")
            return float(a) / float(b)
        except Exception:
            return None
    try:
        return float(s)
    except Exception:
        return None

def split_qty_name(line: str):
    s = normalize_space(line)
    for t in TEXT_QTY:
        if s.lower().startswith(t):
            rem = normalize_space(s[len(t):])
            name = rem if rem else s
            return t, name
    m = LEADING_QTY_RE.match(s)
    if m:
        qty_text = normalize_space(m.group(1))
        name = normalize_space(m.group(2))
        return qty_text, name
    m = TRAILING_QTY_RE.match(s)
    if m:
        name = normalize_space(m.group(1))
        qty_text = normalize_space(m.group(2))
        return qty_text, name
    return None, s

def split_number_and_unit(qty_text: str):
    if qty_text is None:
        return None, ""
    t = qty_text.strip()
    for txt in TEXT_QTY:
        if t.lower() == txt:
            return None, txt
    m = re.match(rf"^\s*({NUM})(?:\s+(.+))?\s*$", t)
    if not m:
        return None, t
    n_raw = m.group(1)
    unit = (m.group(2) or "").strip()
    n = frac_to_float(n_raw)
    if n is not None:
        if abs(n - round(n)) < 1e-9:
            n = int(round(n))
        else:
            n = math.floor(n * 100 + 1e-9) / 100.0
    return n, unit

def clean_title(title: str) -> str:
    return re.sub(r"\s*Recette\s*\|\s*HelloFresh\s*$", "", title).strip()

# ---- Instructions
def get_instructions(soup: BeautifulSoup):
    steps = []
    for box in soup.find_all(attrs={"data-test-id": "instruction-step"}):
        items = [li.get_text(" ", strip=True) for li in box.find_all("li")]
        if items:
            steps.append(" ".join(items))
            continue
        txt = box.get_text(" ", strip=True)
        if txt:
            steps.append(txt)
    if steps:
        return steps
    header = None
    for h in soup.find_all(["h2", "h3", "h4"]):
        if h.get_text(strip=True).lower().startswith("instructions"):
            header = h
            break
    if header:
        section_steps = []
        for sib in header.parent.next_siblings:
            name = getattr(sib, "name", None)
            if name in ("h2", "h3", "h4"):
                break
            if hasattr(sib, "find_all"):
                for ul in sib.find_all("ul"):
                    items = [li.get_text(" ", strip=True) for li in ul.find_all("li")]
                    if items:
                        section_steps.append(" ".join(items))
        if section_steps:
            return section_steps
    return steps

# ---- Durées & difficulté
ISO_RE = re.compile(r"^P(T(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?)$", re.I)

def iso_to_human(iso: str) -> str:
    """
    'PT40M' -> '40 minutes', 'PT1H30M' -> '1 h 30 min'
    """
    if not iso:
        return ""
    m = ISO_RE.match(iso.strip())
    if not m:
        return ""
    h = int(m.group(2) or 0)
    mnt = int(m.group(3) or 0)
    parts = []
    if h:
        parts.append(f"{h} h")
    if mnt:
        parts.append(f"{mnt} min" if not h else f"{mnt} min")
    return " ".join(parts) if parts else ""

# --- Precise DOM fallback for 'Temps total' / 'Temps de préparation' / 'Difficulté'

def _value_after_label(soup: BeautifulSoup, label: str, want_digits=False):
    """
    Find the node whose text == label, then scan the next few spans/divs
    for the first non-empty value (preferring one with digits when want_digits=True).
    """
    n = soup.find(string=re.compile(rf"^\s*{re.escape(label)}\s*$", re.I))
    if not n:
        return ""

    # search nearby first (same row / immediate siblings), then a short forward scan
    anchors = []
    # parent chain for local search
    p = getattr(n, "parent", None)
    for _ in range(4):  # climb a few levels max
        if not p:
            break
        anchors.append(p)
        p = getattr(p, "parent", None)

    # candidates: spans/divs under those anchors (row) and a short forward walk
    seen_ids = set()
    cands = []
    for a in anchors:
        for el in a.find_all(["span", "div"], recursive=True):
            if id(el) in seen_ids: 
                continue
            seen_ids.add(id(el))
            cands.append(el)

    # also scan forward a bit in document order
    for el in n.find_all_next(["span", "div"], limit=12):
        if id(el) in seen_ids:
            continue
        seen_ids.add(id(el))
        cands.append(el)

    # choose first usable text that is not the label itself
    for el in cands:
        t = el.get_text(" ", strip=True)
        if not t:
            continue
        if re.fullmatch(rf"\s*{re.escape(label)}\s*", t, flags=re.I):
            continue
        if want_digits and not re.search(r"\d", t):
            continue
        return t
    return ""

def extract_meta_from_dom(soup: BeautifulSoup):
    """
    DOM-driven fallback: pick the value next to the label.
    """
    total = _value_after_label(soup, "Temps total", want_digits=True)
    prep  = _value_after_label(soup, "Temps de préparation", want_digits=True)
    diff  = _value_after_label(soup, "Difficulté", want_digits=False)
    return total, prep, diff

def parse_recipe_jsonld_only(url: str):
    r = requests.get(url, headers=UA, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    # Titre
    title = None
    og = soup.find(attrs={"property": "og:title"})
    if og and og.get("content"):
        title = clean_title(og["content"])
    if not title:
        h1 = soup.find("h1")
        title = clean_title(h1.get_text(strip=True) if h1 else "Recette")

    ingredients_obj = {}
    found_any = False
    total_time = ""
    prep_time = ""
    difficulty = ""

    # JSON-LD
    for tag in soup.find_all("script", type="application/ld+json"):
        try:
            data = json.loads(tag.string)
        except Exception:
            continue
        objs = data if isinstance(data, list) else [data]
        for obj in objs:
            if not isinstance(obj, dict):
                continue
            t = obj.get("@type")
            if (isinstance(t, list) and "Recipe" in t) or t == "Recipe":
                # 1) Times via JSON-LD si présents
                if not total_time:
                    total_time = iso_to_human(obj.get("totalTime", ""))
                if not prep_time:
                    prep_time = iso_to_human(obj.get("prepTime", ""))
                # parfois difficulté n’existe pas en JSON-LD

                # 2) Ingrédients
                for raw in (obj.get("recipeIngredient") or []):
                    qty_text, name = split_qty_name(str(raw))
                    qty_num, unit = split_number_and_unit(qty_text)
                    if name:
                        ingredients_obj[name] = {"qty": qty_num, "unit": unit or ""}
                        found_any = True
    # Fallback texte/DOM pour time/difficulty si manquants
    if not total_time or not prep_time or not difficulty:
        t_fallback, p_fallback, d_fallback = extract_meta_from_dom(soup)
        if not total_time and t_fallback:
            total_time = t_fallback
        if not prep_time and p_fallback:
            prep_time = p_fallback
        if not difficulty and d_fallback:
            difficulty = d_fallback

    # Instructions
    steps = get_instructions(soup)
    desc = {}
    for i in range(1, 7):
        if i < 6:
            desc[f"desc_part_{i}"] = steps[i-1] if i-1 < len(steps) else ""
        else:
            desc[f"desc_part_{i}"] = " ".join(steps[5:]) if len(steps) > 5 else (steps[5] if len(steps) > 5 else "")

    return {
        "name": title,
        "link": url,
        "ingredients": ingredients_obj if found_any else {},
        "total_time": total_time,         # ex. '40 minutes' ou '1 h 30 min'
        "prep_time": prep_time,           # ex. '35 minutes'
        "difficulty": difficulty,         # ex. 'Intermédiaire'
        **desc
    }

def scrape_many_to_file(urls, out_path="recettes_hellofresh.txt"):
    existing = {}
    out_file = Path(out_path)
    if out_file.exists():
        try:
            with open(out_file, encoding="utf-8") as f:
                old_data = json.load(f)
                for rec in old_data:
                    if rec and rec.get("link"):
                        existing[rec["link"]] = rec
        except Exception as e:
            print(f"[warn] Impossible de charger l’existant ({e})")

    results = dict(existing)
    seen = set()

    for u in urls:
        url = u.strip().rstrip("/")
        if not url or url in seen:
            continue
        seen.add(url)

        if url in existing:
            print(f"[skip] {url} déjà présent, on garde l’ancien")
            continue

        try:
            rec = parse_recipe_jsonld_only(url)
            results[url] = rec
            print(f"[ok] {url} ajouté")
            time.sleep(0.25)
        except Exception as e:
            print(f"[warn] {url}: {e}")
            if url not in results:
                results[url] = {"name": None, "link": url, "ingredients": {}}

    final_list = list(results.values())
    out_file.write_text(json.dumps(final_list, ensure_ascii=False, indent=2), encoding="utf-8")
    return out_path, final_list

# --- Exemple d'utilisation 
# --- # Charger les URLs depuis un fichier JSON (.txt) 
with open("urls_hellofresh.txt", encoding="utf-8") as f: 
    urls = json.load(f) 
out_file, data = scrape_many_to_file(urls) 
print("Fichier écrit:", out_file) 
print(f"{len(data)} recettes sauvegardées")

[skip] https://www.hellofresh.fr/recipes/saumon-en-papillote-and-pistou-maison-625fbe3806486938dc2f4584 déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/recipes/galette-complete-jambon-emmental-626fe9dca921ac38f30227a2 déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/recipes/salade-de-chevre-nectarine-and-tomate-628b8b61c6ee1b00810d8902 déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/recipes/curry-daubergine-rotie-and-pois-chiches-629e031506287fd74e0efc02 déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/recipes/linguine-and-pesto-depinards-maison-62b26056828c7ddc7107e451 déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/recipes/courgette-grillee-labneh-and-beurre-au-curcuma-62b26217932de228a600313c déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/recipes/salade-couscous-perle-feta-rotie-and-grenade-62c31ec1b6367154fe0f1dc1 déjà présent, on garde l’ancien
[skip] https://www.hellofresh.fr/reci

In [1]:
# MATCHING FICHIER
# On regarde le taux de matching de chaque recette avec les ingrédients disponibles (ingredients_disponibles.txt)
# pour les ingrédients jugés indispensables dans ingredients_infos.txt ET dont le rayon == "marché".
# Comparaisons insensibles à la casse et aux accents. ALIASES pris en compte.

import json
import unicodedata
from pathlib import Path
import re

# Charger les fichiers (même si extension .txt, contenu = JSON)
recettes = json.load(open("recettes_hellofresh.txt", encoding="utf-8"))
catalogue = json.load(open("ingredients_infos.txt", encoding="utf-8"))
raw_dispos = set(json.load(open("ingredients_disponibles.txt", encoding="utf-8")))

# --- Alias simples pour homogénéiser les noms ---
ALIASES = {
    "Emmental râpé": "Emmental",
    "Dés de butternut": "Butternut",
    "Fricassée de champignons émincés": "Champignon",
    "Gingembre frais": "Gingembre",
    "Oignon jaune": "Oignon",
    "Poivron rouge": "Poivron",
    "Poivrons grillés": "Poivron",
    "Pommes de terre Franceline": "Pommes de terre",
    "Pommes de terre à chair farineuse": "Pommes de terre",
    "Pommes de terre à chair ferme": "Pommes de terre",
    "Purée de gingembre": "Gingembre",
    "Tomates cerises rouges": "Tomates cerises",
    "Tomates cerises rouges et jaunes": "Tomates cerises",
    "Mélange de jeunes pousses": "Salade",
    "Mélange de salades": "Salade",
    "Chou rouge découpé": "Chou rouge",
    "Champignons blonds": "Champignons",
    "Champignons de Paris": "Champignons",
    "Gousse d'ail": "Ail"
}

def normalize(s: str) -> str:
    """Normalisation : minuscule + suppression des accents + singularisation naïve."""
    if not s:
        return ""
    # 1. Minuscule
    s = s.lower()
    # 2. Supprimer les accents
    s = unicodedata.normalize("NFD", s)
    s = "".join(c for c in s if unicodedata.category(c) != "Mn")
    # 3. Singulariser naïvement (supprimer 's' ou 'x' finaux sauf pour mots courts)
    words = s.split()
    singularized = []
    for w in words:
        if len(w) > 3 and re.search(r"[sx]$", w):  # mot > 3 lettres et fini par s ou x
            w = re.sub(r"[sx]$", "", w)
        singularized.append(w)
    return " ".join(singularized)

# Construire l’index d’alias en mode insensible à la casse/accents
ALIASES_NORM = {normalize(k): v for k, v in ALIASES.items()}

def canon(name: str) -> str:
    """Applique les alias éventuels (lookup insensible casse/accents/ pluriels)."""
    return ALIASES_NORM.get(normalize(name), name)


# Disponibilités → normalisées, avec un mapping vers une forme "jolie" (après alias)
dispo_norm_to_pretty = {}
for d in raw_dispos:
    pretty = canon(d)
    n = normalize(pretty)
    # garde la 1ère forme "jolie" rencontrée
    dispo_norm_to_pretty.setdefault(n, pretty)

dispos_norm = set(dispo_norm_to_pretty.keys())

# Index catalogue par nom normalisé (après alias)
catalogue_norm_index = {normalize(canon(i["name"])): i for i in catalogue}


# Ensemble des indispensables du marché (noms normalisés après alias)
market_indispensables_norm = {
    normalize(canon(i["name"]))
    for i in catalogue
    if i.get("indispensable") and i.get("rayon", "").lower() == "marché"
}

# Table des remplacements (normalisés, aliasés) : base -> {base ∪ remplacements}
REPLACEMENTS_NORM = {}
for item in catalogue:
    base = normalize(canon(item["name"]))
    repls = {normalize(canon(r)) for r in item.get("remplacement", [])}
    REPLACEMENTS_NORM[base] = repls | {base}

def find_available_candidate(norm_name: str):
    """Privilégie l’ingrédient de base s’il est dispo, sinon un remplacement."""
    candidates = [norm_name] + [c for c in REPLACEMENTS_NORM.get(norm_name, {norm_name}) if c != norm_name]
    for cand in candidates:
        if cand in dispos_norm:
            return cand
    return None

def score_recette(recette):
    # Ingrédients de la recette → (canon + normalisés) et version "jolie" pour affichage
    rec_ing_pretty = {canon(n) for n in recette["ingredients"].keys()}
    rec_ing_norm = {normalize(n) for n in rec_ing_pretty}

    # Ingrédients inconnus (pas dans le catalogue) → affichage en "pretty"
    inconnus = sorted(n for n in rec_ing_pretty if normalize(n) not in catalogue_norm_index)

    # Besoins = indispensables du marché qui sont présents dans la recette
    besoins_norm = market_indispensables_norm & rec_ing_norm
    if not besoins_norm:
        return 100.0, [], [], inconnus

    ok = []
    manque = []

    # On juge OK si l’ingrédient ou l’un de ses remplacements est dispo
    for n_pretty in rec_ing_pretty:
        n_norm = normalize(n_pretty)
        if n_norm in besoins_norm:
            cand = find_available_candidate(n_norm)
            if cand is not None:
                # Si on a utilisé un remplacement, on l’affiche : "X (remplacé par : Y)"
                if cand != n_norm:
                    replacement_pretty = dispo_norm_to_pretty.get(cand, cand)
                    ok.append(f"{n_pretty} (remplacé par : {replacement_pretty})")
                else:
                    ok.append(n_pretty)
            else:
                manque.append(n_pretty)

    score = 100 * len(ok) / len(besoins_norm)
    return score, sorted(ok), sorted(manque), inconnus

# Calcul & tri + collecte des inconnus globaux
scored = []
unknown_global_pretty = set()

seen = set()  # dédoublonnage par (name, link)
for r in recettes:
    key = r["name"]
    if key in seen:
        continue
    seen.add(key)

    score, ok, manque, inconnus = score_recette(r)
    scored.append({
        "name": r["name"],
        "link": r["link"],
        "score": round(score, 1),
        "ok": ok,
        "manque": manque,
        "inconnus": inconnus,
    })
    unknown_global_pretty.update(inconnus)

scored.sort(key=lambda x: x["score"], reverse=True)

# Affichage
for r in scored:
    print(f"{r['score']}% - {r['name']} ({r['link']})")
    print("   OK:", ", ".join(r["ok"]) if r["ok"] else "aucun")
    print("   Manque:", ", ".join(r["manque"]) if r["manque"] else "Aucun")
    if r["inconnus"]:
        print("[⚠️] Ingrédients non définis dans ingredients_infos.txt : " + ", ".join(r["inconnus"]))
    print()

# ----- Générer ingredients_a_completer.txt -----
# Ne garder que ceux VRAIMENT absents du catalogue (comparaison normalisée)
unknown_global_pretty = sorted(
    n for n in unknown_global_pretty
    if normalize(n) not in catalogue_norm_index
)

# Mois à insérer (utilisés en one-line dans le rendu)
TEMPLATE_MONTHS = ["janvier", "février", "mars", "avril", "mai", "juin",
                   "juillet", "août", "septembre", "octobre", "novembre", "décembre"]

def render_ing_block(name: str) -> str:
    """Rend un bloc prêt à copier-coller dans ingredients_infos (mois sur une ligne)."""
    return (
        "  {\n"
        f"    \"name\": \"{name}\",\n"
        f"    \"saison\": [\"janvier\", \"février\", \"mars\", \"avril\", \"mai\", \"juin\", \"juillet\", \"août\", \"septembre\", \"octobre\", \"novembre\", \"décembre\"],\n"
        f"    \"rayon\": \"à définir\",\n"
        f"    \"indispensable\": true\n"
        "  }"
    )

out_path = Path("ingredients_a_completer.txt")
with open(out_path, "w", encoding="utf-8") as f:
    blocks = [render_ing_block(n) for n in unknown_global_pretty]
    f.write(",\n".join(blocks))

print(f"→ {len(unknown_global_pretty)} ingrédient(s) à compléter écrit(s) dans {out_path.resolve()}")


100.0% - Lasagnes à la bolognaise (https://www.marmiton.org/recettes/recette_lasagnes-a-la-bolognaise_18215.aspx)
   OK: Carotte, Oignon
   Manque: Aucun

100.0% - La pâte à galettes de blé noir traditionnelle (https://www.marmiton.org/recettes/recette_la-pate-a-galettes-de-ble-noir-traditionnelle_35351.aspx)
   OK: Champignons
   Manque: Aucun

100.0% - Gratin de ravioles du Dauphiné au comté (https://www.750g.com/gratin-de-ravioles-du-dauphine-au-comte-r208023.htm)
   OK: aucun
   Manque: Aucun

100.0% - Spaghetti bolognaise (https://www.marmiton.org/recettes/recette_spaghetti-bolognaise_19840.aspx)
   OK: Carotte, Oignon
   Manque: Aucun

100.0% - Tagliatelles au saumon frais (https://www.marmiton.org/recettes/recette_tagliatelles-au-saumon-frais_11354.aspx)
   OK: aucun
   Manque: Aucun

100.0% - Croziflette (https://www.marmiton.org/recettes/recette_croziflette_165464.aspx)
   OK: Oignon
   Manque: Aucun

100.0% - Croque-monsieur (https://www.marmiton.org/recettes/recette_croque-m

In [1]:
# GÉNÉRER LISTE DE COURSES (réutilise canon(), normalize(), REPLACEMENTS_NORM, dispo_norm_to_pretty, dispos_norm)
import json, math
from collections import defaultdict
from typing import Optional  # Ajouté pour compatibilité Python < 3.10

# Fichiers (ok si déjà chargés plus haut, sinon ces 3 lignes suffisent)
recettes = json.load(open("recettes_hellofresh.txt", encoding="utf-8"))
catalogue = json.load(open("ingredients_infos.txt", encoding="utf-8"))
raw_dispos = set(json.load(open("ingredients_disponibles.txt", encoding="utf-8")))

# IMPORTANT : on réutilise canon(), normalize(), REPLACEMENTS_NORM, dispo_norm_to_pretty du bloc matching.
# Si dispos_norm n'existe pas plus haut, décommente la ligne suivante :
# dispos_norm = {normalize(canon(d)) for d in raw_dispos}

# Index rayons / indispensables (par nom normalisé)
rayons_map = {normalize(canon(i["name"])): i.get("rayon", "").lower() for i in catalogue}
indispensables_map = {normalize(canon(i["name"])): i.get("indispensable", False) for i in catalogue}

def scale_and_round(value, unit, factor):
    """Multiplie une quantité et arrondit (g par 10g, sinon entier)."""
    scaled = value * factor
    if unit.lower().startswith("g"):
        return int(math.ceil(scaled / 10.0) * 10), unit
    return int(math.ceil(scaled)), unit

def pick_available(norm_name: str) -> Optional[str]:
    """
    Retourne le nom normalisé EFFECTIF à utiliser pour l'agrégation:
    - priorité à l'ingrédient de base s'il est dispo,
    - sinon 1er remplacement dispo,
    - sinon None (indisponible).
    """
    candidates = [norm_name] + [c for c in REPLACEMENTS_NORM.get(norm_name, {norm_name}) if c != norm_name]
    return next((c for c in candidates if c in dispos_norm), None)

def courses(recette_names, personnes):
    factor = personnes / 2  # par défaut les recettes sont pour 2 personnes
    result = defaultdict(dict)  # rayon -> {eff_norm -> bucket agrégé}

    selected = set(recette_names)
    seen_names = set()  # évite de compter plusieurs fois la même recette par nom

    for r in recettes:
        name = r["name"]
        if name not in selected:
            continue
        if name in seen_names:
            # DEBUG (optionnel): décommente pour voir les doublons par nom
            # print("[DEBUG] recette en double ignorée:", name, r.get("link", ""))
            continue
        seen_names.add(name)

        for ing_raw, data in r["ingredients"].items():
            # libellé “joli” (après alias) + normalisation
            ing_pretty = canon(ing_raw)
            ing_norm = normalize(ing_pretty)

            # rayon
            rayon = rayons_map.get(ing_norm, "inconnu")
            if rayon == "placard":
                continue

            # lecture quantités (compat ancien format)
            if isinstance(data, dict):
                qty = data.get("qty")
                unit = data.get("unit", "")
                override_indisp = data.get("indispensable", None)
            else:
                qty, unit, override_indisp = None, str(data), None

            indisp_flag = override_indisp if override_indisp is not None else indispensables_map.get(ing_norm, False)

            # scaling
            val = None
            if isinstance(qty, (int, float)):
                val, unit = scale_and_round(qty, unit, factor)

            # choix de l’ingrédient EFFECTIF (base prioritaire; sinon remplacement si MARCHÉ)
            if rayon == "marché":
                candidates = [ing_norm] + [c for c in REPLACEMENTS_NORM.get(ing_norm, {ing_norm}) if c != ing_norm]
                eff_norm = next((c for c in candidates if c in dispos_norm), ing_norm)
                if eff_norm != ing_norm and eff_norm in dispo_norm_to_pretty:
                    label_pretty = dispo_norm_to_pretty[eff_norm]
                else:
                    label_pretty = ing_pretty
            else:
                eff_norm = ing_norm
                label_pretty = ing_pretty

            # agrégation par (rayon, eff_norm)
            bucket = result[rayon].get(eff_norm)
            if not bucket:
                bucket = {
                    "label": label_pretty,
                    "val": 0,
                    "unit": unit,
                    "indispensable": indisp_flag,
                    "recipes": set(),
                    "norm": eff_norm,  # utile pour le split dispo / non-dispo ensuite
                }
                result[rayon][eff_norm] = bucket

            bucket["indispensable"] = bucket["indispensable"] or indisp_flag
            if val is None:
                bucket["val"] = None
            elif bucket["val"] is not None:
                bucket["val"] += val
            else:
                bucket["val"] = val
            if not bucket["unit"] and unit:
                bucket["unit"] = unit
            bucket["recipes"].add(name)

    # sets -> listes triées + structure d’affichage (clé = label_pretty)
    printable = {}
    for rayon, by_eff_norm in result.items():
        printable[rayon] = {}
        for eff_norm, data in by_eff_norm.items():
            data["recipes"] = sorted(data["recipes"])
            printable[rayon][data["label"]] = {
                "val": data["val"],
                "unit": data["unit"],
                "indispensable": data["indispensable"],
                "recipes": data["recipes"],
                "norm": eff_norm,
            }

    return printable, dispos_norm

# -------- Exemple d’utilisation --------
selection = [
    "Poulet thaï au curry vert & lait de coco",
   "Nouilles au bœuf sauté à la thaï",
   "Soupe de marrons & lardons fumés",
   "Risotto à la tomate & au lait de coco",
   "Crumble de légumes au pecorino & à l'origan"
 ]
personnes = 4
liste_courses, _dispos_norm = courses(selection, personnes)


# --- Affichage ---
def _print_block(title, dct):
    if not dct:
        return
    print(f"--- {title} ---")
    sorted_ings = sorted(dct.items(), key=lambda x: (not x[1]["indispensable"], x[0].lower()))
    for ing, data in sorted_ings:
        prefix = "[*] " if data["indispensable"] else "[ ] "
        nb_rec = len(data["recipes"])
        titles = " / ".join(data["recipes"])
        if data["val"] is not None:
            print(f"{prefix}{ing}: {data['val']} {data['unit']}  dans : {nb_rec} recette(s) ({titles})")
        else:
            shown = data["unit"] if data["unit"] else ""
            print(f"{prefix}{ing}: {shown}  dans : {nb_rec} recette(s) ({titles})")
    print()

# 1) MARCHÉ : on peut splitter selon présence effective dans dispos_norm
if "marché" in liste_courses:
    ings = liste_courses["marché"]
    marche_dispo, marche_non = {}, {}
    for label, data in ings.items():
        # un eff_norm qui n'était pas dispo restera non-dispo (label = base), sinon dispo (label = remplaçant/base)
        if data.get("norm") in _dispos_norm:
            marche_dispo[label] = data
        else:
            marche_non[label] = data
    _print_block("MARCHÉ", marche_dispo)
    _print_block("MARCHÉ NON DISPO", marche_non)

# 2) Autres rayons dans l’ordre souhaité
ordre_rayons = ["boucherie", "poissonnerie", "fromagerie", "herbes", "frais", "épicerie"]
deja_affiches = {"marché"}
for key in ordre_rayons:
    if key in liste_courses:
        _print_block(key.upper(), liste_courses[key])
        deja_affiches.add(key)

# 3) Rayons restants éventuels (ordre alpha)
for rayon, ings in sorted(liste_courses.items()):
    if rayon in deja_affiches:
        continue
    _print_block(rayon.upper(), ings)


NameError: name 'normalize' is not defined