In [5]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
import os
from pprint import pprint
import time
import string
from urllib.parse import urljoin

In [6]:


def fetch_url(url):
    headers = {
        "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36"
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response

def driver_setup():
    opts = Options()

    # Ensure Chromium/Chrome headless
    opts.add_argument("--headless=new")
    opts.add_argument("--no-sandbox")
    opts.add_argument("--disable-dev-shm-usage")

    # Optional: allow overriding Chromium binary via env var
    chromium_path = os.getenv("CHROMIUM_PATH")
    if chromium_path:
        opts.binary_location = chromium_path

    # Optional: stable UA for dynamic pages
    opts.add_argument("user-agent=Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36")
    driver = webdriver.Chrome(options=opts)
    driver.set_window_size(1400, 1000)
    return driver

def saq():
    def extract_links(html):
        soup = BeautifulSoup(html, "html.parser")
        links = []
        for a in soup.select(".product-items a[href], a.product-item-link[href]"):
            href = a.get("href", "").strip()
            if not href or href.startswith(("#", "mailto:", "tel:")):
                continue
            full_url = requests.compat.urljoin(url, href)
            if "/fr/" in full_url and full_url.rstrip("/").endswith("-ec"):
                links.append(full_url)
        return list(dict.fromkeys(links))

    all_links = []
    base_url = "https://www.saq.com/fr/cocktails?page_size=96"

    for page in range(1, 11):
        url = base_url + "&p=" + str(page)

        # Fast path: try static HTML first
        try:
            resp = fetch_url(url)
            fast = extract_links(resp.text)
        except Exception:
            fast = []

        if fast:
            all_links.extend(fast)
            continue

        # Fallback to Selenium if needed
        driver = driver_setup()
        try:
            driver.get(url)
            try:
                WebDriverWait(driver, 10).until(
                    lambda d: bool(BeautifulSoup(d.page_source, "html.parser")
                                   .select(".product-items a[href], a.product-item-link[href]"))
                )
            except TimeoutException:
                pass
            slow = extract_links(driver.page_source) or []
            all_links.extend(slow)
        finally:
            driver.quit()

    # Deduplicate and return
    return list(dict.fromkeys(all_links))

if __name__ == "__main__":
    test = saq()
    display(test)

['https://www.saq.com/fr/1-inch-punch-ec',
 'https://www.saq.com/fr/50-jours-ec',
 'https://www.saq.com/fr/536-ec',
 'https://www.saq.com/fr/77e-ciel-ec',
 'https://www.saq.com/fr/911-ec',
 'https://www.saq.com/fr/absinthe-ec',
 'https://www.saq.com/fr/av-ec',
 'https://www.saq.com/fr/africa-colada-ec',
 'https://www.saq.com/fr/agrumes-glaces-ec',
 'https://www.saq.com/fr/grand-marnier-tonique-ec',
 'https://www.saq.com/fr/aloes-fizz-ec',
 'https://www.saq.com/fr/amande-irlandaise-ec',
 'https://www.saq.com/fr/amarante-ec',
 'https://www.saq.com/fr/amaretto-citron-ec',
 'https://www.saq.com/fr/amaretto-sour-ec',
 'https://www.saq.com/fr/amaretto-sour-faible-alcool-ec',
 'https://www.saq.com/fr/americano-ec',
 'https://www.saq.com/fr/amour-sauvage-ec',
 'https://www.saq.com/fr/ananas-veloute-ec',
 'https://www.saq.com/fr/anis-fresco-ec',
 'https://www.saq.com/fr/anis-citronne-ec',
 'https://www.saq.com/fr/anis-petillant-ec',
 'https://www.saq.com/fr/aperi-tiki-colada-ec',
 'https://www.

In [8]:

def fetch_drink(url):
    def parse_ingredients(html):
        soup = BeautifulSoup(html, "html.parser")
        items = []

        blocks = soup.select(
            '.ingredients .value, .ingredients-text .value, '
            '.cocktail-ingredients .value, .cocktail__ingredients .value'
        )
        for b in blocks:
            text = b.get_text("\n", strip=True)
            for raw in text.replace('•', '\n').replace('·', '\n').splitlines():
                raw = raw.strip(" -–—\t\r ")
                if raw:
                    items.append(raw)

        seen, result = set(), []
        for it in items:
            cleaned = " ".join(it.split())
            if cleaned and cleaned not in seen:
                seen.add(cleaned)
                result.append(cleaned)
        return result

    def parse_name(html):
        soup = BeautifulSoup(html, "html.parser")
        el = soup.select_one(".page-title, h1.page-title, [data-ui-id='page-title-wrapper']")
        return el.get_text(strip=True) if el else ""

    def parse_preparation(html):
        soup = BeautifulSoup(html, "html.parser")
        steps = []

        li_nodes = soup.select(
            ".cocktail.preparation-text li, .cocktail .preparation-text li, "
            ".preparation-text li, [itemprop='recipeInstructions'] li"
        )
        if li_nodes:
            for li in li_nodes:
                t = li.get_text(" ", strip=True)
                if t:
                    steps.append(t)
        else:
            block = soup.select_one(
                ".cocktail.preparation-text, .cocktail .preparation-text, "
                ".preparation-text, [itemprop='recipeInstructions']"
            )
            if block:
                text = block.get_text("\n", strip=True)
                for raw in text.replace('•', '\n').replace('·', '\n').splitlines():
                    raw = raw.strip(" -–—\t\r ")
                    if raw:
                        steps.append(raw)

        seen, out = set(), []
        for s in steps:
            s_clean = " ".join(s.split())
            if s_clean and s_clean not in seen:
                seen.add(s_clean)
                out.append(s_clean)
        return out

    def parse_image(html, base_url):
        soup = BeautifulSoup(html, "html.parser")
        candidates = []

        for sel, attr in [
            ('meta[property="og:image"]', 'content'),
            ('meta[property="og:image:secure_url"]', 'content'),
            ('meta[name="twitter:image"]', 'content'),
            ('link[rel="image_src"]', 'href'),
            ('meta[itemprop="image"]', 'content'),
        ]:
            el = soup.select_one(sel)
            if el and el.get(attr):
                candidates.append(el.get(attr).strip())

        def pick_from_srcset(srcset):
            best_url, best_w, best_x = None, -1, -1.0
            for part in srcset.split(","):
                p = part.strip()
                if not p:
                    continue
                pieces = p.split()
                url_only = pieces[0]
                desc = pieces[1] if len(pieces) > 1 else ""
                if desc.endswith("w"):
                    try:
                        w = int(desc[:-1])
                        if w > best_w:
                            best_w, best_url = w, url_only
                    except ValueError:
                        pass
                elif desc.endswith("x"):
                    try:
                        x = float(desc[:-1])
                        if x > best_x:
                            best_x, best_url = x, url_only
                    except ValueError:
                        pass
                else:
                    if not best_url:
                        best_url = url_only
            return best_url

        img_selectors = ['img',]
        for sel in img_selectors:
            for el in soup.select(sel):
                srcset = el.get("srcset") or el.get("data-srcset")
                if srcset:
                    chosen = pick_from_srcset(srcset)
                    if chosen:
                        candidates.append(chosen.strip())

                for attr in ("src", "data-src", "data-original", "data-lazy", "data-image"):
                    v = el.get(attr)
                    if v:
                        candidates.append(v.strip())

        for cand in candidates:
            if not cand:
                continue
            abs_url = urljoin(base_url, cand)
            if any(token in abs_url.lower() for token in ["placeholder", "spacer", "transparent", "data:image"]):
                continue
            return abs_url
        return ""

    name = ""
    html = ""
    ingredients = []
    preparation = []
    image = ""

    resp_url = url
    try:
        resp = fetch_url(url)
        resp_url = getattr(resp, "url", url) or url
        html = resp.text
        ingredients = parse_ingredients(html)
        preparation = parse_preparation(html)
        name = parse_name(html)
        image = parse_image(html, resp_url)
    except Exception:
        pass

    if not name or not ingredients or not preparation or not image:
        driver = driver_setup()
        try:
            driver.get(url)

            def ready(d):
                if d.find_elements(By.CLASS_NAME, "page-title"):
                    return True
                s = BeautifulSoup(d.page_source, "html.parser")
                return bool(s.select(
                    '[itemprop="recipeIngredient"], .ingredients .value, .ingredients-text .value, '
                    '.cocktail__ingredients .value, .cocktail.preparation-text, .cocktail .preparation-text, '
                    '.preparation-text, [itemprop="recipeInstructions"], meta[property="og:image"], img'
                ))

            try:
                WebDriverWait(driver, 12).until(ready)
            except TimeoutException:
                pass

            try:
                title_el = driver.find_element(By.CLASS_NAME, "page-title")
                name = title_el.text.strip() or name
            except NoSuchElementException:
                pass

            if not preparation:
                sels = [".cocktail .preparation-text p"]
                steps = []
                for sel in sels:
                    try:
                        els = driver.find_elements(By.CSS_SELECTOR, sel)
                    except Exception:
                        els = []
                    for el in els:
                        t = el.text.strip()
                        if t:
                            for raw in t.replace('•', '\n').replace('·', '\n').splitlines():
                                raw = raw.strip(" -–—\t\r ")
                                if raw:
                                    steps.append(raw)
                    if steps:
                        break
                seen = set()
                preparation = []
                for s in steps:
                    s_clean = " ".join(s.split())
                    if s_clean and s_clean not in seen:
                        seen.add(s_clean)
                        preparation.append(s_clean)

            html = driver.page_source
            if not ingredients:
                ingredients = parse_ingredients(html)
            if not name:
                name = parse_name(html)
            if not preparation:
                preparation = parse_preparation(html)
            if not image:
                image = parse_image(html, driver.current_url)
        finally:
            driver.quit()

    return {"name": name, "url": url, "ingredients": ingredients, "preparation": preparation, "image": image}


# run for all urls in the existing `test` list
results = []
errors = []

total = len(test)
for i, u in enumerate(test[:1], 1):
    try:
        results.append(fetch_drink(u))
    except Exception as e:
        errors.append({"url": u, "error": str(e)})
    if i % 25 == 0 or i == total:
        print(f"{i}/{total} processed", end="\r")

print(f"\nDone. {len(results)} successes, {len(errors)} errors.")
display(results[:5])



Done. 1 successes, 0 errors.


[{'name': '1 Inch punch',
  'url': 'https://www.saq.com/fr/1-inch-punch-ec',
  'ingredients': ['45 ml (1 ½ oz) de vodka',
   "15 ml (1/2 oz) de liqueur d'agrumes",
   "60 ml (2 oz) de jus d'ananas",
   '15 ml (1/2 oz) de sirop simple',
   'Pâte de chili',
   'Sucre',
   'Flocons de piment',
   'Glaçons'],
  'preparation': ['Préparation',
   "Givrer le bord d'une flûte avec de l’eau et du sucre.",
   'Dans un shaker rempli de glaçons, ajouter tous les ingrédients, excepté le sucre et les flocons de piment.',
   'Agitez vivement de 8 à 10 secondes.',
   'Filtrer le contenu du shaker dans un le verre à l’aide d’une passoire à glaçons.',
   'Garnir de flocons de piments.'],
  'image': 'https://www.saq.com/media/catalog/product/1/-/1-inch-punch-ec-1_1610403034.png?optimize=high&fit=bounds&height=265&width=265&canvas=265:265&format=jpeg'}]

In [1]:
from concurrent.futures import ThreadPoolExecutor, as_completed

# Faster scraper: single-parse per page + concurrent HTTP phase, minimal Selenium fallback

def fetch_drink(url, use_selenium=True):
    def parse_ingredients(soup):
        items = []
        blocks = soup.select(
            '.ingredients .value, .ingredients-text .value, '
            '.cocktail-ingredients .value, .cocktail__ingredients .value, '
            '[itemprop="recipeIngredient"]'
        )
        for b in blocks:
            text = b.get_text("\n", strip=True)
            for raw in text.replace('•', '\n').replace('·', '\n').splitlines():
                raw = raw.strip(" -–—\t\r ")
                if raw:
                    items.append(raw)

        seen, result = set(), []
        for it in items:
            cleaned = " ".join(it.split())
            if cleaned and cleaned not in seen:
                seen.add(cleaned)
                result.append(cleaned)
        return result

    def parse_name(soup):
        el = soup.select_one(".page-title, h1.page-title, [data-ui-id='page-title-wrapper']")
        return el.get_text(strip=True) if el else ""

    def parse_preparation(soup):
        steps = []

        li_nodes = soup.select(
            ".cocktail.preparation-text li, .cocktail .preparation-text li, "
            ".preparation-text li, [itemprop='recipeInstructions'] li"
        )
        if li_nodes:
            for li in li_nodes:
                t = li.get_text(" ", strip=True)
                if t:
                    steps.append(t)
        else:
            block = soup.select_one(
                ".cocktail.preparation-text, .cocktail .preparation-text, "
                ".preparation-text, [itemprop='recipeInstructions']"
            )
            if block:
                text = block.get_text("\n", strip=True)
                for raw in text.replace('•', '\n').replace('·', '\n').splitlines():
                    raw = raw.strip(" -–—\t\r ")
                    if raw:
                        steps.append(raw)

        seen, out = set(), []
        for s in steps:
            s_clean = " ".join(s.split())
            if s_clean and s_clean not in seen:
                seen.add(s_clean)
                out.append(s_clean)
        return out

    def parse_image(soup, base_url):
        candidates = []

        for sel, attr in [
            ('meta[property="og:image"]', 'content'),
            ('meta[property="og:image:secure_url"]', 'content'),
            ('meta[name="twitter:image"]', 'content'),
            ('link[rel="image_src"]', 'href'),
            ('meta[itemprop="image"]', 'content'),
        ]:
            el = soup.select_one(sel)
            if el and el.get(attr):
                candidates.append(el.get(attr).strip())

        def pick_from_srcset(srcset):
            best_url, best_w, best_x = None, -1, -1.0
            for part in srcset.split(","):
                p = part.strip()
                if not p:
                    continue
                pieces = p.split()
                url_only = pieces[0]
                desc = pieces[1] if len(pieces) > 1 else ""
                if desc.endswith("w"):
                    try:
                        w = int(desc[:-1])
                        if w > best_w:
                            best_w, best_url = w, url_only
                    except ValueError:
                        pass
                elif desc.endswith("x"):
                    try:
                        x = float(desc[:-1])
                        if x > best_x:
                            best_x, best_url = x, url_only
                    except ValueError:
                        pass
                else:
                    if not best_url:
                        best_url = url_only
            return best_url

        for el in soup.select('img'):
            srcset = el.get("srcset") or el.get("data-srcset")
            if srcset:
                chosen = pick_from_srcset(srcset)
                if chosen:
                    candidates.append(chosen.strip())
            for attr in ("src", "data-src", "data-original", "data-lazy", "data-image"):
                v = el.get(attr)
                if v:
                    candidates.append(v.strip())

        for cand in candidates:
            if not cand:
                continue
            abs_url = urljoin(base_url, cand)
            if any(t in abs_url.lower() for t in ["placeholder", "spacer", "transparent", "data:image"]):
                continue
            return abs_url
        return ""

    name = ""
    html = ""
    ingredients = []
    preparation = []
    image = ""

    resp_url = url
    try:
        resp = fetch_url(url)
        resp_url = getattr(resp, "url", url) or url
        html = resp.text or ""
        if html:
            soup = BeautifulSoup(html, "html.parser")
            ingredients = parse_ingredients(soup)
            preparation = parse_preparation(soup)
            name = parse_name(soup)
            image = parse_image(soup, resp_url)
    except Exception:
        pass

    need_selenium = use_selenium and (not name or not ingredients or not preparation or not image)
    if need_selenium:
        driver = driver_setup()
        try:
            driver.get(url)

            def ready(d):
                if d.find_elements(By.CLASS_NAME, "page-title"):
                    return True
                s = BeautifulSoup(d.page_source, "html.parser")
                return bool(s.select(
                    '[itemprop="recipeIngredient"], .ingredients .value, .ingredients-text .value, '
                    '.cocktail__ingredients .value, .cocktail.preparation-text, .cocktail .preparation-text, '
                    '.preparation-text, [itemprop="recipeInstructions"], meta[property="og:image"], img'
                ))

            try:
                WebDriverWait(driver, 8).until(ready)
            except TimeoutException:
                pass

            html = driver.page_source
            soup = BeautifulSoup(html, "html.parser")

            if not name:
                try:
                    title_el = driver.find_element(By.CLASS_NAME, "page-title")
                    name = title_el.text.strip() or name
                except NoSuchElementException:
                    name = parse_name(soup) or name

            if not ingredients:
                ingredients = parse_ingredients(soup)
            if not preparation:
                preparation = parse_preparation(soup)
            if not image:
                image = parse_image(soup, driver.current_url)
        finally:
            driver.quit()

    return {"name": name, "url": url, "ingredients": ingredients, "preparation": preparation, "image": image}


# Concurrent run: fast HTTP-only pass, then small Selenium fallback for incomplete ones

# Respect existing results/errors; skip already processed URLs
processed = {r.get("url") for r in results if isinstance(r, dict)}
queue = [u for u in test if u not in processed]

total = len(test)
print(f"Queued {len(queue)} of {total} URLs (skipping {len(processed)} already done).")

# Phase 1: HTTP-only, high concurrency
max_workers = min(16, max(4, len(queue) // 32 or 4))
done = 0
futures = []
if queue:
    with ThreadPoolExecutor(max_workers=max_workers) as ex:
        for u in queue:
            futures.append(ex.submit(fetch_drink, u, False))
        for fut in as_completed(futures):
            try:
                r = fut.result()
                results.append(r)
            except Exception as e:
                # We don't know which URL failed here unless we carry it; keep generic
                errors.append({"url": None, "error": str(e)})
            done += 1
            if done % 25 == 0 or done == len(queue):
                print(f"HTTP phase: {done}/{len(queue)} done", end="\r")
    print()

# Phase 2: Selenium fallback only for incomplete
def is_incomplete(r):
    return not (r.get("name") and r.get("ingredients") and r.get("preparation") and r.get("image"))

url_to_index = {r["url"]: idx for idx, r in enumerate(results) if isinstance(r, dict) and "url" in r}
need_fallback = [r["url"] for r in results if isinstance(r, dict) and is_incomplete(r)]

if need_fallback:
    print(f"Selenium fallback for {len(need_fallback)} pages...")
    # Keep selenium concurrency low to avoid heavy load
    with ThreadPoolExecutor(max_workers=2) as ex:
        futmap = {ex.submit(fetch_drink, u, True): u for u in need_fallback}
        for fut in as_completed(futmap):
            u = futmap[fut]
            try:
                r2 = fut.result()
                idx = url_to_index.get(u)
                if idx is not None:
                    results[idx] = r2
                else:
                    results.append(r2)
            except Exception as e:
                errors.append({"url": u, "error": str(e)})

NameError: name 'results' is not defined

In [None]:
import time
from IPython.display import HTML

def results_to_html(results):
    """
    Display results as interactive cards.
    Expected structure for each item in results:
    {
        'title': str,           # Required: Display title
        'image': str,           # Optional: Image URL
        'description': str,     # Optional: Short description
        'details': str,         # Optional: Full details shown in modal
        'tags': list[str]       # Optional: Tags for filtering
    }
    """
    # Unique container id
    uid = f"res_{int(time.time()*1000)}"

    cards = []
    present_tags = set()

    for item in results:
        title = item.get('title', 'Untitled')
        image = item.get('image', '')
        description = item.get('description', '')
        details = item.get('details', '') or 'No additional details available.'
        tags = item.get('tags', [])
        
        present_tags.update(tags)
        
        cards.append(f"""
        <div class="card" data-tags="{','.join(tags)}" title="{title}">
            <div class="media">
              <img src="{image}" alt="{title}" loading="lazy">
            </div>
            <div class="body">
              <div class="title" title="{title}">{title}</div>
              <div class="description">{description}</div>
              <div class="detail" hidden>
                <div class="full-details">{details}</div>
              </div>
            </div>
        </div>
        """)

    filters = "".join(
        f'<label data-tag="{tag}">'
        f'  <input type="checkbox" value="{tag}">'
        f'  <span class="chip"><span class="name">{tag}</span><span class="badge" data-count>0</span></span>'
        f'</label>'
        for tag in sorted(present_tags)
    )

    styles = f"""
    <style>
      #{uid} {{
        --bg: #ffffff;
        --muted: #6b7280;
        --text: #111827;
        --border: #e5e7eb;
        --chip-bg: #f3f4f6;
        --chip-border: #d1d5db;
        --chip-active-bg: #dbeafe;
        --chip-active-border: #93c5fd;
        --chip-active-text: #1e40af;
        --primary: #2563eb;
        --shadow: 0 1px 2px rgba(0,0,0,.06), 0 4px 10px rgba(0,0,0,.05);
        color: var(--text);
        font-family: system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;
      }}
      #{uid} .toolbar {{
        position: sticky; top: 0; z-index: 3;
        background: linear-gradient(#fff, rgba(255,255,255,.95));
        backdrop-filter: saturate(1.2) blur(4px);
        padding: 10px 0 8px;
        display: flex; flex-wrap: wrap; gap: 10px 14px; align-items: center;
        border-bottom: 1px solid var(--border);
      }}
      #{uid} .summary {{
        font-size: 13px; color: var(--muted); padding: 6px 8px; border-radius: 8px;
        background: #f8fafc; border: 1px solid var(--border);
      }}
      #{uid} .filter {{ display: flex; flex-wrap: wrap; gap: 8px 10px; }}
      #{uid} .filter label {{ display: inline-flex; align-items: center; position: relative; cursor: pointer; }}
      #{uid} .filter input {{ position: absolute; inset: 0; opacity: 0; pointer-events: none; }}
      #{uid} .chip {{
        display: inline-flex; align-items: center; gap: 8px;
        padding: 6px 10px; font-size: 13px; border-radius: 999px;
        background: var(--chip-bg); border: 1px solid var(--chip-border);
        transition: background .2s, border-color .2s, color .2s, transform .08s ease-in-out;
      }}
      #{uid} .filter label:hover .chip {{ transform: translateY(-1px); }}
      #{uid} .filter input:checked + .chip {{
        background: var(--chip-active-bg); border-color: var(--chip-active-border); color: var(--chip-active-text);
      }}
      #{uid} .badge {{
        display: inline-flex; align-items: center; justify-content: center;
        min-width: 22px; height: 20px; padding: 0 6px; border-radius: 999px;
        font-size: 12px; line-height: 1; color: #0f172a;
        background: #e2e8f0; border: 1px solid #cbd5e1;
      }}
      #{uid} .actions {{ margin-left: auto; display: inline-flex; gap: 8px; align-items: center; }}
      #{uid} .btn {{
        border: 1px solid var(--chip-border); background: #fff; color: var(--primary);
        padding: 6px 10px; font-size: 12px; border-radius: 8px; cursor: pointer;
      }}
      #{uid} .btn:hover {{ border-color: var(--primary); }}
      #{uid} .grid {{
        display: grid; gap: 14px; padding: 14px 0;
        grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
      }}
      #{uid} .card {{
        display: flex; flex-direction: column; gap: 8px; cursor: pointer;
        border: 1px solid var(--border); border-radius: 12px; background: var(--bg);
        box-shadow: var(--shadow); overflow: hidden; transition: transform .12s ease, box-shadow .12s ease;
      }}
      #{uid} .card:hover {{ transform: translateY(-2px); box-shadow: 0 8px 24px rgba(0,0,0,.08); }}
      #{uid} .media {{ aspect-ratio: 4/3; background: #f1f5f9; overflow: hidden; }}
      #{uid} .card img {{ width: 100%; height: 100%; object-fit: cover; display: block; }}
      #{uid} .body {{ padding: 8px 10px 10px; display: grid; gap: 6px; }}
      #{uid} .title {{ font-weight: 700; font-size: 15px; letter-spacing: .2px; }}
      #{uid} .description {{ font-size: 13px; color: #374151; line-height: 1.4; }}
      #{uid} .empty {{ display:none; color:#6b7280; font-size:14px; padding: 10px; }}
      /* Modal */
      #{uid} [data-modal][hidden] {{ display:none; }}
      #{uid} [data-modal] {{
        position: fixed; inset: 0; z-index: 5;
        display: grid; place-items: center;
      }}
      #{uid} [data-modal] .overlay {{
        position: absolute; inset: 0; background: rgba(15, 23, 42, .55);
      }}
      #{uid} [data-modal] .sheet {{
        position: relative; z-index: 1; width: min(720px, 92vw);
        background: #fff; border: 1px solid var(--border); border-radius: 12px;
        box-shadow: 0 20px 40px rgba(0,0,0,.18);
        overflow: hidden;
      }}
      #{uid} .modal-body {{
        display: grid; grid-template-columns: 40% 60%; gap: 0;
      }}
      #{uid} .modal-media img {{ display:block; width: 100%; height: 100%; object-fit: cover; }}
      #{uid} .modal-content {{ padding: 16px; }}
      #{uid} .modal-title {{ margin: 0 0 8px; font-size: 18px; font-weight: 700; }}
      #{uid} .modal-details {{ white-space: pre-wrap; font-size: 14px; line-height: 1.4; color:#111827; }}
      #{uid} .close {{
        position: absolute; top: 8px; right: 8px; border: 1px solid var(--chip-border);
        background: #fff; border-radius: 8px; width: 32px; height: 32px; cursor: pointer;
        font-size: 18px; line-height: 30px;
      }}
      @media (max-width: 720px) {{
        #{uid} .grid {{ grid-template-columns: repeat(auto-fill, minmax(160px, 1fr)); }}
        #{uid} .modal-body {{ grid-template-columns: 1fr; }}
      }}
    </style>
    """

    html = f"""
    <div id="{uid}">
      <div class="toolbar">
        <div class="filter" id="tag-filters">{filters or '<span class="empty">No tags available</span>'}</div>
        <div class="actions">
          <div class="summary" aria-live="polite">Showing 0 of 0</div>
          <button class="btn clear" type="button">Clear</button>
        </div>
      </div>
      <div class="grid">
        {''.join(cards)}
      </div>
      <div class="empty">No matches. Try adjusting filters.</div>

      <div class="modal" data-modal hidden>
        <div class="overlay" data-close></div>
        <div class="sheet" role="dialog" aria-modal="true" aria-labelledby="{uid}-modal-title">
          <button class="close" type="button" aria-label="Close" data-close>&times;</button>
          <div class="modal-body">
            <div class="modal-media"><img alt=""></div>
            <div class="modal-content">
              <h3 class="modal-title" id="{uid}-modal-title"></h3>
              <div class="modal-details"></div>
            </div>
          </div>
        </div>
      </div>
    </div>
    <script>
    (function(){{
      const root = document.getElementById('{uid}');
      const grid = root.querySelector('.grid');
      const summary = root.querySelector('.summary');
      const emptyState = root.querySelector('.empty');
      const filterWrap = root.querySelector('#tag-filters');
      const labels = Array.from(filterWrap.querySelectorAll('label[data-tag]'));
      const boxes = labels.map(l => l.querySelector('input[type=checkbox]'));
      const clearBtn = root.querySelector('.clear');

      const cards = Array.from(grid.querySelectorAll('.card')).map(el => ({{
        el,
        tags: (el.dataset.tags || '').split(',').filter(Boolean)
      }}));

      function selectedSet() {{
        return new Set(boxes.filter(b => b.checked).map(b => b.value));
      }}

      function visibleBySelected(sel) {{
        if (!sel || sel.size === 0) return cards.length;
        let n = 0;
        for (const c of cards) {{
          const show = c.tags.some(t => sel.has(t));
          if (show) n++;
        }}
        return n;
      }}

      function apply() {{
        const sel = selectedSet();
        const none = sel.size === 0;
        let visible = 0;
        for (const c of cards) {{
          const show = none ? true : c.tags.some(t => sel.has(t));
          c.el.style.display = show ? '' : 'none';
          if (show) visible++;
        }}
        emptyState.style.display = visible === 0 ? '' : 'none';
        summary.textContent = `Showing ${{visible}} of ${{cards.length}}`;
        return sel;
      }}

      function updateCounts(sel) {{
        const baseVisible = visibleBySelected(sel);
        for (const l of labels) {{
          const tag = l.dataset.tag;
          const badge = l.querySelector('[data-count]');
          if (!badge) continue;

          let count = 0;
          const augmented = new Set(sel);
          augmented.add(tag);
          if (sel.has(tag)) {{
            count = baseVisible;
          }} else {{
            for (const c of cards) {{
              const show = c.tags.some(t => augmented.has(t));
              if (show) count++;
            }}
          }}
          badge.textContent = count;
        }}
      }}

      function refresh() {{
        const sel = apply();
        updateCounts(sel);
      }}

      boxes.forEach(b => b.addEventListener('change', refresh));
      if (clearBtn) clearBtn.addEventListener('click', () => {{ boxes.forEach(b => b.checked = false); refresh(); }});

      // Modal logic
      const modal = root.querySelector('[data-modal]');
      const modImg = modal.querySelector('.modal-media img');
      const modTitle = modal.querySelector('.modal-title');
      const modDetails = modal.querySelector('.modal-details');

      function openFrom(cardEl) {{
        const title = cardEl.getAttribute('title') || cardEl.querySelector('.title')?.textContent || '';
        const img = cardEl.querySelector('img');
        const detailsEl = cardEl.querySelector('.full-details');
        modTitle.textContent = title;
        if (img) {{
          modImg.src = img.src;
          modImg.alt = title;
        }} else {{
          modImg.removeAttribute('src');
          modImg.alt = '';
        }}
        modDetails.textContent = detailsEl ? detailsEl.textContent : 'No additional details available.';
        modal.hidden = false;
      }}

      function closeModal() {{
        modal.hidden = true;
      }}

      root.addEventListener('click', (e) => {{
        const closeBtn = e.target.closest('[data-close]');
        if (closeBtn) return closeModal();
        const card = e.target.closest('.card');
        if (card && root.contains(card)) {{
          openFrom(card);
        }}
      }});

      document.addEventListener('keydown', (e) => {{
        if (e.key === 'Escape' && !modal.hidden) closeModal();
      }});

      // Initial paint
      refresh();
    }})();
    </script>
    """
    return styles + html

# Usage example:
display(HTML(results_to_html(results)))

NameError: name 'pd' is not defined

In [None]:
def fetch_drinks():
    base_url = 'https://www.thecocktaildb.com/api/json/v1/1/search.php?f='
    all_drinks = []

    for ch in string.ascii_lowercase:
        json_data = fetch_url(base_url + ch).json()
        try:
            data = json_data.get('drinks', [])

            for drink in data:
                name = drink['strDrink']
                image = drink['strDrinkThumb']
                ingredients = [drink.get(f"strIngredient{i}") for i in range(1, 16) if drink.get(f"strIngredient{i}")]
                measures = [drink.get(f"strMeasure{i}") for i in range(1, 16) if drink.get(f"strMeasure{i}")]
                instructionsEN = drink['strInstructions']
                instructionsFR = drink.get('strInstructionsFR', '')
                all_drinks.append({
                    'name': name,
                    'image': image,
                    'ingredients': ingredients,
                    'measures': measures,
                    'instructions': instructionsEN,
                    'instructionsFR': instructionsFR
                })
        except Exception as e:
            print(f"Error processing character '{ch}': {e}")
    return all_drinks

all_drinks = fetch_drinks()
display(pd.DataFrame(all_drinks))

Error processing character 'u': 'NoneType' object is not iterable
Error processing character 'x': 'NoneType' object is not iterable


Unnamed: 0,name,image,ingredients,measures,instructions,instructionsFR
0,A1,https://www.thecocktaildb.com/images/media/dri...,"[Gin, Grand Marnier, Lemon Juice, Grenadine]","[1 3/4 shot , 1 Shot , 1/4 Shot, 1/8 Shot]","Pour all ingredients into a cocktail shaker, m...","Verser tous les ingrédients dans un shaker, mé..."
1,ABC,https://www.thecocktaildb.com/images/media/dri...,"[Amaretto, Baileys irish cream, Cognac]","[1/3 , 1/3 , 1/3 ]",Layered in a shot glass.,
2,Ace,https://www.thecocktaildb.com/images/media/dri...,"[Gin, Grenadine, Heavy cream, Milk, Egg White]","[2 shots , 1/2 shot , 1/2 shot , 1/2 shot, 1/2...",Shake all the ingredients in a cocktail shaker...,Agiter tous les ingrédients dans un shaker ave...
3,ACID,https://www.thecocktaildb.com/images/media/dri...,"[151 proof rum, Wild Turkey]","[1 oz Bacardi , 1 oz ]",Poor in the 151 first followed by the 101 serv...,
4,AT&T,https://www.thecocktaildb.com/images/media/dri...,"[Absolut Vodka, Gin, Tonic water]","[1 oz , 1 oz , 4 oz ]","Pour Vodka and Gin over ice, add Tonic and Stir","Verser la vodka et le gin sur de la glace, ajo..."
...,...,...,...,...,...,...
421,Zima Blaster,https://www.thecocktaildb.com/images/media/dri...,"[Zima, Chambord raspberry liqueur]","[12 oz , 3 oz ]","Fill glass with ice. Pour in Chambord, then fi...",
422,Zizi Coin-coin,https://www.thecocktaildb.com/images/media/dri...,"[Cointreau, Lemon juice, Ice, Lemon]","[5 cl , 2 cl , cubes, or lime\n]","Pour 5cl of Cointreau on ice, add 2cl of fresh...","Verser 5 cl de Cointreau sur de la glace, ajou..."
423,Zimadori Zinger,https://www.thecocktaildb.com/images/media/dri...,"[Midori melon liqueur, Zima]","[1.5 oz , 12 oz ]",Pour Zima in a collins glass over ice and then...,Versez le Zima dans un verre Collins sur de la...
424,Zippy's Revenge,https://www.thecocktaildb.com/images/media/dri...,"[Amaretto, Rum, Kool-Aid]","[2 oz , 2 oz , 4 oz Grape ]",Mix Kool-Aid to taste then add Rum and ammaret...,"Mélangez le Kool-Aid selon votre goût, puis aj..."


In [None]:
import time
from IPython.display import HTML

def drinks_to_html(drinks):
    # Unique container id
    uid = f"alc_{int(time.time()*1000)}"

    # Map ingredients to alcohol families
    alcohol_map = {
        'Vodka': ['vodka'],
        'Rum': [' rum', 'rum '],  # avoid matching 'crumble'
        'Gin': ['gin'],
        'Tequila': ['tequila'],
        'Whiskey': ['whiskey', 'whisky', 'bourbon', 'scotch', 'rye'],
        'Brandy': ['brandy', 'cognac', 'armagnac'],
        'Mezcal': ['mezcal'],
        'Cachaça': ['cachaça', 'cachaca'],
        'Wine': [' red wine', ' white wine', ' wine', 'porto', 'port ', ' sherry'],
        'Sparkling Wine': ['champagne', 'prosecco', 'sparkling wine', 'cava'],
        'Vermouth': ['vermouth'],
        'Aperitif': ['aperol', 'campari', 'lillet'],
        'Absinthe': ['absinthe'],
        'Amaretto': ['amaretto'],
        'Coffee Liqueur': ['kahlua', 'coffee liqueur', 'tia maria'],
        'Cream Liqueur': ['baileys', 'irish cream', 'advocaat'],
        'Herbal Liqueur': ['jägermeister', 'jagermeister', 'chartreuse', 'drambuie', 'benedictine'],
        'Melon Liqueur': ['midori'],
        'Orange Liqueur': ['triple sec', 'cointreau', 'grand marnier', ' curaçao', ' curacao'],
        'Sambuca': ['sambuca'],
        'Ouzo': ['ouzo'],
        'Pisco': ['pisco'],
        'Sake': ['sake'],
        'Beer': [' beer', 'beer '],
    }

    def detect_alcs(ings):
        found = set()
        for ing in ings:
            low = f" {str(ing).lower()} "
            for label, needles in alcohol_map.items():
                if any(n in low for n in needles):
                    found.add(label)
        return sorted(found)

    cards = []
    present_alcs = set()

    for d in drinks:
        name = d.get('name', '')
        image = d.get('image', '')
        ings = d.get('ingredients') or []
        instr = d.get('instructions', '') or 'No instructions available.'
        alcs = detect_alcs(ings)
        present_alcs.update(alcs)
        cards.append(f"""
        <div class="card" data-alc="{','.join(alcs)}" title="{name}">
            <div class="media">
              <img src="{image}" alt="{name}" loading="lazy">
            </div>
            <div class="body">
              <div class="title" title="{name}">{name}</div>
              <ul>{"".join(f"<li>{ing}</li>" for ing in ings)}</ul>
              <div class="detail" hidden>
                <div class="instructions">{instr}</div>
              </div>
            </div>
        </div>
        """)

    filters = "".join(
        f'<label data-alc="{a}">'
        f'  <input type="checkbox" value="{a}">'
        f'  <span class="chip"><span class="name">{a}</span><span class="badge" data-count>0</span></span>'
        f'</label>'
        for a in sorted(present_alcs)
    )

    styles = f"""
    <style>
      #{uid} {{
        --bg: #ffffff;
        --muted: #6b7280;
        --text: #111827;
        --border: #e5e7eb;
        --chip-bg: #f3f4f6;
        --chip-border: #d1d5db;
        --chip-active-bg: #dbeafe;
        --chip-active-border: #93c5fd;
        --chip-active-text: #1e40af;
        --primary: #2563eb;
        --shadow: 0 1px 2px rgba(0,0,0,.06), 0 4px 10px rgba(0,0,0,.05);
        color: var(--text);
        font-family: system-ui,-apple-system,Segoe UI,Roboto,Helvetica,Arial,sans-serif;
      }}
      #{uid} .toolbar {{
        position: sticky; top: 0; z-index: 3;
        background: linear-gradient(#fff, rgba(255,255,255,.95));
        backdrop-filter: saturate(1.2) blur(4px);
        padding: 10px 0 8px;
        display: flex; flex-wrap: wrap; gap: 10px 14px; align-items: center;
        border-bottom: 1px solid var(--border);
      }}
      #{uid} .summary {{
        font-size: 13px; color: var(--muted); padding: 6px 8px; border-radius: 8px;
        background: #f8fafc; border: 1px solid var(--border);
      }}
      #{uid} .filter {{ display: flex; flex-wrap: wrap; gap: 8px 10px; }}
      #{uid} .filter label {{ display: inline-flex; align-items: center; position: relative; cursor: pointer; }}
      #{uid} .filter input {{ position: absolute; inset: 0; opacity: 0; pointer-events: none; }}
      #{uid} .chip {{
        display: inline-flex; align-items: center; gap: 8px;
        padding: 6px 10px; font-size: 13px; border-radius: 999px;
        background: var(--chip-bg); border: 1px solid var(--chip-border);
        transition: background .2s, border-color .2s, color .2s, transform .08s ease-in-out;
      }}
      #{uid} .filter label:hover .chip {{ transform: translateY(-1px); }}
      #{uid} .filter input:checked + .chip {{
        background: var(--chip-active-bg); border-color: var(--chip-active-border); color: var(--chip-active-text);
      }}
      #{uid} .badge {{
        display: inline-flex; align-items: center; justify-content: center;
        min-width: 22px; height: 20px; padding: 0 6px; border-radius: 999px;
        font-size: 12px; line-height: 1; color: #0f172a;
        background: #e2e8f0; border: 1px solid #cbd5e1;
      }}
      #{uid} .actions {{ margin-left: auto; display: inline-flex; gap: 8px; align-items: center; }}
      #{uid} .btn {{
        border: 1px solid var(--chip-border); background: #fff; color: var(--primary);
        padding: 6px 10px; font-size: 12px; border-radius: 8px; cursor: pointer;
      }}
      #{uid} .btn:hover {{ border-color: var(--primary); }}
      #{uid} .grid {{
        display: grid; gap: 14px; padding: 14px 0;
        grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
      }}
      #{uid} .card {{
        display: flex; flex-direction: column; gap: 8px; cursor: pointer;
        border: 1px solid var(--border); border-radius: 12px; background: var(--bg);
        box-shadow: var(--shadow); overflow: hidden; transition: transform .12s ease, box-shadow .12s ease;
      }}
      #{uid} .card:hover {{ transform: translateY(-2px); box-shadow: 0 8px 24px rgba(0,0,0,.08); }}
      #{uid} .media {{ aspect-ratio: 4/3; background: #f1f5f9; overflow: hidden; }}
      #{uid} .card img {{ width: 100%; height: 100%; object-fit: cover; display: block; }}
      #{uid} .body {{ padding: 8px 10px 10px; display: grid; gap: 6px; }}
      #{uid} .title {{ font-weight: 700; font-size: 15px; letter-spacing: .2px; }}
      #{uid} .card ul {{ margin: 0; padding-left: 18px; font-size: 12.5px; color: #374151; max-height: 120px; overflow: auto; }}
      #{uid} .empty {{ display:none; color:#6b7280; font-size:14px; padding: 10px; }}
      /* Modal */
      #{uid} [data-modal][hidden] {{ display:none; }}
      #{uid} [data-modal] {{
        position: fixed; inset: 0; z-index: 5;
        display: grid; place-items: center;
      }}
      #{uid} [data-modal] .overlay {{
        position: absolute; inset: 0; background: rgba(15, 23, 42, .55);
      }}
      #{uid} [data-modal] .sheet {{
        position: relative; z-index: 1; width: min(720px, 92vw);
        background: #fff; border: 1px solid var(--border); border-radius: 12px;
        box-shadow: 0 20px 40px rgba(0,0,0,.18);
        overflow: hidden;
      }}
      #{uid} .modal-body {{
        display: grid; grid-template-columns: 40% 60%; gap: 0;
      }}
      #{uid} .modal-media img {{ display:block; width: 100%; height: 100%; object-fit: cover; }}
      #{uid} .modal-content {{ padding: 16px; }}
      #{uid} .modal-title {{ margin: 0 0 8px; font-size: 18px; font-weight: 700; }}
      #{uid} .modal-instructions {{ white-space: pre-wrap; font-size: 14px; line-height: 1.4; color:#111827; }}
      #{uid} .close {{
        position: absolute; top: 8px; right: 8px; border: 1px solid var(--chip-border);
        background: #fff; border-radius: 8px; width: 32px; height: 32px; cursor: pointer;
        font-size: 18px; line-height: 30px;
      }}
      @media (max-width: 720px) {{
        #{uid} .grid {{ grid-template-columns: repeat(auto-fill, minmax(160px, 1fr)); }}
        #{uid} .modal-body {{ grid-template-columns: 1fr; }}
      }}
    </style>
    """

    html = f"""
    <div id="{uid}">
      <div class="toolbar">
        <div class="filter" id="alc-filters">{filters or '<span class="empty">No alcohol tags detected</span>'}</div>
        <div class="actions">
          <div class="summary" aria-live="polite">Showing 0 of 0</div>
          <button class="btn clear" type="button">Clear</button>
        </div>
      </div>
      <div class="grid">
        {''.join(cards)}
      </div>
      <div class="empty">No matches. Try adjusting filters.</div>

      <div class="modal" data-modal hidden>
        <div class="overlay" data-close></div>
        <div class="sheet" role="dialog" aria-modal="true" aria-labelledby="{uid}-modal-title">
          <button class="close" type="button" aria-label="Close" data-close>&times;</button>
          <div class="modal-body">
            <div class="modal-media"><img alt=""></div>
            <div class="modal-content">
              <h3 class="modal-title" id="{uid}-modal-title"></h3>
              <div class="modal-instructions"></div>
            </div>
          </div>
        </div>
      </div>
    </div>
    <script>
    (function(){{
      const root = document.getElementById('{uid}');
      const grid = root.querySelector('.grid');
      const summary = root.querySelector('.summary');
      const emptyState = root.querySelector('.empty');
      const filterWrap = root.querySelector('#alc-filters');
      const labels = Array.from(filterWrap.querySelectorAll('label[data-alc]'));
      const boxes = labels.map(l => l.querySelector('input[type=checkbox]'));
      const clearBtn = root.querySelector('.clear');

      const cards = Array.from(grid.querySelectorAll('.card')).map(el => ({{
        el,
        tags: (el.dataset.alc || '').split(',').filter(Boolean)
      }}));

      function selectedSet() {{
        return new Set(boxes.filter(b => b.checked).map(b => b.value));
      }}

      function visibleBySelected(sel) {{
        if (!sel || sel.size === 0) return cards.length;
        let n = 0;
        for (const c of cards) {{
          const show = c.tags.some(t => sel.has(t));
          if (show) n++;
        }}
        return n;
      }}

      function apply() {{
        const sel = selectedSet();
        const none = sel.size === 0;
        let visible = 0;
        for (const c of cards) {{
          const show = none ? true : c.tags.some(t => sel.has(t));
          c.el.style.display = show ? '' : 'none';
          if (show) visible++;
        }}
        emptyState.style.display = visible === 0 ? '' : 'none';
        summary.textContent = `Showing ${{visible}} of ${{cards.length}}`;
        return sel;
      }}

      function updateCounts(sel) {{
        const baseVisible = visibleBySelected(sel);
        for (const l of labels) {{
          const alc = l.dataset.alc;
          const badge = l.querySelector('[data-count]');
          if (!badge) continue;

          let count = 0;
          const augmented = new Set(sel);
          augmented.add(alc);
          if (sel.has(alc)) {{
            count = baseVisible;
          }} else {{
            for (const c of cards) {{
              const show = c.tags.some(t => augmented.has(t));
              if (show) count++;
            }}
          }}
          badge.textContent = count;
        }}
      }}

      function refresh() {{
        const sel = apply();
        updateCounts(sel);
      }}

      boxes.forEach(b => b.addEventListener('change', refresh));
      if (clearBtn) clearBtn.addEventListener('click', () => {{ boxes.forEach(b => b.checked = false); refresh(); }});

      // Modal logic
      const modal = root.querySelector('[data-modal]');
      const modImg = modal.querySelector('.modal-media img');
      const modTitle = modal.querySelector('.modal-title');
      const modInstr = modal.querySelector('.modal-instructions');

      function openFrom(cardEl) {{
        const name = cardEl.getAttribute('title') || cardEl.querySelector('.title')?.textContent || '';
        const img = cardEl.querySelector('img');
        const instrEl = cardEl.querySelector('.instructions');
        modTitle.textContent = name;
        if (img) {{
          modImg.src = img.src;
          modImg.alt = name;
        }} else {{
          modImg.removeAttribute('src');
          modImg.alt = '';
        }}
        modInstr.textContent = instrEl ? instrEl.textContent : 'No instructions available.';
        modal.hidden = false;
      }}

      function closeModal() {{
        modal.hidden = true;
      }}

      root.addEventListener('click', (e) => {{
        const closeBtn = e.target.closest('[data-close]');
        if (closeBtn) return closeModal();
        const card = e.target.closest('.card');
        if (card && root.contains(card)) {{
          openFrom(card);
        }}
      }});

      document.addEventListener('keydown', (e) => {{
        if (e.key === 'Escape' && !modal.hidden) closeModal();
      }});

      // Initial paint
      refresh();
    }})();
    </script>
    """
    return styles + html

display(HTML(drinks_to_html(all_drinks)))
