In [4]:
# --- Cell 1 AUTONOMOUS_AVU_OMT_3.ipynb ---
# PARAMETERS (Papermill + UI/Env fallbacks; robust & side-effect free)
import os, json
from pathlib import Path

# 0) Try to read Papermill parameters (works only when executed via papermill)
_pm = {}
try:
    from papermill import get_parameters  # type: ignore
    _pm = get_parameters() or {}
except Exception:
    _pm = {}

def _first(*vals, default=None):
    """Return the first non-empty/non-None value."""
    for v in vals:
        if isinstance(v, str):
            if v.strip() != "":
                return v
        elif v is not None:
            return v
    return default

def _coerce_int(x, fallback=None):
    try:
        return int(x)
    except Exception:
        return fallback

def _maybe_json(v):
    if isinstance(v, (dict, list)):
        return v
    if isinstance(v, str):
        try:
            return json.loads(v)
        except Exception:
            return v  # leave as-is (maybe it's just a plain string like a name)
    return v

# 1) Inbound parameters (priority: papermill -> globals -> env)
selected_wine = _maybe_json(_first(
    _pm.get("selected_wine"),
    globals().get("selected_wine"),
    None
))
client_name = _first(
    _pm.get("client_name"),
    globals().get("client_name"),
    os.getenv("CLIENT_NAME"),
    "Valued Client"
)
filters = _maybe_json(_first(
    _pm.get("filters"),
    globals().get("filters"),
    os.getenv("FILTER_INPUTS"),
    {}
))
week_number = _coerce_int(_first(
    _pm.get("week_number"),
    globals().get("week_number"),
    os.getenv("WEEK_NUMBER")
), None)

SERPAPI_KEY = str(_first(
    _pm.get("SERPAPI_KEY"),
    globals().get("SERPAPI_KEY"),
    os.getenv("SERPAPI_KEY"),
    ""
) or "")

# 2) Resolve data root (IRON_DATA). Prefer explicit param → env → OneDrive default.
_default_iron = str(Path.home() / "OneDrive - AVU SA" / "AVU CPI Campaign" /
                    "Puzzle_control_Reports" / "IRON_DATA")
input_path = _first(
    _pm.get("input_path"),
    globals().get("input_path"),
    os.getenv("IRON_DATA"),
    os.getenv("OUTPUT_PATH"),
    _default_iron
)
DATA_DIR = Path(input_path)
DATA_DIR.mkdir(parents=True, exist_ok=True)

OFFERS_DIR = DATA_DIR / "offers"
OFFERS_DIR.mkdir(parents=True, exist_ok=True)

# 3) Secondary fallbacks if not injected:
#    - selected_wine: try UI export (ui_selection.json) if available
#    - filters: try notebooks/filters.json (UI writes it before running)
def _read_json_file(p: Path):
    try:
        if p.exists() and p.stat().st_size > 0:
            return json.loads(p.read_text(encoding="utf-8"))
    except Exception:
        pass
    return None

if not isinstance(selected_wine, dict) or not selected_wine:
    # Try the standard locations used by the web app
    for p in [DATA_DIR / "ui_selection.json", Path("notebooks") / "ui_selection.json"]:
        obj = _read_json_file(p)
        if isinstance(obj, dict) and obj:
            selected_wine = obj
            break

if not isinstance(filters, dict) or not filters:
    obj = _read_json_file(Path("notebooks") / "filters.json")
    if isinstance(obj, dict):
        filters = obj
    else:
        filters = {}

# 4) Feature flags & defaults
ENABLE_OUTLOOK = bool(int(os.getenv("ENABLE_OUTLOOK", "1")))  # 1=draft in Outlook if available
SERPAPI_ENABLED = False  # Cell 2 will flip to True if serpapi usable
DEFAULT_WINE_IMAGE = "https://www.vinfolio.com/blog/wp-content/uploads/2018/07/red-wine-bottle.jpg"

# 5) Normalize week_number (optional parameter)
try:
    if week_number is None:
        from datetime import datetime
        week_number = int(datetime.now().isocalendar().week)
except Exception:
    week_number = None  # keep None if we cannot infer; not critical for offer drafting

# 6) Promote to globals for later cells
SELECTED_WINE = selected_wine
CLIENT_NAME = client_name
FILTERS = filters

# 7) Light validation: we can proceed without filters, but we need a selected wine
if not isinstance(SELECTED_WINE, dict) or not SELECTED_WINE.get("wine"):
    # Give a precise, actionable message early.
    raise ValueError(
        "❌ No valid 'selected_wine' provided. "
        "Select a wine in the UI first (click a card so it highlights), "
        "then press “Generate Offer”, or pass a JSON object with at least {'wine': ..., 'vintage': ...}."
    )

# 8) Friendly run summary
print("📂 DATA_DIR:", DATA_DIR.resolve())
print("📁 OFFERS_DIR:", OFFERS_DIR.resolve())
print("🧩 SELECTED_WINE keys:", sorted(list(SELECTED_WINE.keys())))
print("👤 CLIENT_NAME:", CLIENT_NAME)
print("🔧 FILTERS keys:", sorted(list(FILTERS.keys())) if isinstance(FILTERS, dict) else "∅")
print("🗓️ WEEK_NUMBER:", week_number)
print("🔑 SERPAPI_KEY set:", bool(SERPAPI_KEY))
print("📧 ENABLE_OUTLOOK:", ENABLE_OUTLOOK)


📂 DATA_DIR: C:\Users\Marco.Africani\OneDrive - AVU SA\AVU CPI Campaign\Puzzle_control_Reports\IRON_DATA
📁 OFFERS_DIR: C:\Users\Marco.Africani\OneDrive - AVU SA\AVU CPI Campaign\Puzzle_control_Reports\IRON_DATA\offers
🧩 SELECTED_WINE keys: ['avg_cpi_score', 'day', 'full_type', 'id', 'locked', 'loyalty_level', 'match_quality', 'name', 'price_tier', 'region_group', 'stock', 'type', 'vintage', 'wine']
👤 CLIENT_NAME: Valued Client
🔧 FILTERS keys: []
🗓️ WEEK_NUMBER: 33
🔑 SERPAPI_KEY set: False
📧 ENABLE_OUTLOOK: True


In [5]:
# --- CELL 2 AUTONOMOUS_AVU_OMT_3.ipynb: Dependency checks (no runtime installs in prod) ---
import os

# --- Env overrides (optional) ---
_FORCE_DISABLE = os.getenv("FORCE_DISABLE_SERPAPI", "0") == "1"
_FORCE_ENABLE  = os.getenv("FORCE_ENABLE_SERPAPI", "0") == "1"  # only works if pkg+key exist

# Key (defined even if Cell 1 didn't run)
SERPAPI_KEY = str(globals().get("SERPAPI_KEY") or os.getenv("SERPAPI_KEY", "")).strip()

# Optional dependency probe (package name 'google-search-results', import path 'serpapi')
try:
    from serpapi import GoogleSearch  # noqa: F401
    _PKG_OK = True
except Exception:
    _PKG_OK = False

# Feature flag
_SERPAPI_AVAILABLE = bool(_PKG_OK and SERPAPI_KEY)
if _FORCE_DISABLE:
    SERPAPI_ENABLED = False
elif _FORCE_ENABLE:
    SERPAPI_ENABLED = _SERPAPI_AVAILABLE  # still requires pkg+key
else:
    SERPAPI_ENABLED = _SERPAPI_AVAILABLE

# Make visible to later cells regardless of execution order
globals()["SERPAPI_ENABLED"] = SERPAPI_ENABLED
globals()["SERPAPI_KEY"] = SERPAPI_KEY

# Optional lightweight ping in debug mode (no exception propagation)
if os.getenv("DEBUG_SERPAPI", "0") == "1":
    if SERPAPI_ENABLED:
        try:
            # NOTE: this does not count against free tier meaningfully; it's a minimal query without fetching images
            _ = GoogleSearch({"q": "wine bottle", "location": "Switzerland", "api_key": SERPAPI_KEY})
            print("🔎 SERPAPI: package+key detected (debug ping constructed).")
        except Exception as e:
            print(f"⚠️ SERPAPI debug ping failed: {e}")
    else:
        reason = "missing package/key" if not _SERPAPI_AVAILABLE else "forced disabled"
        print(f"ℹ️ SERPAPI disabled ({reason}). Using stock image fallback.")

# --- Tiny helper for later cells (safe fallback to DEFAULT_WINE_IMAGE) ---
def serpapi_image_or_default(query: str, default_image: str = None):
    """
    Returns (image_url, source) using SerpAPI if enabled; otherwise (default_image, 'default').
    This helper never raises; it is safe to call unconditionally in later cells.
    """
    default_image = default_image or globals().get("DEFAULT_WINE_IMAGE") or \
        "https://www.vinfolio.com/blog/wp-content/uploads/2018/07/red-wine-bottle.jpg"

    if not SERPAPI_ENABLED:
        return default_image, "default"

    try:
        search = GoogleSearch({
            "engine": "google_images",
            "q": query,
            "num": 5,
            "safe": "active",
            "api_key": SERPAPI_KEY,
        })
        results = search.get_dict()
        imgs = results.get("images_results") or []
        for it in imgs:
            url = it.get("original") or it.get("thumbnail") or it.get("link")
            if url:
                return url, "serpapi"
    except Exception as e:
        if os.getenv("DEBUG_SERPAPI", "0") == "1":
            print(f"⚠️ serpapi_image_or_default error: {e}")

    return default_image, "default"


In [6]:
# --- CELL 3 AUTONOMOUS_AVU_OMT_3.ipynb: Diagnostics (safe, idempotent) ---
# This cell only reports what's loaded; it doesn't mutate state.

from pathlib import Path
import os

try:
    import pandas as pd  # type: ignore
except Exception:
    pd = None  # fallback for type checks

def _df_info(df):
    if pd is not None and isinstance(df, pd.DataFrame):
        r, c = df.shape
        return f"{r} rows, {c} cols"
    return "None"

g = globals()

# --- Resolve DATA_DIR (safe fallbacks) ---
DATA_DIR = g.get("DATA_DIR")
if not DATA_DIR:
    DATA_DIR = os.getenv("IRON_DATA") or str(
        Path.home()
        / "OneDrive - AVU SA"
        / "AVU CPI Campaign"
        / "Puzzle_control_Reports"
        / "IRON_DATA"
    )
DATA_DIR = Path(DATA_DIR)
print("DATA_DIR:", DATA_DIR)

# --- Peek at globals if already present (we won't modify them) ---
client_df_ = g.get("client_df")
stock_df_  = g.get("stock_df")
recs_df_   = g.get("recs_df")

print("client_df:", _df_info(client_df_), "| stock_df:", _df_info(stock_df_), "| recs_df:", _df_info(recs_df_))

# --- File presence check for artifacts produced by the engine ---
week_guess = g.get("week_number")
try:
    week_guess = int(week_guess) if week_guess not in (None, "") else None
except Exception:
    week_guess = None

files_to_check = [
    "client_pref_df_latest.pkl",
    # stock (either of these depending on the run)
    "stock_df_with_seasonality.pkl",
    "stock_df_final.pkl",
    # recommendations / summaries
    "top3_recommendations_per_client_by_type.pkl",
    "top3_recommendations_per_client.pkl",
    "top3_recommendations_per_client.csv",
    # weekly calendars
    "weekly_campaign_schedule.pkl",
    f"weekly_campaign_schedule_week_{week_guess}.pkl" if week_guess else None,
    "weekly_campaign_schedule.json",
    f"weekly_campaign_schedule_week_{week_guess}.json" if week_guess else None,
    # PowerBI export we create in AVU_ignition_1 (Cell 9)
    "powerbi_wine_arrow_layout.xlsx",
]
print("\nArtifacts present:")
for f in filter(None, files_to_check):
    print(f"  {f:45} → {DATA_DIR.joinpath(f).exists()}")

# --- Lazy, local diagnostics (do NOT modify globals) ---
if pd is not None:
    # Prefer stock_df_* from disk if global is missing
    if stock_df_ is None:
        stock_path = None
        for cand in ["stock_df_with_seasonality.pkl", "stock_df_final.pkl"]:
            p = DATA_DIR / cand
            if p.exists():
                stock_path = p
                break
        if stock_path:
            try:
                stock_df_local = pd.read_pickle(stock_path)
            except Exception as e:
                print(f"⚠️ Could not read {stock_path.name}: {e}")
                stock_df_local = None
        else:
            stock_df_local = None
    else:
        stock_df_local = stock_df_

    # Prefer recs from globals, else load compact recs file if present
    if recs_df_ is None:
        recs_path = DATA_DIR / "top3_recommendations_per_client.pkl"
        if not recs_path.exists():
            # optional CSV fallback (not always produced)
            recs_path = DATA_DIR / "top3_recommendations_per_client.csv"
        if recs_path.exists():
            try:
                recs_df_local = (
                    pd.read_pickle(recs_path) if recs_path.suffix.lower() == ".pkl"
                    else pd.read_csv(recs_path)
                )
            except Exception as e:
                print(f"⚠️ Could not read {recs_path.name}: {e}")
                recs_df_local = None
        else:
            recs_df_local = None
    else:
        recs_df_local = recs_df_

    # Usable inventory (stock > 0 & has a price); handle column-name drift
    if isinstance(stock_df_local, pd.DataFrame):
        s = None
        for cand in ["stock", "stock_count", "Stock"]:
            if cand in stock_df_local.columns:
                s = pd.to_numeric(stock_df_local[cand], errors="coerce")
                break

        price = None
        for cand in ["CHF Price", "CHF p/bt VAT excl.", "price_chf", "chf_price"]:
            if cand in stock_df_local.columns:
                price = pd.to_numeric(stock_df_local[cand], errors="coerce")
                break

        if s is not None and price is not None:
            usable = int(((s > 0) & price.notna()).sum())
        elif s is not None:
            usable = int((s > 0).sum())
        else:
            usable = 0

        print("\nUsable inventory rows (stock>0 & price present):", usable)

        # ID overlap (recs vs stock)
        if isinstance(recs_df_local, pd.DataFrame) and "id" in stock_df_local.columns:
            s_ids = stock_df_local["id"].astype(str).str.strip().str.replace(r"\.0$", "", regex=True)
            if "id" in recs_df_local.columns:
                r_ids = recs_df_local["id"].astype(str).str.strip().str.replace(r"\.0$", "", regex=True)
                overlap = int(r_ids.isin(s_ids).sum())
                print("recs_df ids found in stock_df:", overlap)
            else:
                print("recs_df has no 'id' column; skipping ID overlap check.")
    else:
        print("\n(stock_df not available; skipping inventory/overlap diagnostics)")
else:
    print("\n(pandas not available; diagnostics limited)")


DATA_DIR: C:\Users\Marco.Africani\OneDrive - AVU SA\AVU CPI Campaign\Puzzle_control_Reports\IRON_DATA
client_df: None | stock_df: None | recs_df: None

Artifacts present:
  client_pref_df_latest.pkl                     → True
  stock_df_with_seasonality.pkl                 → True
  stock_df_final.pkl                            → True
  top3_recommendations_per_client_by_type.pkl   → True
  top3_recommendations_per_client.pkl           → True
  top3_recommendations_per_client.csv           → False
  weekly_campaign_schedule.pkl                  → True
  weekly_campaign_schedule_week_33.pkl          → True
  weekly_campaign_schedule.json                 → True
  weekly_campaign_schedule_week_33.json         → True
  powerbi_wine_arrow_layout.xlsx                → True

Usable inventory rows (stock>0 & price present): 4475
recs_df ids found in stock_df: 201


In [7]:
# --- CELL 4 : Paths, loaders, normalization, selection, compose, save ---
from pathlib import Path
from tempfile import NamedTemporaryFile
import pandas as pd
import random
import re
import math
import json
import os

# ---- Small helpers & fallbacks ----
def atomic_write_text(path: Path, text: str, encoding: str = "utf-8") -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with NamedTemporaryFile("w", delete=False, encoding=encoding) as tmp:
        tmp.write(text)
        tmp_path = Path(tmp.name)
    tmp_path.replace(path)

# Fallback template if not defined earlier
try:
    html_template  # noqa: F821
except NameError:
    def html_template(client_name, wine_name, price, note, image, critic_quote):
        return f"""
        <html><body style="font-family:Arial,Helvetica,sans-serif;">
            <h2>Dear {client_name},</h2>
            <p><strong>{wine_name}</strong> is now available for <strong>{price}</strong>.</p>
            <p>{note}</p>
            <img src="{image}" width="300" style="border-radius:6px"/><br>
            <blockquote style="color:#555;font-style:italic">{critic_quote}</blockquote>
        </body></html>
        """

def build_subject(wine_name: str, price_disp: str) -> str:
    return f"🌟 Exclusive Offer: {wine_name} — {price_disp}"

DEFAULT_WINE_IMAGE = globals().get(
    "DEFAULT_WINE_IMAGE",
    "https://www.vinfolio.com/blog/wp-content/uploads/2018/07/red-wine-bottle.jpg"
)

# ---- Flexible loaders ----
def load_any(*candidates: Path) -> pd.DataFrame:
    for p in candidates:
        p = Path(p)
        if p.exists():
            if p.suffix == ".pkl":
                return pd.read_pickle(p)
            if p.suffix == ".csv":
                return pd.read_csv(p)
            if p.suffix in {".xlsx", ".xls"}:
                return pd.read_excel(p, engine="openpyxl")
    raise FileNotFoundError("None of the candidate files exist: " + ", ".join(map(str, candidates)))

def load_any_or_none(*candidates: Path):
    try:
        return load_any(*candidates)
    except FileNotFoundError as e:
        print(f"⚠️ {e}")
        return None

# Prefer PKLs used by your engine; keep fallbacks for resilience.
def try_load_with_fallbacks():
    # Primary DIR comes from Cell 1; fallback to OneDrive IRON_DATA
    base_dir = Path(globals().get("DATA_DIR") or os.getenv("IRON_DATA") or
                    Path.home() / "OneDrive - AVU SA" / "AVU CPI Campaign" / "Puzzle_control_Reports" / "IRON_DATA")

    client = load_any_or_none(base_dir / "client_pref_df_latest.pkl")

    # 🔧 include stock_df_final.pkl first
    stock  = load_any_or_none(
        base_dir / "stock_df_final.pkl",
        base_dir / "stock_df_with_seasonality.pkl",
        base_dir / "preprocessed_wine_data.xlsx"
    )

    recs   = load_any_or_none(
        base_dir / "top3_recommendations_per_client_by_type.pkl",
        base_dir / "top3_recommendations_per_client.pkl",
        base_dir / "top3_recommendations_per_client.csv"
    )

    # Try latest non_recipient_reports/* if anything is missing
    if any(x is None for x in (client, stock, recs)):
        nrr = base_dir / "non_recipient_reports"
        if nrr.exists() and nrr.is_dir():
            subs = [p for p in nrr.iterdir() if p.is_dir()]
            if subs:
                latest = max(subs, key=lambda p: p.stat().st_mtime)
                client = client or load_any_or_none(latest / "client_pref_df_latest.pkl")
                stock  = stock  or load_any_or_none(
                    latest / "stock_df_final.pkl",
                    latest / "stock_df_with_seasonality.pkl",
                    latest / "preprocessed_wine_data.xlsx"
                )
                recs   = recs   or load_any_or_none(
                    latest / "top3_recommendations_per_client_by_type.pkl",
                    latest / "top3_recommendations_per_client.pkl",
                    latest / "top3_recommendations_per_client.csv"
                )

    return client, stock, recs

client_df, stock_df, recs_df = try_load_with_fallbacks()

# ---- Normalize schema (guard if any are None) ----
def canon_cols(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df.columns = df.columns.str.strip().str.lower().str.replace("\xa0", " ", regex=False)
    rename = {
        # prices
        "chf p/bt vat excl.": "price_chf",
        "chf price": "price_chf",          # 🔧 handle your current column name
        "price": "price_chf",
        # sizes / stock
        "bottle_size_ml": "size_ml",
        "size (ml)": "size_ml",
        "size_cl": "size_cl",
        "stock": "stock_count",
        # wine/meta
        "avg": "avg_score",
        "avg_score": "avg_score",
        "type": "wine_type",
        "full_type": "full_type",
        "wine": "wine_name",
        "name": "wine_name",
        "region": "region",
        "region_group": "region",
        "grape_list": "grapes",
        "vintage": "vintage",
        "id": "id",
        # client
        "customer_no": "customer_no",
        "client_name": "client_name",
        "loyalty_level": "loyalty",
        "occasion": "occasion",
    }
    df.rename(columns={k: v for k, v in rename.items() if k in df.columns}, inplace=True)
    # derive size_ml from size_cl if needed
    if "size_ml" not in df.columns and "size_cl" in df.columns:
        df["size_ml"] = pd.to_numeric(df["size_cl"], errors="coerce") * 10
    # ensure wine_name exists
    if "wine_name" not in df.columns:
        for alt in ("wine", "name"):
            if alt in df.columns:
                df["wine_name"] = df[alt]
                break
        else:
            df["wine_name"] = "Unknown Wine"
    return df

if stock_df is not None:  stock_df  = canon_cols(stock_df)
if recs_df is not None:   recs_df   = canon_cols(recs_df)
if client_df is not None: client_df = canon_cols(client_df)

# --- map/derive customer_no on recs_df + client_df ---
def _ensure_customer_no(df):
    if df is None or not isinstance(df, pd.DataFrame):
        return df
    if "customer_no" in df.columns:
        df["customer_no"] = (
            df["customer_no"].astype(str).str.replace(r"\.0$", "", regex=True).str.strip().replace({"nan": ""})
        )
        return df
    for c in ["customer_id", "client_no", "client_id", "customer", "client", "cust_no", "cust_id"]:
        if c in df.columns:
            df["customer_no"] = (
                df[c].astype(str).str.replace(r"\.0$", "", regex=True).str.strip().replace({"nan": ""})
            )
            return df
    # regex fallback
    for c in df.columns:
        if re.search(r"(customer|client).*(id|no)", str(c), flags=re.I):
            df["customer_no"] = (
                df[c].astype(str).str.replace(r"\.0$", "", regex=True).str.strip().replace({"nan": ""})
            )
            return df
    return df

recs_df   = _ensure_customer_no(recs_df)
client_df = _ensure_customer_no(client_df)

# ---- ID normalization (critical for matching) ----
def _normalize_id_series(s: pd.Series) -> pd.Series:
    s = s.astype(str).str.strip()
    s = s.str.replace(r"\.0$", "", regex=True)
    s = s.str.replace(r"^0+(\d+)$", r"\1", regex=True)
    return s

for df in (stock_df, recs_df, client_df):
    if isinstance(df, pd.DataFrame) and "id" in df.columns:
        df["id"] = _normalize_id_series(df["id"])

for df in (recs_df, client_df):
    if isinstance(df, pd.DataFrame) and "customer_no" in df.columns:
        df["customer_no"] = (
            df["customer_no"].astype(str).str.replace(r"\.0$", "", regex=True).str.strip().replace({"nan": ""})
        )

# ---- Harden stock/price columns if the file uses different headers ----
if stock_df is not None:
    if "stock_count" not in stock_df.columns:
        stock_like = [c for c in stock_df.columns if re.search(r"(stock|qty|quantity|available|bottles?)", str(c), re.I)]
        stock_df["stock_count"] = pd.to_numeric(stock_df[stock_like[0]], errors="coerce").fillna(0).astype(int) if stock_like else 0

    if "price_chf" not in stock_df.columns:
        price_like = [c for c in stock_df.columns if re.search(r"(price|chf|p/bt)", str(c), re.I) and "tier" not in str(c).lower()]
        stock_df["price_chf"] = pd.to_numeric(
            stock_df[price_like[0]].astype(str).str.replace(",", "."),
            errors="coerce"
        ) if price_like else math.nan

# ---- Types / numerics ----
def _to_float(x):
    try:
        return float(str(x).replace(",", "."))
    except Exception:
        return math.nan

if stock_df is not None:
    stock_df["price_chf"]   = stock_df.get("price_chf", pd.Series([math.nan]*len(stock_df))).apply(_to_float)
    stock_df["stock_count"] = pd.to_numeric(stock_df.get("stock_count", 0), errors="coerce").fillna(0).astype(int)

# ---- Selection (prefer UI-provided selected_wine) ----
def _row_from_selection(sel: dict):
    return {
        "id":         _normalize_id_series(pd.Series([sel.get("id") or sel.get("wine_id") or ""])).iloc[0] or None,
        "wine_name":  sel.get("wine") or sel.get("name") or "Unknown Wine",
        "vintage":    sel.get("vintage") or "",
        "wine_type":  sel.get("full_type") or sel.get("type") or "Wine",
        "region":     sel.get("region_group") or sel.get("region") or "Unknown Region",
        "grapes":     sel.get("grapes") or "Various Grapes",
        "size_ml":    sel.get("size_ml") or 750,
        "price_chf":  (pd.to_numeric(str(sel.get("price_chf", "")).replace(",", "."), errors="coerce")
                       if sel.get("price_chf") not in (None, "") else math.nan),
        "avg_score":  sel.get("avg_cpi_score") or sel.get("avg") or None,
        "stock_count": int(sel.get("stock") or 0),
    }

def pick_wine_and_client():
    global stock_df, recs_df, client_df, SELECTED_WINE, CLIENT_NAME

    wine_row = None
    client_row = None

    # 0) If NO data files at all but we have a selection, use it
    if stock_df is None and isinstance(SELECTED_WINE, dict):
        return _row_from_selection(SELECTED_WINE), ({"client_name": CLIENT_NAME} if CLIENT_NAME else None)

    # 1) Prefer UI-selected wine
    if isinstance(SELECTED_WINE, dict) and isinstance(stock_df, pd.DataFrame):
        candidate_id = _normalize_id_series(pd.Series([SELECTED_WINE.get("id") or SELECTED_WINE.get("wine_id") or ""])).iat[0]
        if candidate_id:
            cand_df = stock_df.loc[stock_df["id"] == candidate_id]
            if not cand_df.empty:
                wine_row = cand_df.iloc[0]
                if isinstance(recs_df, pd.DataFrame) and "id" in recs_df.columns and isinstance(client_df, pd.DataFrame):
                    recs_for = recs_df.loc[recs_df["id"] == candidate_id]
                    if not recs_for.empty:
                        if "customer_no" in recs_for.columns and "customer_no" in client_df.columns:
                            top_client_id = (
                                recs_for["customer_no"]
                                .astype(str).str.replace(r"\.0$", "", regex=True).str.strip()
                                .value_counts().index[0]
                            )
                            c = client_df.loc[client_df["customer_no"].astype(str).str.strip() == str(top_client_id)]
                            if not c.empty:
                                client_row = c.iloc[0]
                if client_row is None and isinstance(client_df, pd.DataFrame) and len(client_df):
                    client_row = client_df.iloc[0]
                return wine_row, client_row

        sel_name = (SELECTED_WINE.get("wine") or SELECTED_WINE.get("name") or "").strip()
        sel_vintage = str(SELECTED_WINE.get("vintage") or "").strip()
        if sel_name:
            by_name = stock_df.loc[stock_df["wine_name"].astype(str).str.lower() == sel_name.lower()]
            if sel_vintage:
                by_name = by_name.loc[by_name["vintage"].astype(str) == sel_vintage]
            if not by_name.empty:
                wine_row = by_name.iloc[0]
                if isinstance(recs_df, pd.DataFrame) and "id" in wine_row and isinstance(client_df, pd.DataFrame):
                    recs_for = recs_df.loc[recs_df["id"] == wine_row["id"]]
                    if not recs_for.empty and "customer_no" in recs_for.columns and "customer_no" in client_df.columns:
                        top_client_id = (
                            recs_for["customer_no"]
                            .astype(str).str.replace(r"\.0$", "", regex=True).str.strip()
                            .value_counts().index[0]
                        )
                        c = client_df.loc[client_df["customer_no"].astype(str).str.strip() == str(top_client_id)]
                        if not c.empty:
                            client_row = c.iloc[0]
                if client_row is None and isinstance(client_df, pd.DataFrame) and len(client_df):
                    client_row = client_df.iloc[0]
                return wine_row, client_row

        # selection not found in stock → still use selection row
        return _row_from_selection(SELECTED_WINE), ({"client_name": CLIENT_NAME} if CLIENT_NAME else None)

    # 2) No selection: use engine data
    if isinstance(stock_df, pd.DataFrame):
        in_stock_priced = stock_df.loc[(stock_df["stock_count"] > 0) & stock_df["price_chf"].notna()]

        valid_recs = pd.DataFrame()
        if isinstance(recs_df, pd.DataFrame) and "id" in getattr(recs_df, "columns", []) and not in_stock_priced.empty:
            valid_recs = recs_df.loc[recs_df["id"].isin(set(in_stock_priced["id"].unique()))]

        if not valid_recs.empty:
            wine_id_sel = valid_recs["id"].value_counts().index[0]
            w = stock_df.loc[stock_df["id"] == wine_id_sel]
            if not w.empty:
                wine_row = w.iloc[0]

            if isinstance(client_df, pd.DataFrame) and "customer_no" in client_df.columns and "customer_no" in valid_recs.columns:
                client_id = (
                    valid_recs.loc[valid_recs["id"] == wine_id_sel, "customer_no"]
                    .astype(str).str.replace(r"\.0$", "", regex=True).str.strip()
                    .iloc[0]
                )
                c = client_df.loc[client_df["customer_no"].astype(str).str.strip() == str(client_id)]
                if not c.empty:
                    client_row = c.iloc[0]

            if client_row is None and isinstance(client_df, pd.DataFrame) and len(client_df):
                client_row = client_df.iloc[0]

            if wine_row is not None:
                return wine_row, client_row

        if not in_stock_priced.empty:
            wine_row = in_stock_priced.iloc[0]
            if isinstance(client_df, pd.DataFrame) and len(client_df):
                client_row = client_df.iloc[0]
            return wine_row, client_row

        if len(stock_df):
            wine_row = stock_df.iloc[0]
            if isinstance(client_df, pd.DataFrame) and len(client_df):
                client_row = client_df.iloc[0]
            return wine_row, client_row

    raise ValueError("❌ No data files found and no SELECTED_WINE provided.")

# ---- Perform selection ----
wine_row, client_row = pick_wine_and_client()

# ---- Extract fields safely ----
getter = (wine_row.get if hasattr(wine_row, "get") else lambda k, d=None: wine_row[k] if k in wine_row else d)

wine_id    = getter("id")
wine_name  = getter("wine_name", "Unknown Wine")
wine_type  = getter("wine_type", "Wine")
region     = getter("region", "Unknown Region")
grapes     = getter("grapes", "Various Grapes")
vintage    = getter("vintage", "")
size_ml    = int(getter("size_ml", 750)) if pd.notna(getter("size_ml", 750)) else 750
price_val  = getter("price_chf", math.nan)
avg_score  = getter("avg_score", None)
stock_cnt  = int(getter("stock_count", 0)) if pd.notna(getter("stock_count", 0)) else 0

if isinstance(client_row, dict):
    cget = client_row.get
elif client_row is not None and hasattr(client_row, "get"):
    cget = client_row.get
else:
    cget = lambda *a, **k: None

loyalty     = cget("loyalty", "bronze") or "bronze"
occasion    = (cget("occasion", "special occasion") or "special occasion").lower()
client_name = (globals().get("CLIENT_NAME") or cget("client_name") or f"Client {cget('customer_no') or ''}").strip()

# ---- Formatting helpers ----
def format_price(price):
    return f"CHF {price:,.2f}" if (pd.notna(price) and isinstance(price, (int, float, float))) else "CHF Ask for quote"

def critic_line(score):
    return f"Rated {int(score)}/100 by top critics." if pd.notna(score) else "This wine is gaining attention among sommeliers."

# Narrative (deterministic per wine)
random.seed(str(wine_id))
def generate_narrative(_wine_name, _region, _grapes, _wine_type):
    note = random.choice(["black cherry", "cedar", "truffle", "licorice", "rose petal"])
    finish = random.choice(["refined", "lingering", "silky", "persistent"])
    return f"{_wine_name} from {_region} opens with {note} aromatics, built on {_grapes}. A {_wine_type} with a {finish} finish."

# ---- Image via SerpAPI (optional) ----
def fetch_wine_image(wine, region, vintage, size_ml):
    api_key = (globals().get("SERPAPI_KEY") or os.getenv("SERPAPI_KEY", "")).strip()
    serpapi_enabled = bool(globals().get("SERPAPI_ENABLED", False))
    if not (serpapi_enabled and api_key):
        return DEFAULT_WINE_IMAGE
    try:
        from serpapi import GoogleSearch
        query = f"{wine} {region} {vintage} {size_ml}ml wine bottle"
        results = GoogleSearch({"q": query, "tbm": "isch", "num": 5, "api_key": api_key}).get_dict()
        imgs = results.get("images_results", [])
        return (imgs[0].get("thumbnail") if imgs else None) or DEFAULT_WINE_IMAGE
    except Exception as e:
        print(f"⚠️ Image fetch failed: {e}")
        return DEFAULT_WINE_IMAGE

# ---- Compose email ----
price_disp  = format_price(price_val)
subject     = build_subject(wine_name, price_disp)
image_url   = fetch_wine_image(wine_name, region, vintage, size_ml)
stock_note  = f" Only {stock_cnt} bottles left." if stock_cnt < 3 else ""
narrative   = generate_narrative(wine_name, region, grapes, wine_type)
full_note   = (
    f"We’ve selected something exceptional based on your wine preferences—ideal for your next {occasion}. "
    f"{narrative}{stock_note}"
)
critic_html = critic_line(avg_score)
email_html  = html_template(client_name, wine_name, price_disp, full_note, image_url, critic_html)

# ---- Save HTML ----
OFFERS_DIR = Path(globals().get("OFFERS_DIR", Path(globals().get("DATA_DIR")) / "offers"))
OFFERS_DIR.mkdir(parents=True, exist_ok=True)
out_path = OFFERS_DIR / f"offer_{wine_id or 'selected'}.html"
atomic_write_text(out_path, email_html)
print(f"✅ Offer HTML saved: {out_path}")

# ---- Emit JSON summary for the caller ----
summary = {
    "wine_id": wine_id,
    "subject": subject,
    "html_path": str(out_path),
    "client_name": client_name,
    "price": price_disp,
}
print(json.dumps(summary, indent=2))


✅ Offer HTML saved: C:\Users\Marco.Africani\OneDrive - AVU SA\AVU CPI Campaign\Puzzle_control_Reports\IRON_DATA\offers\offer_51359.html
{
  "wine_id": "51359",
  "subject": "\ud83c\udf1f Exclusive Offer: Lafite Rothschild \u2014 CHF 725.00",
  "html_path": "C:\\Users\\Marco.Africani\\OneDrive - AVU SA\\AVU CPI Campaign\\Puzzle_control_Reports\\IRON_DATA\\offers\\offer_51359.html",
  "client_name": "Valued Client",
  "price": "CHF 725.00"
}


In [8]:
# --- CELL 5 : Optional Outlook draft creation (robust) ---
import os
import sys
from pathlib import Path

# 1) Decide whether to draft in Outlook
#    - Env var ENABLE_OUTLOOK takes priority if set
#    - Otherwise reuse any global ENABLE_OUTLOOK defined in earlier cells
if os.getenv("ENABLE_OUTLOOK") is not None:
    ENABLE_OUTLOOK = bool(int(os.getenv("ENABLE_OUTLOOK", "0")))
else:
    ENABLE_OUTLOOK = bool(globals().get("ENABLE_OUTLOOK", 0))

# 2) Windows-only guard
if sys.platform != "win32":
    print("ℹ️ Non-Windows environment detected; skipping Outlook draft.")
else:
    if not ENABLE_OUTLOOK:
        print("ℹ️ ENABLE_OUTLOOK is false; skipping Outlook draft.")
    else:
        try:
            import win32com.client as win32
            try:
                import pywintypes  # for detailed COM errors
            except ImportError:
                pywintypes = None

            # 3) Collect subject & HTML content with fallbacks
            subject = globals().get("subject") or "AVU Offer"
            html = globals().get("email_html")

            if not html:
                # Try to read the file we just saved in Cell 4
                out_path = globals().get("out_path")
                if out_path and Path(out_path).exists():
                    html = Path(out_path).read_text(encoding="utf-8")
                else:
                    # Fallback: last modified HTML in OFFERS_DIR
                    offers_dir = Path(globals().get("OFFERS_DIR", Path(globals().get("DATA_DIR", ".")) / "offers"))
                    if offers_dir.exists():
                        html_files = sorted(offers_dir.glob("*.html"), key=lambda p: p.stat().st_mtime, reverse=True)
                        if html_files:
                            html = html_files[0].read_text(encoding="utf-8")

            if not html:
                html = "<html><body><p>(No content)</p></body></html>"

            # 4) Draft the email
            ol = win32.Dispatch("Outlook.Application")
            _ = ol.GetNamespace("MAPI").Folders  # ensure profile is initialized

            mail = ol.CreateItem(0)  # 0 = MailItem
            try:
                mail.BodyFormat = 2  # 2 = olFormatHTML
            except Exception:
                pass

            mail.Subject = subject
            mail.HTMLBody = html
            # mail.To = "client@example.com"  # leave blank to save to Drafts
            mail.Save()
            print("📧 Outlook draft created in Drafts.")
        except Exception as e:
            if 'pywintypes' in globals() and pywintypes and isinstance(e, pywintypes.com_error):
                info = getattr(e, "excepinfo", None)
                print(f"⚠️ Outlook COM error: {info or e}")
            else:
                print(f"⚠️ Could not create Outlook draft: {e}")


📧 Outlook draft created in Drafts.
