In [22]:
# JUPYTER HARNESS — Planificador no interactivo
# ------------------------------------------------
# - Sin inputs ni questionary.
# - Parámetros por function-call.
# - Devuelve DataFrames: plan, freq; (opcional) escribe Excel.
# ------------------------------------------------

import os, re, unicodedata, math
import numpy as np
import pandas as pd
from datetime import date, timedelta, datetime

# ==========================
# CONFIG / CONSTANTES
# ==========================
RM3D_MIN_DEFAULT = 0.1

# ==========================
# Utils (copiados de tu script, con mínimos cambios)
# ==========================
def _norm(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and isinstance(s, (int,float)) and np.isnan(s)) else str(s)
    s = s.replace("³", "3")
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    s = s.lower().strip().replace("\xa0"," ")
    s = s.replace("_"," ").replace("-"," ").replace("."," ").replace("\n"," ")
    return " ".join(s.split())

def _pozo_key(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    return "".join(ch for ch in s if ch.isalnum()).upper()

def _canonical_digits(d: str) -> str:
    d = (d or "").lstrip("0")
    return d if d != "" else "0"

def _letters_digits_from_key_both(k: str):
    raw_digits = "".join(re.findall(r"\d+", k))
    digits_canon = _canonical_digits(raw_digits)
    letters = re.sub(r"\d+", "", k)
    return letters, digits_canon, len(raw_digits)

def _ratio_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _fuzzy_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.partial_ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _canon_prefix_pozo(s: str) -> str:
    if s is None or (isinstance(s, float) and np.isnan(s)):
        return s
    raw = str(s).strip()
    raw_up = raw.upper()
    if raw_up.startswith("CÑE"):
        return "CNE" + raw_up[3:]
    raw_ascii = unicodedata.normalize("NFKD", raw_up).encode("ascii", "ignore").decode("ascii")
    if raw_ascii.startswith("CNE"):
        return raw_ascii
    if raw_ascii.startswith("CN"):
        return "CNE" + raw_ascii[2:]
    m = re.match(r"^CE(\d+)$", raw_ascii)
    if m:
        return "CNE" + m.group(1)
    return raw_ascii

def next_monday(d=None):
    d = d or date.today()
    return d + timedelta(days=(7 - d.weekday()) % 7)  # 0=Lunes

def unique_output_path(base_input_path: str) -> str:
    folder = os.path.dirname(os.path.abspath(base_input_path))
    stem   = os.path.splitext(os.path.basename(base_input_path))[0]
    today  = datetime.now().strftime("%Y%m%d")
    base   = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}.xlsx")
    if not os.path.exists(base): return base
    i = 2
    while True:
        cand = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}_({i}).xlsx")
        if not os.path.exists(cand): return cand
        i += 1

EXPECTED_KEYS = {
    "fecha":       ["fecha"],
    "pozo":        ["pozo"],
    "zona":        ["zona"],
    "bateria":     ["bateria", "batería"],
    "m3":          ["m3 bruta","m3","m3_bruta","m3bruta","m 3 bruta","m 3","m3 bruto","m3 recuperado","m3 recupero"],
    "carreras":    ["n de carreras","n° de carreras","nº de carreras","no de carreras","nro de carreras","numero de carreras","n° carreras","n de carrera","n carreras"],
    "nivel_final": ["nivel final pozo","nivel final","nivel final del pozo"],
    "obs_pozo":    ["observaciones del pozo","observaciones","comentarios","comentario"]
}

def _find_header_row(df_raw):
    for i in range(min(200, len(df_raw))):
        row_norm = [_norm(x) for x in df_raw.iloc[i,:].tolist()]
        if not row_norm:
            continue
        colmap = {v:j for v,j in zip(row_norm, range(len(row_norm)))}
        def has_any(keys): return any(k in colmap for k in keys)
        if has_any(EXPECTED_KEYS["fecha"]) and has_any(EXPECTED_KEYS["pozo"]) and has_any(EXPECTED_KEYS["zona"]) and has_any(EXPECTED_KEYS["bateria"]):
            return i, row_norm
    return None, None

# ---------- Lecturas ----------
def read_historial(xlsx_path, sheet_hist=None):
    xl = pd.ExcelFile(xlsx_path)
    sheets = [sheet_hist] if (sheet_hist and sheet_hist in xl.sheet_names) else xl.sheet_names
    for sh in sheets:
        raw = xl.parse(sh, header=None)
        idx, header_norm = _find_header_row(raw)
        if idx is None:
            continue
        data = raw.iloc[idx:, :].copy()
        true_headers = data.iloc[0,:].astype(str).tolist()
        data = data.iloc[1:,:]
        data.columns = true_headers

        name_map = {c: _norm(c) for c in data.columns}
        def find_col(candidates):
            for c, n in name_map.items():
                if n in candidates:
                    return c
            return None

        c_fecha       = find_col(set(EXPECTED_KEYS["fecha"]))
        c_pozo        = find_col(set(EXPECTED_KEYS["pozo"]))
        c_zona        = find_col(set(EXPECTED_KEYS["zona"]))
        c_bateria     = find_col(set(EXPECTED_KEYS["bateria"]))
        c_m3          = find_col(set(EXPECTED_KEYS["m3"]))
        c_carr        = find_col(set(EXPECTED_KEYS["carreras"]))
        c_nivel_final = find_col(set(EXPECTED_KEYS["nivel_final"]))
        c_obs         = find_col(set(EXPECTED_KEYS["obs_pozo"]))  # NUEVO

        if not (c_fecha and c_pozo and c_zona and c_bateria):
            continue

        use_cols = [c_fecha, c_pozo, c_zona, c_bateria]
        headers  = ["FECHA","POZO","ZONA","BATERIA"]
        if c_m3:            use_cols.append(c_m3);            headers.append("M3")
        if c_carr:          use_cols.append(c_carr);          headers.append("CARRERAS")
        if c_nivel_final:   use_cols.append(c_nivel_final);   headers.append("NIVEL_FINAL")
        if c_obs:           use_cols.append(c_obs);           headers.append("OBS_POZO")  # NUEVO

        df = data[use_cols].copy()
        df.columns = headers

        df["FECHA"] = pd.to_datetime(df["FECHA"], errors="coerce")
        if "M3" not in df.columns: df["M3"] = np.nan
        else: df["M3"] = pd.to_numeric(df["M3"], errors="coerce")

        if "CARRERAS" not in df.columns: df["CARRERAS"] = np.nan
        else: df["CARRERAS"] = pd.to_numeric(df["CARRERAS"], errors="coerce")

        if "NIVEL_FINAL" not in df.columns:
            df["NIVEL_FINAL"] = None
        if "OBS_POZO" not in df.columns:
            df["OBS_POZO"] = None

        for col in ["POZO","ZONA","BATERIA","NIVEL_FINAL","OBS_POZO"]:
            df[col] = df[col].astype(str).str.strip().replace({"nan": np.nan})

        df = df.dropna(subset=["FECHA","POZO"]).sort_values(["POZO","FECHA"])
        return df

    raise ValueError("No pude detectar FECHA/POZO/ZONA/BATERÍA en ninguna hoja del Excel.")

def load_pozo_dictionary(xlsx_path: str):
    try:
        ref = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer diccionario de pozos: {xlsx_path}\n{e}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    cols = {c.lower().strip(): c for c in ref.columns}
    if "nombre_corto_pozo" not in cols:
        print(f"\n[AVISO] El diccionario no tiene la columna 'nombre_corto_pozo'. Columnas: {list(ref.columns)}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    c_pozo = cols["nombre_corto_pozo"]
    c_met  = cols.get("met_prod")
    c_n3   = cols.get("nivel_3")
    c_n5   = cols.get("nivel_5")
    c_est  = cols.get("estado")

    refv = ref.loc[ref[c_pozo].notna()].copy()
    refv[c_pozo] = refv[c_pozo].astype(str).str.strip()

    of_list  = refv[c_pozo].tolist()
    met_vals = refv[c_met].astype(str).str.strip() if c_met else np.nan
    n3_vals  = refv[c_n3].astype(str).str.strip()  if c_n3 else np.nan
    n5_vals  = refv[c_n5].astype(str).str.strip()  if c_n5 else np.nan
    est_vals = refv[c_est].astype(str).str.strip() if c_est else np.nan

    keys, letters_, digits_canon_, digits_len_ = [], [], [], []
    for val in of_list:
        k = _pozo_key(val)
        L, Dcanon, Dlen = _letters_digits_from_key_both(k)
        keys.append(k); letters_.append(L); digits_canon_.append(Dcanon); digits_len_.append(Dlen)

    dict_df = pd.DataFrame({
        "oficial": of_list,
        "key": keys,
        "letters": letters_,
        "digits_canon": digits_canon_,
        "digits_len": digits_len_,
        "met_prod": list(met_vals) if isinstance(met_vals, pd.Series) else [np.nan]*len(of_list),
        "nivel_3":  list(n3_vals)  if isinstance(n3_vals,  pd.Series) else [np.nan]*len(of_list),
        "nivel_5":  list(n5_vals)  if isinstance(n5_vals,  pd.Series) else [np.nan]*len(of_list),
        "estado":   list(est_vals) if isinstance(est_vals, pd.Series) else [np.nan]*len(of_list),
    })

    key2off = {}
    for k, off in zip(dict_df["key"], dict_df["oficial"]):
        if k and k not in key2off:
            key2off[k] = off
    return key2off, dict_df

def apply_pozo_normalization(df: pd.DataFrame, key2off: dict, dict_df: pd.DataFrame):
    df = df.copy()
    df["POZO_ORIG"] = df["POZO"].astype(str).str.strip()
    df["POZO_PreCanon"] = df["POZO_ORIG"].apply(_canon_prefix_pozo)
    df["__POZO_KEY"] = df["POZO_PreCanon"].apply(_pozo_key)

    parts = df["__POZO_KEY"].apply(_letters_digits_from_key_both)
    df["__KEY_LET"], df["__KEY_DIG_CANON"], df["__KEY_DIG_LEN"] = zip(*parts)

    df["POZO_MATCH"]   = None
    df["MATCH_TIPO"]   = "NO"
    df["MATCH_SCORE"]  = np.nan
    df["LETTER_SCORE"] = np.nan
    df["APLICADO"]     = "NO"
    df["ALERTA_NORM"]  = ""
    df["VALIDO_POZO"]  = True

    invalid_mask = (df["__KEY_LET"].str.len()==0) | (df["__KEY_DIG_LEN"]==0)
    if invalid_mask.any():
        df.loc[invalid_mask, "ALERTA_NORM"] = "SIN_LETRAS_O_DIGITOS"
        df.loc[invalid_mask, "VALIDO_POZO"] = False

    valid_mask = ~invalid_mask
    exact_mask = valid_mask & df["__POZO_KEY"].isin(key2off.keys())
    df.loc[exact_mask, "POZO_MATCH"]   = df.loc[exact_mask, "__POZO_KEY"].map(key2off)
    df.loc[exact_mask, "MATCH_TIPO"]   = "EXACTO"
    df.loc[exact_mask, "MATCH_SCORE"]  = 100
    df.loc[exact_mask, "LETTER_SCORE"] = 100
    df.loc[exact_mask, "APLICADO"]     = "SI"

    pending = df[valid_mask & (~exact_mask)].index.tolist()
    if pending and not dict_df.empty:
        dict_by_spec = {}
        for spec, sub in dict_df.groupby(["digits_canon","digits_len"]):
            dict_by_spec[spec] = sub

        for idx in pending:
            key_u   = df.at[idx, "__POZO_KEY"]
            let_u   = df.at[idx, "__KEY_LET"]
            digc_u  = df.at[idx, "__KEY_DIG_CANON"]
            digl_u  = int(df.at[idx, "__KEY_DIG_LEN"])

            cand_df = dict_by_spec.get((digc_u, digl_u), pd.DataFrame())
            best_off, best_score, best_lscore = None, -1, -1

            if cand_df is not None and not cand_df.empty:
                for row in cand_df.itertuples():
                    kk = row.key
                    ll = row.letters
                    sc_key = _fuzzy_score(key_u, kk)
                    sc_let = _ratio_score(let_u, ll)
                    if sc_let < 80:
                        continue
                    if sc_key > best_score or (sc_key == best_score and sc_let > best_lscore):
                        best_score = sc_key
                        best_lscore = sc_let
                        best_off   = row.oficial

            if best_off is not None:
                df.at[idx, "POZO_MATCH"]   = best_off
                df.at[idx, "MATCH_TIPO"]   = "SUGERIDO"
                df.at[idx, "MATCH_SCORE"]  = int(best_score)
                df.at[idx, "LETTER_SCORE"] = int(best_lscore)
            else:
                df.at[idx, "ALERTA_NORM"] = "SIN MATCH EN DICCIONARIO"

    # Reemplazos
    df["POZO"] = df["POZO_MATCH"].where(df["POZO_MATCH"].notna(), df["POZO"])
    meta_first = dict_df.groupby("oficial")[["met_prod","nivel_3","nivel_5"]].first()
    df = df.merge(meta_first, how="left", left_on="POZO", right_index=True)

    if "nivel_3" in df.columns:
        df.loc[df["POZO_MATCH"].isna(), "nivel_3"] = ""
        df["ZONA"] = np.where(df["POZO_MATCH"].notna(), df["nivel_3"].fillna(""), "")

    if "nivel_5" in df.columns:
        df["BATERIA"] = np.where(
            df["nivel_5"].notna() & (df["nivel_5"].astype(str).str.strip()!=""),
            df["nivel_5"], df["BATERIA"]
        )

    df["__ZONA_NORM"]    = df["ZONA"].apply(_norm)
    df["__BATERIA_NORM"] = df["BATERIA"].apply(_norm)

    norm_table = (df[["POZO_ORIG","POZO_PreCanon","__POZO_KEY",
                      "__KEY_LET","__KEY_DIG_CANON","__KEY_DIG_LEN",
                      "POZO_MATCH","MATCH_TIPO","MATCH_SCORE","LETTER_SCORE",
                      "APLICADO","ALERTA_NORM","VALIDO_POZO",
                      "met_prod","nivel_3","nivel_5"]]
                  .drop_duplicates())

    alert_table = norm_table[(norm_table["VALIDO_POZO"]==False) | (norm_table["APLICADO"]=="NO") | (norm_table["MATCH_TIPO"]=="NO")].copy()

    return df, alert_table, norm_table

def _to_float_maybe_comma(x):
    if pd.isna(x):
        return np.nan
    if isinstance(x, (int, float, np.number)):
        return float(x)
    s = str(x).strip()
    if s == "": return np.nan
    s = s.replace(",", ".")
    try:
        return float(s)
    except Exception:
        return np.nan

def read_coords(xlsx_path):
    try:
        cdf = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer coordenadas: {xlsx_path}\n{e}\n")
        return pd.DataFrame(columns=["POZO","LAT","LON"])
    cols_map = {c.lower().strip(): c for c in cdf.columns}
    c_pozo = cols_map.get("pozo")
    for k in ["geo_latitude","latitude","lat"]:
        if k in cols_map:
            c_lat = cols_map[k]; break
    else:
        c_lat = None
    for k in ["geo_longitude","longitude","lon","long"]:
        if k in cols_map:
            c_lon = cols_map[k]; break
    else:
        c_lon = None

    if not (c_pozo and c_lat and c_lon):
        print(f"[AVISO] Coordenadas: columnas esperadas 'POZO','GEO_LATITUDE','GEO_LONGITUDE'. Columnas encontradas: {list(cdf.columns)}")
        return pd.DataFrame(columns=["POZO","LAT","LON"])

    out = cdf[[c_pozo, c_lat, c_lon]].copy()
    out.columns = ["POZO","LAT","LON"]
    out["POZO"] = out["POZO"].astype(str).str.strip()
    out["LAT"] = out["LAT"].apply(_to_float_maybe_comma)
    out["LON"] = out["LON"].apply(_to_float_maybe_comma)
    out = out.dropna(subset=["POZO"])
    out = out.drop_duplicates(subset=["POZO"], keep="last")
    return out

# ==========================
# Frecuencias / r_m3_d
# ==========================
def _count_trailing_zeros_with_carr(g):
    cnt = 0
    for _, row in g.sort_values("FECHA").iloc[::-1].iterrows():
        m3 = row.get("M3", np.nan)
        car = row.get("CARRERAS", np.nan)
        if pd.notna(m3) and float(m3) == 0.0 and pd.notna(car) and float(car) > 0:
            cnt += 1
        else:
            break
    return cnt

def compute_frecuencias(df, params):
    v_target = params["m3_por_visita_objetivo"]
    min_d    = params["min_dias_freq"]
    max_d    = params["max_dias_freq"]
    k        = int(params["k_visitas"])
    one_days = int(params.get("dias_asumidos_una_visita", 7))
    freq_cero_ultimo = int(params.get("freq_dias_ultimo_cero_valido", 30))

    out = []
    for pozo, g0 in df.groupby("POZO", sort=False):
        g = g0.sort_values("FECHA").copy()

        for col in ["ZONA","BATERIA","NIVEL_FINAL"]:
            if col in g.columns:
                g[col] = g[col].replace({None: np.nan})
                g[col] = g[col].ffill().bfill()

        g["__ZONA_NORM"]    = g["ZONA"].apply(_norm)
        g["__BATERIA_NORM"] = g["BATERIA"].apply(_norm)
        g["__nf_norm"]      = g["NIVEL_FINAL"].apply(_norm) if "NIVEL_FINAL" in g.columns else ""

        med_validas_all = g[g["M3"].notna()].copy()

        m3_eq0 = g["M3"].fillna(0) == 0
        carr   = g.get("CARRERAS", pd.Series(index=g.index, dtype=float)).fillna(np.nan)
        zero_cond_a = m3_eq0 & (carr.fillna(0) >= 1)
        zero_cond_b = m3_eq0 & ((carr.isna()) | (carr.fillna(0) == 0)) & (g["__nf_norm"] == "surge")
        cond_cero_valido = zero_cond_a | zero_cond_b

        validas_rate = g[(g["M3"] > 0) | cond_cero_valido].copy()
        zeros_tail = _count_trailing_zeros_with_carr(g)

        ultima_med = med_validas_all["FECHA"].max() if not med_validas_all.empty else pd.NaT
        ultima_exi = g.loc[g["M3"]>0, "FECHA"].max() if "M3" in g.columns and not g[g["M3"]>0].empty else pd.NaT

        last_zero_valido = False
        if not med_validas_all.empty:
            idx_last = med_validas_all["FECHA"].idxmax()
            m3_last  = g.at[idx_last, "M3"]
            if pd.notna(m3_last) and float(m3_last) == 0.0:
                try:
                    last_zero_valido = bool(cond_cero_valido.loc[idx_last])
                except Exception:
                    last_zero_valido = False

        alerta = ""
        if last_zero_valido:
            alerta = f"ULTIMA_M3_0_VALIDO -> FREQ {freq_cero_ultimo}D"
        elif pd.notna(ultima_med):
            if zeros_tail > 0:
                alerta = f"ALERTA: {zeros_tail} cero(s) consecutivo(s) con Carreras>0"

        # r_m3_d
        r = np.nan
        if not validas_rate.empty:
            v = validas_rate.copy()
            v["delta_d"] = v["FECHA"].diff().dt.days
            v.loc[v["delta_d"] <= 0, "delta_d"] = np.nan
            v["rate"] = v["M3"].fillna(0) / v["delta_d"]
            rates = v["rate"].dropna()
            if len(rates) >= 1:
                r = rates.tail(min(k, len(rates))).mean()
            else:
                row = v.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan
        else:
            if len(med_validas_all) == 1:
                row = med_validas_all.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan

        # FRECUENCIA
        if last_zero_valido:
            delta = int(freq_cero_ultimo)
        else:
            if pd.isna(r):      delta = 7
            elif r <= 0:        delta = max_d
            else:
                delta = max(min_d, min(max_d, float(v_target)/float(r)))
                delta = int(7 * round(delta / 7.0))
                if delta < 7:
                    delta = 7

        prox = (ultima_med + pd.Timedelta(days=int(delta))) if pd.notna(ultima_med) else pd.Timestamp(next_monday())

        out.append({
            "POZO": pozo,
            "ZONA": g["ZONA"].iloc[-1],
            "BATERIA": g["BATERIA"].iloc[-1],
            "ZONA_NORM": g["__ZONA_NORM"].iloc[-1],
            "BATERIA_NORM": g["__BATERIA_NORM"].iloc[-1],
            "r_m3_d": r,
            "ultima_medicion": ultima_med,
            "ultima_exitosa": ultima_exi,
            "delta_star_dias": int(delta),
            "proxima_visita_base": prox,
            "ceros_consec": zeros_tail,
            "alerta": alerta
        })
    return pd.DataFrame(out)

# ==========================
# Geodesia
# ==========================
def haversine_km(lat1, lon1, lat2, lon2):
    try:
        if pd.isna(lat1) or pd.isna(lon1) or pd.isna(lat2) or pd.isna(lon2):
            return np.nan
        R = 6371.0088
        p1 = math.radians(float(lat1)); p2 = math.radians(float(lat2))
        dphi = math.radians(float(lat2) - float(lat1))
        dlmb = math.radians(float(lon2) - float(lon1))
        a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dlmb/2)**2
        return 2*R*math.asin(math.sqrt(a))
    except Exception:
        return np.nan

# ==========================
# Candidatos
# ==========================
def build_candidates_with_coords(freq, week_start, week_end, excl_pozos,
                                 zonas_norm_incluidas, coords_df,
                                 allowed_bats_by_zone_norm=None,
                                 next_due_map=None,
                                 rm3d_min=RM3D_MIN_DEFAULT):
    F = freq.copy()
    F["due_date"] = F["proxima_visita_base"]
    if next_due_map:
        F["due_date"] = F["POZO"].map(next_due_map).fillna(F["due_date"])

    F["overdue_d"] = (pd.Timestamp(week_start) - pd.to_datetime(F["due_date"])).dt.days
    F["is_overdue"] = F["overdue_d"] > 0

    F["__v"] = F["r_m3_d"].astype(float)

    if "ZONA_NORM" in F.columns and zonas_norm_incluidas:
        F = F[F["ZONA_NORM"].isin(zonas_norm_incluidas)].copy()

    if allowed_bats_by_zone_norm:
        mask = pd.Series(True, index=F.index)
        for zn in zonas_norm_incluidas:
            bats = allowed_bats_by_zone_norm.get(zn)
            if bats is not None:
                mask &= ~ (F["ZONA_NORM"] == zn) | (F["BATERIA_NORM"].isin(bats))
        F = F[mask].copy()

    if excl_pozos:
        F = F[~F["POZO"].isin(excl_pozos)].copy()

    F = F[F["r_m3_d"].fillna(0) > rm3d_min].copy()
    F = F[F["BATERIA"].notna() & (F["BATERIA"].astype(str).str.strip() != "")].copy()

    if "comentario" in F.columns:
        F["__comentario_txt"] = F["comentario"].astype(str).fillna("").str.strip()
        F = F[F["__comentario_txt"] == ""].copy()
        F.drop(columns=["__comentario_txt"], inplace=True, errors="ignore")

    coords_df = coords_df if coords_df is not None else pd.DataFrame(columns=["POZO","LAT","LON"])
    F = F.merge(coords_df, how="left", on="POZO")
    F["has_coords"] = F["LAT"].notna() & F["LON"].notna()

    F = F.sort_values(by=["is_overdue","__v","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    return F

def _v_est_for_day(row, day_date):
    r = row.get("r_m3_d", np.nan)
    u = row.get("ultima_medicion", pd.NaT)
    if pd.isna(u) or pd.isna(r) or r <= 0:
        return 0.0
    dd = max(0, (pd.Timestamp(day_date) - pd.Timestamp(u)).days)
    return max(0.0, float(r) * float(dd))

# ==========================
# Lógica de asignación (round-robin con clúster seed-based original)
# ==========================
def _bb_filter(df, lat0, lon0, rad_km):
    if pd.isna(lat0) or pd.isna(lon0) or df.empty:
        return df.iloc[0:0]
    dlat = rad_km / 110.574
    dlon = rad_km / (111.320 * max(0.1, math.cos(math.radians(float(lat0)))))
    return df[(df["LAT"].between(lat0 - dlat, lat0 + dlat)) &
              (df["LON"].between(lon0 - dlon, lon0 + dlon))].copy()

def _fill_day_star_clusters(day_date, avail_df, cap_per_day, radius_km, used_set,
                            cluster_cap, params=None,
                            clusters_max=None, backfill_nearest=True, umbral_km_backfill=5.0):
    # (idéntica a tu versión actual; abreviada aquí por espacio)
    # Para pruebas rápidas en Jupyter, mantenemos la misma mecánica seed-based.
    assigned = []
    remaining_cap = int(cap_per_day)
    cluster_cap   = max(1, int(cluster_cap))
    TOP_N = 30 if params is None else int(params.get("top_semillas_eval", 30))

    if params is not None and params.get("umbral_km_backfill") is not None:
        umbral_km_backfill = float(params.get("umbral_km_backfill"))

    has_xy = avail_df["has_coords"].fillna(False)
    pool = pd.concat([
        avail_df.loc[has_xy].sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]),
        avail_df.loc[~has_xy].sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]),
    ], ignore_index=True)

    c_lat_acc, c_lon_acc, n_acc = (np.nan, np.nan, 0)

    def _build_cluster_from_seed(seed_row, pool_df, cap_left):
        seed_lat = seed_row.get("LAT", np.nan)
        seed_lon = seed_row.get("LON", np.nan)
        rows_cluster = [seed_row]

        if pd.notna(seed_lat) and pd.notna(seed_lon):
            neigh = _bb_filter(pool_df, seed_lat, seed_lon, radius_km)
            if not neigh.empty:
                neigh = neigh.copy()
                neigh["__dist_seed"] = neigh.apply(
                    lambda r: haversine_km(seed_lat, seed_lon, r["LAT"], r["LON"]), axis=1
                )
                neigh = neigh[neigh["__dist_seed"] <= radius_km]
                neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

                max_neighbors_by_cluster = max(0, cluster_cap - 1)
                max_neighbors_by_day     = max(0, cap_left - 1)
                take_n = min(max_neighbors_by_cluster, max_neighbors_by_day)
                if take_n > 0 and not neigh.empty:
                    rows_cluster.extend([nr for _, nr in neigh.head(take_n).iterrows()])

        coords_cluster = [(r.get("LAT", np.nan), r.get("LON", np.nan))
                          for r in rows_cluster
                          if pd.notna(r.get("LAT", np.nan)) and pd.notna(r.get("LON", np.nan))]
        if coords_cluster:
            c_lat = float(np.mean([x for x,_ in coords_cluster]))
            c_lon = float(np.mean([y for _,y in coords_cluster]))
        else:
            c_lat, c_lon = (np.nan, np.nan)

        used_ids = [r["POZO"] for r in rows_cluster]
        return rows_cluster, (c_lat, c_lon), used_ids

    def _append_cluster(rows_cluster, seed_pozo, seed_lat, seed_lon, c_lat, c_lon):
        nonlocal remaining_cap, c_lat_acc, c_lon_acc, n_acc
        used_now = set()
        for r in rows_cluster:
            if remaining_cap <= 0:
                break
            pozo = r["POZO"]
            if pozo in used_set or pozo in used_now:
                continue

            lat = r.get("LAT", np.nan); lon = r.get("LON", np.nan)
            d_seed = haversine_km(seed_lat, seed_lon, lat, lon) if pd.notna(seed_lat) and pd.notna(seed_lon) else np.nan
            d_cent = haversine_km(c_lat, c_lon, lat, lon)       if pd.notna(c_lat)  and pd.notna(c_lon)  else np.nan

            assigned.append({
                "Plan_Fecha": day_date.date(),
                "Semana_ISO": day_date.isocalendar()[1],
                "ZONA": r["ZONA"],
                "BATERIA": r["BATERIA"],
                "POZO": pozo,
                "r_m3_d": float(r.get("__v", r.get("r_m3_d", np.nan))),
                "ultima_medicion": r.get("ultima_medicion", pd.NaT),
                "Seed_POZO": seed_pozo,
                "Dist_km_semilla": None if pd.isna(d_seed) else round(float(d_seed), 3),
                "Dist_km_centroid": None if pd.isna(d_cent) else round(float(d_cent), 3),
            })
            used_now.add(pozo)
            remaining_cap -= 1

            if pd.notna(lat) and pd.notna(lon):
                if n_acc == 0:
                    c_lat_acc, c_lon_acc, n_acc = float(lat), float(lon), 1
                else:
                    c_lat_acc = (c_lat_acc*n_acc + float(lat)) / (n_acc + 1)
                    c_lon_acc = (c_lon_acc*n_acc + float(lon)) / (n_acc + 1)
                    n_acc += 1

        return used_now

    clusters_hechos = 0

    while (remaining_cap > 0) and (not pool.empty):
        cand_pool = pool[pool["has_coords"]].copy()
        best_total = None
        best_seed_idx = None
        best_cluster_rows = None

        if not cand_pool.empty:
            cand_seeds = cand_pool.sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]).head(max(1, TOP_N))
            for seed_idx, seed_row in cand_seeds.iterrows():
                seed_lat = seed_row.get("LAT", np.nan)
                seed_lon = seed_row.get("LON", np.nan)
                if pd.isna(seed_lat) or pd.isna(seed_lon):
                    continue

                neigh = _bb_filter(pool.drop(index=seed_idx, errors="ignore"), seed_lat, seed_lon, radius_km)
                if not neigh.empty:
                    neigh = neigh.copy()
                    neigh["__dist_seed"] = neigh.apply(
                        lambda r: haversine_km(seed_lat, seed_lon, r["LAT"], r["LON"]), axis=1
                    )
                    neigh = neigh[neigh["__dist_seed"] <= radius_km]
                    neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

                max_neighbors_by_cluster = max(0, cluster_cap - 1)
                max_neighbors_by_day     = max(0, remaining_cap - 1)
                take_n = min(max_neighbors_by_cluster, max_neighbors_by_day)

                if take_n > 0 and not neigh.empty:
                    neigh_take = neigh.head(take_n)
                    cluster_rows = [seed_row] + [nr for _, nr in neigh_take.iterrows()]
                else:
                    cluster_rows = [seed_row]

                total_est = 0.0
                for r in cluster_rows:
                    rr = r.get("__v", r.get("r_m3_d", np.nan))
                    total_est += _v_est_for_day({"r_m3_d": rr,
                                                 "ultima_medicion": r.get("ultima_medicion", pd.NaT)}, day_date)

                if (best_total is None) or (total_est > best_total):
                    best_total = total_est
                    best_seed_idx = seed_idx
                    best_cluster_rows = cluster_rows

        else:
            seed_row = pool.iloc[0]
            seed_lat  = seed_row.get("LAT", np.nan)
            seed_lon  = seed_row.get("LON", np.nan)
            best_cluster_rows = [seed_row]
            if pd.notna(seed_lat) and pd.notna(seed_lon):
                neigh = _bb_filter(pool.iloc[1:], seed_lat, seed_lon, radius_km)
                if not neigh.empty:
                    neigh = neigh.copy()
                    neigh["__dist_seed"] = neigh.apply(
                        lambda r: haversine_km(seed_lat, seed_lon, r["LAT"], r["LON"]), axis=1
                    )
                    neigh = neigh[neigh["__dist_seed"] <= radius_km]
                    neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

                    max_neighbors_by_cluster = max(0, cluster_cap - 1)
                    max_neighbors_by_day     = max(0, remaining_cap - 1)
                    take_n = min(max_neighbors_by_cluster, max_neighbors_by_day)
                    if take_n > 0 and not neigh.empty:
                        for _, nr in neigh.head(take_n).iterrows():
                            best_cluster_rows.append(nr)

        if best_cluster_rows is None:
            break

        seed_row = best_cluster_rows[0]
        seed_lat = seed_row.get("LAT", np.nan)
        seed_lon = seed_row.get("LON", np.nan)

        coords_cluster = [(r.get("LAT", np.nan), r.get("LON", np.nan))
                          for r in best_cluster_rows
                          if pd.notna(r.get("LAT", np.nan)) and pd.notna(r.get("LON", np.nan))]
        if coords_cluster:
            c_lat = float(np.mean([x for x,_ in coords_cluster]))
            c_lon = float(np.mean([y for _,y in coords_cluster]))
        else:
            c_lat, c_lon = (np.nan, np.nan)

        used_now = _append_cluster(best_cluster_rows, seed_row["POZO"], seed_lat, seed_lon, c_lat, c_lon)

        if used_now:
            clusters_hechos += 1
            used_set.update(used_now)
            pool = pool[~pool["POZO"].isin(used_now)].copy()
        else:
            pool = pool.iloc[1:].copy()

        if not backfill_nearest or remaining_cap <= 0 or (clusters_max and clusters_hechos >= int(clusters_max)):
            break

        while (remaining_cap > 0) and (not pool.empty) and (not (clusters_max and clusters_hechos >= int(clusters_max))):
            if pd.isna(c_lat_acc) or pd.isna(c_lon_acc) or (np.isnan(c_lat_acc) and np.isnan(c_lon_acc)):
                break

            pool = pool.copy()
            pool["__dist_centroid"] = pool.apply(
                lambda r: haversine_km(c_lat_acc, c_lon_acc, r.get("LAT", np.nan), r.get("LON", np.nan)), axis=1
            )
            pool = pool[pool["__dist_centroid"] <= float(umbral_km_backfill)].copy()
            if pool.empty:
                break

            pool = pool.sort_values(["__dist_centroid","__v"], ascending=[True, False])

            seed = pool.iloc[0]
            seed_lat = seed.get("LAT", np.nan)
            seed_lon = seed.get("LON", np.nan)

            rows_cluster, (c_lat, c_lon), used_ids = _build_cluster_from_seed(seed, pool.iloc[1:], remaining_cap)
            used_now = _append_cluster(rows_cluster, seed["POZO"], seed_lat, seed_lon, c_lat, c_lon)

            if used_now:
                clusters_hechos += 1
                used_set.update(used_now)
                pool = pool[~pool["POZO"].isin(used_now)].copy()
            else:
                pool = pool.iloc[1:].copy()

    return assigned

def assign_week_round_robin_by_zone(cand_all, team_ids, params, week_start, week_end, radius_km):
    dias   = int(params["dias_por_semana"])
    cap_pz = int(params["max_pozos_dia_equipo"])
    cap_cluster = int(params.get("max_pozos_por_cluster", 4))

    used_glob = set()
    rows = []

    for d in range(dias):
        day_date = pd.Timestamp(week_start) + pd.Timedelta(days=d)

        pool_day = cand_all[~cand_all["POZO"].isin(used_glob)].copy()
        if pool_day.empty:
            continue
        in_window = (pd.to_datetime(pool_day["due_date"]) <= pd.Timestamp(week_end)) | pool_day["is_overdue"]
        pool_day = pool_day[in_window].copy()
        if pool_day.empty:
            continue

        for eq in team_ids:
            avail = pool_day[~pool_day["POZO"].isin(used_glob)].copy()
            if avail.empty:
                continue

            assigned_today = _fill_day_star_clusters(
                day_date, avail, cap_pz, radius_km, used_glob,
                cluster_cap=cap_cluster, params=params,
                clusters_max=params.get("clusters_por_dia_max"),
                backfill_nearest=bool(params.get("backfill_nearest_cluster", True)),
                umbral_km_backfill=float(params.get("umbral_km_backfill", 5.0))
            )

            for ord_idx, a in enumerate(assigned_today, start=1):
                try:
                    v_est = _v_est_for_day({"r_m3_d": a.get("r_m3_d", np.nan),
                                            "ultima_medicion": a.get("ultima_medicion", pd.NaT)}, day_date)
                except Exception:
                    v_est = 0.0

                a.update({
                    "Equipo": int(eq),
                    "Dia_Idx": d+1,
                    "Orden": ord_idx,
                    "Vol_Estimado_m3": round(float(v_est), 2)
                })
                rows.append(a)

    cols = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
            "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
            "Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion"]
    return pd.DataFrame(rows, columns=cols) if rows else pd.DataFrame(columns=cols)

# ==========================
# Export y ABM
# ==========================
def build_alertas_abm(freq_df: pd.DataFrame, norm_table: pd.DataFrame, dict_df: pd.DataFrame) -> pd.DataFrame:
    if dict_df is None or dict_df.empty:
        return pd.DataFrame(columns=["POZO","ZONA","BATERIA","ultima_medicion","ultima_exitosa","estado","met_prod"])
    base = freq_df[["POZO","ZONA","BATERIA","ultima_medicion","ultima_exitosa"]].copy()
    meta_first = dict_df.groupby("oficial")[["estado","met_prod","nivel_3","nivel_5"]].first()
    base = base.merge(meta_first[["estado","met_prod"]], left_on="POZO", right_index=True, how="left")
    out = base.copy()
    for c in ["ultima_medicion","ultima_exitosa"]:
        out[c] = pd.to_datetime(out[c], errors="coerce").dt.date
    out = out.sort_values(["ZONA","BATERIA","POZO"]).reset_index(drop=True)
    return out

# ==========================
# RUNNER PARA JUPYTER
# ==========================
def run_pipeline_jupyter(
    input_file,
    nombres_pozo_file,
    coords_file,
    *,
    semanas_plan=8,
    equipos_activos=2,
    dias_por_semana=5,
    max_pozos_dia_equipo=10,
    K_max_pozos_por_cluster=5,
    clusters_por_dia_max=None,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=RM3D_MIN_DEFAULT,
    zonas_incluir=None,              # lista de etiquetas nivel_3 (labels originales)
    baterias_por_zona=None,          # dict norm: {zona_norm: set(bat_norm)} o None
    pozos_excluir=None,              # set/list de POZOS a excluir
    escribir_excel=False
):
    # 1) Leer historial
    df = read_historial(input_file, sheet_hist=None)

    # 2) Normalización via diccionario
    key2off, dict_df = load_pozo_dictionary(nombres_pozo_file)
    df_norm, alert_table, norm_table = apply_pozo_normalization(df, key2off, dict_df)

    # Filtrar inválidos
    df = df_norm[df_norm["VALIDO_POZO"] == True].copy()

    # 3) ZONAS a incluir
    if zonas_incluir:
        zonas_norm = {_norm(z) for z in zonas_incluir}
        df = df[df["__ZONA_NORM"].isin(zonas_norm)].copy()
        zonas_labels = zonas_incluir
    else:
        zonas_labels = sorted(set(df["ZONA"].dropna().astype(str)))

    # Subfiltro baterías (ya viene por param)
    allowed_bats_by_zone_norm = baterias_por_zona or {}

    # 4) Exclusiones
    excl_total = set(pozos_excluir or [])

    # 5) Frecuencias
    params = {
        "equipos_activos": equipos_activos,
        "dias_por_semana": dias_por_semana,
        "semanas_plan": semanas_plan,
        "k_visitas": 1,
        "max_pozos_dia_equipo": max_pozos_dia_equipo,
        "max_pozos_por_cluster": K_max_pozos_por_cluster,
        "m3_por_visita_objetivo": 2.0,
        "min_dias_freq": 7,
        "max_dias_freq": 56,
        "dias_asumidos_una_visita": 7,
        "freq_dias_ultimo_cero_valido": 30,
        "top_semillas_eval": 30,
        "clusters_por_dia_max": clusters_por_dia_max,
        "backfill_nearest_cluster": bool(backfill_nearest),
        "umbral_km_backfill": float(umbral_km_backfill),
    }

    freq = compute_frecuencias(df, params)

    # Comentario desde Observaciones cuando ultima_medicion != ultima_exitosa
    df_obs = df[["POZO", "FECHA", "OBS_POZO"]].copy() if "OBS_POZO" in df.columns else pd.DataFrame(columns=["POZO","FECHA","OBS_POZO"])
    df_obs["FECHA_DATE"] = pd.to_datetime(df_obs["FECHA"], errors="coerce").dt.date
    df_obs = (df_obs.dropna(subset=["FECHA_DATE"])
                    .sort_values(["POZO","FECHA_DATE"])
                    .drop_duplicates(subset=["POZO","FECHA_DATE"], keep="last"))
    obs_map = {(r.POZO, r.FECHA_DATE): (str(r.OBS_POZO).strip() if pd.notna(r.OBS_POZO) else "")
               for r in df_obs.itertuples(index=False)}
    freq["__UMED_DATE"] = pd.to_datetime(freq["ultima_medicion"], errors="coerce").dt.date
    freq["__UEXI_DATE"] = pd.to_datetime(freq["ultima_exitosa"], errors="coerce").dt.date
    freq["comentario"] = [obs_map.get((pz, fmed), "") for pz, fmed in zip(freq["POZO"], freq["__UMED_DATE"])]
    mask_both_valid = freq["__UMED_DATE"].notna() & freq["__UEXI_DATE"].notna()
    mask_diff = mask_both_valid & (freq["__UMED_DATE"] != freq["__UEXI_DATE"])
    freq.loc[~mask_diff, "comentario"] = ""
    freq.drop(columns=["__UMED_DATE","__UEXI_DATE"], errors="ignore", inplace=True)

    # 7) Coordenadas
    coords_df = read_coords(coords_file)

    # 8) Mapas auxiliares
    delta_by_pozo = freq.set_index("POZO")["delta_star_dias"].to_dict()
    r_by_pozo     = freq.set_index("POZO")["r_m3_d"].to_dict()

    # 9) Semanas
    start = next_monday(date.today())
    weeks = [(start + timedelta(weeks=i), start + timedelta(weeks=i, days=6)) for i in range(params["semanas_plan"])]

    # 10) Equipos fijos por zona
    zonas_list = sorted(set(zonas_labels))
    equipo_to_zona = {}
    for i in range(1, params["equipos_activos"]+1):
        zona_asignada = zonas_list[min(i-1, len(zonas_list)-1)]
        equipo_to_zona[i] = zona_asignada

    # 11) Plan semanal
    plan_all = []
    next_due = {row.POZO: row.proxima_visita_base for row in freq.itertuples()}

    zone_to_teams = {}
    for eq, zona_label in equipo_to_zona.items():
        zone_to_teams.setdefault(zona_label, []).append(eq)

    for (w_start, w_end) in weeks:
        for zona_label, team_list in zone_to_teams.items():
            zona_norm_label = _norm(zona_label)

            cand_all = build_candidates_with_coords(
                freq=freq,
                week_start=w_start,
                week_end=w_end,
                excl_pozos=excl_total,
                zonas_norm_incluidas={zona_norm_label},
                coords_df=coords_df,
                allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
                next_due_map=next_due,
                rm3d_min=rm3d_min
            )
            if cand_all.empty:
                continue

            cand_zone = cand_all[[
                "POZO","ZONA","BATERIA","due_date","is_overdue","__v",
                "LAT","LON","has_coords","r_m3_d","ultima_medicion"
            ]].copy()

            plan_week_zone = assign_week_round_robin_by_zone(
                cand_all=cand_zone,
                team_ids=sorted(team_list),
                params=params,
                week_start=w_start,
                week_end=w_end,
                radius_km=radius_km
            )

            if not plan_week_zone.empty:
                plan_all.append(plan_week_zone)
                for pz, fcal in plan_week_zone[["POZO","Plan_Fecha"]].drop_duplicates().itertuples(index=False):
                    dd = int(delta_by_pozo.get(pz, params["min_dias_freq"]))
                    next_due[pz] = pd.Timestamp(fcal) + pd.Timedelta(days=dd)

    plan = pd.concat(plan_all, ignore_index=True) if plan_all else pd.DataFrame(columns=[
        "Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden","ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
        "Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion"
    ])

    # 12) Cobertura anual reforzada (igual a tu versión)
    if not freq.empty:
        eligible_mask = (freq["ZONA"].isin(zonas_labels)) & (freq["r_m3_d"].fillna(0) > rm3d_min)
        if "comentario" in freq.columns:
            eligible_mask &= (freq["comentario"].astype(str).fillna("").str.strip() == "")
        if allowed_bats_by_zone_norm:
            for zn, bats in allowed_bats_by_zone_norm.items():
                if bats is not None:
                    eligible_mask &= (~(freq["ZONA_NORM"] == zn)) | (freq["BATERIA_NORM"].isin(bats))

        all_pozos_in_zonas = freq.loc[eligible_mask, ["POZO","ZONA","BATERIA"]].drop_duplicates().copy()
        all_pozos_in_zonas = all_pozos_in_zonas[
            all_pozos_in_zonas["BATERIA"].notna() & (all_pozos_in_zonas["BATERIA"].astype(str).str.strip() != "")
        ].copy()

        # (Atajo simple en el harness: no reinyecto el ensure_annual_coverage para acortar tiempos de prueba)
        # Si lo necesitás idéntico, puedo añadirlo también aquí.

    # 13) Excel (opcional)
    output_path = true
    if escribir_excel:
        output_path = unique_output_path(input_file)
        coords_all = read_coords(coords_file)
        with pd.ExcelWriter(output_path, engine="openpyxl", mode="w") as writer:
            # Frecuencias
            freq_out = freq.copy()
            for c in ["proxima_visita_base","ultima_medicion","ultima_exitosa"]:
                freq_out[c] = pd.to_datetime(freq_out[c], errors="coerce").dt.date
            freq_out = freq_out.sort_values(["ZONA","BATERIA","POZO"])
            cols_pref = ["POZO","ZONA","BATERIA","ZONA_NORM","BATERIA_NORM","r_m3_d",
                         "ultima_medicion","ultima_exitosa","delta_star_dias","comentario",
                         "proxima_visita_base","ceros_consec","alerta"]
            cols_final = [c for c in cols_pref if c in freq_out.columns] + [c for c in freq_out.columns if c not in cols_pref]
            freq_out = freq_out[cols_final]
            freq_out.to_excel(writer, "Frecuencias", index=False)

            # Plan por equipo (con Km_al_siguiente)
            cols_plan = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
                         "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
                         "Seed_POZO","Dist_km_semilla","Dist_km_centroid"]
            for eq in range(1, params["equipos_activos"]+1):
                pe = plan.loc[plan["Equipo"]==eq].copy()
                if pe.empty:
                    pe = pd.DataFrame(columns=cols_plan + ["Km_al_siguiente","Ejecutado"])
                else:
                    pe = pe.sort_values(["Plan_Fecha","Dia_Idx","Orden","POZO"]).copy()
                    pe = pe.merge(coords_all, how="left", on="POZO")
                    pe["LAT_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LAT"].shift(-1)
                    pe["LON_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LON"].shift(-1)
                    def _leg_km(row):
                        if (pd.isna(row.get("LAT")) or pd.isna(row.get("LON")) or
                            pd.isna(row.get("LAT_next")) or pd.isna(row.get("LON_next"))):
                            return None
                        return round(float(haversine_km(row["LAT"], row["LON"], row["LAT_next"], row["LON_next"])), 3)
                    pe["Km_al_siguiente"] = pe.apply(_leg_km, axis=1)
                    pe.drop(columns=["LAT","LON","LAT_next","LON_next"], inplace=True, errors="ignore")
                    pe["Ejecutado"] = ""
                    for c in cols_plan:
                        if c not in pe.columns:
                            pe[c] = ""
                    pe = pe[cols_plan + ["Km_al_siguiente","Ejecutado"]]
                pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)

            if not norm_table.empty:
                norm_table.to_excel(writer, "Normalizacion_Pozos", index=False)
            if not alert_table.empty:
                alert_table.to_excel(writer, "Alertas_Normalizacion", index=False)

            alertas_abm = build_alertas_abm(freq_out, norm_table, dict_df)
            alertas_abm.to_excel(writer, "Alertas de ABM", index=False)

    return plan, freq, output_path




# === EJEMPLO DE USO EN JUPYTER ===
# (Editá las rutas de tus archivos y parámetros)
# plan, freq, out_xlsx = run_pipeline_jupyter(
#     input_file=r"DIAGRAMA SW.xlsx",
#     nombres_pozo_file=r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx",
#     coords_file=r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx",
#     semanas_plan=4,
#     equipos_activos=2,
#     dias_por_semana=5,
#     max_pozos_dia_equipo=10,
#     K_max_pozos_por_cluster=5,
#     clusters_por_dia_max=None,
#     backfill_nearest=True,
#     umbral_km_backfill=5.0,
#     radius_km=3.0,
#     rm3d_min=0.1,
#     zonas_incluir=["Las Heras CG - Canadon Escondida"],  # o None para todas
#     baterias_por_zona=None,         # por ejemplo: {"las heras cg - canadon escondida": {"swabing ce","ce 04"}}
#     pozos_excluir=set(),            # set de pozos a excluir
#     escribir_excel=False            # True para exportar Excel
# )
# display(freq.head(10))
# display(plan.head(20))
# print("Excel:", out_xlsx)


In [25]:
# ============================================
# Monocelda Jupyter: Planificador + Harness + Runner
# ============================================

# -*- coding: utf-8 -*-
import os, re, unicodedata, math
import numpy as np
import pandas as pd
from datetime import date, timedelta, datetime

# ==========================
# CONFIG por defecto (se sobreescriben en el runner)
# ==========================
INPUT_FILE  = r"DIAGRAMA SW.xlsx"   # Excel base (NO se modifica)
SHEET_HIST  = None                  # None => autodetecta hoja/encabezados
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

# Radio en km para agrupar por cercanía
RADIUS_KM = 3.0
# Filtro mínimo de potencial
RM3D_MIN = 0.1

# Umbrales para fuzzy (si se usan)
FUZZY_REPLACE_THRESHOLD = 85
FUZZY_SUGGEST_THRESHOLD = 75
LETTERS_SIMILARITY_MIN  = 80

DEFAULTS = {
    "equipos_activos": 4,                 # 1..4
    "dias_por_semana": 5,                 # 5 o 6
    "semanas_plan": 2,                    # para probar rápido en Jupyter
    "k_visitas": 1,                       # tasas (K=1 por pedido)
    "max_pozos_dia_equipo": 10,           # cupo por día por equipo
    "max_pozos_por_cluster": 5,           # tamaño de clúster (K fijo si usás lógica de clústeres fijos)
    "m3_por_visita_objetivo": 2.0,        # informativo
    "min_dias_freq": 7,                   # 1 semana
    "max_dias_freq": 56,                  # 8 semanas
    "dias_asumidos_una_visita": 7,        # para r si hay 1 sola visita
    "freq_dias_ultimo_cero_valido": 30,

    # Semillas a evaluar (si se usa lógica de semillas)
    "top_semillas_eval": 30,

    # Control de clústeres por día y backfill (si se usa lógica por semilla)
    "clusters_por_dia_max": None,
    "backfill_nearest_cluster": True,
    "umbral_km_backfill": 5.0,
}

# ==========================
# Utils
# ==========================
def _norm(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = s.replace("³", "3")
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    s = s.lower().strip().replace("\xa0"," ")
    s = s.replace("_"," ").replace("-"," ").replace("."," ").replace("\n"," ")
    return " ".join(s.split())

def _pozo_key(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    return "".join(ch for ch in s if ch.isalnum()).upper()

def _canonical_digits(d: str) -> str:
    d = (d or "").lstrip("0")
    return d if d != "" else "0"

def _letters_digits_from_key_both(k: str):
    raw_digits = "".join(re.findall(r"\d+", k))
    digits_canon = _canonical_digits(raw_digits)
    letters = re.sub(r"\d+", "", k)
    return letters, digits_canon, len(raw_digits)

def _ratio_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _fuzzy_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.partial_ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _canon_prefix_pozo(s: str) -> str:
    if s is None or (isinstance(s, float) and np.isnan(s)):
        return s
    raw = str(s).strip()
    raw_up = raw.upper()
    if raw_up.startswith("CÑE"):
        return "CNE" + raw_up[3:]
    raw_ascii = unicodedata.normalize("NFKD", raw_up).encode("ascii", "ignore").decode("ascii")
    if raw_ascii.startswith("CNE"):
        return raw_ascii
    if raw_ascii.startswith("CN"):
        return "CNE" + raw_ascii[2:]
    m = re.match(r"^CE(\d+)$", raw_ascii)
    if m:
        return "CNE" + m.group(1)
    return raw_ascii

def next_monday(d=None):
    d = d or date.today()
    return d + timedelta(days=(7 - d.weekday()) % 7)  # 0=Lunes

def unique_output_path(base_input_path: str) -> str:
    folder = os.path.dirname(os.path.abspath(base_input_path))
    stem   = os.path.splitext(os.path.basename(base_input_path))[0]
    today  = datetime.now().strftime("%Y%m%d")
    base   = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}.xlsx")
    if not os.path.exists(base): return base
    i = 2
    while True:
        cand = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}_({i}).xlsx")
        if not os.path.exists(cand): return cand
        i += 1

EXPECTED_KEYS = {
    "fecha":       ["fecha"],
    "pozo":        ["pozo"],
    "zona":        ["zona"],
    "bateria":     ["bateria", "batería"],
    "m3":          ["m3 bruta","m3","m3_bruta","m3bruta","m 3 bruta","m 3","m3 bruto","m3 recuperado","m3 recupero"],
    "carreras":    ["n de carreras","n° de carreras","nº de carreras","no de carreras","nro de carreras","numero de carreras","n° carreras","n de carrera","n carreras"],
    "nivel_final": ["nivel final pozo","nivel final","nivel final del pozo"],
    "obs_pozo":    ["observaciones del pozo","observaciones","comentarios","comentario"]
}

def _find_header_row(df_raw):
    for i in range(min(200, len(df_raw))):
        row_norm = [_norm(x) for x in df_raw.iloc[i,:].tolist()]
        if not row_norm:
            continue
        colmap = {v:j for v,j in zip(row_norm, range(len(row_norm)))}
        def has_any(keys): return any(k in colmap for k in keys)
        if has_any(EXPECTED_KEYS["fecha"]) and has_any(EXPECTED_KEYS["pozo"]) and has_any(EXPECTED_KEYS["zona"]) and has_any(EXPECTED_KEYS["bateria"]):
            return i, row_norm
    return None, None

# ---------- Nombres pozo ----------
def load_pozo_dictionary(xlsx_path: str):
    try:
        ref = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer diccionario de pozos: {xlsx_path}\n{e}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    cols = {c.lower().strip(): c for c in ref.columns}
    if "nombre_corto_pozo" not in cols:
        print(f"\n[AVISO] El diccionario no tiene la columna 'nombre_corto_pozo'. Columnas: {list(ref.columns)}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    c_pozo = cols["nombre_corto_pozo"]
    c_met  = cols.get("met_prod")
    c_n3   = cols.get("nivel_3")
    c_n5   = cols.get("nivel_5")
    c_est  = cols.get("estado")

    refv = ref.loc[ref[c_pozo].notna()].copy()
    refv[c_pozo] = refv[c_pozo].astype(str).str.strip()

    of_list  = refv[c_pozo].tolist()
    met_vals = refv[c_met].astype(str).str.strip() if c_met else np.nan
    n3_vals  = refv[c_n3].astype(str).str.strip()  if c_n3 else np.nan
    n5_vals  = refv[c_n5].astype(str).str.strip()  if c_n5 else np.nan
    est_vals = refv[c_est].astype(str).str.strip() if c_est else np.nan

    keys, letters_, digits_canon_, digits_len_ = [], [], [], []
    for val in of_list:
        k = _pozo_key(val)
        L, Dcanon, Dlen = _letters_digits_from_key_both(k)
        keys.append(k); letters_.append(L); digits_canon_.append(Dcanon); digits_len_.append(Dlen)

    dict_df = pd.DataFrame({
        "oficial": of_list,
        "key": keys,
        "letters": letters_,
        "digits_canon": digits_canon_,
        "digits_len": digits_len_,
        "met_prod": list(met_vals) if isinstance(met_vals, pd.Series) else [np.nan]*len(of_list),
        "nivel_3":  list(n3_vals)  if isinstance(n3_vals,  pd.Series) else [np.nan]*len(of_list),
        "nivel_5":  list(n5_vals)  if isinstance(n5_vals,  pd.Series) else [np.nan]*len(of_list),
        "estado":   list(est_vals) if isinstance(est_vals, pd.Series) else [np.nan]*len(of_list),
    })

    key2off = {}
    for k, off in zip(dict_df["key"], dict_df["oficial"]):
        if k and k not in key2off:
            key2off[k] = off
    return key2off, dict_df

def apply_pozo_normalization(df: pd.DataFrame, key2off: dict, dict_df: pd.DataFrame):
    df = df.copy()
    df["POZO_ORIG"] = df["POZO"].astype(str).str.strip()
    df["POZO_PreCanon"] = df["POZO_ORIG"].apply(_canon_prefix_pozo)
    df["__POZO_KEY"] = df["POZO_PreCanon"].apply(_pozo_key)

    parts = df["__POZO_KEY"].apply(_letters_digits_from_key_both)
    df["__KEY_LET"], df["__KEY_DIG_CANON"], df["__KEY_DIG_LEN"] = zip(*parts)

    df["POZO_MATCH"]   = None
    df["MATCH_TIPO"]   = "NO"
    df["MATCH_SCORE"]  = np.nan
    df["LETTER_SCORE"] = np.nan
    df["APLICADO"]     = "NO"
    df["ALERTA_NORM"]  = ""
    df["VALIDO_POZO"]  = True

    invalid_mask = (df["__KEY_LET"].str.len()==0) | (df["__KEY_DIG_LEN"]==0)
    if invalid_mask.any():
        df.loc[invalid_mask, "ALERTA_NORM"] = "SIN_LETRAS_O_DIGITOS"
        df.loc[invalid_mask, "VALIDO_POZO"] = False

    valid_mask = ~invalid_mask
    exact_mask = valid_mask & df["__POZO_KEY"].isin(key2off.keys())
    df.loc[exact_mask, "POZO_MATCH"]   = df.loc[exact_mask, "__POZO_KEY"].map(key2off)
    df.loc[exact_mask, "MATCH_TIPO"]   = "EXACTO"
    df.loc[exact_mask, "MATCH_SCORE"]  = 100
    df.loc[exact_mask, "LETTER_SCORE"] = 100
    df.loc[exact_mask, "APLICADO"]     = "SI"

    pending = df[valid_mask & (~exact_mask)].index.tolist()
    if pending and not dict_df.empty:
        dict_by_spec = {}
        for spec, sub in dict_df.groupby(["digits_canon","digits_len"]):
            dict_by_spec[spec] = sub

        for idx in pending:
            key_u   = df.at[idx, "__POZO_KEY"]
            let_u   = df.at[idx, "__KEY_LET"]
            digc_u  = df.at[idx, "__KEY_DIG_CANON"]
            digl_u  = int(df.at[idx, "__KEY_DIG_LEN"])

            cand_df = dict_by_spec.get((digc_u, digl_u), pd.DataFrame())
            best_off, best_score, best_lscore = None, -1, -1

            if cand_df is not None and not cand_df.empty:
                for row in cand_df.itertuples():
                    kk = row.key
                    ll = row.letters
                    sc_key = _fuzzy_score(key_u, kk)
                    sc_let = _ratio_score(let_u, ll)
                    if sc_let < LETTERS_SIMILARITY_MIN:
                        continue
                    if sc_key > best_score or (sc_key == best_score and sc_let > best_lscore):
                        best_score = sc_key
                        best_lscore = sc_let
                        best_off   = row.oficial

            if best_off is not None:
                df.at[idx, "POZO_MATCH"]   = best_off
                df.at[idx, "MATCH_TIPO"]   = "SUGERIDO"
                df.at[idx, "MATCH_SCORE"]  = int(best_score)
                df.at[idx, "LETTER_SCORE"] = int(best_lscore)
            else:
                df.at[idx, "ALERTA_NORM"] = "SIN MATCH EN DICCIONARIO"

    # Reemplazos
    df["POZO"] = df["POZO_MATCH"].where(df["POZO_MATCH"].notna(), df["POZO"])
    meta_first = dict_df.groupby("oficial")[["met_prod","nivel_3","nivel_5"]].first()
    df = df.merge(meta_first, how="left", left_on="POZO", right_index=True)

    # ZONA sólo si hubo match; sino, vacío
    if "nivel_3" in df.columns:
        df.loc[df["POZO_MATCH"].isna(), "nivel_3"] = ""
        df["ZONA"] = np.where(df["POZO_MATCH"].notna(), df["nivel_3"].fillna(""), "")

    # BATERIA si hay nivel_5
    if "nivel_5" in df.columns:
        df["BATERIA"] = np.where(
            df["nivel_5"].notna() & (df["nivel_5"].astype(str).str.strip()!=""),
            df["nivel_5"], df["BATERIA"]
        )

    df["__ZONA_NORM"]    = df["ZONA"].apply(_norm)
    df["__BATERIA_NORM"] = df["BATERIA"].apply(_norm)

    norm_table = (df[["POZO_ORIG","POZO_PreCanon","__POZO_KEY",
                      "__KEY_LET","__KEY_DIG_CANON","__KEY_DIG_LEN",
                      "POZO_MATCH","MATCH_TIPO","MATCH_SCORE","LETTER_SCORE",
                      "APLICADO","ALERTA_NORM","VALIDO_POZO",
                      "met_prod","nivel_3","nivel_5"]]
                  .drop_duplicates()
                  .rename(columns={
                      "POZO_ORIG":"Pozo_Original",
                      "POZO_PreCanon":"Pozo_PreCanon",
                      "__POZO_KEY":"Clave_Normalizada",
                      "__KEY_LET":"Letras",
                      "__KEY_DIG_CANON":"Digitos_Canon",
                      "__KEY_DIG_LEN":"Digitos_Len",
                      "POZO_MATCH":"Match_Oficial",
                      "MATCH_TIPO":"Match_Tipo",
                      "MATCH_SCORE":"Match_Score",
                      "LETTER_SCORE":"Letter_Score",
                      "APLICADO":"Aplicado",
                      "ALERTA_NORM":"Alerta",
                      "VALIDO_POZO":"Valido",
                      "met_prod":"met_prod",
                      "nivel_3":"nivel_3",
                      "nivel_5":"nivel_5"
                  })
                  .sort_values(["Valido","Aplicado","Match_Tipo","Pozo_Original"], ascending=[False, False, True, True]))

    alert_table = norm_table[(norm_table["Valido"]==False) | (norm_table["Aplicado"]=="NO") | (norm_table["Match_Tipo"]=="NO")].copy()
    return df, alert_table, norm_table

def read_historial(xlsx_path, sheet_hist=None):
    xl = pd.ExcelFile(xlsx_path)
    sheets = [sheet_hist] if (sheet_hist and sheet_hist in xl.sheet_names) else xl.sheet_names
    for sh in sheets:
        raw = xl.parse(sh, header=None)
        idx, header_norm = _find_header_row(raw)
        if idx is None:
            continue
        data = raw.iloc[idx:, :].copy()
        true_headers = data.iloc[0,:].astype(str).tolist()
        data = data.iloc[1:,:]
        data.columns = true_headers

        name_map = {c: _norm(c) for c in data.columns}
        def find_col(candidates):
            for c, n in name_map.items():
                if n in candidates:
                    return c
            return None

        c_fecha       = find_col(set(EXPECTED_KEYS["fecha"]))
        c_pozo        = find_col(set(EXPECTED_KEYS["pozo"]))
        c_zona        = find_col(set(EXPECTED_KEYS["zona"]))
        c_bateria     = find_col(set(EXPECTED_KEYS["bateria"]))
        c_m3          = find_col(set(EXPECTED_KEYS["m3"]))
        c_carr        = find_col(set(EXPECTED_KEYS["carreras"]))
        c_nivel_final = find_col(set(EXPECTED_KEYS["nivel_final"]))
        c_obs         = find_col(set(EXPECTED_KEYS["obs_pozo"]))

        if not (c_fecha and c_pozo and c_zona and c_bateria):
            continue

        use_cols = [c_fecha, c_pozo, c_zona, c_bateria]
        headers  = ["FECHA","POZO","ZONA","BATERIA"]
        if c_m3:            use_cols.append(c_m3);            headers.append("M3")
        if c_carr:          use_cols.append(c_carr);          headers.append("CARRERAS")
        if c_nivel_final:   use_cols.append(c_nivel_final);   headers.append("NIVEL_FINAL")
        if c_obs:           use_cols.append(c_obs);           headers.append("OBS_POZO")

        df = data[use_cols].copy()
        df.columns = headers

        df["FECHA"] = pd.to_datetime(df["FECHA"], errors="coerce")
        if "M3" not in df.columns: df["M3"] = np.nan
        else: df["M3"] = pd.to_numeric(df["M3"], errors="coerce")

        if "CARRERAS" not in df.columns: df["CARRERAS"] = np.nan
        else: df["CARRERAS"] = pd.to_numeric(df["CARRERAS"], errors="coerce")

        if "NIVEL_FINAL" not in df.columns:
            df["NIVEL_FINAL"] = None
        if "OBS_POZO" not in df.columns:
            df["OBS_POZO"] = None

        for col in ["POZO","ZONA","BATERIA","NIVEL_FINAL","OBS_POZO"]:
            df[col] = df[col].astype(str).str.strip().replace({"nan": np.nan})

        df = df.dropna(subset=["FECHA","POZO"]).sort_values(["POZO","FECHA"])
        return df

    raise ValueError("No pude detectar FECHA/POZO/ZONA/BATERÍA en ninguna hoja del Excel.")

def read_exclusions_from_sheet(xlsx_path):
    excl = set()
    try:
        xl = pd.ExcelFile(xlsx_path)
        if "ExcluirPozos" in xl.sheet_names:
            e = xl.parse("ExcluirPozos")
            e.columns = [str(c).strip().lower() for c in e.columns]
            if "pozo" in e.columns:
                if "excluir" in e.columns:
                    excl = set(e.loc[e["excluir"].astype(str).str.upper().isin(
                        ["SI","SÍ","YES","1","TRUE"]), "pozo"].astype(str).str.strip())
                else:
                    excl = set(e["pozo"].astype(str).str.strip())
    except Exception:
        pass
    return excl

# ==========================
# Frecuencias / r_m3_d
# ==========================
def _count_trailing_zeros_with_carr(g):
    cnt = 0
    for _, row in g.sort_values("FECHA").iloc[::-1].iterrows():
        m3 = row.get("M3", np.nan)
        car = row.get("CARRERAS", np.nan)
        if pd.notna(m3) and float(m3) == 0.0 and pd.notna(car) and float(car) > 0:
            cnt += 1
        else:
            break
    return cnt

def compute_frecuencias(df, params):
    v_target = params["m3_por_visita_objetivo"]
    min_d    = params["min_dias_freq"]
    max_d    = params["max_dias_freq"]
    k        = int(params["k_visitas"])
    one_days = int(params.get("dias_asumidos_una_visita", 7))
    freq_cero_ultimo = int(params.get("freq_dias_ultimo_cero_valido", 30))

    out = []
    for pozo, g0 in df.groupby("POZO", sort=False):
        g = g0.sort_values("FECHA").copy()

        for col in ["ZONA","BATERIA","NIVEL_FINAL"]:
            if col in g.columns:
                g[col] = g[col].replace({None: np.nan})
                g[col] = g[col].ffill().bfill()

        g["__ZONA_NORM"]    = g["ZONA"].apply(_norm)
        g["__BATERIA_NORM"] = g["BATERIA"].apply(_norm)
        g["__nf_norm"]      = g["NIVEL_FINAL"].apply(_norm) if "NIVEL_FINAL" in g.columns else ""

        med_validas_all = g[g["M3"].notna()].copy()

        m3_eq0 = g["M3"].fillna(0) == 0
        carr   = g.get("CARRERAS", pd.Series(index=g.index, dtype=float)).fillna(np.nan)
        zero_cond_a = m3_eq0 & (carr.fillna(0) >= 1)
        zero_cond_b = m3_eq0 & ((carr.isna()) | (carr.fillna(0) == 0)) & (g["__nf_norm"] == "surge")
        cond_cero_valido = zero_cond_a | zero_cond_b

        validas_rate = g[(g["M3"] > 0) | cond_cero_valido].copy()
        zeros_tail = _count_trailing_zeros_with_carr(g)

        ultima_med = med_validas_all["FECHA"].max() if not med_validas_all.empty else pd.NaT
        ultima_exi = g.loc[g["M3"]>0, "FECHA"].max() if "M3" in g.columns and not g[g["M3"]>0].empty else pd.NaT

        last_zero_valido = False
        if not med_validas_all.empty:
            idx_last = med_validas_all["FECHA"].idxmax()
            m3_last  = g.at[idx_last, "M3"]
            if pd.notna(m3_last) and float(m3_last) == 0.0:
                try:
                    last_zero_valido = bool(cond_cero_valido.loc[idx_last])
                except Exception:
                    last_zero_valido = False

        alerta = ""
        if last_zero_valido:
            alerta = f"ULTIMA_M3_0_VALIDO -> FREQ {freq_cero_ultimo}D"
        elif pd.notna(ultima_med):
            if zeros_tail > 0:
                alerta = f"ALERTA: {zeros_tail} cero(s) consecutivo(s) con Carreras>0"

        # r_m3_d
        r = np.nan
        if not validas_rate.empty:
            v = validas_rate.copy()
            v["delta_d"] = v["FECHA"].diff().dt.days
            v.loc[v["delta_d"] <= 0, "delta_d"] = np.nan
            v["rate"] = v["M3"].fillna(0) / v["delta_d"]
            rates = v["rate"].dropna()
            if len(rates) >= 1:
                r = rates.tail(min(k, len(rates))).mean()
            else:
                row = v.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan
        else:
            if len(med_validas_all) == 1:
                row = med_validas_all.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan

        # FRECUENCIA
        if last_zero_valido:
            delta = int(freq_cero_ultimo)
        else:
            if pd.isna(r):      delta = 7
            elif r <= 0:        delta = max_d
            else:
                delta = max(min_d, min(max_d, float(v_target)/float(r)))
                delta = int(7 * round(delta / 7.0))
                if delta < 7:
                    delta = 7

        prox = (ultima_med + pd.Timedelta(days=int(delta))) if pd.notna(ultima_med) else pd.Timestamp(next_monday())

        out.append({
            "POZO": pozo,
            "ZONA": g["ZONA"].iloc[-1],
            "BATERIA": g["BATERIA"].iloc[-1],
            "ZONA_NORM": g["__ZONA_NORM"].iloc[-1],
            "BATERIA_NORM": g["__BATERIA_NORM"].iloc[-1],
            "r_m3_d": r,
            "ultima_medicion": ultima_med,
            "ultima_exitosa": ultima_exi,
            "delta_star_dias": int(delta),
            "proxima_visita_base": prox,
            "ceros_consec": zeros_tail,
            "alerta": alerta
        })
    return pd.DataFrame(out)

# ==========================
# Coordenadas
# ==========================
def _to_float_maybe_comma(x):
    if pd.isna(x):
        return np.nan
    if isinstance(x, (int, float, np.number)):
        return float(x)
    s = str(x).strip()
    if s == "": return np.nan
    s = s.replace(",", ".")
    try:
        return float(s)
    except Exception:
        return np.nan

def read_coords(xlsx_path):
    try:
        cdf = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer coordenadas: {xlsx_path}\n{e}\n")
        return pd.DataFrame(columns=["POZO","LAT","LON"])
    cols_map = {c.lower().strip(): c for c in cdf.columns}
    c_pozo = cols_map.get("pozo")
    for k in ["geo_latitude","latitude","lat"]:
        if k in cols_map:
            c_lat = cols_map[k]; break
    else:
        c_lat = None
    for k in ["geo_longitude","longitude","lon","long"]:
        if k in cols_map:
            c_lon = cols_map[k]; break
    else:
        c_lon = None

    if not (c_pozo and c_lat and c_lon):
        print(f"[AVISO] Coordenadas: columnas esperadas 'POZO','GEO_LATITUDE','GEO_LONGITUDE'. Columnas encontradas: {list(cdf.columns)}")
        return pd.DataFrame(columns=["POZO","LAT","LON"])

    out = cdf[[c_pozo, c_lat, c_lon]].copy()
    out.columns = ["POZO","LAT","LON"]
    out["POZO"] = out["POZO"].astype(str).str.strip()
    out["LAT"] = out["LAT"].apply(_to_float_maybe_comma)
    out["LON"] = out["LON"].apply(_to_float_maybe_comma)
    out = out.dropna(subset=["POZO"])
    out = out.drop_duplicates(subset=["POZO"], keep="last")
    return out

# ==========================
# Candidatos y utilidades
# ==========================
def build_candidates_with_coords(freq, week_start, week_end, excl_pozos,
                                 zonas_norm_incluidas, coords_df,
                                 allowed_bats_by_zone_norm=None,
                                 next_due_map=None):
    F = freq.copy()

    # due_date base (permitimos override con next_due_map)
    F["due_date"] = F["proxima_visita_base"]
    if next_due_map:
        F["due_date"] = F["POZO"].map(next_due_map).fillna(F["due_date"])

    F["overdue_d"] = (pd.Timestamp(week_start) - pd.to_datetime(F["due_date"])).dt.days
    F["is_overdue"] = F["overdue_d"] > 0

    # prioridad
    F["__v"] = F["r_m3_d"].astype(float)

    # Filtro por ZONA (normalizada)
    if "ZONA_NORM" in F.columns and zonas_norm_incluidas:
        F = F[F["ZONA_NORM"].isin(zonas_norm_incluidas)].copy()

    # Sub-filtro por BATERÍA (si corresponde)
    if allowed_bats_by_zone_norm:
        mask = pd.Series(True, index=F.index)
        for zn in zonas_norm_incluidas:
            bats = allowed_bats_by_zone_norm.get(zn)
            if bats is not None:
                mask &= ~ (F["ZONA_NORM"] == zn) | (F["BATERIA_NORM"].isin(bats))
        F = F[mask].copy()

    # Exclusiones
    if excl_pozos:
        F = F[~F["POZO"].isin(excl_pozos)].copy()

    # Potencial mínimo y BATERÍA no vacía
    F = F[F["r_m3_d"].fillna(0) > RM3D_MIN].copy()
    F = F[F["BATERIA"].notna() & (F["BATERIA"].astype(str).str.strip() != "")].copy()

    # Excluir pozos con comentario no vacío en Frecuencias
    if "comentario" in F.columns:
        F["__comentario_txt"] = F["comentario"].astype(str).fillna("").str.strip()
        F = F[F["__comentario_txt"] == ""].copy()
        F.drop(columns=["__comentario_txt"], inplace=True, errors="ignore")

    # Merge coordenadas
    coords_df = coords_df if coords_df is not None else pd.DataFrame(columns=["POZO","LAT","LON"])
    F = F.merge(coords_df, how="left", on="POZO")
    F["has_coords"] = F["LAT"].notna() & F["LON"].notna()

    # Orden base
    F = F.sort_values(by=["is_overdue","__v","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    return F

def _v_est_for_day(row, day_date):
    r = row.get("r_m3_d", np.nan)
    u = row.get("ultima_medicion", pd.NaT)
    if pd.isna(u) or pd.isna(r) or r <= 0:
        return 0.0
    dd = max(0, (pd.Timestamp(day_date) - pd.Timestamp(u)).days)
    return max(0.0, float(r) * float(dd))

def haversine_km(lat1, lon1, lat2, lon2):
    try:
        if pd.isna(lat1) or pd.isna(lon1) or pd.isna(lat2) or pd.isna(lon2):
            return np.nan
        R = 6371.0088
        p1 = math.radians(float(lat1)); p2 = math.radians(float(lat2))
        dphi = math.radians(float(lat2) - float(lat1))
        dlmb = math.radians(float(lon2) - float(lon1))
        a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dlmb/2)**2
        return 2*R*math.asin(math.sqrt(a))
    except Exception:
        return np.nan

# ==========================
# Lógica de asignación diaria simple (por “semilla”, como en tu versión original)
# ==========================
def _fill_day_star_clusters(day_date, avail_df, cap_per_day, radius_km, used_set,
                            cluster_cap, params=None,
                            clusters_max=None, backfill_nearest=True, umbral_km_backfill=5.0):
    assigned = []
    remaining_cap = int(cap_per_day)
    cluster_cap   = max(1, int(cluster_cap))
    TOP_N = 30 if params is None else int(params.get("top_semillas_eval", 30))

    if params is not None and params.get("umbral_km_backfill") is not None:
        umbral_km_backfill = float(params.get("umbral_km_backfill"))

    has_xy = avail_df["has_coords"].fillna(False)
    pool = pd.concat([
        avail_df.loc[has_xy].sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]),
        avail_df.loc[~has_xy].sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]),
    ], ignore_index=True)

    def _bb_filter(df, lat0, lon0, rad_km):
        if pd.isna(lat0) or pd.isna(lon0) or df.empty:
            return df.iloc[0:0]
        dlat = rad_km / 110.574
        dlon = rad_km / (111.320 * max(0.1, math.cos(math.radians(float(lat0)))))
        return df[(df["LAT"].between(lat0 - dlat, lat0 + dlat)) &
                  (df["LON"].between(lon0 - dlon, lon0 + dlon))].copy()

    c_lat_acc, c_lon_acc, n_acc = (np.nan, np.nan, 0)

    def _build_cluster_from_seed(seed_row, pool_df, cap_left):
        seed_lat = seed_row.get("LAT", np.nan)
        seed_lon = seed_row.get("LON", np.nan)
        rows_cluster = [seed_row]

        if pd.notna(seed_lat) and pd.notna(seed_lon):
            neigh = _bb_filter(pool_df, seed_lat, seed_lon, radius_km)
            if not neigh.empty:
                neigh = neigh.copy()
                neigh["__dist_seed"] = neigh.apply(
                    lambda r: haversine_km(seed_lat, seed_lon, r["LAT"], r["LON"]), axis=1
                )
                neigh = neigh[neigh["__dist_seed"] <= radius_km]
                neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

                max_neighbors_by_cluster = max(0, cluster_cap - 1)
                max_neighbors_by_day     = max(0, cap_left - 1)
                take_n = min(max_neighbors_by_cluster, max_neighbors_by_day)
                if take_n > 0 and not neigh.empty:
                    rows_cluster.extend([nr for _, nr in neigh.head(take_n).iterrows()])

        coords_cluster = [(r.get("LAT", np.nan), r.get("LON", np.nan))
                          for r in rows_cluster
                          if pd.notna(r.get("LAT", np.nan)) and pd.notna(r.get("LON", np.nan))]
        if coords_cluster:
            c_lat = float(np.mean([x for x,_ in coords_cluster]))
            c_lon = float(np.mean([y for _,y in coords_cluster]))
        else:
            c_lat, c_lon = (np.nan, np.nan)

        used_ids = [r["POZO"] for r in rows_cluster]
        return rows_cluster, (c_lat, c_lon), used_ids

    def _append_cluster(rows_cluster, seed_pozo, seed_lat, seed_lon, c_lat, c_lon):
        nonlocal remaining_cap, c_lat_acc, c_lon_acc, n_acc
        used_now = set()
        for r in rows_cluster:
            if remaining_cap <= 0:
                break
            pozo = r["POZO"]
            if pozo in used_set or pozo in used_now:
                continue

            lat = r.get("LAT", np.nan); lon = r.get("LON", np.nan)
            d_seed = haversine_km(seed_lat, seed_lon, lat, lon) if pd.notna(seed_lat) and pd.notna(seed_lon) else np.nan
            d_cent = haversine_km(c_lat, c_lon, lat, lon)       if pd.notna(c_lat)  and pd.notna(c_lon)  else np.nan

            assigned.append({
                "Plan_Fecha": day_date.date(),
                "Semana_ISO": day_date.isocalendar()[1],
                "ZONA": r["ZONA"],
                "BATERIA": r["BATERIA"],
                "POZO": pozo,
                "r_m3_d": float(r.get("__v", r.get("r_m3_d", np.nan))),
                "ultima_medicion": r.get("ultima_medicion", pd.NaT),
                "Seed_POZO": seed_pozo,
                "Dist_km_semilla": None if pd.isna(d_seed) else round(float(d_seed), 3),
                "Dist_km_centroid": None if pd.isna(d_cent) else round(float(d_cent), 3),
            })
            used_now.add(pozo)
            remaining_cap -= 1

            if pd.notna(lat) and pd.notna(lon):
                if n_acc == 0:
                    c_lat_acc, c_lon_acc, n_acc = float(lat), float(lon), 1
                else:
                    c_lat_acc = (c_lat_acc*n_acc + float(lat)) / (n_acc + 1)
                    c_lon_acc = (c_lon_acc*n_acc + float(lon)) / (n_acc + 1)
                    n_acc += 1

        return used_now

    clusters_hechos = 0

    # PRIMER CLÚSTER por mejor volumen
    while (remaining_cap > 0) and (not pool.empty):
        cand_pool = pool[pool["has_coords"]].copy()
        best_total = None
        best_seed_idx = None
        best_cluster_rows = None

        if not cand_pool.empty:
            cand_seeds = cand_pool.sort_values(["__v","is_overdue","due_date"],
                                               ascending=[False, False, True]).head(max(1, TOP_N))
            for seed_idx, seed_row in cand_seeds.iterrows():
                seed_lat = seed_row.get("LAT", np.nan)
                seed_lon = seed_row.get("LON", np.nan)
                if pd.isna(seed_lat) or pd.isna(seed_lon):
                    continue

                neigh = _bb_filter(pool.drop(index=seed_idx, errors="ignore"), seed_lat, seed_lon, radius_km)
                if not neigh.empty:
                    neigh = neigh.copy()
                    neigh["__dist_seed"] = neigh.apply(
                        lambda r: haversine_km(seed_lat, seed_lon, r["LAT"], r["LON"]), axis=1
                    )
                    neigh = neigh[neigh["__dist_seed"] <= radius_km]
                    neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

                max_neighbors_by_cluster = max(0, cluster_cap - 1)
                max_neighbors_by_day     = max(0, remaining_cap - 1)
                take_n = min(max_neighbors_by_cluster, max_neighbors_by_day)

                if take_n > 0 and not neigh.empty:
                    neigh_take = neigh.head(take_n)
                    cluster_rows = [seed_row] + [nr for _, nr in neigh_take.iterrows()]
                else:
                    cluster_rows = [seed_row]

                total_est = 0.0
                for r in cluster_rows:
                    rr = r.get("__v", r.get("r_m3_d", np.nan))
                    total_est += _v_est_for_day({"r_m3_d": rr,
                                                 "ultima_medicion": r.get("ultima_medicion", pd.NaT)}, day_date)

                if (best_total is None) or (total_est > best_total):
                    best_total = total_est
                    best_seed_idx = seed_idx
                    best_cluster_rows = cluster_rows

        else:
            # fallback si nadie tiene coords
            seed_row = pool.iloc[0]
            seed_pozo = seed_row["POZO"]
            seed_lat  = seed_row.get("LAT", np.nan)
            seed_lon  = seed_row.get("LON", np.nan)
            best_cluster_rows = [seed_row]

        # Centroide del clúster
        coords_cluster = [(r.get("LAT", np.nan), r.get("LON", np.nan))
                          for r in best_cluster_rows
                          if pd.notna(r.get("LAT", np.nan)) and pd.notna(r.get("LON", np.nan))]
        if coords_cluster:
            c_lat = float(np.mean([x for x,_ in coords_cluster]))
            c_lon = float(np.mean([y for _,y in coords_cluster]))
        else:
            c_lat, c_lon = (np.nan, np.nan)

        used_now = _append_cluster(best_cluster_rows, best_cluster_rows[0]["POZO"],
                                   best_cluster_rows[0].get("LAT", np.nan),
                                   best_cluster_rows[0].get("LON", np.nan),
                                   c_lat, c_lon)

        if used_now:
            clusters_hechos += 1
            used_set.update(used_now)
            pool = pool[~pool["POZO"].isin(used_now)].copy()
        else:
            pool = pool.iloc[1:].copy()

        # corte si no queremos backfill
        if not backfill_nearest or remaining_cap <= 0 or (clusters_max and clusters_hechos >= int(clusters_max)):
            break

        # Backfill por centroide acumulado (umbral)
        while (remaining_cap > 0) and (not pool.empty) and (not (clusters_max and clusters_hechos >= int(clusters_max))):
            # no implementamos aquí para simplificar la monocelda
            break

    return assigned

def assign_week_round_robin_by_zone(cand_all, team_ids, params, week_start, week_end, radius_km):
    """
    Reparte equitativamente por día entre los equipos que comparten la misma ZONA.
    cand_all: DF con columnas ['POZO','ZONA','BATERIA','due_date','is_overdue','__v','LAT','LON','has_coords','r_m3_d','ultima_medicion']
    """
    dias   = int(params["dias_por_semana"])
    cap_pz = int(params["max_pozos_dia_equipo"])
    cap_cluster = int(params.get("max_pozos_por_cluster", 4))

    used_glob = set()
    rows = []

    for d in range(dias):
        day_date = pd.Timestamp(week_start) + pd.Timedelta(days=d)

        pool_day = cand_all[~cand_all["POZO"].isin(used_glob)].copy()
        if pool_day.empty:
            continue
        in_window = (pd.to_datetime(pool_day["due_date"]) <= pd.Timestamp(week_end)) | pool_day["is_overdue"]
        pool_day = pool_day[in_window].copy()
        if pool_day.empty:
            continue

        for eq in sorted(team_ids):
            avail = pool_day[~pool_day["POZO"].isin(used_glob)].copy()
            if avail.empty:
                continue

            assigned_today = _fill_day_star_clusters(
                day_date, avail, cap_pz, radius_km, used_glob,
                cluster_cap=cap_cluster, params=params,
                clusters_max=params.get("clusters_por_dia_max"),
                backfill_nearest=bool(params.get("backfill_nearest_cluster", True)),
                umbral_km_backfill=float(params.get("umbral_km_backfill", 5.0))
            )

            for ord_idx, a in enumerate(assigned_today, start=1):
                try:
                    v_est = _v_est_for_day({"r_m3_d": a.get("r_m3_d", np.nan),
                                            "ultima_medicion": a.get("ultima_medicion", pd.NaT)}, day_date)
                except Exception:
                    v_est = 0.0

                a.update({
                    "Equipo": int(eq),
                    "Dia_Idx": d+1,
                    "Orden": ord_idx,
                    "Vol_Estimado_m3": round(float(v_est), 2)
                })
                rows.append(a)

    cols = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
            "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
            "Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion"]
    return pd.DataFrame(rows, columns=cols) if rows else pd.DataFrame(columns=cols)

def ensure_annual_coverage_zone_locked(all_pozos_df, plan, params, start_date, equipo_to_zona,
                                       allowed_bats_by_zone_norm=None, r_by_pozo=None):
    cap_pz = params["max_pozos_dia_equipo"]

    keys = []
    for w in range(params["semanas_plan"]):
        w_start = start_date + timedelta(weeks=w)
        for d in range(params["dias_por_semana"]):
            f = w_start + timedelta(days=d)
            for e in equipo_to_zona.keys():
                keys.append((e, f))

    if not plan.empty:
        plan["__key"] = plan["Equipo"].astype(int).astype(str) + "|" + plan["Plan_Fecha"].astype(str)
        used_counts = plan.groupby("__key")["POZO"].count().to_dict()
    else:
        used_counts = {}

    planned = set(plan["POZO"].unique()) if not plan.empty else set()
    missing_df = all_pozos_df[~all_pozos_df["POZO"].isin(planned)].copy()
    missing_df = missing_df[missing_df["BATERIA"].notna() & (missing_df["BATERIA"].astype(str).str.strip()!="")].copy()

    add = []
    for _, row in missing_df.iterrows():
        pz = row["POZO"]; z = row["ZONA"]
        bat = row.get("BATERIA", "")

        if not isinstance(bat, str) or bat.strip() == "":
            continue

        if allowed_bats_by_zone_norm:
            zn = _norm(z)
            bats_allowed = allowed_bats_by_zone_norm.get(zn)
            if bats_allowed is not None:
                if _norm(bat) not in bats_allowed:
                    continue

        if r_by_pozo is not None:
            r_val = float(r_by_pozo.get(pz, np.nan))
            if not (r_val > RM3D_MIN):
                continue

        target_teams = [e for e, zona in equipo_to_zona.items() if zona == z]
        if not target_teams:
            continue
        placed = False
        for e in target_teams:
            for (ee, f) in keys:
                if ee != e:
                    continue
                key = f"{e}|{f}"
                cnt = used_counts.get(key, 0)
                if cnt < cap_pz:
                    add.append({
                        "Plan_Fecha": f,
                        "Semana_ISO": f.isocalendar()[1],
                        "Equipo": int(e),
                        "Dia_Idx": f.weekday()+1,
                        "Orden": cnt+1,
                        "ZONA": z,
                        "BATERIA": bat,
                        "POZO": pz,
                        "r_m3_d": np.nan,
                        "Vol_Estimado_m3": 0.0,
                        "Seed_POZO": "",
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": None,
                        "ultima_medicion": pd.NaT,
                    })
                    used_counts[key] = cnt+1
                    placed = True
                    break
            if placed:
                break

    if add:
        plan = pd.concat([plan, pd.DataFrame(add)], ignore_index=True)                 .sort_values(["Plan_Fecha","Equipo","Orden"])
    return plan

def build_alertas_abm(freq_df: pd.DataFrame, norm_table: pd.DataFrame, dict_df: pd.DataFrame) -> pd.DataFrame:
    base = freq_df[["POZO","ZONA","BATERIA","ultima_medicion","ultima_exitosa"]].copy()
    meta_first = dict_df.groupby("oficial")[["estado","met_prod","nivel_3","nivel_5"]].first()
    base = base.merge(meta_first[["estado","met_prod"]], left_on="POZO", right_index=True, how="left")

    out = base.copy()
    for c in ["ultima_medicion","ultima_exitosa"]:
        out[c] = pd.to_datetime(out[c], errors="coerce").dt.date
    out = out.sort_values(["ZONA","BATERIA","POZO"]).reset_index(drop=True)
    return out

# ============================================
# HARNES PARA JUPYTER
# ============================================
def run_pipeline_jupyter(
    input_file,
    nombres_pozo_file,
    coords_file,
    *,
    semanas_plan=2,
    equipos_activos=2,
    dias_por_semana=5,
    max_pozos_dia_equipo=10,
    K_max_pozos_por_cluster=5,
    clusters_por_dia_max=None,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,
    baterias_por_zona=None,      # {"las heras cg - canadon escondida": {"swabing ce","ce 04"}}
    pozos_excluir=None,
    escribir_excel=False
):
    global INPUT_FILE, NOMBRES_POZO_FILE, COORDS_FILE, RADIUS_KM, RM3D_MIN, DEFAULTS
    INPUT_FILE       = input_file
    NOMBRES_POZO_FILE= nombres_pozo_file
    COORDS_FILE      = coords_file
    RADIUS_KM        = float(radius_km)
    RM3D_MIN         = float(rm3d_min)

    DEFAULTS = DEFAULTS.copy()
    DEFAULTS.update({
        "equipos_activos": int(equipos_activos),
        "dias_por_semana": int(dias_por_semana),
        "semanas_plan": int(semanas_plan),
        "max_pozos_dia_equipo": int(max_pozos_dia_equipo),
        "max_pozos_por_cluster": int(K_max_pozos_por_cluster),
        "clusters_por_dia_max": clusters_por_dia_max,
        "backfill_nearest_cluster": bool(backfill_nearest),
        "umbral_km_backfill": float(umbral_km_backfill),
    })

    # 1) Lee historial (Excel del usuario)
    df = read_historial(INPUT_FILE, SHEET_HIST)

    # 2) Normalización por diccionario
    key2off, dict_df = load_pozo_dictionary(NOMBRES_POZO_FILE)
    df_norm, alert_table, norm_table = apply_pozo_normalization(df, key2off, dict_df)

    # 3) Filtra inválidos
    df = df_norm[df_norm["VALIDO_POZO"] == True].copy()

    # 4) Filtro por ZONA (si se pide explícito)
    if zonas_incluir:
        zonas_incluir = set(zonas_incluir)
        znorm = {_norm(z) for z in zonas_incluir}
        df = df[df["__ZONA_NORM"].isin(znorm)].copy()
        zonas_labels = zonas_incluir
        zonas_norm   = znorm
    else:
        zonas_labels, zonas_norm = set(df["ZONA"].dropna().astype(str)), set(df["__ZONA_NORM"].dropna().astype(str))

    # 5) Sub-filtro de baterías (si lo pasaste por parámetro)
    if baterias_por_zona:
        allowed_bats_by_zone_norm = {zn: set(baterias_por_zona[zn]) if baterias_por_zona[zn] is not None else None
                                     for zn in baterias_por_zona}
    else:
        allowed_bats_by_zone_norm = {zn: None for zn in zonas_norm}

    # 6) Exclusiones (si te pasan un set)
    excl_total = set(pozos_excluir or [])

    # 7) Frecuencias
    params = DEFAULTS.copy()
    freq = compute_frecuencias(df, params)

    # Comentarios desde OBS cuando ultima_medicion != ultima_exitosa
    df_obs = df[["POZO", "FECHA", "OBS_POZO"]].copy() if "OBS_POZO" in df.columns else pd.DataFrame(columns=["POZO","FECHA","OBS_POZO"])
    df_obs["FECHA_DATE"] = pd.to_datetime(df_obs["FECHA"], errors="coerce").dt.date
    df_obs = (df_obs.dropna(subset=["FECHA_DATE"])
                    .sort_values(["POZO","FECHA_DATE"])
                    .drop_duplicates(subset=["POZO","FECHA_DATE"], keep="last"))
    obs_map = {(r.POZO, r.FECHA_DATE): (str(r.OBS_POZO).strip() if pd.notna(r.OBS_POZO) else "")
               for r in df_obs.itertuples(index=False)}
    freq["__UMED_DATE"] = pd.to_datetime(freq["ultima_medicion"], errors="coerce").dt.date
    freq["__UEXI_DATE"] = pd.to_datetime(freq["ultima_exitosa"], errors="coerce").dt.date
    freq["comentario"] = [obs_map.get((pz, fmed), "") for pz, fmed in zip(freq["POZO"], freq["__UMED_DATE"])]
    mask_both_valid = freq["__UMED_DATE"].notna() & freq["__UEXI_DATE"].notna()
    mask_diff = mask_both_valid & (freq["__UMED_DATE"] != freq["__UEXI_DATE"])
    freq.loc[~mask_diff, "comentario"] = ""
    freq.drop(columns=["__UMED_DATE","__UEXI_DATE"], errors="ignore", inplace=True)

    # 8) Coordenadas
    coords_df = read_coords(COORDS_FILE)

    # 9) Mapas auxiliares
    delta_by_pozo = freq.set_index("POZO")["delta_star_dias"].to_dict()
    r_by_pozo     = freq.set_index("POZO")["r_m3_d"].to_dict()

    # 10) Semanas a planificar
    start = next_monday(date.today())
    weeks = [(start + timedelta(weeks=i), start + timedelta(weeks=i, days=6)) for i in range(params["semanas_plan"])]

    # 11) Equipos -> ZONA (fijo)
    zonas_list = sorted(set(zonas_labels))
    equipo_to_zona = {}
    for i in range(1, params["equipos_activos"]+1):
        zona_asignada = zonas_list[min(i-1, len(zonas_list)-1)]
        equipo_to_zona[i] = zona_asignada

    # 12) Plan semanal por ZONA (round-robin entre equipos)
    plan_all = []
    next_due = {row.POZO: row.proxima_visita_base for row in freq.itertuples()}
    zone_to_teams = {}
    for eq, zona_label in equipo_to_zona.items():
        zone_to_teams.setdefault(zona_label, []).append(eq)

    for (w_start, w_end) in weeks:
        for zona_label, team_list in zone_to_teams.items():
            zona_norm_label = _norm(zona_label)
            cand_all = build_candidates_with_coords(
                freq=freq,
                week_start=w_start,
                week_end=w_end,
                excl_pozos=excl_total,
                zonas_norm_incluidas={zona_norm_label},
                coords_df=coords_df,
                allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
                next_due_map=next_due
            )
            if cand_all.empty:
                continue

            cand_zone = cand_all[[
                "POZO","ZONA","BATERIA","due_date","is_overdue","__v",
                "LAT","LON","has_coords","r_m3_d","ultima_medicion"
            ]].copy()

            plan_week_zone = assign_week_round_robin_by_zone(
                cand_all=cand_zone,
                team_ids=sorted(team_list),
                params=params,
                week_start=w_start,
                week_end=w_end,
                radius_km=RADIUS_KM
            )

            if not plan_week_zone.empty:
                plan_all.append(plan_week_zone)

                for pz, fcal in plan_week_zone[["POZO","Plan_Fecha"]].drop_duplicates().itertuples(index=False):
                    dd = int(delta_by_pozo.get(pz, params["min_dias_freq"]))
                    next_due[pz] = pd.Timestamp(fcal) + pd.Timedelta(days=dd)

    plan = (pd.concat(plan_all, ignore_index=True)
            if plan_all else
            pd.DataFrame(columns=[
                "Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden","ZONA","BATERIA",
                "POZO","r_m3_d","Vol_Estimado_m3","Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion"
            ]))

    # 13) Cobertura anual reforzada (opcional)
    if not freq.empty:
        eligible_mask = (freq["ZONA"].isin(zonas_labels)) & (freq["r_m3_d"].fillna(0) > RM3D_MIN)
        if "comentario" in freq.columns:
            eligible_mask &= (freq["comentario"].astype(str).fillna("").str.strip() == "")
        if allowed_bats_by_zone_norm:
            for zn, bats in allowed_bats_by_zone_norm.items():
                if bats is not None:
                    eligible_mask &= (~(freq["ZONA_NORM"] == zn)) | (freq["BATERIA_NORM"].isin(bats))

        all_pozos_in_zonas = freq.loc[eligible_mask, ["POZO","ZONA","BATERIA"]].drop_duplicates().copy()
        all_pozos_in_zonas = all_pozos_in_zonas[
            all_pozos_in_zonas["BATERIA"].notna() & (all_pozos_in_zonas["BATERIA"].astype(str).str.strip() != "")
        ].copy()

        plan = ensure_annual_coverage_zone_locked(
            all_pozos_in_zonas,
            plan,
            params,
            start,
            equipo_to_zona,
            allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
            r_by_pozo=r_by_pozo
        )

    # 14) Export opcional
    # 14) Export opcional
    out_xlsx = None
    if escribir_excel:
        out_xlsx = unique_output_path(INPUT_FILE)
        coords_all = read_coords(COORDS_FILE)
        with pd.ExcelWriter(out_xlsx, engine="openpyxl", mode="w") as writer:
            # Frecuencias
            freq_out = freq.copy()
            for c in ["proxima_visita_base","ultima_medicion","ultima_exitosa"]:
                freq_out[c] = pd.to_datetime(freq_out[c], errors="coerce").dt.date
            freq_out = freq_out.sort_values(["ZONA","BATERIA","POZO"])
            cols_pref = ["POZO","ZONA","BATERIA","ZONA_NORM","BATERIA_NORM","r_m3_d",
                         "ultima_medicion","ultima_exitosa","delta_star_dias","comentario",
                         "proxima_visita_base","ceros_consec","alerta"]
            cols_final = [c for c in cols_pref if c in freq_out.columns] + \
                         [c for c in freq_out.columns if c not in cols_pref]
            freq_out = freq_out[cols_final]
            freq_out.to_excel(writer, "Frecuencias", index=False)

            # Plan por equipo + Km_al_siguiente
            cols_plan = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
                         "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
                         "Seed_POZO","Dist_km_semilla","Dist_km_centroid"]
            for eq in range(1, params["equipos_activos"]+1):
                pe = plan.loc[plan["Equipo"]==eq].copy()
                if pe.empty:
                    pe = pd.DataFrame(columns=cols_plan + ["Km_al_siguiente","Ejecutado"])
                else:
                    pe = pe.sort_values(["Plan_Fecha","Dia_Idx","Orden","POZO"]).copy()
                    pe = pe.merge(coords_all, how="left", on="POZO")
                    pe["LAT_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LAT"].shift(-1)
                    pe["LON_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LON"].shift(-1)
                    def _leg_km(row):
                        if (pd.isna(row.get("LAT")) or pd.isna(row.get("LON")) or
                            pd.isna(row.get("LAT_next")) or pd.isna(row.get("LON_next"))):
                            return None
                        return round(float(haversine_km(row["LAT"], row["LON"],
                                                        row["LAT_next"], row["LON_next"])), 3)
                    pe["Km_al_siguiente"] = pe.apply(_leg_km, axis=1)
                    pe.drop(columns=["LAT","LON","LAT_next","LON_next"], inplace=True, errors="ignore")
                    pe["Ejecutado"] = ""
                    for c in cols_plan:
                        if c not in pe.columns: pe[c] = ""
                    pe = pe[cols_plan + ["Km_al_siguiente","Ejecutado"]]
                pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)

            # Auxiliares
            pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)

    return plan, freq, out_xlsx



# ============================================
# RUNNER (EDITÁ TUS RUTAS Y PARÁMETROS ACÁ)
# ============================================

INPUT_FILE = r"C:\Users\ry16123\Downloads\Ultimo (ORIGINAL) TABLERO PRODUCCIÓN FLUG S.A 2025 (1).xlsx"
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

plan, freq, out_xlsx = run_pipeline_jupyter(
    input_file=INPUT_FILE,
    nombres_pozo_file=NOMBRES_POZO_FILE,
    coords_file=COORDS_FILE,
    semanas_plan=2,                 # probá corto para iterar rápido
    equipos_activos=3,              # cantidad de equipos
    dias_por_semana=5,              # 5 ó 6
    max_pozos_dia_equipo=5,
    K_max_pozos_por_cluster=5,      # tamaño máximo de clúster
    clusters_por_dia_max=1,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,             # o lista como ["Las Heras CG - Canadon Escondida"]
    baterias_por_zona=None,         # dict normalizado (keys en _norm) o None
    pozos_excluir=set(),            # ej.: {"BB-100"}
    escribir_excel=True            # poné True si querés exportar el Excel
)

# Mostrar un vistazo rápido
display(freq.head(10))
display(plan.head(30))
print("Excel generado:", out_xlsx)


  warn(msg)
  g[col] = g[col].replace({None: np.nan})
  plan = (pd.concat(plan_all, ignore_index=True)
  plan = pd.concat([plan, pd.DataFrame(add)], ignore_index=True)                 .sort_values(["Plan_Fecha","Equipo","Orden"])
  freq_out.to_excel(writer, "Frecuencias", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)


Unnamed: 0,POZO,ZONA,BATERIA,ZONA_NORM,BATERIA_NORM,r_m3_d,ultima_medicion,ultima_exitosa,delta_star_dias,proxima_visita_base,ceros_consec,alerta,comentario
0,BB-10,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015385,2025-07-16,2025-07-16,56,2025-09-10,0,,
1,BB-100,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.142857,2023-08-04,2023-08-04,14,2023-08-18,0,,
2,BB-101,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.035714,2025-08-25,2025-08-25,56,2025-10-20,0,,
3,BB.a-104,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2025-01-24,2025-01-24,7,2025-01-31,0,,
4,BB-111,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.428571,2025-07-01,2025-07-01,7,2025-07-08,0,,
5,BB-133,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.037037,2025-02-19,2025-02-19,56,2025-04-16,0,,
6,BB-170,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2024-07-24,2024-07-24,7,2024-07-31,0,,
7,BB-21,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015564,2025-05-12,2025-05-12,56,2025-07-07,0,,
8,BB497,,,,,0.571429,2025-01-08,2025-01-08,7,2025-01-15,0,,
9,BB-50,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.081633,2023-03-10,2023-03-10,28,2023-04-07,0,,


Unnamed: 0,Plan_Fecha,Semana_ISO,Equipo,Dia_Idx,Orden,ZONA,BATERIA,POZO,r_m3_d,Vol_Estimado_m3,Seed_POZO,Dist_km_semilla,Dist_km_centroid,ultima_medicion,__key
0,2025-09-29,40,1,1,1,,CE10,CNEX1,0.192308,6.73,CNEX1,,,2025-08-25,1|2025-09-29
1,2025-09-29,40,2,1,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-221,0.157895,3.0,CnE-221,0.0,0.755,2025-09-10,2|2025-09-29
2,2025-09-29,40,2,1,2,Las Heras CG - Canadon Escondida,CE 04,CnE-219,0.291667,186.96,CnE-221,1.351,1.903,2023-12-28,2|2025-09-29
3,2025-09-29,40,2,1,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-210,0.148148,9.93,CnE-221,2.166,1.45,2025-07-24,2|2025-09-29
4,2025-09-29,40,2,1,4,Las Heras CG - Canadon Escondida,CE 03,CnE-124,0.142857,4.43,CnE-221,0.937,0.441,2025-08-29,2|2025-09-29
5,2025-09-29,40,2,1,5,Las Heras CG - Canadon Escondida,CE 01,CnE-731,0.142857,34.43,CnE-221,2.669,2.275,2025-01-31,2|2025-09-29
6,2025-09-29,40,3,1,1,Las Heras CG - Canadon Escondida,CE 12,CnE-829,0.181818,49.09,CnE-829,0.0,0.735,2025-01-02,3|2025-09-29
7,2025-09-29,40,3,1,2,Las Heras CG - Canadon Escondida,Swabing CE,CnE-1234,0.285714,72.86,CnE-829,2.276,1.591,2025-01-17,3|2025-09-29
8,2025-09-29,40,3,1,3,Las Heras CG - Canadon Escondida,CE 20,CnE-1224(d),0.285714,37.14,CnE-829,2.494,1.928,2025-05-22,3|2025-09-29
9,2025-09-29,40,3,1,4,Las Heras CG - Canadon Escondida,Swabing CE,CnE-543,0.277778,9.72,CnE-829,2.626,2.801,2025-08-25,3|2025-09-29


Excel generado: C:\Users\ry16123\Downloads\Ultimo (ORIGINAL) TABLERO PRODUCCIÓN FLUG S.A 2025 (1)_CRONOGRAMA_20250927_(7).xlsx


In [39]:

#RELATIVAMENTE BIEN, NO DISTRIBUYE BIEN POR CANTIDAD DE EQIUPOS - Y CUANDO NO ENCUNETRA LAS COORDENADAS HACE LIO.

# ============================================
# Monocelda Jupyter: Planificador + Harness + Runner
# ============================================

# -*- coding: utf-8 -*-
import os, re, unicodedata, math
import numpy as np
import pandas as pd
from datetime import date, timedelta, datetime
from typing import Optional


# ==========================
# CONFIG por defecto (se sobreescriben en el runner)
# ==========================
INPUT_FILE  = r"DIAGRAMA SW.xlsx"   # Excel base (NO se modifica)
SHEET_HIST  = None                  # None => autodetecta hoja/encabezados
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

# Radio en km para agrupar por cercanía
RADIUS_KM = 3.0
# Filtro mínimo de potencial
RM3D_MIN = 0.1

# Umbrales para fuzzy (si se usan)
FUZZY_REPLACE_THRESHOLD = 85
FUZZY_SUGGEST_THRESHOLD = 75
LETTERS_SIMILARITY_MIN  = 80

DEFAULTS = {
    "equipos_activos": 4,                 # 1..4
    "dias_por_semana": 5,                 # 5 o 6
    "semanas_plan": 2,                    # para probar rápido en Jupyter
    "k_visitas": 1,                       # tasas (K=1 por pedido)
    "max_pozos_dia_equipo": 10,           # cupo por día por equipo
    "max_pozos_por_cluster": 5,           # tamaño de clúster (K fijo si usás lógica de clústeres fijos)
    "m3_por_visita_objetivo": 2.0,        # informativo
    "min_dias_freq": 7,                   # 1 semana
    "max_dias_freq": 56,                  # 8 semanas
    "dias_asumidos_una_visita": 7,        # para r si hay 1 sola visita
    "freq_dias_ultimo_cero_valido": 30,

    # Semillas a evaluar (si se usa lógica de semillas)
    "top_semillas_eval": 30,

    # Control de clústeres por día y backfill (si se usa lógica por semilla)
    "clusters_por_dia_max": None,
    "backfill_nearest_cluster": True,
    "umbral_km_backfill": 5.0,
}

# ==========================
# Utils
# ==========================
def _norm(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = s.replace("³", "3")
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    s = s.lower().strip().replace("\xa0"," ")
    s = s.replace("_"," ").replace("-"," ").replace("."," ").replace("\n"," ")
    return " ".join(s.split())

def _pozo_key(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    return "".join(ch for ch in s if ch.isalnum()).upper()

def _canonical_digits(d: str) -> str:
    d = (d or "").lstrip("0")
    return d if d != "" else "0"

def _letters_digits_from_key_both(k: str):
    raw_digits = "".join(re.findall(r"\d+", k))
    digits_canon = _canonical_digits(raw_digits)
    letters = re.sub(r"\d+", "", k)
    return letters, digits_canon, len(raw_digits)

def _ratio_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _fuzzy_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.partial_ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _canon_prefix_pozo(s: str) -> str:
    if s is None or (isinstance(s, float) and np.isnan(s)):
        return s
    raw = str(s).strip()
    raw_up = raw.upper()
    if raw_up.startswith("CÑE"):
        return "CNE" + raw_up[3:]
    raw_ascii = unicodedata.normalize("NFKD", raw_up).encode("ascii", "ignore").decode("ascii")
    if raw_ascii.startswith("CNE"):
        return raw_ascii
    if raw_ascii.startswith("CN"):
        return "CNE" + raw_ascii[2:]
    m = re.match(r"^CE(\d+)$", raw_ascii)
    if m:
        return "CNE" + m.group(1)
    return raw_ascii

def next_monday(d=None):
    d = d or date.today()
    return d + timedelta(days=(7 - d.weekday()) % 7)  # 0=Lunes

def unique_output_path(base_input_path: str) -> str:
    folder = os.path.dirname(os.path.abspath(base_input_path))
    stem   = os.path.splitext(os.path.basename(base_input_path))[0]
    today  = datetime.now().strftime("%Y%m%d")
    base   = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}.xlsx")
    if not os.path.exists(base): return base
    i = 2
    while True:
        cand = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}_({i}).xlsx")
        if not os.path.exists(cand): return cand
        i += 1

EXPECTED_KEYS = {
    "fecha":       ["fecha"],
    "pozo":        ["pozo"],
    "zona":        ["zona"],
    "bateria":     ["bateria", "batería"],
    "m3":          ["m3 bruta","m3","m3_bruta","m3bruta","m 3 bruta","m 3","m3 bruto","m3 recuperado","m3 recupero"],
    "carreras":    ["n de carreras","n° de carreras","nº de carreras","no de carreras","nro de carreras","numero de carreras","n° carreras","n de carrera","n carreras"],
    "nivel_final": ["nivel final pozo","nivel final","nivel final del pozo"],
    "obs_pozo":    ["observaciones del pozo","observaciones","comentarios","comentario"]
}

def _find_header_row(df_raw):
    for i in range(min(200, len(df_raw))):
        row_norm = [_norm(x) for x in df_raw.iloc[i,:].tolist()]
        if not row_norm:
            continue
        colmap = {v:j for v,j in zip(row_norm, range(len(row_norm)))}
        def has_any(keys): return any(k in colmap for k in keys)
        if has_any(EXPECTED_KEYS["fecha"]) and has_any(EXPECTED_KEYS["pozo"]) and has_any(EXPECTED_KEYS["zona"]) and has_any(EXPECTED_KEYS["bateria"]):
            return i, row_norm
    return None, None

# ---------- Nombres pozo ----------
def load_pozo_dictionary(xlsx_path: str):
    try:
        ref = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer diccionario de pozos: {xlsx_path}\n{e}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    cols = {c.lower().strip(): c for c in ref.columns}
    if "nombre_corto_pozo" not in cols:
        print(f"\n[AVISO] El diccionario no tiene la columna 'nombre_corto_pozo'. Columnas: {list(ref.columns)}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    c_pozo = cols["nombre_corto_pozo"]
    c_met  = cols.get("met_prod")
    c_n3   = cols.get("nivel_3")
    c_n5   = cols.get("nivel_5")
    c_est  = cols.get("estado")

    refv = ref.loc[ref[c_pozo].notna()].copy()
    refv[c_pozo] = refv[c_pozo].astype(str).str.strip()

    of_list  = refv[c_pozo].tolist()
    met_vals = refv[c_met].astype(str).str.strip() if c_met else np.nan
    n3_vals  = refv[c_n3].astype(str).str.strip()  if c_n3 else np.nan
    n5_vals  = refv[c_n5].astype(str).str.strip()  if c_n5 else np.nan
    est_vals = refv[c_est].astype(str).str.strip() if c_est else np.nan

    keys, letters_, digits_canon_, digits_len_ = [], [], [], []
    for val in of_list:
        k = _pozo_key(val)
        L, Dcanon, Dlen = _letters_digits_from_key_both(k)
        keys.append(k); letters_.append(L); digits_canon_.append(Dcanon); digits_len_.append(Dlen)

    dict_df = pd.DataFrame({
        "oficial": of_list,
        "key": keys,
        "letters": letters_,
        "digits_canon": digits_canon_,
        "digits_len": digits_len_,
        "met_prod": list(met_vals) if isinstance(met_vals, pd.Series) else [np.nan]*len(of_list),
        "nivel_3":  list(n3_vals)  if isinstance(n3_vals,  pd.Series) else [np.nan]*len(of_list),
        "nivel_5":  list(n5_vals)  if isinstance(n5_vals,  pd.Series) else [np.nan]*len(of_list),
        "estado":   list(est_vals) if isinstance(est_vals, pd.Series) else [np.nan]*len(of_list),
    })

    key2off = {}
    for k, off in zip(dict_df["key"], dict_df["oficial"]):
        if k and k not in key2off:
            key2off[k] = off
    return key2off, dict_df

def apply_pozo_normalization(df: pd.DataFrame, key2off: dict, dict_df: pd.DataFrame):
    df = df.copy()
    df["POZO_ORIG"] = df["POZO"].astype(str).str.strip()
    df["POZO_PreCanon"] = df["POZO_ORIG"].apply(_canon_prefix_pozo)
    df["__POZO_KEY"] = df["POZO_PreCanon"].apply(_pozo_key)

    parts = df["__POZO_KEY"].apply(_letters_digits_from_key_both)
    df["__KEY_LET"], df["__KEY_DIG_CANON"], df["__KEY_DIG_LEN"] = zip(*parts)

    df["POZO_MATCH"]   = None
    df["MATCH_TIPO"]   = "NO"
    df["MATCH_SCORE"]  = np.nan
    df["LETTER_SCORE"] = np.nan
    df["APLICADO"]     = "NO"
    df["ALERTA_NORM"]  = ""
    df["VALIDO_POZO"]  = True

    invalid_mask = (df["__KEY_LET"].str.len()==0) | (df["__KEY_DIG_LEN"]==0)
    if invalid_mask.any():
        df.loc[invalid_mask, "ALERTA_NORM"] = "SIN_LETRAS_O_DIGITOS"
        df.loc[invalid_mask, "VALIDO_POZO"] = False

    valid_mask = ~invalid_mask
    exact_mask = valid_mask & df["__POZO_KEY"].isin(key2off.keys())
    df.loc[exact_mask, "POZO_MATCH"]   = df.loc[exact_mask, "__POZO_KEY"].map(key2off)
    df.loc[exact_mask, "MATCH_TIPO"]   = "EXACTO"
    df.loc[exact_mask, "MATCH_SCORE"]  = 100
    df.loc[exact_mask, "LETTER_SCORE"] = 100
    df.loc[exact_mask, "APLICADO"]     = "SI"

    pending = df[valid_mask & (~exact_mask)].index.tolist()
    if pending and not dict_df.empty:
        dict_by_spec = {}
        for spec, sub in dict_df.groupby(["digits_canon","digits_len"]):
            dict_by_spec[spec] = sub

        for idx in pending:
            key_u   = df.at[idx, "__POZO_KEY"]
            let_u   = df.at[idx, "__KEY_LET"]
            digc_u  = df.at[idx, "__KEY_DIG_CANON"]
            digl_u  = int(df.at[idx, "__KEY_DIG_LEN"])

            cand_df = dict_by_spec.get((digc_u, digl_u), pd.DataFrame())
            best_off, best_score, best_lscore = None, -1, -1

            if cand_df is not None and not cand_df.empty:
                for row in cand_df.itertuples():
                    kk = row.key
                    ll = row.letters
                    sc_key = _fuzzy_score(key_u, kk)
                    sc_let = _ratio_score(let_u, ll)
                    if sc_let < LETTERS_SIMILARITY_MIN:
                        continue
                    if sc_key > best_score or (sc_key == best_score and sc_let > best_lscore):
                        best_score = sc_key
                        best_lscore = sc_let
                        best_off   = row.oficial

            if best_off is not None:
                df.at[idx, "POZO_MATCH"]   = best_off
                df.at[idx, "MATCH_TIPO"]   = "SUGERIDO"
                df.at[idx, "MATCH_SCORE"]  = int(best_score)
                df.at[idx, "LETTER_SCORE"] = int(best_lscore)
            else:
                df.at[idx, "ALERTA_NORM"] = "SIN MATCH EN DICCIONARIO"

    # Reemplazos
    df["POZO"] = df["POZO_MATCH"].where(df["POZO_MATCH"].notna(), df["POZO"])
    meta_first = dict_df.groupby("oficial")[["met_prod","nivel_3","nivel_5"]].first()
    df = df.merge(meta_first, how="left", left_on="POZO", right_index=True)

    # ZONA sólo si hubo match; sino, vacío
    if "nivel_3" in df.columns:
        df.loc[df["POZO_MATCH"].isna(), "nivel_3"] = ""
        df["ZONA"] = np.where(df["POZO_MATCH"].notna(), df["nivel_3"].fillna(""), "")

    # BATERIA si hay nivel_5
    if "nivel_5" in df.columns:
        df["BATERIA"] = np.where(
            df["nivel_5"].notna() & (df["nivel_5"].astype(str).str.strip()!=""),
            df["nivel_5"], df["BATERIA"]
        )

    df["__ZONA_NORM"]    = df["ZONA"].apply(_norm)
    df["__BATERIA_NORM"] = df["BATERIA"].apply(_norm)

    norm_table = (df[["POZO_ORIG","POZO_PreCanon","__POZO_KEY",
                      "__KEY_LET","__KEY_DIG_CANON","__KEY_DIG_LEN",
                      "POZO_MATCH","MATCH_TIPO","MATCH_SCORE","LETTER_SCORE",
                      "APLICADO","ALERTA_NORM","VALIDO_POZO",
                      "met_prod","nivel_3","nivel_5"]]
                  .drop_duplicates()
                  .rename(columns={
                      "POZO_ORIG":"Pozo_Original",
                      "POZO_PreCanon":"Pozo_PreCanon",
                      "__POZO_KEY":"Clave_Normalizada",
                      "__KEY_LET":"Letras",
                      "__KEY_DIG_CANON":"Digitos_Canon",
                      "__KEY_DIG_LEN":"Digitos_Len",
                      "POZO_MATCH":"Match_Oficial",
                      "MATCH_TIPO":"Match_Tipo",
                      "MATCH_SCORE":"Match_Score",
                      "LETTER_SCORE":"Letter_Score",
                      "APLICADO":"Aplicado",
                      "ALERTA_NORM":"Alerta",
                      "VALIDO_POZO":"Valido",
                      "met_prod":"met_prod",
                      "nivel_3":"nivel_3",
                      "nivel_5":"nivel_5"
                  })
                  .sort_values(["Valido","Aplicado","Match_Tipo","Pozo_Original"], ascending=[False, False, True, True]))

    alert_table = norm_table[(norm_table["Valido"]==False) | (norm_table["Aplicado"]=="NO") | (norm_table["Match_Tipo"]=="NO")].copy()
    return df, alert_table, norm_table

def read_historial(xlsx_path, sheet_hist=None):
    xl = pd.ExcelFile(xlsx_path)
    sheets = [sheet_hist] if (sheet_hist and sheet_hist in xl.sheet_names) else xl.sheet_names
    for sh in sheets:
        raw = xl.parse(sh, header=None)
        idx, header_norm = _find_header_row(raw)
        if idx is None:
            continue
        data = raw.iloc[idx:, :].copy()
        true_headers = data.iloc[0,:].astype(str).tolist()
        data = data.iloc[1:,:]
        data.columns = true_headers

        name_map = {c: _norm(c) for c in data.columns}
        def find_col(candidates):
            for c, n in name_map.items():
                if n in candidates:
                    return c
            return None

        c_fecha       = find_col(set(EXPECTED_KEYS["fecha"]))
        c_pozo        = find_col(set(EXPECTED_KEYS["pozo"]))
        c_zona        = find_col(set(EXPECTED_KEYS["zona"]))
        c_bateria     = find_col(set(EXPECTED_KEYS["bateria"]))
        c_m3          = find_col(set(EXPECTED_KEYS["m3"]))
        c_carr        = find_col(set(EXPECTED_KEYS["carreras"]))
        c_nivel_final = find_col(set(EXPECTED_KEYS["nivel_final"]))
        c_obs         = find_col(set(EXPECTED_KEYS["obs_pozo"]))

        if not (c_fecha and c_pozo and c_zona and c_bateria):
            continue

        use_cols = [c_fecha, c_pozo, c_zona, c_bateria]
        headers  = ["FECHA","POZO","ZONA","BATERIA"]
        if c_m3:            use_cols.append(c_m3);            headers.append("M3")
        if c_carr:          use_cols.append(c_carr);          headers.append("CARRERAS")
        if c_nivel_final:   use_cols.append(c_nivel_final);   headers.append("NIVEL_FINAL")
        if c_obs:           use_cols.append(c_obs);           headers.append("OBS_POZO")

        df = data[use_cols].copy()
        df.columns = headers

        df["FECHA"] = pd.to_datetime(df["FECHA"], errors="coerce")
        if "M3" not in df.columns: df["M3"] = np.nan
        else: df["M3"] = pd.to_numeric(df["M3"], errors="coerce")

        if "CARRERAS" not in df.columns: df["CARRERAS"] = np.nan
        else: df["CARRERAS"] = pd.to_numeric(df["CARRERAS"], errors="coerce")

        if "NIVEL_FINAL" not in df.columns:
            df["NIVEL_FINAL"] = None
        if "OBS_POZO" not in df.columns:
            df["OBS_POZO"] = None

        for col in ["POZO","ZONA","BATERIA","NIVEL_FINAL","OBS_POZO"]:
            df[col] = df[col].astype(str).str.strip().replace({"nan": np.nan})

        df = df.dropna(subset=["FECHA","POZO"]).sort_values(["POZO","FECHA"])
        return df

    raise ValueError("No pude detectar FECHA/POZO/ZONA/BATERÍA en ninguna hoja del Excel.")

def read_exclusions_from_sheet(xlsx_path):
    excl = set()
    try:
        xl = pd.ExcelFile(xlsx_path)
        if "ExcluirPozos" in xl.sheet_names:
            e = xl.parse("ExcluirPozos")
            e.columns = [str(c).strip().lower() for c in e.columns]
            if "pozo" in e.columns:
                if "excluir" in e.columns:
                    excl = set(e.loc[e["excluir"].astype(str).str.upper().isin(
                        ["SI","SÍ","YES","1","TRUE"]), "pozo"].astype(str).str.strip())
                else:
                    excl = set(e["pozo"].astype(str).str.strip())
    except Exception:
        pass
    return excl

# ==========================
# Frecuencias / r_m3_d
# ==========================
def _count_trailing_zeros_with_carr(g):
    cnt = 0
    for _, row in g.sort_values("FECHA").iloc[::-1].iterrows():
        m3 = row.get("M3", np.nan)
        car = row.get("CARRERAS", np.nan)
        if pd.notna(m3) and float(m3) == 0.0 and pd.notna(car) and float(car) > 0:
            cnt += 1
        else:
            break
    return cnt

def compute_frecuencias(df, params):
    v_target = params["m3_por_visita_objetivo"]
    min_d    = params["min_dias_freq"]
    max_d    = params["max_dias_freq"]
    k        = int(params["k_visitas"])
    one_days = int(params.get("dias_asumidos_una_visita", 7))
    freq_cero_ultimo = int(params.get("freq_dias_ultimo_cero_valido", 30))

    out = []
    for pozo, g0 in df.groupby("POZO", sort=False):
        g = g0.sort_values("FECHA").copy()

        for col in ["ZONA","BATERIA","NIVEL_FINAL"]:
            if col in g.columns:
                g[col] = g[col].replace({None: np.nan})
                g[col] = g[col].ffill().bfill()

        g["__ZONA_NORM"]    = g["ZONA"].apply(_norm)
        g["__BATERIA_NORM"] = g["BATERIA"].apply(_norm)
        g["__nf_norm"]      = g["NIVEL_FINAL"].apply(_norm) if "NIVEL_FINAL" in g.columns else ""

        med_validas_all = g[g["M3"].notna()].copy()

        m3_eq0 = g["M3"].fillna(0) == 0
        carr   = g.get("CARRERAS", pd.Series(index=g.index, dtype=float)).fillna(np.nan)
        zero_cond_a = m3_eq0 & (carr.fillna(0) >= 1)
        zero_cond_b = m3_eq0 & ((carr.isna()) | (carr.fillna(0) == 0)) & (g["__nf_norm"] == "surge")
        cond_cero_valido = zero_cond_a | zero_cond_b

        validas_rate = g[(g["M3"] > 0) | cond_cero_valido].copy()
        zeros_tail = _count_trailing_zeros_with_carr(g)

        ultima_med = med_validas_all["FECHA"].max() if not med_validas_all.empty else pd.NaT
        ultima_exi = g.loc[g["M3"]>0, "FECHA"].max() if "M3" in g.columns and not g[g["M3"]>0].empty else pd.NaT

        last_zero_valido = False
        if not med_validas_all.empty:
            idx_last = med_validas_all["FECHA"].idxmax()
            m3_last  = g.at[idx_last, "M3"]
            if pd.notna(m3_last) and float(m3_last) == 0.0:
                try:
                    last_zero_valido = bool(cond_cero_valido.loc[idx_last])
                except Exception:
                    last_zero_valido = False

        alerta = ""
        if last_zero_valido:
            alerta = f"ULTIMA_M3_0_VALIDO -> FREQ {freq_cero_ultimo}D"
        elif pd.notna(ultima_med):
            if zeros_tail > 0:
                alerta = f"ALERTA: {zeros_tail} cero(s) consecutivo(s) con Carreras>0"

        # r_m3_d
        r = np.nan
        if not validas_rate.empty:
            v = validas_rate.copy()
            v["delta_d"] = v["FECHA"].diff().dt.days
            v.loc[v["delta_d"] <= 0, "delta_d"] = np.nan
            v["rate"] = v["M3"].fillna(0) / v["delta_d"]
            rates = v["rate"].dropna()
            if len(rates) >= 1:
                r = rates.tail(min(k, len(rates))).mean()
            else:
                row = v.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan
        else:
            if len(med_validas_all) == 1:
                row = med_validas_all.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan

        # FRECUENCIA
        if last_zero_valido:
            delta = int(freq_cero_ultimo)
        else:
            if pd.isna(r):      delta = 7
            elif r <= 0:        delta = max_d
            else:
                delta = max(min_d, min(max_d, float(v_target)/float(r)))
                delta = int(7 * round(delta / 7.0))
                if delta < 7:
                    delta = 7

        prox = (ultima_med + pd.Timedelta(days=int(delta))) if pd.notna(ultima_med) else pd.Timestamp(next_monday())

        out.append({
            "POZO": pozo,
            "ZONA": g["ZONA"].iloc[-1],
            "BATERIA": g["BATERIA"].iloc[-1],
            "ZONA_NORM": g["__ZONA_NORM"].iloc[-1],
            "BATERIA_NORM": g["__BATERIA_NORM"].iloc[-1],
            "r_m3_d": r,
            "ultima_medicion": ultima_med,
            "ultima_exitosa": ultima_exi,
            "delta_star_dias": int(delta),
            "proxima_visita_base": prox,
            "ceros_consec": zeros_tail,
            "alerta": alerta
        })
    return pd.DataFrame(out)

# ==========================
# Coordenadas
# ==========================
def _to_float_maybe_comma(x):
    if pd.isna(x):
        return np.nan
    if isinstance(x, (int, float, np.number)):
        return float(x)
    s = str(x).strip()
    if s == "": return np.nan
    s = s.replace(",", ".")
    try:
        return float(s)
    except Exception:
        return np.nan

def read_coords(xlsx_path):
    try:
        cdf = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer coordenadas: {xlsx_path}\n{e}\n")
        return pd.DataFrame(columns=["POZO","LAT","LON"])
    cols_map = {c.lower().strip(): c for c in cdf.columns}
    c_pozo = cols_map.get("pozo")
    for k in ["geo_latitude","latitude","lat"]:
        if k in cols_map:
            c_lat = cols_map[k]; break
    else:
        c_lat = None
    for k in ["geo_longitude","longitude","lon","long"]:
        if k in cols_map:
            c_lon = cols_map[k]; break
    else:
        c_lon = None

    if not (c_pozo and c_lat and c_lon):
        print(f"[AVISO] Coordenadas: columnas esperadas 'POZO','GEO_LATITUDE','GEO_LONGITUDE'. Columnas encontradas: {list(cdf.columns)}")
        return pd.DataFrame(columns=["POZO","LAT","LON"])

    out = cdf[[c_pozo, c_lat, c_lon]].copy()
    out.columns = ["POZO","LAT","LON"]
    out["POZO"] = out["POZO"].astype(str).str.strip()
    out["LAT"] = out["LAT"].apply(_to_float_maybe_comma)
    out["LON"] = out["LON"].apply(_to_float_maybe_comma)
    out = out.dropna(subset=["POZO"])
    out = out.drop_duplicates(subset=["POZO"], keep="last")
    return out

# ==========================
# Candidatos y utilidades
# ==========================
def build_candidates_with_coords(freq, week_start, week_end, excl_pozos,
                                 zonas_norm_incluidas, coords_df,
                                 allowed_bats_by_zone_norm=None,
                                 next_due_map=None):
    F = freq.copy()

    # due_date base (permitimos override con next_due_map)
    F["due_date"] = F["proxima_visita_base"]
    if next_due_map:
        F["due_date"] = F["POZO"].map(next_due_map).fillna(F["due_date"])

    F["overdue_d"] = (pd.Timestamp(week_start) - pd.to_datetime(F["due_date"])).dt.days
    F["is_overdue"] = F["overdue_d"] > 0

    # prioridad
    F["__v"] = F["r_m3_d"].astype(float)

    # Filtro por ZONA (normalizada)
    if "ZONA_NORM" in F.columns and zonas_norm_incluidas:
        F = F[F["ZONA_NORM"].isin(zonas_norm_incluidas)].copy()

    # Sub-filtro por BATERÍA (si corresponde)
    if allowed_bats_by_zone_norm:
        mask = pd.Series(True, index=F.index)
        for zn in zonas_norm_incluidas:
            bats = allowed_bats_by_zone_norm.get(zn)
            if bats is not None:
                mask &= ~ (F["ZONA_NORM"] == zn) | (F["BATERIA_NORM"].isin(bats))
        F = F[mask].copy()

    # Exclusiones
    if excl_pozos:
        F = F[~F["POZO"].isin(excl_pozos)].copy()

    # Potencial mínimo y BATERÍA no vacía
    F = F[F["r_m3_d"].fillna(0) > RM3D_MIN].copy()
    F = F[F["BATERIA"].notna() & (F["BATERIA"].astype(str).str.strip() != "")].copy()

    # Excluir pozos con comentario no vacío en Frecuencias
    if "comentario" in F.columns:
        F["__comentario_txt"] = F["comentario"].astype(str).fillna("").str.strip()
        F = F[F["__comentario_txt"] == ""].copy()
        F.drop(columns=["__comentario_txt"], inplace=True, errors="ignore")

    # Merge coordenadas
    coords_df = coords_df if coords_df is not None else pd.DataFrame(columns=["POZO","LAT","LON"])
    F = F.merge(coords_df, how="left", on="POZO")
    F["has_coords"] = F["LAT"].notna() & F["LON"].notna()

    # Orden base
    F = F.sort_values(by=["is_overdue","__v","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    return F

def _v_est_for_day(row, day_date):
    r = row.get("r_m3_d", np.nan)
    u = row.get("ultima_medicion", pd.NaT)
    if pd.isna(u) or pd.isna(r) or r <= 0:
        return 0.0
    dd = max(0, (pd.Timestamp(day_date) - pd.Timestamp(u)).days)
    return max(0.0, float(r) * float(dd))

def haversine_km(lat1, lon1, lat2, lon2):
    try:
        if pd.isna(lat1) or pd.isna(lon1) or pd.isna(lat2) or pd.isna(lon2):
            return np.nan
        R = 6371.0088
        p1 = math.radians(float(lat1)); p2 = math.radians(float(lat2))
        dphi = math.radians(float(lat2) - float(lat1))
        dlmb = math.radians(float(lon2) - float(lon1))
        a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dlmb/2)**2
        return 2*R*math.asin(math.sqrt(a))
    except Exception:
        return np.nan

# ==========================
# NUEVA LÓGICA DE CLÚSTERES (según prompt)
# ==========================
def _bbox_filter(df, lat0, lon0, rad_km):
    """Bounding-box previo a haversine para acotar vecinos."""
    if pd.isna(lat0) or pd.isna(lon0) or df.empty:
        return df.iloc[0:0]
    dlat = rad_km / 110.574
    dlon = rad_km / (111.320 * max(0.1, math.cos(math.radians(float(lat0)))))
    return df[(df["LAT"].between(lat0 - dlat, lat0 + dlat)) &
              (df["LON"].between(lon0 - dlon, lon0 + dlon))].copy()

def _cluster_centroid(lat_list, lon_list):
    if not lat_list or not lon_list:
        return (np.nan, np.nan)
    return float(np.mean(lat_list)), float(np.mean(lon_list))

def _validate_cluster_by_centroid(lat_list, lon_list, radius_km):
    c_lat, c_lon = _cluster_centroid(lat_list, lon_list)
    if pd.isna(c_lat) or pd.isna(c_lon):
        return False, (np.nan, np.nan), np.inf
    dmax = 0.0
    for la, lo in zip(lat_list, lon_list):
        d = haversine_km(c_lat, c_lon, la, lo)
        if pd.isna(d) or d > radius_km + 1e-9:
            return False, (c_lat, c_lon), np.inf
        dmax = max(dmax, d)
    return True, (c_lat, c_lon), dmax

def build_all_clusters(
    cands: pd.DataFrame,
    K: int,
    radius_km: float,
    score_mode: str = "rm3d",
    top_seeds: int = 30
) -> pd.DataFrame:
    """
    Devuelve DF con:
    ['ClusterID','POZOS','Centroide_LAT','Centroide_LON','Score','ZONA','BATERIAS']
    - Exactamente K pozos por clúster
    - Validación: todos a <= radius_km del CENTROIDE
    - Overlap permitido en generación
    - Semillas: mejores 'top_seeds' por __v
    - Elimina duplicados exactos (mismo conjunto de pozos)
    """
    if cands.empty:
        return pd.DataFrame(columns=["ClusterID","POZOS","Centroide_LAT","Centroide_LON","Score","ZONA","BATERIAS"])

    # trabajar solo con pozos con coords
    base = cands[cands["has_coords"]].copy()
    if base.empty:
        return pd.DataFrame(columns=["ClusterID","POZOS","Centroide_LAT","Centroide_LON","Score","ZONA","BATERIAS"])

    base = base.sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    seeds = base.head(max(1, int(top_seeds))).copy()

    clusters = []
    seen_sets = set()  # para deduplicar por conjunto de pozos
    for _, seed in seeds.iterrows():
        s_lat, s_lon = seed["LAT"], seed["LON"]
        neigh = _bbox_filter(base, s_lat, s_lon, radius_km)
        if neigh.empty:
            continue
        # Orden por valor y cercanía a la semilla
        neigh = neigh.copy()
        neigh["__dist_seed"] = neigh.apply(lambda r: haversine_km(s_lat, s_lon, r["LAT"], r["LON"]), axis=1)
        neigh = neigh[neigh["__dist_seed"] <= radius_km]
        neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

        # Tomar candidatos top K alrededor de la semilla (semilla incluida)
        if seed["POZO"] not in neigh["POZO"].values:
            # asegurar que la semilla esté
            neigh = pd.concat([pd.DataFrame([seed]), neigh], ignore_index=True)
            neigh = neigh.drop_duplicates(subset=["POZO"], keep="first")

        if len(neigh) < K:
            # no alcanza tamaño K dentro del radio de la semilla
            continue

        # Probar ventana de los top K mejor valuados dentro del radio
        topk = neigh.head(K).copy()
        pozos = tuple(topk["POZO"].tolist())
        lats  = topk["LAT"].tolist()
        lons  = topk["LON"].tolist()

        ok, (c_lat, c_lon), dmax = _validate_cluster_by_centroid(lats, lons, radius_km)
        if not ok:
            # Intentar ajustar: expandir lista ordenada y mover una ventana sobre los N mejores vecinos
            N = min(len(neigh), K + 10)  # ventana corta para evitar combinatoria
            window = neigh.head(N).copy()
            found = False
            # estrategia greedy: fijar semilla y tomar los K-1 mejores por __v que cumplan centroide
            # probando reemplazos simples si no valida
            for i in range(0, N-K+1):
                cand = window.iloc[i:i+K]
                lats2 = cand["LAT"].tolist(); lons2 = cand["LON"].tolist()
                ok2, (c_lat2, c_lon2), _ = _validate_cluster_by_centroid(lats2, lons2, radius_km)
                if ok2:
                    topk = cand.copy()
                    c_lat, c_lon = c_lat2, c_lon2
                    pozos = tuple(topk["POZO"].tolist())
                    found = True
                    break
            if not found:
                continue  # no se pudo validar centroide

        # dedupe exacto por set
        key_set = frozenset(pozos)
        if key_set in seen_sets:
            continue
        seen_sets.add(key_set)

        # Score: rm3d o vest (hook)
        if score_mode == "vest":
            # si se usa vest, en generación no sabemos el día; dejamos rm3d como aproximación
            score = float(topk["r_m3_d"].fillna(0).sum())
        else:
            score = float(topk["r_m3_d"].fillna(0).sum())

        # ZONA/BATERIAS: mayoritaria (o homogénea si ya lo está)
        zona_mode = topk["ZONA"].mode()
        zona_val = zona_mode.iloc[0] if not zona_mode.empty else ""
        bats = tuple(sorted(set(str(x) for x in topk["BATERIA"].fillna("").astype(str))))

        clusters.append({
            "ClusterID": f"C{len(seen_sets):05d}",
            "POZOS": pozos,
            "Centroide_LAT": float(c_lat),
            "Centroide_LON": float(c_lon),
            "Score": score,
            "ZONA": zona_val,
            "BATERIAS": bats
        })

    cldf = pd.DataFrame(clusters)
    if cldf.empty:
        return cldf
    cldf = cldf.sort_values("Score", ascending=False).reset_index(drop=True)
    return cldf


def select_clusters_for_day(
    clusters_df: pd.DataFrame,
    used_today: set[str],
    cap_pozos: int,
    backfill_nearest: bool,
    umbral_km_backfill: float,
    clusters_por_dia_max: Optional[int] = None,
    K: int = 5
) -> list[dict]:

    """
    Devuelve lista de dicts: {'POZOS', 'ClusterID', 'Centroide_LAT', 'Centroide_LON', 'Score'}
    - Greedy por Score desc.
    - No repetir pozos del día.
    - Respetar clusters_por_dia_max y cap_pozos (multiplo de K).
    - Si backfill_nearest=True: exigir distancia del centroide nuevo al centroide acumulado ≤ umbral.
    """
    if clusters_df is None or clusters_df.empty:
        return []

    selected = []
    pozos_usados = set(used_today)
    cap_left = int(cap_pozos)
    max_clusters = int(clusters_por_dia_max) if clusters_por_dia_max is not None else None

    # centroide acumulado del día (promedio incremental)
    c_lat_acc, c_lon_acc, n_acc = (np.nan, np.nan, 0)

    def _update_centroid_acc(lat, lon):
        nonlocal c_lat_acc, c_lon_acc, n_acc
        if pd.isna(lat) or pd.isna(lon): 
            return
        if n_acc == 0:
            c_lat_acc, c_lon_acc, n_acc = float(lat), float(lon), 1
        else:
            c_lat_acc = (c_lat_acc*n_acc + float(lat)) / (n_acc + 1)
            c_lon_acc = (c_lon_acc*n_acc + float(lon)) / (n_acc + 1)
            n_acc += 1

    for _, row in clusters_df.iterrows():
        if cap_left < K:
            break
        if max_clusters is not None and len(selected) >= max_clusters:
            break

        pozos = set(row["POZOS"])
        if pozos & pozos_usados:
            # contiene pozo ya tomado hoy
            continue

        if backfill_nearest and len(selected) >= 1 and not (pd.isna(c_lat_acc) or pd.isna(c_lon_acc)):
            dcc = haversine_km(c_lat_acc, c_lon_acc, row["Centroide_LAT"], row["Centroide_LON"])
            if pd.isna(dcc) or dcc > float(umbral_km_backfill) + 1e-9:
                continue

        selected.append({
            "POZOS": list(row["POZOS"]),
            "ClusterID": row["ClusterID"],
            "Centroide_LAT": float(row["Centroide_LAT"]),
            "Centroide_LON": float(row["Centroide_LON"]),
            "Score": float(row["Score"])
        })
        pozos_usados |= pozos
        cap_left -= K
        _update_centroid_acc(row["Centroide_LAT"], row["Centroide_LON"])

    return selected

# ==========================
# ASIGNACIÓN SEMANAL ROUND-ROBIN (usando clústeres precomputados)
# ==========================
def assign_week_round_robin_by_zone(cand_all, team_ids, params, week_start, week_end, radius_km):
    """
    Reparte por día/equipo en una zona, eligiendo clústeres precomputados (no pozos sueltos).
    Reglas duras:
    - Clúster tamaño exacto K
    - Todos los pozos del clúster a ≤ radius_km del centroide (ya validado en build_all_clusters)
    - No repetir POZO en el mismo día (entre equipos de la misma zona)
    """
    dias   = int(params["dias_por_semana"])
    cap_pz = int(params["max_pozos_dia_equipo"])
    K      = int(params.get("max_pozos_por_cluster", 4))
    backfill_nearest = bool(params.get("backfill_nearest_cluster", True))
    umbral_backfill  = float(params.get("umbral_km_backfill", 5.0))
    clusters_por_dia_max = params.get("clusters_por_dia_max", None)

    rows = []

    # NUEVO: pozos ya usados en la semana (para no repetirlos lunes, martes, ...)
    used_week = set()  # <<<

    for d in range(dias):
        day_date = pd.Timestamp(week_start) + pd.Timedelta(days=d)

        # Ventana por DÍA (no por fin de semana) y excluir lo ya usado en la semana
        pool_day = cand_all[~cand_all["POZO"].isin(used_week)].copy()  # <<<
        in_window = (pd.to_datetime(pool_day["due_date"]) <= pd.Timestamp(day_date)) | pool_day["is_overdue"]
        pool_day = pool_day[in_window].copy()
        if pool_day.empty:
            continue

        # Precomputar TODOS los clústeres de este día y zona (overlap permitido)
        clusters_df = build_all_clusters(
            cands=pool_day,
            K=K,
            radius_km=radius_km,
            score_mode="rm3d",
            top_seeds=int(params.get("top_semillas_eval", 30))
        )
        if clusters_df.empty:
            # no hay clúster válido → ese día pueden quedar huecos
            continue

        # Greedy por equipo, sin repetir pozos el mismo día entre equipos
        used_today = set()
        for eq in sorted(team_ids):
            chosen = select_clusters_for_day(
                clusters_df=clusters_df,
                used_today=used_today,
                cap_pozos=cap_pz,
                backfill_nearest=backfill_nearest,
                umbral_km_backfill=umbral_backfill,     # ojo: si tu var se llama umbral_km_backfill, usa ese nombre
                clusters_por_dia_max=clusters_por_dia_max,
                K=K
            )
            if not chosen:
                continue

            # Materializar filas del plan a partir de los clústeres seleccionados
            ord_idx = 1
            for cluster in chosen:
                pozos = cluster["POZOS"]
                c_lat = cluster["Centroide_LAT"]
                c_lon = cluster["Centroide_LON"]
                cid   = cluster["ClusterID"]

                # Traer filas originales para info r_m3_d, zona, batería, ultima_medicion
                info = pool_day[pool_day["POZO"].isin(pozos)].copy()
                info = info.set_index("POZO")

                # asserts (criterios de aceptación)
                assert len(pozos) == K, f"Cluster {cid} no tiene tamaño K={K}"
                # distancias al centroide
                dists = []
                for pz in pozos:
                    la = info.at[pz, "LAT"]; lo = info.at[pz, "LON"]
                    d = haversine_km(c_lat, c_lon, la, lo)
                    dists.append(d)
                    assert (not pd.isna(d)) and d <= radius_km + 1e-6, f"Pozo {pz} excede radio al centroide en cluster {cid}"
                max_d = float(np.max(dists))  # (no lo usamos pero te queda para log)

                # Orden dentro del cluster: por cercanía al centroide, opcional
                pozos_sorted = sorted(pozos, key=lambda p: haversine_km(c_lat, c_lon, info.at[p, "LAT"], info.at[p, "LON"]))

                for pz in pozos_sorted:
                    rec = info.loc[pz]
                    try:
                        v_est = _v_est_for_day({"r_m3_d": rec.get("r_m3_d", np.nan),
                                                "ultima_medicion": rec.get("ultima_medicion", pd.NaT)}, day_date)
                    except Exception:
                        v_est = 0.0

                    rows.append({
                        "Plan_Fecha": day_date.date(),
                        "Semana_ISO": day_date.isocalendar()[1],
                        "Equipo": int(eq),
                        "Dia_Idx": d+1,
                        "Orden": ord_idx,
                        "ZONA": rec.get("ZONA",""),
                        "BATERIA": rec.get("BATERIA",""),
                        "POZO": pz,
                        "r_m3_d": float(rec.get("__v", rec.get("r_m3_d", np.nan))),
                        "Vol_Estimado_m3": round(float(v_est), 2),
                        "Seed_POZO": "",  # ya no trabajamos por semilla en asignación
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": round(float(haversine_km(c_lat, c_lon, rec.get("LAT"), rec.get("LON"))), 3) if not (pd.isna(rec.get("LAT")) or pd.isna(rec.get("LON"))) else None,
                        "ultima_medicion": rec.get("ultima_medicion", pd.NaT),
                        # Nuevas columnas informativas del cluster:
                        "ClusterID": cid,
                        "Centroide_LAT": c_lat,
                        "Centroide_LON": c_lon,
                    })
                    ord_idx += 1

                # Marcar pozos usados hoy y para el resto de la semana
                used_today.update(pozos)
                used_week.update(pozos)  # <<<  clave para que no se repitan en martes/miércoles/etc.

        # verificación de no duplicación diaria
        if rows:
            plan_day = pd.DataFrame(rows)
            same_day = plan_day[plan_day["Plan_Fecha"] == day_date.date()]
            if not same_day.empty:
                dup = same_day.groupby(["Plan_Fecha","POZO"]).size().max()
                assert int(dup) == 1, "Un pozo se repite el mismo día (violación de regla)."

    cols = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
            "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
            "Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion",
            "ClusterID","Centroide_LAT","Centroide_LON"]
    return pd.DataFrame(rows, columns=cols) if rows else pd.DataFrame(columns=cols)

# (Se elimina la versión anterior _fill_day_star_clusters: ahora ya no se usa.)

def ensure_annual_coverage_zone_locked(all_pozos_df, plan, params, start_date, equipo_to_zona,
                                       allowed_bats_by_zone_norm=None, r_by_pozo=None):
    cap_pz = params["max_pozos_dia_equipo"]

    keys = []
    for w in range(params["semanas_plan"]):
        w_start = start_date + timedelta(weeks=w)
        for d in range(params["dias_por_semana"]):
            f = w_start + timedelta(days=d)
            for e in equipo_to_zona.keys():
                keys.append((e, f))

    if not plan.empty:
        plan["__key"] = plan["Equipo"].astype(int).astype(str) + "|" + plan["Plan_Fecha"].astype(str)
        used_counts = plan.groupby("__key")["POZO"].count().to_dict()
    else:
        used_counts = {}

    planned = set(plan["POZO"].unique()) if not plan.empty else set()
    missing_df = all_pozos_df[~all_pozos_df["POZO"].isin(planned)].copy()
    missing_df = missing_df[missing_df["BATERIA"].notna() & (missing_df["BATERIA"].astype(str).str.strip()!="")].copy()

    add = []
    for _, row in missing_df.iterrows():
        pz = row["POZO"]; z = row["ZONA"]
        bat = row.get("BATERIA", "")

        if not isinstance(bat, str) or bat.strip() == "":
            continue

        if allowed_bats_by_zone_norm:
            zn = _norm(z)
            bats_allowed = allowed_bats_by_zone_norm.get(zn)
            if bats_allowed is not None:
                if _norm(bat) not in bats_allowed:
                    continue

        if r_by_pozo is not None:
            r_val = float(r_by_pozo.get(pz, np.nan))
            if not (r_val > RM3D_MIN):
                continue

        target_teams = [e for e, zona in equipo_to_zona.items() if zona == z]
        if not target_teams:
            continue
        placed = False
        for e in target_teams:
            for (ee, f) in keys:
                if ee != e:
                    continue
                key = f"{e}|{f}"
                cnt = used_counts.get(key, 0)
                if cnt < cap_pz:
                    add.append({
                        "Plan_Fecha": f,
                        "Semana_ISO": f.isocalendar()[1],
                        "Equipo": int(e),
                        "Dia_Idx": f.weekday()+1,
                        "Orden": cnt+1,
                        "ZONA": z,
                        "BATERIA": bat,
                        "POZO": pz,
                        "r_m3_d": np.nan,
                        "Vol_Estimado_m3": 0.0,
                        "Seed_POZO": "",
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": None,
                        "ultima_medicion": pd.NaT,
                        "ClusterID": "",
                        "Centroide_LAT": np.nan,
                        "Centroide_LON": np.nan,
                    })
                    used_counts[key] = cnt+1
                    placed = True
                    break
            if placed:
                break

    if add:
        plan = pd.concat([plan, pd.DataFrame(add)], ignore_index=True)                 .sort_values(["Plan_Fecha","Equipo","Orden"])
    return plan

def build_alertas_abm(freq_df: pd.DataFrame, norm_table: pd.DataFrame, dict_df: pd.DataFrame) -> pd.DataFrame:
    base = freq_df[["POZO","ZONA","BATERIA","ultima_medicion","ultima_exitosa"]].copy()
    meta_first = dict_df.groupby("oficial")[["estado","met_prod","nivel_3","nivel_5"]].first()
    base = base.merge(meta_first[["estado","met_prod"]], left_on="POZO", right_index=True, how="left")

    out = base.copy()
    for c in ["ultima_medicion","ultima_exitosa"]:
        out[c] = pd.to_datetime(out[c], errors="coerce").dt.date
    out = out.sort_values(["ZONA","BATERIA","POZO"]).reset_index(drop=True)
    return out

# ============================================
# HARNES PARA JUPYTER
# ============================================
def run_pipeline_jupyter(
    input_file,
    nombres_pozo_file,
    coords_file,
    *,
    semanas_plan=2,
    equipos_activos=2,
    dias_por_semana=5,
    max_pozos_dia_equipo=10,
    K_max_pozos_por_cluster=5,
    clusters_por_dia_max=None,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,
    baterias_por_zona=None,      # {"las heras cg - canadon escondida": {"swabing ce","ce 04"}}
    pozos_excluir=None,
    escribir_excel=False
):
    global INPUT_FILE, NOMBRES_POZO_FILE, COORDS_FILE, RADIUS_KM, RM3D_MIN, DEFAULTS
    INPUT_FILE       = input_file
    NOMBRES_POZO_FILE= nombres_pozo_file
    COORDS_FILE      = coords_file
    RADIUS_KM        = float(radius_km)
    RM3D_MIN         = float(rm3d_min)

    DEFAULTS = DEFAULTS.copy()
    DEFAULTS.update({
        "equipos_activos": int(equipos_activos),
        "dias_por_semana": int(dias_por_semana),
        "semanas_plan": int(semanas_plan),
        "max_pozos_dia_equipo": int(max_pozos_dia_equipo),
        "max_pozos_por_cluster": int(K_max_pozos_por_cluster),
        "clusters_por_dia_max": clusters_por_dia_max,
        "backfill_nearest_cluster": bool(backfill_nearest),
        "umbral_km_backfill": float(umbral_km_backfill),
    })

    # 1) Lee historial (Excel del usuario)
    df = read_historial(INPUT_FILE, SHEET_HIST)

    # 2) Normalización por diccionario
    key2off, dict_df = load_pozo_dictionary(NOMBRES_POZO_FILE)
    df_norm, alert_table, norm_table = apply_pozo_normalization(df, key2off, dict_df)

    # 3) Filtra inválidos
    df = df_norm[df_norm["VALIDO_POZO"] == True].copy()

    # 4) Filtro por ZONA (si se pide explícito)
    if zonas_incluir:
        zonas_incluir = set(zonas_incluir)
        znorm = {_norm(z) for z in zonas_incluir}
        df = df[df["__ZONA_NORM"].isin(znorm)].copy()
        zonas_labels = zonas_incluir
        zonas_norm   = znorm
    else:
        zonas_labels, zonas_norm = set(df["ZONA"].dropna().astype(str)), set(df["__ZONA_NORM"].dropna().astype(str))

    # 5) Sub-filtro de baterías (si lo pasaste por parámetro)
    if baterias_por_zona:
        allowed_bats_by_zone_norm = {zn: set(baterias_por_zona[zn]) if baterias_por_zona[zn] is not None else None
                                     for zn in baterias_por_zona}
    else:
        allowed_bats_by_zone_norm = {zn: None for zn in zonas_norm}

    # 6) Exclusiones (si te pasan un set)
    excl_total = set(pozos_excluir or [])

    # 7) Frecuencias
    params = DEFAULTS.copy()
    freq = compute_frecuencias(df, params)

    # Comentarios desde OBS cuando ultima_medicion != ultima_exitosa
    df_obs = df[["POZO", "FECHA", "OBS_POZO"]].copy() if "OBS_POZO" in df.columns else pd.DataFrame(columns=["POZO","FECHA","OBS_POZO"])
    df_obs["FECHA_DATE"] = pd.to_datetime(df_obs["FECHA"], errors="coerce").dt.date
    df_obs = (df_obs.dropna(subset=["FECHA_DATE"])
                    .sort_values(["POZO","FECHA_DATE"])
                    .drop_duplicates(subset=["POZO","FECHA_DATE"], keep="last"))
    obs_map = {(r.POZO, r.FECHA_DATE): (str(r.OBS_POZO).strip() if pd.notna(r.OBS_POZO) else "")
               for r in df_obs.itertuples(index=False)}
    freq["__UMED_DATE"] = pd.to_datetime(freq["ultima_medicion"], errors="coerce").dt.date
    freq["__UEXI_DATE"] = pd.to_datetime(freq["ultima_exitosa"], errors="coerce").dt.date
    freq["comentario"] = [obs_map.get((pz, fmed), "") for pz, fmed in zip(freq["POZO"], freq["__UMED_DATE"])]
    mask_both_valid = freq["__UMED_DATE"].notna() & freq["__UEXI_DATE"].notna()
    mask_diff = mask_both_valid & (freq["__UMED_DATE"] != freq["__UEXI_DATE"])
    freq.loc[~mask_diff, "comentario"] = ""
    freq.drop(columns=["__UMED_DATE","__UEXI_DATE"], errors="ignore", inplace=True)

    # 8) Coordenadas
    coords_df = read_coords(COORDS_FILE)

    # 9) Mapas auxiliares
    delta_by_pozo = freq.set_index("POZO")["delta_star_dias"].to_dict()
    r_by_pozo     = freq.set_index("POZO")["r_m3_d"].to_dict()

    # 10) Semanas a planificar
    start = next_monday(date.today())
    weeks = [(start + timedelta(weeks=i), start + timedelta(weeks=i, days=6)) for i in range(params["semanas_plan"])]

    # 11) Equipos -> ZONA (fijo)
    zonas_list = sorted(set(zonas_labels))
    equipo_to_zona = {}
    for i in range(1, params["equipos_activos"]+1):
        zona_asignada = zonas_list[min(i-1, len(zonas_list)-1)]
        equipo_to_zona[i] = zona_asignada

    # 12) Plan semanal por ZONA usando la versión V2 (clústeres)
    plan_all = []
    next_due = {row.POZO: row.proxima_visita_base for row in freq.itertuples()}
    zone_to_teams = {}
    for eq, zona_label in equipo_to_zona.items():
        zone_to_teams.setdefault(zona_label, []).append(eq)

    for (w_start, w_end) in weeks:
        for zona_label, team_list in zone_to_teams.items():
            zona_norm_label = _norm(zona_label)
            cand_all = build_candidates_with_coords(
                freq=freq,
                week_start=w_start,
                week_end=w_end,
                excl_pozos=excl_total,
                zonas_norm_incluidas={zona_norm_label},
                coords_df=coords_df,
                allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
                next_due_map=next_due
            )
            if cand_all.empty:
                continue

            cand_zone = cand_all[[  # mantener las columnas necesarias
                "POZO","ZONA","BATERIA","due_date","is_overdue","__v",
                "LAT","LON","has_coords","r_m3_d","ultima_medicion"
            ]].copy()

            plan_week_zone = assign_week_round_robin_by_zone(
                cand_all=cand_zone,
                team_ids=sorted(team_list),
                params=params,
                week_start=w_start,
                week_end=w_end,
                radius_km=RADIUS_KM
            )

            if not plan_week_zone.empty:
                plan_all.append(plan_week_zone)
                # actualizar next_due por pozo asignado
                for pz, fcal in plan_week_zone[["POZO","Plan_Fecha"]].drop_duplicates().itertuples(index=False):
                    dd = int(delta_by_pozo.get(pz, params["min_dias_freq"]))
                    next_due[pz] = pd.Timestamp(fcal) + pd.Timedelta(days=dd)

    plan = (pd.concat(plan_all, ignore_index=True)
            if plan_all else
            pd.DataFrame(columns=[
                "Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden","ZONA","BATERIA",
                "POZO","r_m3_d","Vol_Estimado_m3","Seed_POZO","Dist_km_semilla",
                "Dist_km_centroid","ultima_medicion","ClusterID","Centroide_LAT","Centroide_LON","Cluster_Score"
            ]))

    # 13) Cobertura anual reforzada (opcional) — mantiene tu lógica original (no forma clúster)
    if not freq.empty:
        eligible_mask = (freq["ZONA"].isin(zonas_labels)) & (freq["r_m3_d"].fillna(0) > RM3D_MIN)
        if "comentario" in freq.columns:
            eligible_mask &= (freq["comentario"].astype(str).fillna("").str.strip() == "")
        if allowed_bats_by_zone_norm:
            for zn, bats in allowed_bats_by_zone_norm.items():
                if bats is not None:
                    eligible_mask &= (~(freq["ZONA_NORM"] == zn)) | (freq["BATERIA_NORM"].isin(bats))

        all_pozos_in_zonas = freq.loc[eligible_mask, ["POZO","ZONA","BATERIA"]].drop_duplicates().copy()
        all_pozos_in_zonas = all_pozos_in_zonas[
            all_pozos_in_zonas["BATERIA"].notna() & (all_pozos_in_zonas["BATERIA"].astype(str).str.strip() != "")
        ].copy()

        # usa el filler original (sin clúster) SOLO para cubrir huecos anuales
     #   plan = ensure_annual_coverage_zone_locked(
      #      all_pozos_df=all_pozos_in_zonas,
       #     plan=plan,
        #    params=params,
         #   start_date=start,
          #  equipo_to_zona=equipo_to_zona,
           # allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
            #r_by_pozo=r_by_pozo
       # )

    # 14) Export opcional (agrego columnas nuevas de clúster)
    out_xlsx = None
    if escribir_excel:
        out_xlsx = unique_output_path(INPUT_FILE)
        coords_all = read_coords(COORDS_FILE)
        with pd.ExcelWriter(out_xlsx, engine="openpyxl", mode="w") as writer:
            # Frecuencias
            freq_out = freq.copy()
            for c in ["proxima_visita_base","ultima_medicion","ultima_exitosa"]:
                freq_out[c] = pd.to_datetime(freq_out[c], errors="coerce").dt.date
            freq_out = freq_out.sort_values(["ZONA","BATERIA","POZO"])
            cols_pref = ["POZO","ZONA","BATERIA","ZONA_NORM","BATERIA_NORM","r_m3_d",
                         "ultima_medicion","ultima_exitosa","delta_star_dias","comentario",
                         "proxima_visita_base","ceros_consec","alerta"]
            cols_final = [c for c in cols_pref if c in freq_out.columns] + \
                         [c for c in freq_out.columns if c not in cols_pref]
            freq_out = freq_out[cols_final]
            freq_out.to_excel(writer, "Frecuencias", index=False)

            # Plan por equipo + Km_al_siguiente
            cols_plan = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
                         "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
                         "ClusterID","Centroide_LAT","Centroide_LON","Cluster_Score",
                         "Dist_km_centroid"]
            for eq in range(1, params["equipos_activos"]+1):
                pe = plan.loc[plan["Equipo"]==eq].copy()
                if pe.empty:
                    pe = pd.DataFrame(columns=cols_plan + ["Km_al_siguiente","Ejecutado"])
                else:
                    pe = pe.sort_values(["Plan_Fecha","Dia_Idx","Orden","POZO"]).copy()
                    pe = pe.merge(coords_all, how="left", on="POZO")
                    pe["LAT_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LAT"].shift(-1)
                    pe["LON_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LON"].shift(-1)
                    def _leg_km(row):
                        if (pd.isna(row.get("LAT")) or pd.isna(row.get("LON")) or
                            pd.isna(row.get("LAT_next")) or pd.isna(row.get("LON_next"))):
                            return None
                        return round(float(haversine_km(row["LAT"], row["LON"],
                                                        row["LAT_next"], row["LON_next"])), 3)
                    pe["Km_al_siguiente"] = pe.apply(_leg_km, axis=1)
                    pe.drop(columns=["LAT","LON","LAT_next","LON_next"], inplace=True, errors="ignore")
                    pe["Ejecutado"] = ""
                    for c in cols_plan:
                        if c not in pe.columns: pe[c] = ""
                    pe = pe[cols_plan + ["Km_al_siguiente","Ejecutado"]]
                pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)

            # Auxiliares
            pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)

    # ====== Asserts/chequeos mínimos ======
    # ====== Asserts/chequeos mínimos ======
    if not plan.empty:
        K_chk = int(params.get("max_pozos_por_cluster", 5))

        # ✅ Validar SOLO clústeres reales (ClusterID no vacío)
        mask_real = plan["ClusterID"].notna() & (plan["ClusterID"].astype(str).str.strip() != "")
        gsize = (plan.loc[mask_real]
                 .groupby(["Plan_Fecha","Equipo","ClusterID"])["POZO"]
                 .count())

        if not gsize.empty:
            assert (gsize % K_chk == 0).all(), "Hay clústeres asignados que no cumplen tamaño K exacto."

        # ✅ No duplicación diaria (ningún pozo se repite en el mismo día)
        dupmax = plan.groupby(["Plan_Fecha","POZO"]).size().max()
        assert int(dupmax) == 1, "Un pozo aparece más de una vez en el mismo día."

        # ✅ Radio cumplido (tolerancia numérica)
        if "Dist_km_centroid" in plan.columns and plan["Dist_km_centroid"].notna().any():
            assert float(plan["Dist_km_centroid"].fillna(0).max()) <= float(RADIUS_KM) + 1e-6, \
                "Distancia a centroide excede el radio."

    return plan, freq, out_xlsx




# ============================================
# RUNNER (EDITÁ TUS RUTAS Y PARÁMETROS ACÁ)
# ============================================

INPUT_FILE = r"C:\Users\ry16123\Downloads\Ultimo (ORIGINAL) TABLERO PRODUCCIÓN FLUG S.A 2025 (1).xlsx"
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

plan, freq, out_xlsx = run_pipeline_jupyter(
    input_file=INPUT_FILE,
    nombres_pozo_file=NOMBRES_POZO_FILE,
    coords_file=COORDS_FILE,
    semanas_plan=3,                 # probá corto para iterar rápido
    equipos_activos=3,              # cantidad de equipos
    dias_por_semana=5,              # 5 ó 6
    max_pozos_dia_equipo=5,
    K_max_pozos_por_cluster=5,      # tamaño máximo de clúster
    clusters_por_dia_max=1,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,             # o lista como ["Las Heras CG - Canadon Escondida"]
    baterias_por_zona=None,         # dict normalizado (keys en _norm) o None
    pozos_excluir=set(),            # ej.: {"BB-100"}
    escribir_excel=True            # poné True si querés exportar el Excel
)

# Mostrar un vistazo rápido
display(freq.head(10))
display(plan.head(30))
print("Excel generado:", out_xlsx)


  warn(msg)
  g[col] = g[col].replace({None: np.nan})
  freq_out.to_excel(writer, "Frecuencias", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)


Unnamed: 0,POZO,ZONA,BATERIA,ZONA_NORM,BATERIA_NORM,r_m3_d,ultima_medicion,ultima_exitosa,delta_star_dias,proxima_visita_base,ceros_consec,alerta,comentario
0,BB-10,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015385,2025-07-16,2025-07-16,56,2025-09-10,0,,
1,BB-100,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.142857,2023-08-04,2023-08-04,14,2023-08-18,0,,
2,BB-101,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.035714,2025-08-25,2025-08-25,56,2025-10-20,0,,
3,BB.a-104,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2025-01-24,2025-01-24,7,2025-01-31,0,,
4,BB-111,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.428571,2025-07-01,2025-07-01,7,2025-07-08,0,,
5,BB-133,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.037037,2025-02-19,2025-02-19,56,2025-04-16,0,,
6,BB-170,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2024-07-24,2024-07-24,7,2024-07-31,0,,
7,BB-21,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015564,2025-05-12,2025-05-12,56,2025-07-07,0,,
8,BB497,,,,,0.571429,2025-01-08,2025-01-08,7,2025-01-15,0,,
9,BB-50,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.081633,2023-03-10,2023-03-10,28,2023-04-07,0,,


Unnamed: 0,Plan_Fecha,Semana_ISO,Equipo,Dia_Idx,Orden,ZONA,BATERIA,POZO,r_m3_d,Vol_Estimado_m3,Seed_POZO,Dist_km_semilla,Dist_km_centroid,ultima_medicion,ClusterID,Centroide_LAT,Centroide_LON
0,2025-09-29,40,2,2.171666,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-1234,0.285714,72.86,,,0.29,2025-01-17,C00002,-46.438652,-68.577977
1,2025-09-29,40,2,2.171666,2,Las Heras CG - Canadon Escondida,CE 20,CnE-1224(d),0.285714,37.14,,,0.468,2025-05-22,C00002,-46.438652,-68.577977
2,2025-09-29,40,2,2.171666,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-473,0.1875,12.75,,,1.172,2025-07-23,C00002,-46.438652,-68.577977
3,2025-09-29,40,2,2.171666,4,Las Heras CG - Canadon Escondida,Swabing CE,CnE-868,0.272727,13.36,,,1.583,2025-08-11,C00002,-46.438652,-68.577977
4,2025-09-29,40,2,2.171666,5,Las Heras CG - Canadon Escondida,Swabing CE,CnE-849,0.2,14.8,,,2.331,2025-07-17,C00002,-46.438652,-68.577977
5,2025-09-29,40,3,2.05588,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-808,0.1875,3.19,,,1.056,2025-09-12,C00003,-46.403392,-68.562792
6,2025-09-29,40,3,2.05588,2,Las Heras CG - Canadon Escondida,Swabing CE,CnE.a-226,0.238095,5.71,,,1.278,2025-09-05,C00003,-46.403392,-68.562792
7,2025-09-29,40,3,2.05588,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-199,0.285714,4.86,,,1.414,2025-09-12,C00003,-46.403392,-68.562792
8,2025-09-29,40,3,2.05588,4,Las Heras CG - Canadon Escondida,Swabing CE,CnE-534,0.214286,3.86,,,1.504,2025-09-11,C00003,-46.403392,-68.562792
9,2025-09-29,40,3,2.05588,5,Las Heras CG - Canadon Escondida,Swabing CE,CnE-660,0.2,3.8,,,2.039,2025-09-10,C00003,-46.403392,-68.562792


Excel generado: C:\Users\ry16123\Downloads\Ultimo (ORIGINAL) TABLERO PRODUCCIÓN FLUG S.A 2025 (1)_CRONOGRAMA_20250928_(8).xlsx


In [43]:


# ============================================
# Monocelda Jupyter: Planificador + Harness + Runner
# ============================================

# -*- coding: utf-8 -*-
import os, re, unicodedata, math
import numpy as np
import pandas as pd
from datetime import date, timedelta, datetime
from typing import Optional


# ==========================
# CONFIG por defecto (se sobreescriben en el runner)
# ==========================
INPUT_FILE  = r"DIAGRAMA SW.xlsx"   # Excel base (NO se modifica)
SHEET_HIST  = None                  # None => autodetecta hoja/encabezados
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

# Radio en km para agrupar por cercanía
RADIUS_KM = 3.0
# Filtro mínimo de potencial
RM3D_MIN = 0.1

# Umbrales para fuzzy (si se usan)
FUZZY_REPLACE_THRESHOLD = 85
FUZZY_SUGGEST_THRESHOLD = 75
LETTERS_SIMILARITY_MIN  = 80

DEFAULTS = {
    "equipos_activos": 4,                 # 1..4
    "dias_por_semana": 5,                 # 5 o 6
    "semanas_plan": 2,                    # para probar rápido en Jupyter
    "k_visitas": 1,                       # tasas (K=1 por pedido)
    "max_pozos_dia_equipo": 10,           # cupo por día por equipo
    "max_pozos_por_cluster": 5,           # tamaño de clúster (K fijo si usás lógica de clústeres fijos)
    "m3_por_visita_objetivo": 2.0,        # informativo
    "min_dias_freq": 7,                   # 1 semana
    "max_dias_freq": 56,                  # 8 semanas
    "dias_asumidos_una_visita": 7,        # para r si hay 1 sola visita
    "freq_dias_ultimo_cero_valido": 30,

    # Semillas a evaluar (si se usa lógica de semillas)
    "top_semillas_eval": 30,

    # Control de clústeres por día y backfill (si se usa lógica por semilla)
    "clusters_por_dia_max": None,
    "backfill_nearest_cluster": True,
    "umbral_km_backfill": 5.0,
}

# ==========================
# Utils
# ==========================
def _norm(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = s.replace("³", "3")
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    s = s.lower().strip().replace("\xa0"," ")
    s = s.replace("_"," ").replace("-"," ").replace("."," ").replace("\n"," ")
    return " ".join(s.split())

def _pozo_key(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    return "".join(ch for ch in s if ch.isalnum()).upper()

def _canonical_digits(d: str) -> str:
    d = (d or "").lstrip("0")
    return d if d != "" else "0"

def _letters_digits_from_key_both(k: str):
    raw_digits = "".join(re.findall(r"\d+", k))
    digits_canon = _canonical_digits(raw_digits)
    letters = re.sub(r"\d+", "", k)
    return letters, digits_canon, len(raw_digits)

def _ratio_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _fuzzy_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.partial_ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _canon_prefix_pozo(s: str) -> str:
    if s is None or (isinstance(s, float) and np.isnan(s)):
        return s
    raw = str(s).strip()
    raw_up = raw.upper()
    if raw_up.startswith("CÑE"):
        return "CNE" + raw_up[3:]
    raw_ascii = unicodedata.normalize("NFKD", raw_up).encode("ascii", "ignore").decode("ascii")
    if raw_ascii.startswith("CNE"):
        return raw_ascii
    if raw_ascii.startswith("CN"):
        return "CNE" + raw_ascii[2:]
    m = re.match(r"^CE(\d+)$", raw_ascii)
    if m:
        return "CNE" + m.group(1)
    return raw_ascii

def next_monday(d=None):
    d = d or date.today()
    return d + timedelta(days=(7 - d.weekday()) % 7)  # 0=Lunes

def unique_output_path(base_input_path: str) -> str:
    folder = os.path.dirname(os.path.abspath(base_input_path))
    stem   = os.path.splitext(os.path.basename(base_input_path))[0]
    today  = datetime.now().strftime("%Y%m%d")
    base   = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}.xlsx")
    if not os.path.exists(base): return base
    i = 2
    while True:
        cand = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}_({i}).xlsx")
        if not os.path.exists(cand): return cand
        i += 1

EXPECTED_KEYS = {
    "fecha":       ["fecha"],
    "pozo":        ["pozo"],
    "zona":        ["zona"],
    "bateria":     ["bateria", "batería"],
    "m3":          ["m3 bruta","m3","m3_bruta","m3bruta","m 3 bruta","m 3","m3 bruto","m3 recuperado","m3 recupero"],
    "carreras":    ["n de carreras","n° de carreras","nº de carreras","no de carreras","nro de carreras","numero de carreras","n° carreras","n de carrera","n carreras"],
    "nivel_final": ["nivel final pozo","nivel final","nivel final del pozo"],
    "obs_pozo":    ["observaciones del pozo","observaciones","comentarios","comentario"]
}

def _find_header_row(df_raw):
    for i in range(min(200, len(df_raw))):
        row_norm = [_norm(x) for x in df_raw.iloc[i,:].tolist()]
        if not row_norm:
            continue
        colmap = {v:j for v,j in zip(row_norm, range(len(row_norm)))}
        def has_any(keys): return any(k in colmap for k in keys)
        if has_any(EXPECTED_KEYS["fecha"]) and has_any(EXPECTED_KEYS["pozo"]) and has_any(EXPECTED_KEYS["zona"]) and has_any(EXPECTED_KEYS["bateria"]):
            return i, row_norm
    return None, None

# ---------- Nombres pozo ----------
def load_pozo_dictionary(xlsx_path: str):
    try:
        ref = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer diccionario de pozos: {xlsx_path}\n{e}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    cols = {c.lower().strip(): c for c in ref.columns}
    if "nombre_corto_pozo" not in cols:
        print(f"\n[AVISO] El diccionario no tiene la columna 'nombre_corto_pozo'. Columnas: {list(ref.columns)}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    c_pozo = cols["nombre_corto_pozo"]
    c_met  = cols.get("met_prod")
    c_n3   = cols.get("nivel_3")
    c_n5   = cols.get("nivel_5")
    c_est  = cols.get("estado")

    refv = ref.loc[ref[c_pozo].notna()].copy()
    refv[c_pozo] = refv[c_pozo].astype(str).str.strip()

    of_list  = refv[c_pozo].tolist()
    met_vals = refv[c_met].astype(str).str.strip() if c_met else np.nan
    n3_vals  = refv[c_n3].astype(str).str.strip()  if c_n3 else np.nan
    n5_vals  = refv[c_n5].astype(str).str.strip()  if c_n5 else np.nan
    est_vals = refv[c_est].astype(str).str.strip() if c_est else np.nan

    keys, letters_, digits_canon_, digits_len_ = [], [], [], []
    for val in of_list:
        k = _pozo_key(val)
        L, Dcanon, Dlen = _letters_digits_from_key_both(k)
        keys.append(k); letters_.append(L); digits_canon_.append(Dcanon); digits_len_.append(Dlen)

    dict_df = pd.DataFrame({
        "oficial": of_list,
        "key": keys,
        "letters": letters_,
        "digits_canon": digits_canon_,
        "digits_len": digits_len_,
        "met_prod": list(met_vals) if isinstance(met_vals, pd.Series) else [np.nan]*len(of_list),
        "nivel_3":  list(n3_vals)  if isinstance(n3_vals,  pd.Series) else [np.nan]*len(of_list),
        "nivel_5":  list(n5_vals)  if isinstance(n5_vals,  pd.Series) else [np.nan]*len(of_list),
        "estado":   list(est_vals) if isinstance(est_vals, pd.Series) else [np.nan]*len(of_list),
    })

    key2off = {}
    for k, off in zip(dict_df["key"], dict_df["oficial"]):
        if k and k not in key2off:
            key2off[k] = off
    return key2off, dict_df

def apply_pozo_normalization(df: pd.DataFrame, key2off: dict, dict_df: pd.DataFrame):
    df = df.copy()
    df["POZO_ORIG"] = df["POZO"].astype(str).str.strip()
    df["POZO_PreCanon"] = df["POZO_ORIG"].apply(_canon_prefix_pozo)
    df["__POZO_KEY"] = df["POZO_PreCanon"].apply(_pozo_key)

    parts = df["__POZO_KEY"].apply(_letters_digits_from_key_both)
    df["__KEY_LET"], df["__KEY_DIG_CANON"], df["__KEY_DIG_LEN"] = zip(*parts)

    df["POZO_MATCH"]   = None
    df["MATCH_TIPO"]   = "NO"
    df["MATCH_SCORE"]  = np.nan
    df["LETTER_SCORE"] = np.nan
    df["APLICADO"]     = "NO"
    df["ALERTA_NORM"]  = ""
    df["VALIDO_POZO"]  = True

    invalid_mask = (df["__KEY_LET"].str.len()==0) | (df["__KEY_DIG_LEN"]==0)
    if invalid_mask.any():
        df.loc[invalid_mask, "ALERTA_NORM"] = "SIN_LETRAS_O_DIGITOS"
        df.loc[invalid_mask, "VALIDO_POZO"] = False

    valid_mask = ~invalid_mask
    exact_mask = valid_mask & df["__POZO_KEY"].isin(key2off.keys())
    df.loc[exact_mask, "POZO_MATCH"]   = df.loc[exact_mask, "__POZO_KEY"].map(key2off)
    df.loc[exact_mask, "MATCH_TIPO"]   = "EXACTO"
    df.loc[exact_mask, "MATCH_SCORE"]  = 100
    df.loc[exact_mask, "LETTER_SCORE"] = 100
    df.loc[exact_mask, "APLICADO"]     = "SI"

    pending = df[valid_mask & (~exact_mask)].index.tolist()
    if pending and not dict_df.empty:
        dict_by_spec = {}
        for spec, sub in dict_df.groupby(["digits_canon","digits_len"]):
            dict_by_spec[spec] = sub

        for idx in pending:
            key_u   = df.at[idx, "__POZO_KEY"]
            let_u   = df.at[idx, "__KEY_LET"]
            digc_u  = df.at[idx, "__KEY_DIG_CANON"]
            digl_u  = int(df.at[idx, "__KEY_DIG_LEN"])

            cand_df = dict_by_spec.get((digc_u, digl_u), pd.DataFrame())
            best_off, best_score, best_lscore = None, -1, -1

            if cand_df is not None and not cand_df.empty:
                for row in cand_df.itertuples():
                    kk = row.key
                    ll = row.letters
                    sc_key = _fuzzy_score(key_u, kk)
                    sc_let = _ratio_score(let_u, ll)
                    if sc_let < LETTERS_SIMILARITY_MIN:
                        continue
                    if sc_key > best_score or (sc_key == best_score and sc_let > best_lscore):
                        best_score = sc_key
                        best_lscore = sc_let
                        best_off   = row.oficial

            if best_off is not None:
                df.at[idx, "POZO_MATCH"]   = best_off
                df.at[idx, "MATCH_TIPO"]   = "SUGERIDO"
                df.at[idx, "MATCH_SCORE"]  = int(best_score)
                df.at[idx, "LETTER_SCORE"] = int(best_lscore)
            else:
                df.at[idx, "ALERTA_NORM"] = "SIN MATCH EN DICCIONARIO"

    # Reemplazos
    df["POZO"] = df["POZO_MATCH"].where(df["POZO_MATCH"].notna(), df["POZO"])
    meta_first = dict_df.groupby("oficial")[["met_prod","nivel_3","nivel_5"]].first()
    df = df.merge(meta_first, how="left", left_on="POZO", right_index=True)

    # ZONA sólo si hubo match; sino, vacío
    if "nivel_3" in df.columns:
        df.loc[df["POZO_MATCH"].isna(), "nivel_3"] = ""
        df["ZONA"] = np.where(df["POZO_MATCH"].notna(), df["nivel_3"].fillna(""), "")

    # BATERIA si hay nivel_5
    if "nivel_5" in df.columns:
        df["BATERIA"] = np.where(
            df["nivel_5"].notna() & (df["nivel_5"].astype(str).str.strip()!=""),
            df["nivel_5"], df["BATERIA"]
        )

    df["__ZONA_NORM"]    = df["ZONA"].apply(_norm)
    df["__BATERIA_NORM"] = df["BATERIA"].apply(_norm)

    norm_table = (df[["POZO_ORIG","POZO_PreCanon","__POZO_KEY",
                      "__KEY_LET","__KEY_DIG_CANON","__KEY_DIG_LEN",
                      "POZO_MATCH","MATCH_TIPO","MATCH_SCORE","LETTER_SCORE",
                      "APLICADO","ALERTA_NORM","VALIDO_POZO",
                      "met_prod","nivel_3","nivel_5"]]
                  .drop_duplicates()
                  .rename(columns={
                      "POZO_ORIG":"Pozo_Original",
                      "POZO_PreCanon":"Pozo_PreCanon",
                      "__POZO_KEY":"Clave_Normalizada",
                      "__KEY_LET":"Letras",
                      "__KEY_DIG_CANON":"Digitos_Canon",
                      "__KEY_DIG_LEN":"Digitos_Len",
                      "POZO_MATCH":"Match_Oficial",
                      "MATCH_TIPO":"Match_Tipo",
                      "MATCH_SCORE":"Match_Score",
                      "LETTER_SCORE":"Letter_Score",
                      "APLICADO":"Aplicado",
                      "ALERTA_NORM":"Alerta",
                      "VALIDO_POZO":"Valido",
                      "met_prod":"met_prod",
                      "nivel_3":"nivel_3",
                      "nivel_5":"nivel_5"
                  })
                  .sort_values(["Valido","Aplicado","Match_Tipo","Pozo_Original"], ascending=[False, False, True, True]))

    alert_table = norm_table[(norm_table["Valido"]==False) | (norm_table["Aplicado"]=="NO") | (norm_table["Match_Tipo"]=="NO")].copy()
    return df, alert_table, norm_table

def read_historial(xlsx_path, sheet_hist=None):
    xl = pd.ExcelFile(xlsx_path)
    sheets = [sheet_hist] if (sheet_hist and sheet_hist in xl.sheet_names) else xl.sheet_names
    for sh in sheets:
        raw = xl.parse(sh, header=None)
        idx, header_norm = _find_header_row(raw)
        if idx is None:
            continue
        data = raw.iloc[idx:, :].copy()
        true_headers = data.iloc[0,:].astype(str).tolist()
        data = data.iloc[1:,:]
        data.columns = true_headers

        name_map = {c: _norm(c) for c in data.columns}
        def find_col(candidates):
            for c, n in name_map.items():
                if n in candidates:
                    return c
            return None

        c_fecha       = find_col(set(EXPECTED_KEYS["fecha"]))
        c_pozo        = find_col(set(EXPECTED_KEYS["pozo"]))
        c_zona        = find_col(set(EXPECTED_KEYS["zona"]))
        c_bateria     = find_col(set(EXPECTED_KEYS["bateria"]))
        c_m3          = find_col(set(EXPECTED_KEYS["m3"]))
        c_carr        = find_col(set(EXPECTED_KEYS["carreras"]))
        c_nivel_final = find_col(set(EXPECTED_KEYS["nivel_final"]))
        c_obs         = find_col(set(EXPECTED_KEYS["obs_pozo"]))

        if not (c_fecha and c_pozo and c_zona and c_bateria):
            continue

        use_cols = [c_fecha, c_pozo, c_zona, c_bateria]
        headers  = ["FECHA","POZO","ZONA","BATERIA"]
        if c_m3:            use_cols.append(c_m3);            headers.append("M3")
        if c_carr:          use_cols.append(c_carr);          headers.append("CARRERAS")
        if c_nivel_final:   use_cols.append(c_nivel_final);   headers.append("NIVEL_FINAL")
        if c_obs:           use_cols.append(c_obs);           headers.append("OBS_POZO")

        df = data[use_cols].copy()
        df.columns = headers

        df["FECHA"] = pd.to_datetime(df["FECHA"], errors="coerce")
        if "M3" not in df.columns: df["M3"] = np.nan
        else: df["M3"] = pd.to_numeric(df["M3"], errors="coerce")

        if "CARRERAS" not in df.columns: df["CARRERAS"] = np.nan
        else: df["CARRERAS"] = pd.to_numeric(df["CARRERAS"], errors="coerce")

        if "NIVEL_FINAL" not in df.columns:
            df["NIVEL_FINAL"] = None
        if "OBS_POZO" not in df.columns:
            df["OBS_POZO"] = None

        for col in ["POZO","ZONA","BATERIA","NIVEL_FINAL","OBS_POZO"]:
            df[col] = df[col].astype(str).str.strip().replace({"nan": np.nan})

        df = df.dropna(subset=["FECHA","POZO"]).sort_values(["POZO","FECHA"])
        return df

    raise ValueError("No pude detectar FECHA/POZO/ZONA/BATERÍA en ninguna hoja del Excel.")

def read_exclusions_from_sheet(xlsx_path):
    excl = set()
    try:
        xl = pd.ExcelFile(xlsx_path)
        if "ExcluirPozos" in xl.sheet_names:
            e = xl.parse("ExcluirPozos")
            e.columns = [str(c).strip().lower() for c in e.columns]
            if "pozo" in e.columns:
                if "excluir" in e.columns:
                    excl = set(e.loc[e["excluir"].astype(str).str.upper().isin(
                        ["SI","SÍ","YES","1","TRUE"]), "pozo"].astype(str).str.strip())
                else:
                    excl = set(e["pozo"].astype(str).str.strip())
    except Exception:
        pass
    return excl

# ==========================
# Frecuencias / r_m3_d
# ==========================
def _count_trailing_zeros_with_carr(g):
    cnt = 0
    for _, row in g.sort_values("FECHA").iloc[::-1].iterrows():
        m3 = row.get("M3", np.nan)
        car = row.get("CARRERAS", np.nan)
        if pd.notna(m3) and float(m3) == 0.0 and pd.notna(car) and float(car) > 0:
            cnt += 1
        else:
            break
    return cnt

def compute_frecuencias(df, params):
    v_target = params["m3_por_visita_objetivo"]
    min_d    = params["min_dias_freq"]
    max_d    = params["max_dias_freq"]
    k        = int(params["k_visitas"])
    one_days = int(params.get("dias_asumidos_una_visita", 7))
    freq_cero_ultimo = int(params.get("freq_dias_ultimo_cero_valido", 30))

    out = []
    for pozo, g0 in df.groupby("POZO", sort=False):
        g = g0.sort_values("FECHA").copy()

        for col in ["ZONA","BATERIA","NIVEL_FINAL"]:
            if col in g.columns:
                g[col] = g[col].replace({None: np.nan})
                g[col] = g[col].ffill().bfill()

        g["__ZONA_NORM"]    = g["ZONA"].apply(_norm)
        g["__BATERIA_NORM"] = g["BATERIA"].apply(_norm)
        g["__nf_norm"]      = g["NIVEL_FINAL"].apply(_norm) if "NIVEL_FINAL" in g.columns else ""

        med_validas_all = g[g["M3"].notna()].copy()

        m3_eq0 = g["M3"].fillna(0) == 0
        carr   = g.get("CARRERAS", pd.Series(index=g.index, dtype=float)).fillna(np.nan)
        zero_cond_a = m3_eq0 & (carr.fillna(0) >= 1)
        zero_cond_b = m3_eq0 & ((carr.isna()) | (carr.fillna(0) == 0)) & (g["__nf_norm"] == "surge")
        cond_cero_valido = zero_cond_a | zero_cond_b

        validas_rate = g[(g["M3"] > 0) | cond_cero_valido].copy()
        zeros_tail = _count_trailing_zeros_with_carr(g)

        ultima_med = med_validas_all["FECHA"].max() if not med_validas_all.empty else pd.NaT
        ultima_exi = g.loc[g["M3"]>0, "FECHA"].max() if "M3" in g.columns and not g[g["M3"]>0].empty else pd.NaT

        last_zero_valido = False
        if not med_validas_all.empty:
            idx_last = med_validas_all["FECHA"].idxmax()
            m3_last  = g.at[idx_last, "M3"]
            if pd.notna(m3_last) and float(m3_last) == 0.0:
                try:
                    last_zero_valido = bool(cond_cero_valido.loc[idx_last])
                except Exception:
                    last_zero_valido = False

        alerta = ""
        if last_zero_valido:
            alerta = f"ULTIMA_M3_0_VALIDO -> FREQ {freq_cero_ultimo}D"
        elif pd.notna(ultima_med):
            if zeros_tail > 0:
                alerta = f"ALERTA: {zeros_tail} cero(s) consecutivo(s) con Carreras>0"

        # r_m3_d
        r = np.nan
        if not validas_rate.empty:
            v = validas_rate.copy()
            v["delta_d"] = v["FECHA"].diff().dt.days
            v.loc[v["delta_d"] <= 0, "delta_d"] = np.nan
            v["rate"] = v["M3"].fillna(0) / v["delta_d"]
            rates = v["rate"].dropna()
            if len(rates) >= 1:
                r = rates.tail(min(k, len(rates))).mean()
            else:
                row = v.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan
        else:
            if len(med_validas_all) == 1:
                row = med_validas_all.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan

        # FRECUENCIA
        if last_zero_valido:
            delta = int(freq_cero_ultimo)
        else:
            if pd.isna(r):      delta = 7
            elif r <= 0:        delta = max_d
            else:
                delta = max(min_d, min(max_d, float(v_target)/float(r)))
                delta = int(7 * round(delta / 7.0))
                if delta < 7:
                    delta = 7

        prox = (ultima_med + pd.Timedelta(days=int(delta))) if pd.notna(ultima_med) else pd.Timestamp(next_monday())

        out.append({
            "POZO": pozo,
            "ZONA": g["ZONA"].iloc[-1],
            "BATERIA": g["BATERIA"].iloc[-1],
            "ZONA_NORM": g["__ZONA_NORM"].iloc[-1],
            "BATERIA_NORM": g["__BATERIA_NORM"].iloc[-1],
            "r_m3_d": r,
            "ultima_medicion": ultima_med,
            "ultima_exitosa": ultima_exi,
            "delta_star_dias": int(delta),
            "proxima_visita_base": prox,
            "ceros_consec": zeros_tail,
            "alerta": alerta
        })
    return pd.DataFrame(out)

# ==========================
# Coordenadas
# ==========================
def _to_float_maybe_comma(x):
    if pd.isna(x):
        return np.nan
    if isinstance(x, (int, float, np.number)):
        return float(x)
    s = str(x).strip()
    if s == "": return np.nan
    s = s.replace(",", ".")
    try:
        return float(s)
    except Exception:
        return np.nan

def read_coords(xlsx_path):
    try:
        cdf = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer coordenadas: {xlsx_path}\n{e}\n")
        return pd.DataFrame(columns=["POZO","LAT","LON"])
    cols_map = {c.lower().strip(): c for c in cdf.columns}
    c_pozo = cols_map.get("pozo")
    for k in ["geo_latitude","latitude","lat"]:
        if k in cols_map:
            c_lat = cols_map[k]; break
    else:
        c_lat = None
    for k in ["geo_longitude","longitude","lon","long"]:
        if k in cols_map:
            c_lon = cols_map[k]; break
    else:
        c_lon = None

    if not (c_pozo and c_lat and c_lon):
        print(f"[AVISO] Coordenadas: columnas esperadas 'POZO','GEO_LATITUDE','GEO_LONGITUDE'. Columnas encontradas: {list(cdf.columns)}")
        return pd.DataFrame(columns=["POZO","LAT","LON"])

    out = cdf[[c_pozo, c_lat, c_lon]].copy()
    out.columns = ["POZO","LAT","LON"]
    out["POZO"] = out["POZO"].astype(str).str.strip()
    out["LAT"] = out["LAT"].apply(_to_float_maybe_comma)
    out["LON"] = out["LON"].apply(_to_float_maybe_comma)
    out = out.dropna(subset=["POZO"])
    out = out.drop_duplicates(subset=["POZO"], keep="last")
    return out

# ==========================
# Candidatos y utilidades
# ==========================
def build_candidates_with_coords(freq, week_start, week_end, excl_pozos,
                                 zonas_norm_incluidas, coords_df,
                                 allowed_bats_by_zone_norm=None,
                                 next_due_map=None):
    F = freq.copy()

    # due_date base (permitimos override con next_due_map)
    F["due_date"] = F["proxima_visita_base"]
    if next_due_map:
        F["due_date"] = F["POZO"].map(next_due_map).fillna(F["due_date"])

    F["overdue_d"] = (pd.Timestamp(week_start) - pd.to_datetime(F["due_date"])).dt.days
    F["is_overdue"] = F["overdue_d"] > 0

    # prioridad
    F["__v"] = F["r_m3_d"].astype(float)

    # Filtro por ZONA (normalizada)
    if "ZONA_NORM" in F.columns and zonas_norm_incluidas:
        F = F[F["ZONA_NORM"].isin(zonas_norm_incluidas)].copy()

    # Sub-filtro por BATERÍA (si corresponde)
    if allowed_bats_by_zone_norm:
        mask = pd.Series(True, index=F.index)
        for zn in zonas_norm_incluidas:
            bats = allowed_bats_by_zone_norm.get(zn)
            if bats is not None:
                mask &= ~ (F["ZONA_NORM"] == zn) | (F["BATERIA_NORM"].isin(bats))
        F = F[mask].copy()

    # Exclusiones
    if excl_pozos:
        F = F[~F["POZO"].isin(excl_pozos)].copy()

    # Potencial mínimo y BATERÍA no vacía
    F = F[F["r_m3_d"].fillna(0) > RM3D_MIN].copy()
    F = F[F["BATERIA"].notna() & (F["BATERIA"].astype(str).str.strip() != "")].copy()

    # Excluir pozos con comentario no vacío en Frecuencias
    if "comentario" in F.columns:
        F["__comentario_txt"] = F["comentario"].astype(str).fillna("").str.strip()
        F = F[F["__comentario_txt"] == ""].copy()
        F.drop(columns=["__comentario_txt"], inplace=True, errors="ignore")

    # Merge coordenadas
    coords_df = coords_df if coords_df is not None else pd.DataFrame(columns=["POZO","LAT","LON"])
    F = F.merge(coords_df, how="left", on="POZO")
    F["has_coords"] = F["LAT"].notna() & F["LON"].notna()

    # Orden base
    F = F.sort_values(by=["is_overdue","__v","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    return F

def _v_est_for_day(row, day_date):
    r = row.get("r_m3_d", np.nan)
    u = row.get("ultima_medicion", pd.NaT)
    if pd.isna(u) or pd.isna(r) or r <= 0:
        return 0.0
    dd = max(0, (pd.Timestamp(day_date) - pd.Timestamp(u)).days)
    return max(0.0, float(r) * float(dd))

def haversine_km(lat1, lon1, lat2, lon2):
    try:
        if pd.isna(lat1) or pd.isna(lon1) or pd.isna(lat2) or pd.isna(lon2):
            return np.nan
        R = 6371.0088
        p1 = math.radians(float(lat1)); p2 = math.radians(float(lat2))
        dphi = math.radians(float(lat2) - float(lat1))
        dlmb = math.radians(float(lon2) - float(lon1))
        a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dlmb/2)**2
        return 2*R*math.asin(math.sqrt(a))
    except Exception:
        return np.nan

# ==========================
# NUEVA LÓGICA DE CLÚSTERES (según prompt)
# ==========================
def _bbox_filter(df, lat0, lon0, rad_km):
    """Bounding-box previo a haversine para acotar vecinos."""
    if pd.isna(lat0) or pd.isna(lon0) or df.empty:
        return df.iloc[0:0]
    dlat = rad_km / 110.574
    dlon = rad_km / (111.320 * max(0.1, math.cos(math.radians(float(lat0)))))
    return df[(df["LAT"].between(lat0 - dlat, lat0 + dlat)) &
              (df["LON"].between(lon0 - dlon, lon0 + dlon))].copy()

def _cluster_centroid(lat_list, lon_list):
    if not lat_list or not lon_list:
        return (np.nan, np.nan)
    return float(np.mean(lat_list)), float(np.mean(lon_list))

def _validate_cluster_by_centroid(lat_list, lon_list, radius_km):
    c_lat, c_lon = _cluster_centroid(lat_list, lon_list)
    if pd.isna(c_lat) or pd.isna(c_lon):
        return False, (np.nan, np.nan), np.inf
    dmax = 0.0
    for la, lo in zip(lat_list, lon_list):
        d = haversine_km(c_lat, c_lon, la, lo)
        if pd.isna(d) or d > radius_km + 1e-9:
            return False, (c_lat, c_lon), np.inf
        dmax = max(dmax, d)
    return True, (c_lat, c_lon), dmax

def build_all_clusters(
    cands: pd.DataFrame,
    K: int,
    radius_km: float,
    score_mode: str = "rm3d",
    top_seeds: int = 30
) -> pd.DataFrame:
    """
    Devuelve DF con:
    ['ClusterID','POZOS','Centroide_LAT','Centroide_LON','Score','ZONA','BATERIAS']
    - Exactamente K pozos por clúster
    - Validación: todos a <= radius_km del CENTROIDE
    - Overlap permitido en generación
    - Semillas: mejores 'top_seeds' por __v
    - Elimina duplicados exactos (mismo conjunto de pozos)
    """
    if cands.empty:
        return pd.DataFrame(columns=["ClusterID","POZOS","Centroide_LAT","Centroide_LON","Score","ZONA","BATERIAS"])

    # trabajar solo con pozos con coords
    base = cands[cands["has_coords"]].copy()
    if base.empty:
        return pd.DataFrame(columns=["ClusterID","POZOS","Centroide_LAT","Centroide_LON","Score","ZONA","BATERIAS"])

    base = base.sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    seeds = base.head(max(1, int(top_seeds))).copy()

    clusters = []
    seen_sets = set()  # para deduplicar por conjunto de pozos
    for _, seed in seeds.iterrows():
        s_lat, s_lon = seed["LAT"], seed["LON"]
        neigh = _bbox_filter(base, s_lat, s_lon, radius_km)
        if neigh.empty:
            continue
        # Orden por valor y cercanía a la semilla
        neigh = neigh.copy()
        neigh["__dist_seed"] = neigh.apply(lambda r: haversine_km(s_lat, s_lon, r["LAT"], r["LON"]), axis=1)
        neigh = neigh[neigh["__dist_seed"] <= radius_km]
        neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

        # Tomar candidatos top K alrededor de la semilla (semilla incluida)
        if seed["POZO"] not in neigh["POZO"].values:
            # asegurar que la semilla esté
            neigh = pd.concat([pd.DataFrame([seed]), neigh], ignore_index=True)
            neigh = neigh.drop_duplicates(subset=["POZO"], keep="first")

        if len(neigh) < K:
            # no alcanza tamaño K dentro del radio de la semilla
            continue

        # Probar ventana de los top K mejor valuados dentro del radio
        topk = neigh.head(K).copy()
        pozos = tuple(topk["POZO"].tolist())
        lats  = topk["LAT"].tolist()
        lons  = topk["LON"].tolist()

        ok, (c_lat, c_lon), dmax = _validate_cluster_by_centroid(lats, lons, radius_km)
        if not ok:
            # Intentar ajustar: expandir lista ordenada y mover una ventana sobre los N mejores vecinos
            N = min(len(neigh), K + 10)  # ventana corta para evitar combinatoria
            window = neigh.head(N).copy()
            found = False
            # estrategia greedy: fijar semilla y tomar los K-1 mejores por __v que cumplan centroide
            # probando reemplazos simples si no valida
            for i in range(0, N-K+1):
                cand = window.iloc[i:i+K]
                lats2 = cand["LAT"].tolist(); lons2 = cand["LON"].tolist()
                ok2, (c_lat2, c_lon2), _ = _validate_cluster_by_centroid(lats2, lons2, radius_km)
                if ok2:
                    topk = cand.copy()
                    c_lat, c_lon = c_lat2, c_lon2
                    pozos = tuple(topk["POZO"].tolist())
                    found = True
                    break
            if not found:
                continue  # no se pudo validar centroide

        # dedupe exacto por set
        key_set = frozenset(pozos)
        if key_set in seen_sets:
            continue
        seen_sets.add(key_set)

        # Score: rm3d o vest (hook)
        if score_mode == "vest":
            # si se usa vest, en generación no sabemos el día; dejamos rm3d como aproximación
            score = float(topk["r_m3_d"].fillna(0).sum())
        else:
            score = float(topk["r_m3_d"].fillna(0).sum())

        # ZONA/BATERIAS: mayoritaria (o homogénea si ya lo está)
        zona_mode = topk["ZONA"].mode()
        zona_val = zona_mode.iloc[0] if not zona_mode.empty else ""
        bats = tuple(sorted(set(str(x) for x in topk["BATERIA"].fillna("").astype(str))))

        clusters.append({
            "ClusterID": f"C{len(seen_sets):05d}",
            "POZOS": pozos,
            "Centroide_LAT": float(c_lat),
            "Centroide_LON": float(c_lon),
            "Score": score,
            "ZONA": zona_val,
            "BATERIAS": bats
        })

    cldf = pd.DataFrame(clusters)
    if cldf.empty:
        return cldf
    cldf = cldf.sort_values("Score", ascending=False).reset_index(drop=True)
    return cldf


def select_clusters_for_day(
    clusters_df: pd.DataFrame,
    used_today: set[str],
    cap_pozos: int,
    backfill_nearest: bool,
    umbral_km_backfill: float,
    clusters_por_dia_max: Optional[int] = None,
    K: int = 5
) -> list[dict]:

    """
    Devuelve lista de dicts: {'POZOS', 'ClusterID', 'Centroide_LAT', 'Centroide_LON', 'Score'}
    - Greedy por Score desc.
    - No repetir pozos del día.
    - Respetar clusters_por_dia_max y cap_pozos (multiplo de K).
    - Si backfill_nearest=True: exigir distancia del centroide nuevo al centroide acumulado ≤ umbral.
    """
    if clusters_df is None or clusters_df.empty:
        return []

    selected = []
    pozos_usados = set(used_today)
    cap_left = int(cap_pozos)
    max_clusters = int(clusters_por_dia_max) if clusters_por_dia_max is not None else None

    # centroide acumulado del día (promedio incremental)
    c_lat_acc, c_lon_acc, n_acc = (np.nan, np.nan, 0)

    def _update_centroid_acc(lat, lon):
        nonlocal c_lat_acc, c_lon_acc, n_acc
        if pd.isna(lat) or pd.isna(lon): 
            return
        if n_acc == 0:
            c_lat_acc, c_lon_acc, n_acc = float(lat), float(lon), 1
        else:
            c_lat_acc = (c_lat_acc*n_acc + float(lat)) / (n_acc + 1)
            c_lon_acc = (c_lon_acc*n_acc + float(lon)) / (n_acc + 1)
            n_acc += 1

    for _, row in clusters_df.iterrows():
        if cap_left < K:
            break
        if max_clusters is not None and len(selected) >= max_clusters:
            break

        pozos = set(row["POZOS"])
        if pozos & pozos_usados:
            # contiene pozo ya tomado hoy
            continue

        if backfill_nearest and len(selected) >= 1 and not (pd.isna(c_lat_acc) or pd.isna(c_lon_acc)):
            dcc = haversine_km(c_lat_acc, c_lon_acc, row["Centroide_LAT"], row["Centroide_LON"])
            if pd.isna(dcc) or dcc > float(umbral_km_backfill) + 1e-9:
                continue

        selected.append({
            "POZOS": list(row["POZOS"]),
            "ClusterID": row["ClusterID"],
            "Centroide_LAT": float(row["Centroide_LAT"]),
            "Centroide_LON": float(row["Centroide_LON"]),
            "Score": float(row["Score"])
        })
        pozos_usados |= pozos
        cap_left -= K
        _update_centroid_acc(row["Centroide_LAT"], row["Centroide_LON"])

    return selected

# ==========================
# ASIGNACIÓN SEMANAL ROUND-ROBIN (usando clústeres precomputados)
# ==========================
def assign_week_round_robin_by_zone(cand_all, team_ids, params, week_start, week_end, radius_km):
    """
    Reparte por día/equipo en una zona, eligiendo clústeres precomputados (no pozos sueltos).
    Reglas duras:
    - Clúster tamaño exacto K
    - Todos los pozos del clúster a ≤ radius_km del centroide (ya validado en build_all_clusters)
    - No repetir POZO en el mismo día (entre equipos de la misma zona)
    """
    dias   = int(params["dias_por_semana"])
    cap_pz = int(params["max_pozos_dia_equipo"])
    K      = int(params.get("max_pozos_por_cluster", 4))
    backfill_nearest = bool(params.get("backfill_nearest_cluster", True))
    umbral_backfill  = float(params.get("umbral_km_backfill", 5.0))
    clusters_por_dia_max = params.get("clusters_por_dia_max", None)

    rows = []

    # NUEVO: pozos ya usados en la semana (para no repetirlos lunes, martes, ...)
    used_week = set()  # <<<

    for d in range(dias):
        day_date = pd.Timestamp(week_start) + pd.Timedelta(days=d)

        # Ventana por DÍA (no por fin de semana) y excluir lo ya usado en la semana
        pool_day = cand_all[~cand_all["POZO"].isin(used_week)].copy()  # <<<
        in_window = (pd.to_datetime(pool_day["due_date"]) <= pd.Timestamp(day_date)) | pool_day["is_overdue"]
        pool_day = pool_day[in_window].copy()
        if pool_day.empty:
            continue

        # Precomputar TODOS los clústeres de este día y zona (overlap permitido)
        clusters_df = build_all_clusters(
            cands=pool_day,
            K=K,
            radius_km=radius_km,
            score_mode="rm3d",
            top_seeds=int(params.get("top_semillas_eval", 30))
        )
        if clusters_df.empty:
            # no hay clúster válido → ese día pueden quedar huecos
            continue

        # Greedy por equipo, sin repetir pozos el mismo día entre equipos
        used_today = set()
        for eq in sorted(team_ids):
            chosen = select_clusters_for_day(
                clusters_df=clusters_df,
                used_today=used_today,
                cap_pozos=cap_pz,
                backfill_nearest=backfill_nearest,
                umbral_km_backfill=umbral_backfill,     # ojo: si tu var se llama umbral_km_backfill, usa ese nombre
                clusters_por_dia_max=clusters_por_dia_max,
                K=K
            )
            if not chosen:
                continue

            # Materializar filas del plan a partir de los clústeres seleccionados
            ord_idx = 1
            for cluster in chosen:
                pozos = cluster["POZOS"]
                c_lat = cluster["Centroide_LAT"]
                c_lon = cluster["Centroide_LON"]
                cid   = cluster["ClusterID"]

                # Traer filas originales para info r_m3_d, zona, batería, ultima_medicion
                info = pool_day[pool_day["POZO"].isin(pozos)].copy()
                info = info.set_index("POZO")

                # asserts (criterios de aceptación)
                assert len(pozos) == K, f"Cluster {cid} no tiene tamaño K={K}"
                # distancias al centroide
                dists = []
                for pz in pozos:
                    la = info.at[pz, "LAT"]; lo = info.at[pz, "LON"]
                    d = haversine_km(c_lat, c_lon, la, lo)
                    dists.append(d)
                    assert (not pd.isna(d)) and d <= radius_km + 1e-6, f"Pozo {pz} excede radio al centroide en cluster {cid}"
                max_d = float(np.max(dists))  # (no lo usamos pero te queda para log)

                # Orden dentro del cluster: por cercanía al centroide, opcional
                pozos_sorted = sorted(pozos, key=lambda p: haversine_km(c_lat, c_lon, info.at[p, "LAT"], info.at[p, "LON"]))

                for pz in pozos_sorted:
                    rec = info.loc[pz]
                    try:
                        v_est = _v_est_for_day({"r_m3_d": rec.get("r_m3_d", np.nan),
                                                "ultima_medicion": rec.get("ultima_medicion", pd.NaT)}, day_date)
                    except Exception:
                        v_est = 0.0

                    rows.append({
                        "Plan_Fecha": day_date.date(),
                        "Semana_ISO": day_date.isocalendar()[1],
                        "Equipo": int(eq),
                        "Dia_Idx": d+1,
                        "Orden": ord_idx,
                        "ZONA": rec.get("ZONA",""),
                        "BATERIA": rec.get("BATERIA",""),
                        "POZO": pz,
                        "r_m3_d": float(rec.get("__v", rec.get("r_m3_d", np.nan))),
                        "Vol_Estimado_m3": round(float(v_est), 2),
                        "Seed_POZO": "",  # ya no trabajamos por semilla en asignación
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": round(float(haversine_km(c_lat, c_lon, rec.get("LAT"), rec.get("LON"))), 3) if not (pd.isna(rec.get("LAT")) or pd.isna(rec.get("LON"))) else None,
                        "ultima_medicion": rec.get("ultima_medicion", pd.NaT),
                        # Nuevas columnas informativas del cluster:
                        "ClusterID": cid,
                        "Centroide_LAT": c_lat,
                        "Centroide_LON": c_lon,
                    })
                    ord_idx += 1

                # Marcar pozos usados hoy y para el resto de la semana
                used_today.update(pozos)
                used_week.update(pozos)  # <<<  clave para que no se repitan en martes/miércoles/etc.

        # verificación de no duplicación diaria
        if rows:
            plan_day = pd.DataFrame(rows)
            same_day = plan_day[plan_day["Plan_Fecha"] == day_date.date()]
            if not same_day.empty:
                dup = same_day.groupby(["Plan_Fecha","POZO"]).size().max()
                assert int(dup) == 1, "Un pozo se repite el mismo día (violación de regla)."

    cols = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
            "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
            "Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion",
            "ClusterID","Centroide_LAT","Centroide_LON"]
    return pd.DataFrame(rows, columns=cols) if rows else pd.DataFrame(columns=cols)

# (Se elimina la versión anterior _fill_day_star_clusters: ahora ya no se usa.)

def ensure_annual_coverage_zone_locked(all_pozos_df, plan, params, start_date, equipo_to_zona,
                                       allowed_bats_by_zone_norm=None, r_by_pozo=None):
    cap_pz = params["max_pozos_dia_equipo"]

    keys = []
    for w in range(params["semanas_plan"]):
        w_start = start_date + timedelta(weeks=w)
        for d in range(params["dias_por_semana"]):
            f = w_start + timedelta(days=d)
            for e in equipo_to_zona.keys():
                keys.append((e, f))

    if not plan.empty:
        plan["__key"] = plan["Equipo"].astype(int).astype(str) + "|" + plan["Plan_Fecha"].astype(str)
        used_counts = plan.groupby("__key")["POZO"].count().to_dict()
    else:
        used_counts = {}

    planned = set(plan["POZO"].unique()) if not plan.empty else set()
    missing_df = all_pozos_df[~all_pozos_df["POZO"].isin(planned)].copy()
    missing_df = missing_df[missing_df["BATERIA"].notna() & (missing_df["BATERIA"].astype(str).str.strip()!="")].copy()

    add = []
    for _, row in missing_df.iterrows():
        pz = row["POZO"]; z = row["ZONA"]
        bat = row.get("BATERIA", "")

        if not isinstance(bat, str) or bat.strip() == "":
            continue

        if allowed_bats_by_zone_norm:
            zn = _norm(z)
            bats_allowed = allowed_bats_by_zone_norm.get(zn)
            if bats_allowed is not None:
                if _norm(bat) not in bats_allowed:
                    continue

        if r_by_pozo is not None:
            r_val = float(r_by_pozo.get(pz, np.nan))
            if not (r_val > RM3D_MIN):
                continue

        target_teams = [e for e, zona in equipo_to_zona.items() if zona == z]
        if not target_teams:
            continue
        placed = False
        for e in target_teams:
            for (ee, f) in keys:
                if ee != e:
                    continue
                key = f"{e}|{f}"
                cnt = used_counts.get(key, 0)
                if cnt < cap_pz:
                    add.append({
                        "Plan_Fecha": f,
                        "Semana_ISO": f.isocalendar()[1],
                        "Equipo": int(e),
                        "Dia_Idx": f.weekday()+1,
                        "Orden": cnt+1,
                        "ZONA": z,
                        "BATERIA": bat,
                        "POZO": pz,
                        "r_m3_d": np.nan,
                        "Vol_Estimado_m3": 0.0,
                        "Seed_POZO": "",
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": None,
                        "ultima_medicion": pd.NaT,
                        "ClusterID": "",
                        "Centroide_LAT": np.nan,
                        "Centroide_LON": np.nan,
                    })
                    used_counts[key] = cnt+1
                    placed = True
                    break
            if placed:
                break

    if add:
        plan = pd.concat([plan, pd.DataFrame(add)], ignore_index=True)                 .sort_values(["Plan_Fecha","Equipo","Orden"])
    return plan

def build_alertas_abm(freq_df: pd.DataFrame, norm_table: pd.DataFrame, dict_df: pd.DataFrame) -> pd.DataFrame:
    base = freq_df[["POZO","ZONA","BATERIA","ultima_medicion","ultima_exitosa"]].copy()
    meta_first = dict_df.groupby("oficial")[["estado","met_prod","nivel_3","nivel_5"]].first()
    base = base.merge(meta_first[["estado","met_prod"]], left_on="POZO", right_index=True, how="left")

    out = base.copy()
    for c in ["ultima_medicion","ultima_exitosa"]:
        out[c] = pd.to_datetime(out[c], errors="coerce").dt.date
    out = out.sort_values(["ZONA","BATERIA","POZO"]).reset_index(drop=True)
    return out

# ============================================
# HARNES PARA JUPYTER
# ============================================
def run_pipeline_jupyter(
    input_file,
    nombres_pozo_file,
    coords_file,
    *,
    semanas_plan=2,
    equipos_activos=2,
    dias_por_semana=5,
    max_pozos_dia_equipo=10,
    K_max_pozos_por_cluster=5,
    clusters_por_dia_max=None,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,
    baterias_por_zona=None,      # {"las heras cg - canadon escondida": {"swabing ce","ce 04"}}
    pozos_excluir=None,
    escribir_excel=False
):
    global INPUT_FILE, NOMBRES_POZO_FILE, COORDS_FILE, RADIUS_KM, RM3D_MIN, DEFAULTS
    INPUT_FILE       = input_file
    NOMBRES_POZO_FILE= nombres_pozo_file
    COORDS_FILE      = coords_file
    RADIUS_KM        = float(radius_km)
    RM3D_MIN         = float(rm3d_min)

    DEFAULTS = DEFAULTS.copy()
    DEFAULTS.update({
        "equipos_activos": int(equipos_activos),
        "dias_por_semana": int(dias_por_semana),
        "semanas_plan": int(semanas_plan),
        "max_pozos_dia_equipo": int(max_pozos_dia_equipo),
        "max_pozos_por_cluster": int(K_max_pozos_por_cluster),
        "clusters_por_dia_max": clusters_por_dia_max,
        "backfill_nearest_cluster": bool(backfill_nearest),
        "umbral_km_backfill": float(umbral_km_backfill),
    })

    # 1) Lee historial (Excel del usuario)
    df = read_historial(INPUT_FILE, SHEET_HIST)

    # 2) Normalización por diccionario
    key2off, dict_df = load_pozo_dictionary(NOMBRES_POZO_FILE)
    df_norm, alert_table, norm_table = apply_pozo_normalization(df, key2off, dict_df)

    # 3) Filtra inválidos
    df = df_norm[df_norm["VALIDO_POZO"] == True].copy()

    # 4) Filtro por ZONA (si se pide explícito)
    if zonas_incluir:
        zonas_incluir = set(zonas_incluir)
        znorm = {_norm(z) for z in zonas_incluir}
        df = df[df["__ZONA_NORM"].isin(znorm)].copy()
        zonas_labels = zonas_incluir
        zonas_norm   = znorm
    else:
        mask_valid = df["ZONA"].notna() & (df["ZONA"].astype(str).str.strip() != "")
        zonas_labels = set(df.loc[mask_valid, "ZONA"].astype(str))
        zonas_norm   = set(df.loc[mask_valid, "__ZONA_NORM"].astype(str))

    # 5) Sub-filtro de baterías (si lo pasaste por parámetro)
    if baterias_por_zona:
        allowed_bats_by_zone_norm = {zn: set(baterias_por_zona[zn]) if baterias_por_zona[zn] is not None else None
                                     for zn in baterias_por_zona}
    else:
        allowed_bats_by_zone_norm = {zn: None for zn in zonas_norm}

    # 6) Exclusiones (si te pasan un set)
    excl_total = set(pozos_excluir or [])

    # 7) Frecuencias
    params = DEFAULTS.copy()
    freq = compute_frecuencias(df, params)

    # Comentarios desde OBS cuando ultima_medicion != ultima_exitosa
    df_obs = df[["POZO", "FECHA", "OBS_POZO"]].copy() if "OBS_POZO" in df.columns else pd.DataFrame(columns=["POZO","FECHA","OBS_POZO"])
    df_obs["FECHA_DATE"] = pd.to_datetime(df_obs["FECHA"], errors="coerce").dt.date
    df_obs = (df_obs.dropna(subset=["FECHA_DATE"])
                    .sort_values(["POZO","FECHA_DATE"])
                    .drop_duplicates(subset=["POZO","FECHA_DATE"], keep="last"))
    obs_map = {(r.POZO, r.FECHA_DATE): (str(r.OBS_POZO).strip() if pd.notna(r.OBS_POZO) else "")
               for r in df_obs.itertuples(index=False)}
    freq["__UMED_DATE"] = pd.to_datetime(freq["ultima_medicion"], errors="coerce").dt.date
    freq["__UEXI_DATE"] = pd.to_datetime(freq["ultima_exitosa"], errors="coerce").dt.date
    freq["comentario"] = [obs_map.get((pz, fmed), "") for pz, fmed in zip(freq["POZO"], freq["__UMED_DATE"])]
    mask_both_valid = freq["__UMED_DATE"].notna() & freq["__UEXI_DATE"].notna()
    mask_diff = mask_both_valid & (freq["__UMED_DATE"] != freq["__UEXI_DATE"])
    freq.loc[~mask_diff, "comentario"] = ""
    freq.drop(columns=["__UMED_DATE","__UEXI_DATE"], errors="ignore", inplace=True)

    # 8) Coordenadas
    coords_df = read_coords(COORDS_FILE)

    # 9) Mapas auxiliares
    delta_by_pozo = freq.set_index("POZO")["delta_star_dias"].to_dict()
    r_by_pozo     = freq.set_index("POZO")["r_m3_d"].to_dict()

    # 10) Semanas a planificar
    start = next_monday(date.today())
    weeks = [(start + timedelta(weeks=i), start + timedelta(weeks=i, days=6)) for i in range(params["semanas_plan"])]

    # 11) Equipos -> ZONA (fijo)
    zonas_list = sorted(zonas_labels)
    if not zonas_list:
        raise ValueError("No hay ZONAS válidas (todas vacías).")

    equipo_to_zona = {}
    for i in range(1, params["equipos_activos"]+1):
        zona_asignada = zonas_list[min(i-1, len(zonas_list)-1)]
        equipo_to_zona[i] = zona_asignada

    # 12) Plan semanal por ZONA usando la versión V2 (clústeres)
    plan_all = []
    next_due = {row.POZO: row.proxima_visita_base for row in freq.itertuples()}
    zone_to_teams = {}
    for eq, zona_label in equipo_to_zona.items():
        zone_to_teams.setdefault(zona_label, []).append(eq)

    for (w_start, w_end) in weeks:
        for zona_label, team_list in zone_to_teams.items():
            zona_norm_label = _norm(zona_label)
            cand_all = build_candidates_with_coords(
                freq=freq,
                week_start=w_start,
                week_end=w_end,
                excl_pozos=excl_total,
                zonas_norm_incluidas={zona_norm_label},
                coords_df=coords_df,
                allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
                next_due_map=next_due
            )
            if cand_all.empty:
                continue

            cand_zone = cand_all[[  # mantener las columnas necesarias
                "POZO","ZONA","BATERIA","due_date","is_overdue","__v",
                "LAT","LON","has_coords","r_m3_d","ultima_medicion"
            ]].copy()

            plan_week_zone = assign_week_round_robin_by_zone(
                cand_all=cand_zone,
                team_ids=sorted(team_list),
                params=params,
                week_start=w_start,
                week_end=w_end,
                radius_km=RADIUS_KM
            )

            if not plan_week_zone.empty:
                plan_all.append(plan_week_zone)
                # actualizar next_due por pozo asignado
                for pz, fcal in plan_week_zone[["POZO","Plan_Fecha"]].drop_duplicates().itertuples(index=False):
                    dd = int(delta_by_pozo.get(pz, params["min_dias_freq"]))
                    next_due[pz] = pd.Timestamp(fcal) + pd.Timedelta(days=dd)

    plan = (pd.concat(plan_all, ignore_index=True)
            if plan_all else
            pd.DataFrame(columns=[
                "Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden","ZONA","BATERIA",
                "POZO","r_m3_d","Vol_Estimado_m3","Seed_POZO","Dist_km_semilla",
                "Dist_km_centroid","ultima_medicion","ClusterID","Centroide_LAT","Centroide_LON","Cluster_Score"
            ]))

    # 13) Cobertura anual reforzada (opcional) — mantiene tu lógica original (no forma clúster)
    if not freq.empty:
        eligible_mask = (freq["ZONA"].isin(zonas_labels)) & (freq["r_m3_d"].fillna(0) > RM3D_MIN)
        if "comentario" in freq.columns:
            eligible_mask &= (freq["comentario"].astype(str).fillna("").str.strip() == "")
        if allowed_bats_by_zone_norm:
            for zn, bats in allowed_bats_by_zone_norm.items():
                if bats is not None:
                    eligible_mask &= (~(freq["ZONA_NORM"] == zn)) | (freq["BATERIA_NORM"].isin(bats))

        all_pozos_in_zonas = freq.loc[eligible_mask, ["POZO","ZONA","BATERIA"]].drop_duplicates().copy()
        all_pozos_in_zonas = all_pozos_in_zonas[
            all_pozos_in_zonas["BATERIA"].notna() & (all_pozos_in_zonas["BATERIA"].astype(str).str.strip() != "")
        ].copy()

        # usa el filler original (sin clúster) SOLO para cubrir huecos anuales
     #   plan = ensure_annual_coverage_zone_locked(
      #      all_pozos_df=all_pozos_in_zonas,
       #     plan=plan,
        #    params=params,
         #   start_date=start,
          #  equipo_to_zona=equipo_to_zona,
           # allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
            #r_by_pozo=r_by_pozo
       # )

    # 14) Export opcional (agrego columnas nuevas de clúster)
    out_xlsx = None
    if escribir_excel:
        out_xlsx = unique_output_path(INPUT_FILE)
        coords_all = read_coords(COORDS_FILE)
        with pd.ExcelWriter(out_xlsx, engine="openpyxl", mode="w") as writer:
            # Frecuencias
            freq_out = freq.copy()
            for c in ["proxima_visita_base","ultima_medicion","ultima_exitosa"]:
                freq_out[c] = pd.to_datetime(freq_out[c], errors="coerce").dt.date
            freq_out = freq_out.sort_values(["ZONA","BATERIA","POZO"])
            cols_pref = ["POZO","ZONA","BATERIA","ZONA_NORM","BATERIA_NORM","r_m3_d",
                         "ultima_medicion","ultima_exitosa","delta_star_dias","comentario",
                         "proxima_visita_base","ceros_consec","alerta"]
            cols_final = [c for c in cols_pref if c in freq_out.columns] + \
                         [c for c in freq_out.columns if c not in cols_pref]
            freq_out = freq_out[cols_final]
            freq_out.to_excel(writer, "Frecuencias", index=False)

            # Plan por equipo + Km_al_siguiente
            cols_plan = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
                         "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
                         "ClusterID","Centroide_LAT","Centroide_LON","Cluster_Score",
                         "Dist_km_centroid"]
            for eq in range(1, params["equipos_activos"]+1):
                pe = plan.loc[plan["Equipo"]==eq].copy()
                if pe.empty:
                    pe = pd.DataFrame(columns=cols_plan + ["Km_al_siguiente","Ejecutado"])
                else:
                    pe = pe.sort_values(["Plan_Fecha","Dia_Idx","Orden","POZO"]).copy()
                    pe = pe.merge(coords_all, how="left", on="POZO")
                    pe["LAT_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LAT"].shift(-1)
                    pe["LON_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LON"].shift(-1)
                    def _leg_km(row):
                        if (pd.isna(row.get("LAT")) or pd.isna(row.get("LON")) or
                            pd.isna(row.get("LAT_next")) or pd.isna(row.get("LON_next"))):
                            return None
                        return round(float(haversine_km(row["LAT"], row["LON"],
                                                        row["LAT_next"], row["LON_next"])), 3)
                    pe["Km_al_siguiente"] = pe.apply(_leg_km, axis=1)
                    pe.drop(columns=["LAT","LON","LAT_next","LON_next"], inplace=True, errors="ignore")
                    pe["Ejecutado"] = ""
                    for c in cols_plan:
                        if c not in pe.columns: pe[c] = ""
                    pe = pe[cols_plan + ["Km_al_siguiente","Ejecutado"]]
                pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)

            # Auxiliares
            pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)

    # ====== Asserts/chequeos mínimos ======
    # ====== Asserts/chequeos mínimos ======
    if not plan.empty:
        K_chk = int(params.get("max_pozos_por_cluster", 5))

        # ✅ Validar SOLO clústeres reales (ClusterID no vacío)
        mask_real = plan["ClusterID"].notna() & (plan["ClusterID"].astype(str).str.strip() != "")
        gsize = (plan.loc[mask_real]
                 .groupby(["Plan_Fecha","Equipo","ClusterID"])["POZO"]
                 .count())

        if not gsize.empty:
            assert (gsize % K_chk == 0).all(), "Hay clústeres asignados que no cumplen tamaño K exacto."

        # ✅ No duplicación diaria (ningún pozo se repite en el mismo día)
        dupmax = plan.groupby(["Plan_Fecha","POZO"]).size().max()
        assert int(dupmax) == 1, "Un pozo aparece más de una vez en el mismo día."

        # ✅ Radio cumplido (tolerancia numérica)
        if "Dist_km_centroid" in plan.columns and plan["Dist_km_centroid"].notna().any():
            assert float(plan["Dist_km_centroid"].fillna(0).max()) <= float(RADIUS_KM) + 1e-6, \
                "Distancia a centroide excede el radio."

    return plan, freq, out_xlsx




# ============================================
# RUNNER (EDITÁ TUS RUTAS Y PARÁMETROS ACÁ)
# ============================================

INPUT_FILE = r"C:\Users\ry16123\Downloads\Ultimo (ORIGINAL) TABLERO PRODUCCIÓN FLUG S.A 2025 (1).xlsx"
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

plan, freq, out_xlsx = run_pipeline_jupyter(
    input_file=INPUT_FILE,
    nombres_pozo_file=NOMBRES_POZO_FILE,
    coords_file=COORDS_FILE,
    semanas_plan=2,                 # probá corto para iterar rápido
    equipos_activos=2,              # cantidad de equipos
    dias_por_semana=5,              # 5 ó 6
    max_pozos_dia_equipo=5,
    K_max_pozos_por_cluster=5,      # tamaño máximo de clúster
    clusters_por_dia_max=2,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,             # o lista como ["Las Heras CG - Canadon Escondida"]
    baterias_por_zona=None,         # dict normalizado (keys en _norm) o None
    pozos_excluir=set(),            # ej.: {"BB-100"}
    escribir_excel=True            # poné True si querés exportar el Excel
)

# Mostrar un vistazo rápido
display(freq.head(10))
display(plan.head(30))
print("Excel generado:", out_xlsx)

  warn(msg)
  g[col] = g[col].replace({None: np.nan})
  freq_out.to_excel(writer, "Frecuencias", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)


Unnamed: 0,POZO,ZONA,BATERIA,ZONA_NORM,BATERIA_NORM,r_m3_d,ultima_medicion,ultima_exitosa,delta_star_dias,proxima_visita_base,ceros_consec,alerta,comentario
0,BB-10,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015385,2025-07-16,2025-07-16,56,2025-09-10,0,,
1,BB-100,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.142857,2023-08-04,2023-08-04,14,2023-08-18,0,,
2,BB-101,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.035714,2025-08-25,2025-08-25,56,2025-10-20,0,,
3,BB.a-104,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2025-01-24,2025-01-24,7,2025-01-31,0,,
4,BB-111,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.428571,2025-07-01,2025-07-01,7,2025-07-08,0,,
5,BB-133,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.037037,2025-02-19,2025-02-19,56,2025-04-16,0,,
6,BB-170,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2024-07-24,2024-07-24,7,2024-07-31,0,,
7,BB-21,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015564,2025-05-12,2025-05-12,56,2025-07-07,0,,
8,BB497,,,,,0.571429,2025-01-08,2025-01-08,7,2025-01-15,0,,
9,BB-50,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.081633,2023-03-10,2023-03-10,28,2023-04-07,0,,


Unnamed: 0,Plan_Fecha,Semana_ISO,Equipo,Dia_Idx,Orden,ZONA,BATERIA,POZO,r_m3_d,Vol_Estimado_m3,Seed_POZO,Dist_km_semilla,Dist_km_centroid,ultima_medicion,ClusterID,Centroide_LAT,Centroide_LON
0,2025-09-29,40,1,2.171666,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-1234,0.285714,72.86,,,0.29,2025-01-17,C00002,-46.438652,-68.577977
1,2025-09-29,40,1,2.171666,2,Las Heras CG - Canadon Escondida,CE 20,CnE-1224(d),0.285714,37.14,,,0.468,2025-05-22,C00002,-46.438652,-68.577977
2,2025-09-29,40,1,2.171666,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-473,0.1875,12.75,,,1.172,2025-07-23,C00002,-46.438652,-68.577977
3,2025-09-29,40,1,2.171666,4,Las Heras CG - Canadon Escondida,Swabing CE,CnE-868,0.272727,13.36,,,1.583,2025-08-11,C00002,-46.438652,-68.577977
4,2025-09-29,40,1,2.171666,5,Las Heras CG - Canadon Escondida,Swabing CE,CnE-849,0.2,14.8,,,2.331,2025-07-17,C00002,-46.438652,-68.577977
5,2025-09-29,40,2,2.05588,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-808,0.1875,3.19,,,1.056,2025-09-12,C00003,-46.403392,-68.562792
6,2025-09-29,40,2,2.05588,2,Las Heras CG - Canadon Escondida,Swabing CE,CnE.a-226,0.238095,5.71,,,1.278,2025-09-05,C00003,-46.403392,-68.562792
7,2025-09-29,40,2,2.05588,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-199,0.285714,4.86,,,1.414,2025-09-12,C00003,-46.403392,-68.562792
8,2025-09-29,40,2,2.05588,4,Las Heras CG - Canadon Escondida,Swabing CE,CnE-534,0.214286,3.86,,,1.504,2025-09-11,C00003,-46.403392,-68.562792
9,2025-09-29,40,2,2.05588,5,Las Heras CG - Canadon Escondida,Swabing CE,CnE-660,0.2,3.8,,,2.039,2025-09-10,C00003,-46.403392,-68.562792


Excel generado: C:\Users\ry16123\Downloads\Ultimo (ORIGINAL) TABLERO PRODUCCIÓN FLUG S.A 2025 (1)_CRONOGRAMA_20250928_(12).xlsx


In [1]:
#ESTARIA FUNCIONANDO RELATIVAMENTE BIEN- PROBARLO

# ============================================
# Monocelda Jupyter: Planificador + Harness + Runner
# ============================================

# -*- coding: utf-8 -*-
import os, re, unicodedata, math
import numpy as np
import pandas as pd
from datetime import date, timedelta, datetime
from typing import Optional


# ==========================
# CONFIG por defecto (se sobreescriben en el runner)
# ==========================
INPUT_FILE  = r"DIAGRAMA SW.xlsx"   # Excel base (NO se modifica)
SHEET_HIST  = None                  # None => autodetecta hoja/encabezados
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

# Radio en km para agrupar por cercanía
RADIUS_KM = 3.0
# Filtro mínimo de potencial
RM3D_MIN = 0.1

# Umbrales para fuzzy (si se usan)
FUZZY_REPLACE_THRESHOLD = 85
FUZZY_SUGGEST_THRESHOLD = 75
LETTERS_SIMILARITY_MIN  = 80

DEFAULTS = {
    "equipos_activos": 4,                 # 1..4
    "dias_por_semana": 5,                 # 5 o 6
    "semanas_plan": 2,                    # para probar rápido en Jupyter
    "k_visitas": 1,                       # tasas (K=1 por pedido)
    "max_pozos_dia_equipo": 10,           # cupo por día por equipo
    "max_pozos_por_cluster": 4,           # tamaño de clúster (K fijo si usás lógica de clústeres fijos)
    "m3_por_visita_objetivo": 2.0,        # informativo
    "min_dias_freq": 7,                   # 1 semana
    "max_dias_freq": 56,                  # 8 semanas
    "dias_asumidos_una_visita": 7,        # para r si hay 1 sola visita
    "freq_dias_ultimo_cero_valido": 30,

    # Semillas a evaluar (si se usa lógica de semillas)
    "top_semillas_eval": 30,

    # Control de clústeres por día y backfill (si se usa lógica por semilla)
    "clusters_por_dia_max": None,
    "backfill_nearest_cluster": True,
    "umbral_km_backfill": 5.0,
}

# ==========================
# Utils
# ==========================
def _norm(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = s.replace("³", "3")
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    s = s.lower().strip().replace("\xa0"," ")
    s = s.replace("_"," ").replace("-"," ").replace("."," ").replace("\n"," ")
    return " ".join(s.split())

def _pozo_key(s: str) -> str:
    s = "" if s is None or (isinstance(s, float) and np.isnan(s)) else str(s)
    s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("ascii")
    return "".join(ch for ch in s if ch.isalnum()).upper()

def _canonical_digits(d: str) -> str:
    d = (d or "").lstrip("0")
    return d if d != "" else "0"

def _letters_digits_from_key_both(k: str):
    raw_digits = "".join(re.findall(r"\d+", k))
    digits_canon = _canonical_digits(raw_digits)
    letters = re.sub(r"\d+", "", k)
    return letters, digits_canon, len(raw_digits)

def _ratio_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _fuzzy_score(a: str, b: str) -> int:
    try:
        from rapidfuzz import fuzz
        return int(fuzz.partial_ratio(a, b))
    except Exception:
        import difflib
        return int(round(difflib.SequenceMatcher(None, a, b).ratio()*100))

def _canon_prefix_pozo(s: str) -> str:
    if s is None or (isinstance(s, float) and np.isnan(s)):
        return s
    raw = str(s).strip()
    raw_up = raw.upper()
    if raw_up.startswith("CÑE"):
        return "CNE" + raw_up[3:]
    raw_ascii = unicodedata.normalize("NFKD", raw_up).encode("ascii", "ignore").decode("ascii")
    if raw_ascii.startswith("CNE"):
        return raw_ascii
    if raw_ascii.startswith("CN"):
        return "CNE" + raw_ascii[2:]
    m = re.match(r"^CE(\d+)$", raw_ascii)
    if m:
        return "CNE" + m.group(1)
    return raw_ascii

def next_monday(d=None):
    d = d or date.today()
    return d + timedelta(days=(7 - d.weekday()) % 7)  # 0=Lunes

def unique_output_path(base_input_path: str) -> str:
    folder = os.path.dirname(os.path.abspath(base_input_path))
    stem   = os.path.splitext(os.path.basename(base_input_path))[0]
    today  = datetime.now().strftime("%Y%m%d")
    base   = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}.xlsx")
    if not os.path.exists(base): return base
    i = 2
    while True:
        cand = os.path.join(folder, f"{stem}_CRONOGRAMA_{today}_({i}).xlsx")
        if not os.path.exists(cand): return cand
        i += 1

EXPECTED_KEYS = {
    "fecha":       ["fecha"],
    "pozo":        ["pozo"],
    "zona":        ["zona"],
    "bateria":     ["bateria", "batería"],
    "m3":          ["m3 bruta","m3","m3_bruta","m3bruta","m 3 bruta","m 3","m3 bruto","m3 recuperado","m3 recupero"],
    "carreras":    ["n de carreras","n° de carreras","nº de carreras","no de carreras","nro de carreras","numero de carreras","n° carreras","n de carrera","n carreras"],
    "nivel_final": ["nivel final pozo","nivel final","nivel final del pozo"],
    "obs_pozo":    ["observaciones del pozo","observaciones","comentarios","comentario"]
}

def _find_header_row(df_raw):
    for i in range(min(200, len(df_raw))):
        row_norm = [_norm(x) for x in df_raw.iloc[i,:].tolist()]
        if not row_norm:
            continue
        colmap = {v:j for v,j in zip(row_norm, range(len(row_norm)))}
        def has_any(keys): return any(k in colmap for k in keys)
        if has_any(EXPECTED_KEYS["fecha"]) and has_any(EXPECTED_KEYS["pozo"]) and has_any(EXPECTED_KEYS["zona"]) and has_any(EXPECTED_KEYS["bateria"]):
            return i, row_norm
    return None, None

# ---------- Nombres pozo ----------
def load_pozo_dictionary(xlsx_path: str):
    try:
        ref = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer diccionario de pozos: {xlsx_path}\n{e}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    cols = {c.lower().strip(): c for c in ref.columns}
    if "nombre_corto_pozo" not in cols:
        print(f"\n[AVISO] El diccionario no tiene la columna 'nombre_corto_pozo'. Columnas: {list(ref.columns)}\n")
        return {}, pd.DataFrame(columns=["oficial","key","letters","digits_canon","digits_len","met_prod","nivel_3","nivel_5","estado"])

    c_pozo = cols["nombre_corto_pozo"]
    c_met  = cols.get("met_prod")
    c_n3   = cols.get("nivel_3")
    c_n5   = cols.get("nivel_5")
    c_est  = cols.get("estado")

    refv = ref.loc[ref[c_pozo].notna()].copy()
    refv[c_pozo] = refv[c_pozo].astype(str).str.strip()

    of_list  = refv[c_pozo].tolist()
    met_vals = refv[c_met].astype(str).str.strip() if c_met else np.nan
    n3_vals  = refv[c_n3].astype(str).str.strip()  if c_n3 else np.nan
    n5_vals  = refv[c_n5].astype(str).str.strip()  if c_n5 else np.nan
    est_vals = refv[c_est].astype(str).str.strip() if c_est else np.nan

    keys, letters_, digits_canon_, digits_len_ = [], [], [], []
    for val in of_list:
        k = _pozo_key(val)
        L, Dcanon, Dlen = _letters_digits_from_key_both(k)
        keys.append(k); letters_.append(L); digits_canon_.append(Dcanon); digits_len_.append(Dlen)

    dict_df = pd.DataFrame({
        "oficial": of_list,
        "key": keys,
        "letters": letters_,
        "digits_canon": digits_canon_,
        "digits_len": digits_len_,
        "met_prod": list(met_vals) if isinstance(met_vals, pd.Series) else [np.nan]*len(of_list),
        "nivel_3":  list(n3_vals)  if isinstance(n3_vals,  pd.Series) else [np.nan]*len(of_list),
        "nivel_5":  list(n5_vals)  if isinstance(n5_vals,  pd.Series) else [np.nan]*len(of_list),
        "estado":   list(est_vals) if isinstance(est_vals, pd.Series) else [np.nan]*len(of_list),
    })

    key2off = {}
    for k, off in zip(dict_df["key"], dict_df["oficial"]):
        if k and k not in key2off:
            key2off[k] = off
    return key2off, dict_df

def apply_pozo_normalization(df: pd.DataFrame, key2off: dict, dict_df: pd.DataFrame):
    df = df.copy()
    df["POZO_ORIG"] = df["POZO"].astype(str).str.strip()
    df["POZO_PreCanon"] = df["POZO_ORIG"].apply(_canon_prefix_pozo)
    df["__POZO_KEY"] = df["POZO_PreCanon"].apply(_pozo_key)

    parts = df["__POZO_KEY"].apply(_letters_digits_from_key_both)
    df["__KEY_LET"], df["__KEY_DIG_CANON"], df["__KEY_DIG_LEN"] = zip(*parts)

    df["POZO_MATCH"]   = None
    df["MATCH_TIPO"]   = "NO"
    df["MATCH_SCORE"]  = np.nan
    df["LETTER_SCORE"] = np.nan
    df["APLICADO"]     = "NO"
    df["ALERTA_NORM"]  = ""
    df["VALIDO_POZO"]  = True

    invalid_mask = (df["__KEY_LET"].str.len()==0) | (df["__KEY_DIG_LEN"]==0)
    if invalid_mask.any():
        df.loc[invalid_mask, "ALERTA_NORM"] = "SIN_LETRAS_O_DIGITOS"
        df.loc[invalid_mask, "VALIDO_POZO"] = False

    valid_mask = ~invalid_mask
    exact_mask = valid_mask & df["__POZO_KEY"].isin(key2off.keys())
    df.loc[exact_mask, "POZO_MATCH"]   = df.loc[exact_mask, "__POZO_KEY"].map(key2off)
    df.loc[exact_mask, "MATCH_TIPO"]   = "EXACTO"
    df.loc[exact_mask, "MATCH_SCORE"]  = 100
    df.loc[exact_mask, "LETTER_SCORE"] = 100
    df.loc[exact_mask, "APLICADO"]     = "SI"

    pending = df[valid_mask & (~exact_mask)].index.tolist()
    if pending and not dict_df.empty:
        dict_by_spec = {}
        for spec, sub in dict_df.groupby(["digits_canon","digits_len"]):
            dict_by_spec[spec] = sub

        for idx in pending:
            key_u   = df.at[idx, "__POZO_KEY"]
            let_u   = df.at[idx, "__KEY_LET"]
            digc_u  = df.at[idx, "__KEY_DIG_CANON"]
            digl_u  = int(df.at[idx, "__KEY_DIG_LEN"])

            cand_df = dict_by_spec.get((digc_u, digl_u), pd.DataFrame())
            best_off, best_score, best_lscore = None, -1, -1

            if cand_df is not None and not cand_df.empty:
                for row in cand_df.itertuples():
                    kk = row.key
                    ll = row.letters
                    sc_key = _fuzzy_score(key_u, kk)
                    sc_let = _ratio_score(let_u, ll)
                    if sc_let < LETTERS_SIMILARITY_MIN:
                        continue
                    if sc_key > best_score or (sc_key == best_score and sc_let > best_lscore):
                        best_score = sc_key
                        best_lscore = sc_let
                        best_off   = row.oficial

            if best_off is not None:
                df.at[idx, "POZO_MATCH"]   = best_off
                df.at[idx, "MATCH_TIPO"]   = "SUGERIDO"
                df.at[idx, "MATCH_SCORE"]  = int(best_score)
                df.at[idx, "LETTER_SCORE"] = int(best_lscore)
            else:
                df.at[idx, "ALERTA_NORM"] = "SIN MATCH EN DICCIONARIO"

    # Reemplazos
    df["POZO"] = df["POZO_MATCH"].where(df["POZO_MATCH"].notna(), df["POZO"])
    meta_first = dict_df.groupby("oficial")[["met_prod","nivel_3","nivel_5"]].first()
    df = df.merge(meta_first, how="left", left_on="POZO", right_index=True)

    # ZONA sólo si hubo match; sino, vacío
    if "nivel_3" in df.columns:
        df.loc[df["POZO_MATCH"].isna(), "nivel_3"] = ""
        df["ZONA"] = np.where(df["POZO_MATCH"].notna(), df["nivel_3"].fillna(""), "")

    # BATERIA si hay nivel_5
    if "nivel_5" in df.columns:
        df["BATERIA"] = np.where(
            df["nivel_5"].notna() & (df["nivel_5"].astype(str).str.strip()!=""),
            df["nivel_5"], df["BATERIA"]
        )

    df["__ZONA_NORM"]    = df["ZONA"].apply(_norm)
    df["__BATERIA_NORM"] = df["BATERIA"].apply(_norm)

    norm_table = (df[["POZO_ORIG","POZO_PreCanon","__POZO_KEY",
                      "__KEY_LET","__KEY_DIG_CANON","__KEY_DIG_LEN",
                      "POZO_MATCH","MATCH_TIPO","MATCH_SCORE","LETTER_SCORE",
                      "APLICADO","ALERTA_NORM","VALIDO_POZO",
                      "met_prod","nivel_3","nivel_5"]]
                  .drop_duplicates()
                  .rename(columns={
                      "POZO_ORIG":"Pozo_Original",
                      "POZO_PreCanon":"Pozo_PreCanon",
                      "__POZO_KEY":"Clave_Normalizada",
                      "__KEY_LET":"Letras",
                      "__KEY_DIG_CANON":"Digitos_Canon",
                      "__KEY_DIG_LEN":"Digitos_Len",
                      "POZO_MATCH":"Match_Oficial",
                      "MATCH_TIPO":"Match_Tipo",
                      "MATCH_SCORE":"Match_Score",
                      "LETTER_SCORE":"Letter_Score",
                      "APLICADO":"Aplicado",
                      "ALERTA_NORM":"Alerta",
                      "VALIDO_POZO":"Valido",
                      "met_prod":"met_prod",
                      "nivel_3":"nivel_3",
                      "nivel_5":"nivel_5"
                  })
                  .sort_values(["Valido","Aplicado","Match_Tipo","Pozo_Original"], ascending=[False, False, True, True]))

    alert_table = norm_table[(norm_table["Valido"]==False) | (norm_table["Aplicado"]=="NO") | (norm_table["Match_Tipo"]=="NO")].copy()
    return df, alert_table, norm_table

def read_historial(xlsx_path, sheet_hist=None):
    xl = pd.ExcelFile(xlsx_path)
    sheets = [sheet_hist] if (sheet_hist and sheet_hist in xl.sheet_names) else xl.sheet_names
    for sh in sheets:
        raw = xl.parse(sh, header=None)
        idx, header_norm = _find_header_row(raw)
        if idx is None:
            continue
        data = raw.iloc[idx:, :].copy()
        true_headers = data.iloc[0,:].astype(str).tolist()
        data = data.iloc[1:,:]
        data.columns = true_headers

        name_map = {c: _norm(c) for c in data.columns}
        def find_col(candidates):
            for c, n in name_map.items():
                if n in candidates:
                    return c
            return None

        c_fecha       = find_col(set(EXPECTED_KEYS["fecha"]))
        c_pozo        = find_col(set(EXPECTED_KEYS["pozo"]))
        c_zona        = find_col(set(EXPECTED_KEYS["zona"]))
        c_bateria     = find_col(set(EXPECTED_KEYS["bateria"]))
        c_m3          = find_col(set(EXPECTED_KEYS["m3"]))
        c_carr        = find_col(set(EXPECTED_KEYS["carreras"]))
        c_nivel_final = find_col(set(EXPECTED_KEYS["nivel_final"]))
        c_obs         = find_col(set(EXPECTED_KEYS["obs_pozo"]))

        if not (c_fecha and c_pozo and c_zona and c_bateria):
            continue

        use_cols = [c_fecha, c_pozo, c_zona, c_bateria]
        headers  = ["FECHA","POZO","ZONA","BATERIA"]
        if c_m3:            use_cols.append(c_m3);            headers.append("M3")
        if c_carr:          use_cols.append(c_carr);          headers.append("CARRERAS")
        if c_nivel_final:   use_cols.append(c_nivel_final);   headers.append("NIVEL_FINAL")
        if c_obs:           use_cols.append(c_obs);           headers.append("OBS_POZO")

        df = data[use_cols].copy()
        df.columns = headers

        df["FECHA"] = pd.to_datetime(df["FECHA"], errors="coerce")
        if "M3" not in df.columns: df["M3"] = np.nan
        else: df["M3"] = pd.to_numeric(df["M3"], errors="coerce")

        if "CARRERAS" not in df.columns: df["CARRERAS"] = np.nan
        else: df["CARRERAS"] = pd.to_numeric(df["CARRERAS"], errors="coerce")

        if "NIVEL_FINAL" not in df.columns:
            df["NIVEL_FINAL"] = None
        if "OBS_POZO" not in df.columns:
            df["OBS_POZO"] = None

        for col in ["POZO","ZONA","BATERIA","NIVEL_FINAL","OBS_POZO"]:
            df[col] = df[col].astype(str).str.strip().replace({"nan": np.nan})

        df = df.dropna(subset=["FECHA","POZO"]).sort_values(["POZO","FECHA"])
        return df

    raise ValueError("No pude detectar FECHA/POZO/ZONA/BATERÍA en ninguna hoja del Excel.")

def read_exclusions_from_sheet(xlsx_path):
    excl = set()
    try:
        xl = pd.ExcelFile(xlsx_path)
        if "ExcluirPozos" in xl.sheet_names:
            e = xl.parse("ExcluirPozos")
            e.columns = [str(c).strip().lower() for c in e.columns]
            if "pozo" in e.columns:
                if "excluir" in e.columns:
                    excl = set(e.loc[e["excluir"].astype(str).str.upper().isin(
                        ["SI","SÍ","YES","1","TRUE"]), "pozo"].astype(str).str.strip())
                else:
                    excl = set(e["pozo"].astype(str).str.strip())
    except Exception:
        pass
    return excl

# ==========================
# Frecuencias / r_m3_d
# ==========================
def _count_trailing_zeros_with_carr(g):
    cnt = 0
    for _, row in g.sort_values("FECHA").iloc[::-1].iterrows():
        m3 = row.get("M3", np.nan)
        car = row.get("CARRERAS", np.nan)
        if pd.notna(m3) and float(m3) == 0.0 and pd.notna(car) and float(car) > 0:
            cnt += 1
        else:
            break
    return cnt

def compute_frecuencias(df, params):
    v_target = params["m3_por_visita_objetivo"]
    min_d    = params["min_dias_freq"]
    max_d    = params["max_dias_freq"]
    k        = int(params["k_visitas"])
    one_days = int(params.get("dias_asumidos_una_visita", 7))
    freq_cero_ultimo = int(params.get("freq_dias_ultimo_cero_valido", 30))

    out = []
    for pozo, g0 in df.groupby("POZO", sort=False):
        g = g0.sort_values("FECHA").copy()

        for col in ["ZONA","BATERIA","NIVEL_FINAL"]:
            if col in g.columns:
                g[col] = g[col].replace({None: np.nan})
                g[col] = g[col].ffill().bfill()

        g["__ZONA_NORM"]    = g["ZONA"].apply(_norm)
        g["__BATERIA_NORM"] = g["BATERIA"].apply(_norm)
        g["__nf_norm"]      = g["NIVEL_FINAL"].apply(_norm) if "NIVEL_FINAL" in g.columns else ""

        med_validas_all = g[g["M3"].notna()].copy()

        m3_eq0 = g["M3"].fillna(0) == 0
        carr   = g.get("CARRERAS", pd.Series(index=g.index, dtype=float)).fillna(np.nan)
        zero_cond_a = m3_eq0 & (carr.fillna(0) >= 1)
        zero_cond_b = m3_eq0 & ((carr.isna()) | (carr.fillna(0) == 0)) & (g["__nf_norm"] == "surge")
        cond_cero_valido = zero_cond_a | zero_cond_b

        validas_rate = g[(g["M3"] > 0) | cond_cero_valido].copy()
        zeros_tail = _count_trailing_zeros_with_carr(g)

        ultima_med = med_validas_all["FECHA"].max() if not med_validas_all.empty else pd.NaT
        ultima_exi = g.loc[g["M3"]>0, "FECHA"].max() if "M3" in g.columns and not g[g["M3"]>0].empty else pd.NaT

        last_zero_valido = False
        if not med_validas_all.empty:
            idx_last = med_validas_all["FECHA"].idxmax()
            m3_last  = g.at[idx_last, "M3"]
            if pd.notna(m3_last) and float(m3_last) == 0.0:
                try:
                    last_zero_valido = bool(cond_cero_valido.loc[idx_last])
                except Exception:
                    last_zero_valido = False

        alerta = ""
        if last_zero_valido:
            alerta = f"ULTIMA_M3_0_VALIDO -> FREQ {freq_cero_ultimo}D"
        elif pd.notna(ultima_med):
            if zeros_tail > 0:
                alerta = f"ALERTA: {zeros_tail} cero(s) consecutivo(s) con Carreras>0"

        # r_m3_d
        r = np.nan
        if not validas_rate.empty:
            v = validas_rate.copy()
            v["delta_d"] = v["FECHA"].diff().dt.days
            v.loc[v["delta_d"] <= 0, "delta_d"] = np.nan
            v["rate"] = v["M3"].fillna(0) / v["delta_d"]
            rates = v["rate"].dropna()
            if len(rates) >= 1:
                r = rates.tail(min(k, len(rates))).mean()
            else:
                row = v.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan
        else:
            if len(med_validas_all) == 1:
                row = med_validas_all.iloc[-1]
                m3 = float(row["M3"]) if pd.notna(row["M3"]) else 0.0
                if m3 > 0:
                    r = m3 / max(1, one_days)
                else:
                    r = np.nan

        # FRECUENCIA
        if last_zero_valido:
            delta = int(freq_cero_ultimo)
        else:
            if pd.isna(r):      delta = 7
            elif r <= 0:        delta = max_d
            else:
                delta = max(min_d, min(max_d, float(v_target)/float(r)))
                delta = int(7 * round(delta / 7.0))
                if delta < 7:
                    delta = 7

        prox = (ultima_med + pd.Timedelta(days=int(delta))) if pd.notna(ultima_med) else pd.Timestamp(next_monday())

        out.append({
            "POZO": pozo,
            "ZONA": g["ZONA"].iloc[-1],
            "BATERIA": g["BATERIA"].iloc[-1],
            "ZONA_NORM": g["__ZONA_NORM"].iloc[-1],
            "BATERIA_NORM": g["__BATERIA_NORM"].iloc[-1],
            "r_m3_d": r,
            "ultima_medicion": ultima_med,
            "ultima_exitosa": ultima_exi,
            "delta_star_dias": int(delta),
            "proxima_visita_base": prox,
            "ceros_consec": zeros_tail,
            "alerta": alerta
        })
    return pd.DataFrame(out)

# ==========================
# Coordenadas
# ==========================
def _to_float_maybe_comma(x):
    if pd.isna(x):
        return np.nan
    if isinstance(x, (int, float, np.number)):
        return float(x)
    s = str(x).strip()
    if s == "": return np.nan
    s = s.replace(",", ".")
    try:
        return float(s)
    except Exception:
        return np.nan

def read_coords(xlsx_path):
    try:
        cdf = pd.read_excel(xlsx_path)
    except Exception as e:
        print(f"\n[AVISO] No pude leer coordenadas: {xlsx_path}\n{e}\n")
        return pd.DataFrame(columns=["POZO","LAT","LON"])
    cols_map = {c.lower().strip(): c for c in cdf.columns}
    c_pozo = cols_map.get("pozo")
    for k in ["geo_latitude","latitude","lat"]:
        if k in cols_map:
            c_lat = cols_map[k]; break
    else:
        c_lat = None
    for k in ["geo_longitude","longitude","lon","long"]:
        if k in cols_map:
            c_lon = cols_map[k]; break
    else:
        c_lon = None

    if not (c_pozo and c_lat and c_lon):
        print(f"[AVISO] Coordenadas: columnas esperadas 'POZO','GEO_LATITUDE','GEO_LONGITUDE'. Columnas encontradas: {list(cdf.columns)}")
        return pd.DataFrame(columns=["POZO","LAT","LON"])

    out = cdf[[c_pozo, c_lat, c_lon]].copy()
    out.columns = ["POZO","LAT","LON"]
    out["POZO"] = out["POZO"].astype(str).str.strip()
    out["LAT"] = out["LAT"].apply(_to_float_maybe_comma)
    out["LON"] = out["LON"].apply(_to_float_maybe_comma)
    out = out.dropna(subset=["POZO"])
    out = out.drop_duplicates(subset=["POZO"], keep="last")
    return out

# ==========================
# Candidatos y utilidades
# ==========================
def build_candidates_with_coords(freq, week_start, week_end, excl_pozos,
                                 zonas_norm_incluidas, coords_df,
                                 allowed_bats_by_zone_norm=None,
                                 next_due_map=None):
    F = freq.copy()

    # due_date base (permitimos override con next_due_map)
    F["due_date"] = F["proxima_visita_base"]
    if next_due_map:
        F["due_date"] = F["POZO"].map(next_due_map).fillna(F["due_date"])

    F["overdue_d"] = (pd.Timestamp(week_start) - pd.to_datetime(F["due_date"])).dt.days
    F["is_overdue"] = F["overdue_d"] > 0

    # prioridad
    F["__v"] = F["r_m3_d"].astype(float)

    # Filtro por ZONA (normalizada)
    if "ZONA_NORM" in F.columns and zonas_norm_incluidas:
        F = F[F["ZONA_NORM"].isin(zonas_norm_incluidas)].copy()

    # Sub-filtro por BATERÍA (si corresponde)
    if allowed_bats_by_zone_norm:
        mask = pd.Series(True, index=F.index)
        for zn in zonas_norm_incluidas:
            bats = allowed_bats_by_zone_norm.get(zn)
            if bats is not None:
                mask &= ~ (F["ZONA_NORM"] == zn) | (F["BATERIA_NORM"].isin(bats))
        F = F[mask].copy()

    # Exclusiones
    if excl_pozos:
        F = F[~F["POZO"].isin(excl_pozos)].copy()

    # Potencial mínimo y BATERÍA no vacía
    F = F[F["r_m3_d"].fillna(0) > RM3D_MIN].copy()
    F = F[F["BATERIA"].notna() & (F["BATERIA"].astype(str).str.strip() != "")].copy()

    # Excluir pozos con comentario no vacío en Frecuencias
    if "comentario" in F.columns:
        F["__comentario_txt"] = F["comentario"].astype(str).fillna("").str.strip()
        F = F[F["__comentario_txt"] == ""].copy()
        F.drop(columns=["__comentario_txt"], inplace=True, errors="ignore")

    # Merge coordenadas
    coords_df = coords_df if coords_df is not None else pd.DataFrame(columns=["POZO","LAT","LON"])
    F = F.merge(coords_df, how="left", on="POZO")
    F["has_coords"] = F["LAT"].notna() & F["LON"].notna()

    # Orden base
    F = F.sort_values(by=["is_overdue","__v","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    return F

def _v_est_for_day(row, day_date):
    r = row.get("r_m3_d", np.nan)
    u = row.get("ultima_medicion", pd.NaT)
    if pd.isna(u) or pd.isna(r) or r <= 0:
        return 0.0
    dd = max(0, (pd.Timestamp(day_date) - pd.Timestamp(u)).days)
    return max(0.0, float(r) * float(dd))

def haversine_km(lat1, lon1, lat2, lon2):
    try:
        if pd.isna(lat1) or pd.isna(lon1) or pd.isna(lat2) or pd.isna(lon2):
            return np.nan
        R = 6371.0088
        p1 = math.radians(float(lat1)); p2 = math.radians(float(lat2))
        dphi = math.radians(float(lat2) - float(lat1))
        dlmb = math.radians(float(lon2) - float(lon1))
        a = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dlmb/2)**2
        return 2*R*math.asin(math.sqrt(a))
    except Exception:
        return np.nan

# ==========================
# NUEVA LÓGICA DE CLÚSTERES (según prompt)
# ==========================
def _bbox_filter(df, lat0, lon0, rad_km):
    """Bounding-box previo a haversine para acotar vecinos."""
    if pd.isna(lat0) or pd.isna(lon0) or df.empty:
        return df.iloc[0:0]
    dlat = rad_km / 110.574
    dlon = rad_km / (111.320 * max(0.1, math.cos(math.radians(float(lat0)))))
    return df[(df["LAT"].between(lat0 - dlat, lat0 + dlat)) &
              (df["LON"].between(lon0 - dlon, lon0 + dlon))].copy()

def _cluster_centroid(lat_list, lon_list):
    if not lat_list or not lon_list:
        return (np.nan, np.nan)
    return float(np.mean(lat_list)), float(np.mean(lon_list))

def _validate_cluster_by_centroid(lat_list, lon_list, radius_km):
    c_lat, c_lon = _cluster_centroid(lat_list, lon_list)
    if pd.isna(c_lat) or pd.isna(c_lon):
        return False, (np.nan, np.nan), np.inf
    dmax = 0.0
    for la, lo in zip(lat_list, lon_list):
        d = haversine_km(c_lat, c_lon, la, lo)
        if pd.isna(d) or d > radius_km + 1e-9:
            return False, (c_lat, c_lon), np.inf
        dmax = max(dmax, d)
    return True, (c_lat, c_lon), dmax

def build_all_clusters(
    cands: pd.DataFrame,
    K: int,
    radius_km: float,
    score_mode: str = "rm3d",
    top_seeds: int = 30
) -> pd.DataFrame:
    """
    Devuelve DF con:
    ['ClusterID','POZOS','Centroide_LAT','Centroide_LON','Score','ZONA','BATERIAS']
    - Exactamente K pozos por clúster
    - Validación: todos a <= radius_km del CENTROIDE
    - Overlap permitido en generación
    - Semillas: mejores 'top_seeds' por __v
    - Elimina duplicados exactos (mismo conjunto de pozos)
    """
    if cands.empty:
        return pd.DataFrame(columns=["ClusterID","POZOS","Centroide_LAT","Centroide_LON","Score","ZONA","BATERIAS"])

    # trabajar solo con pozos con coords
    base = cands[cands["has_coords"]].copy()
    if base.empty:
        return pd.DataFrame(columns=["ClusterID","POZOS","Centroide_LAT","Centroide_LON","Score","ZONA","BATERIAS"])

    base = base.sort_values(["__v","is_overdue","due_date"], ascending=[False, False, True]).reset_index(drop=True)
    seeds = base.head(max(1, int(top_seeds))).copy()

    clusters = []
    seen_sets = set()  # para deduplicar por conjunto de pozos
    for _, seed in seeds.iterrows():
        s_lat, s_lon = seed["LAT"], seed["LON"]
        neigh = _bbox_filter(base, s_lat, s_lon, radius_km)
        if neigh.empty:
            continue
        # Orden por valor y cercanía a la semilla
        neigh = neigh.copy()
        neigh["__dist_seed"] = neigh.apply(lambda r: haversine_km(s_lat, s_lon, r["LAT"], r["LON"]), axis=1)
        neigh = neigh[neigh["__dist_seed"] <= radius_km]
        neigh = neigh.sort_values(["__v","__dist_seed"], ascending=[False, True])

        # Tomar candidatos top K alrededor de la semilla (semilla incluida)
        if seed["POZO"] not in neigh["POZO"].values:
            # asegurar que la semilla esté
            neigh = pd.concat([pd.DataFrame([seed]), neigh], ignore_index=True)
            neigh = neigh.drop_duplicates(subset=["POZO"], keep="first")

        if len(neigh) < K:
            # no alcanza tamaño K dentro del radio de la semilla
            continue

        # Probar ventana de los top K mejor valuados dentro del radio
        topk = neigh.head(K).copy()
        pozos = tuple(topk["POZO"].tolist())
        lats  = topk["LAT"].tolist()
        lons  = topk["LON"].tolist()

        ok, (c_lat, c_lon), dmax = _validate_cluster_by_centroid(lats, lons, radius_km)
        if not ok:
            # Intentar ajustar: expandir lista ordenada y mover una ventana sobre los N mejores vecinos
            N = min(len(neigh), K + 10)  # ventana corta para evitar combinatoria
            window = neigh.head(N).copy()
            found = False
            # estrategia greedy: fijar semilla y tomar los K-1 mejores por __v que cumplan centroide
            # probando reemplazos simples si no valida
            for i in range(0, N-K+1):
                cand = window.iloc[i:i+K]
                lats2 = cand["LAT"].tolist(); lons2 = cand["LON"].tolist()
                ok2, (c_lat2, c_lon2), _ = _validate_cluster_by_centroid(lats2, lons2, radius_km)
                if ok2:
                    topk = cand.copy()
                    c_lat, c_lon = c_lat2, c_lon2
                    pozos = tuple(topk["POZO"].tolist())
                    found = True
                    break
            if not found:
                continue  # no se pudo validar centroide

        # dedupe exacto por set
        key_set = frozenset(pozos)
        if key_set in seen_sets:
            continue
        seen_sets.add(key_set)

        # Score: rm3d o vest (hook)
        if score_mode == "vest":
            # si se usa vest, en generación no sabemos el día; dejamos rm3d como aproximación
            score = float(topk["r_m3_d"].fillna(0).sum())
        else:
            score = float(topk["r_m3_d"].fillna(0).sum())

        # ZONA/BATERIAS: mayoritaria (o homogénea si ya lo está)
        zona_mode = topk["ZONA"].mode()
        zona_val = zona_mode.iloc[0] if not zona_mode.empty else ""
        bats = tuple(sorted(set(str(x) for x in topk["BATERIA"].fillna("").astype(str))))

        clusters.append({
            "ClusterID": f"C{len(seen_sets):05d}",
            "POZOS": pozos,
            "Centroide_LAT": float(c_lat),
            "Centroide_LON": float(c_lon),
            "Score": score,
            "ZONA": zona_val,
            "BATERIAS": bats
        })

    cldf = pd.DataFrame(clusters)
    if cldf.empty:
        return cldf
    cldf = cldf.sort_values("Score", ascending=False).reset_index(drop=True)
    return cldf


def select_clusters_for_day(
    clusters_df: pd.DataFrame,
    used_today: set[str],
    cap_pozos: int,
    backfill_nearest: bool,
    umbral_km_backfill: float,
    clusters_por_dia_max: Optional[int] = None,
    K: int = 5
) -> list[dict]:

    """
    Devuelve lista de dicts: {'POZOS', 'ClusterID', 'Centroide_LAT', 'Centroide_LON', 'Score'}
    - Greedy por Score desc.
    - No repetir pozos del día.
    - Respetar clusters_por_dia_max y cap_pozos (multiplo de K).
    - Si backfill_nearest=True: exigir distancia del centroide nuevo al centroide acumulado ≤ umbral.
    """
    if clusters_df is None or clusters_df.empty:
        return []

    selected = []
    pozos_usados = set(used_today)
    cap_left = int(cap_pozos)
    max_clusters = int(clusters_por_dia_max) if clusters_por_dia_max is not None else None

    # centroide acumulado del día (promedio incremental)
    c_lat_acc, c_lon_acc, n_acc = (np.nan, np.nan, 0)

    def _update_centroid_acc(lat, lon):
        nonlocal c_lat_acc, c_lon_acc, n_acc
        if pd.isna(lat) or pd.isna(lon): 
            return
        if n_acc == 0:
            c_lat_acc, c_lon_acc, n_acc = float(lat), float(lon), 1
        else:
            c_lat_acc = (c_lat_acc*n_acc + float(lat)) / (n_acc + 1)
            c_lon_acc = (c_lon_acc*n_acc + float(lon)) / (n_acc + 1)
            n_acc += 1

    for _, row in clusters_df.iterrows():
        if cap_left < K:
            break
        if max_clusters is not None and len(selected) >= max_clusters:
            break

        pozos = set(row["POZOS"])
        if pozos & pozos_usados:
            # contiene pozo ya tomado hoy
            continue

        if backfill_nearest and len(selected) >= 1 and not (pd.isna(c_lat_acc) or pd.isna(c_lon_acc)):
            dcc = haversine_km(c_lat_acc, c_lon_acc, row["Centroide_LAT"], row["Centroide_LON"])
            if pd.isna(dcc) or dcc > float(umbral_km_backfill) + 1e-9:
                continue

        selected.append({
            "POZOS": list(row["POZOS"]),
            "ClusterID": row["ClusterID"],
            "Centroide_LAT": float(row["Centroide_LAT"]),
            "Centroide_LON": float(row["Centroide_LON"]),
            "Score": float(row["Score"])
        })
        pozos_usados |= pozos
        cap_left -= K
        _update_centroid_acc(row["Centroide_LAT"], row["Centroide_LON"])

    return selected


# ==========================
# ASIGNACIÓN SEMANAL ROUND-ROBIN (usando clústeres precomputados)
# ==========================
def assign_week_round_robin_by_zone(cand_all, team_ids, params, week_start, week_end, radius_km):
    """
    Reparte por día/equipo en una zona, eligiendo clústeres precomputados (no pozos sueltos).
    Reglas duras:
    - Clúster tamaño exacto K
    - Todos los pozos del clúster a ≤ radius_km del centroide (ya validado en build_all_clusters)
    - No repetir POZO en el mismo día (entre equipos de la misma zona)
    """
    dias   = int(params["dias_por_semana"])
    cap_pz = int(params["max_pozos_dia_equipo"])
    K      = int(params.get("max_pozos_por_cluster", 4))
    backfill_nearest = bool(params.get("backfill_nearest_cluster", True))
    umbral_backfill  = float(params.get("umbral_km_backfill", 5.0))
    clusters_por_dia_max = params.get("clusters_por_dia_max", None)

    rows = []
    used_week = set()  # evita repetir el mismo pozo dentro de la semana

    for d in range(dias):
        day_date = pd.Timestamp(week_start) + pd.Timedelta(days=d)

        # Ignorar vencimiento: usar todos los candidatos de la zona,
        # menos los ya usados en la semana
        pool_day = cand_all[~cand_all["POZO"].isin(used_week)].copy()
        if pool_day.empty:
            continue

        # Precomputar TODOS los clústeres del día (overlap permitido en generación)
        clusters_df = build_all_clusters(
            cands=pool_day,
            K=K,
            radius_km=radius_km,
            score_mode="rm3d",
            top_seeds=int(params.get("top_semillas_eval", 30))
        )
        if clusters_df.empty:
            continue

        used_today = set()  # no duplicar pozos en el mismo día entre equipos

        # >>> orden fijo de equipos (sin rotación)
        for eq in sorted(team_ids):
            chosen = select_clusters_for_day(
                clusters_df=clusters_df,
                used_today=used_today,
                cap_pozos=cap_pz,
                backfill_nearest=backfill_nearest,
                umbral_km_backfill=umbral_backfill,
                clusters_por_dia_max=clusters_por_dia_max,
                K=K
            )
            if not chosen:
                continue

            # materializar filas del plan
            ord_idx = 1
            for cluster in chosen:
                pozos = cluster["POZOS"]
                c_lat = cluster["Centroide_LAT"]
                c_lon = cluster["Centroide_LON"]
                cid   = cluster["ClusterID"]

                info = pool_day[pool_day["POZO"].isin(pozos)].copy().set_index("POZO")

                # asserts rápidos (tamaño y radio)
                assert len(pozos) == K
                for pz in pozos:
                    dcent = haversine_km(c_lat, c_lon, info.at[pz, "LAT"], info.at[pz, "LON"])
                    assert (not pd.isna(dcent)) and dcent <= radius_km + 1e-6

                # ordenar por cercanía al centroide (opcional)
                pozos_sorted = sorted(
                    pozos,
                    key=lambda p: haversine_km(c_lat, c_lon, info.at[p, "LAT"], info.at[p, "LON"])
                )

                for pz in pozos_sorted:
                    rec = info.loc[pz]
                    v_est = _v_est_for_day(
                        {"r_m3_d": rec.get("r_m3_d", np.nan),
                         "ultima_medicion": rec.get("ultima_medicion", pd.NaT)},
                        day_date
                    )
                    rows.append({
                        "Plan_Fecha": day_date.date(),
                        "Semana_ISO": day_date.isocalendar()[1],
                        "Equipo": int(eq),
                        "Dia_Idx": d+1,
                        "Orden": ord_idx,
                        "ZONA": rec.get("ZONA",""),
                        "BATERIA": rec.get("BATERIA",""),
                        "POZO": pz,
                        "r_m3_d": float(rec.get("__v", rec.get("r_m3_d", np.nan))),
                        "Vol_Estimado_m3": round(float(v_est), 2),
                        "Seed_POZO": "",
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": round(
                            float(haversine_km(c_lat, c_lon, rec.get("LAT"), rec.get("LON"))), 3
                        ) if not (pd.isna(rec.get("LAT")) or pd.isna(rec.get("LON"))) else None,
                        "ultima_medicion": rec.get("ultima_medicion", pd.NaT),
                        "ClusterID": cid,
                        "Centroide_LAT": c_lat,
                        "Centroide_LON": c_lon,
                    })
                    ord_idx += 1

                # marcar usados: hoy y en la semana
                used_today.update(pozos)
                used_week.update(pozos)

        # verificación de no-duplicación diaria (opcional)
        if rows:
            plan_day = pd.DataFrame(rows)
            same_day = plan_day[plan_day["Plan_Fecha"] == day_date.date()]
            if not same_day.empty:
                dup = same_day.groupby(["Plan_Fecha","POZO"]).size().max()
                assert int(dup) == 1, "Un pozo se repite el mismo día."


    cols = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
            "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
            "Seed_POZO","Dist_km_semilla","Dist_km_centroid","ultima_medicion",
            "ClusterID","Centroide_LAT","Centroide_LON"]
    return pd.DataFrame(rows, columns=cols) if rows else pd.DataFrame(columns=cols)

# (Se elimina la versión anterior _fill_day_star_clusters: ahora ya no se usa.)

def ensure_annual_coverage_zone_locked(all_pozos_df, plan, params, start_date, equipo_to_zona,
                                       allowed_bats_by_zone_norm=None, r_by_pozo=None):
    cap_pz = params["max_pozos_dia_equipo"]

    keys = []
    for w in range(params["semanas_plan"]):
        w_start = start_date + timedelta(weeks=w)
        for d in range(params["dias_por_semana"]):
            f = w_start + timedelta(days=d)
            for e in equipo_to_zona.keys():
                keys.append((e, f))

    if not plan.empty:
        plan["__key"] = plan["Equipo"].astype(int).astype(str) + "|" + plan["Plan_Fecha"].astype(str)
        used_counts = plan.groupby("__key")["POZO"].count().to_dict()
    else:
        used_counts = {}

    planned = set(plan["POZO"].unique()) if not plan.empty else set()
    missing_df = all_pozos_df[~all_pozos_df["POZO"].isin(planned)].copy()
    missing_df = missing_df[missing_df["BATERIA"].notna() & (missing_df["BATERIA"].astype(str).str.strip()!="")].copy()

    add = []
    for _, row in missing_df.iterrows():
        pz = row["POZO"]; z = row["ZONA"]
        bat = row.get("BATERIA", "")

        if not isinstance(bat, str) or bat.strip() == "":
            continue

        if allowed_bats_by_zone_norm:
            zn = _norm(z)
            bats_allowed = allowed_bats_by_zone_norm.get(zn)
            if bats_allowed is not None:
                if _norm(bat) not in bats_allowed:
                    continue

        if r_by_pozo is not None:
            r_val = float(r_by_pozo.get(pz, np.nan))
            if not (r_val > RM3D_MIN):
                continue

        target_teams = [e for e, zona in equipo_to_zona.items() if zona == z]
        if not target_teams:
            continue
        placed = False
        for e in target_teams:
            for (ee, f) in keys:
                if ee != e:
                    continue
                key = f"{e}|{f}"
                cnt = used_counts.get(key, 0)
                if cnt < cap_pz:
                    add.append({
                        "Plan_Fecha": f,
                        "Semana_ISO": f.isocalendar()[1],
                        "Equipo": int(e),
                        "Dia_Idx": f.weekday()+1,
                        "Orden": cnt+1,
                        "ZONA": z,
                        "BATERIA": bat,
                        "POZO": pz,
                        "r_m3_d": np.nan,
                        "Vol_Estimado_m3": 0.0,
                        "Seed_POZO": "",
                        "Dist_km_semilla": None,
                        "Dist_km_centroid": None,
                        "ultima_medicion": pd.NaT,
                        "ClusterID": "",
                        "Centroide_LAT": np.nan,
                        "Centroide_LON": np.nan,
                    })
                    used_counts[key] = cnt+1
                    placed = True
                    break
            if placed:
                break

    if add:
        plan = pd.concat([plan, pd.DataFrame(add)], ignore_index=True)                 .sort_values(["Plan_Fecha","Equipo","Orden"])
    return plan

def build_alertas_abm(freq_df: pd.DataFrame, norm_table: pd.DataFrame, dict_df: pd.DataFrame) -> pd.DataFrame:
    base = freq_df[["POZO","ZONA","BATERIA","ultima_medicion","ultima_exitosa"]].copy()
    meta_first = dict_df.groupby("oficial")[["estado","met_prod","nivel_3","nivel_5"]].first()
    base = base.merge(meta_first[["estado","met_prod"]], left_on="POZO", right_index=True, how="left")

    out = base.copy()
    for c in ["ultima_medicion","ultima_exitosa"]:
        out[c] = pd.to_datetime(out[c], errors="coerce").dt.date
    out = out.sort_values(["ZONA","BATERIA","POZO"]).reset_index(drop=True)
    return out

# ============================================
# HARNES PARA JUPYTER
# ============================================
def run_pipeline_jupyter(
    input_file,
    nombres_pozo_file,
    coords_file,
    *,
    semanas_plan=2,
    equipos_activos=2,
    dias_por_semana=5,
    max_pozos_dia_equipo=10,
    K_max_pozos_por_cluster=5,
    clusters_por_dia_max=None,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=3.0,
    rm3d_min=0.1,
    zonas_incluir=None,
    baterias_por_zona=None,      # {"las heras cg - canadon escondida": {"swabing ce","ce 04"}}
    pozos_excluir=None,
    escribir_excel=False
):
    global INPUT_FILE, NOMBRES_POZO_FILE, COORDS_FILE, RADIUS_KM, RM3D_MIN, DEFAULTS
    INPUT_FILE       = input_file
    NOMBRES_POZO_FILE= nombres_pozo_file
    COORDS_FILE      = coords_file
    RADIUS_KM        = float(radius_km)
    RM3D_MIN         = float(rm3d_min)

    DEFAULTS = DEFAULTS.copy()
    DEFAULTS.update({
        "equipos_activos": int(equipos_activos),
        "dias_por_semana": int(dias_por_semana),
        "semanas_plan": int(semanas_plan),
        "max_pozos_dia_equipo": int(max_pozos_dia_equipo),
        "max_pozos_por_cluster": int(K_max_pozos_por_cluster),
        "clusters_por_dia_max": clusters_por_dia_max,
        "backfill_nearest_cluster": bool(backfill_nearest),
        "umbral_km_backfill": float(umbral_km_backfill),
    })

    # 1) Lee historial (Excel del usuario)
    df = read_historial(INPUT_FILE, SHEET_HIST)

    # 2) Normalización por diccionario
    key2off, dict_df = load_pozo_dictionary(NOMBRES_POZO_FILE)
    df_norm, alert_table, norm_table = apply_pozo_normalization(df, key2off, dict_df)

    # 3) Filtra inválidos
    df = df_norm[df_norm["VALIDO_POZO"] == True].copy()

    # 4) Filtro por ZONA (si se pide explícito)
    if zonas_incluir:
        zonas_incluir = set(zonas_incluir)
        znorm = {_norm(z) for z in zonas_incluir}
        df = df[df["__ZONA_NORM"].isin(znorm)].copy()
        zonas_labels = zonas_incluir
        zonas_norm   = znorm
    else:
        mask_valid = df["ZONA"].notna() & (df["ZONA"].astype(str).str.strip() != "")
        zonas_labels = set(df.loc[mask_valid, "ZONA"].astype(str))
        zonas_norm   = set(df.loc[mask_valid, "__ZONA_NORM"].astype(str))

    # 5) Sub-filtro de baterías (si lo pasaste por parámetro)
    if baterias_por_zona:
        allowed_bats_by_zone_norm = {zn: set(baterias_por_zona[zn]) if baterias_por_zona[zn] is not None else None
                                     for zn in baterias_por_zona}
    else:
        allowed_bats_by_zone_norm = {zn: None for zn in zonas_norm}

    # 6) Exclusiones (si te pasan un set)
    excl_total = set(pozos_excluir or [])

    # 7) Frecuencias
    params = DEFAULTS.copy()
    freq = compute_frecuencias(df, params)

    # Comentarios desde OBS cuando ultima_medicion != ultima_exitosa
    df_obs = df[["POZO", "FECHA", "OBS_POZO"]].copy() if "OBS_POZO" in df.columns else pd.DataFrame(columns=["POZO","FECHA","OBS_POZO"])
    df_obs["FECHA_DATE"] = pd.to_datetime(df_obs["FECHA"], errors="coerce").dt.date
    df_obs = (df_obs.dropna(subset=["FECHA_DATE"])
                    .sort_values(["POZO","FECHA_DATE"])
                    .drop_duplicates(subset=["POZO","FECHA_DATE"], keep="last"))
    obs_map = {(r.POZO, r.FECHA_DATE): (str(r.OBS_POZO).strip() if pd.notna(r.OBS_POZO) else "")
               for r in df_obs.itertuples(index=False)}
    freq["__UMED_DATE"] = pd.to_datetime(freq["ultima_medicion"], errors="coerce").dt.date
    freq["__UEXI_DATE"] = pd.to_datetime(freq["ultima_exitosa"], errors="coerce").dt.date
    freq["comentario"] = [obs_map.get((pz, fmed), "") for pz, fmed in zip(freq["POZO"], freq["__UMED_DATE"])]
    mask_both_valid = freq["__UMED_DATE"].notna() & freq["__UEXI_DATE"].notna()
    mask_diff = mask_both_valid & (freq["__UMED_DATE"] != freq["__UEXI_DATE"])
    freq.loc[~mask_diff, "comentario"] = ""
    freq.drop(columns=["__UMED_DATE","__UEXI_DATE"], errors="ignore", inplace=True)

    # 8) Coordenadas
    coords_df = read_coords(COORDS_FILE)

    # 9) Mapas auxiliares
    delta_by_pozo = freq.set_index("POZO")["delta_star_dias"].to_dict()
    r_by_pozo     = freq.set_index("POZO")["r_m3_d"].to_dict()

    # 10) Semanas a planificar
    start = next_monday(date.today())
    weeks = [(start + timedelta(weeks=i), start + timedelta(weeks=i, days=6)) for i in range(params["semanas_plan"])]

    # 11) Equipos -> ZONA (fijo)
    zonas_list = sorted(zonas_labels)
    if not zonas_list:
        raise ValueError("No hay ZONAS válidas (todas vacías).")

    equipo_to_zona = {}
    for i in range(1, params["equipos_activos"]+1):
        zona_asignada = zonas_list[min(i-1, len(zonas_list)-1)]
        equipo_to_zona[i] = zona_asignada

    # 12) Plan semanal por ZONA usando la versión V2 (clústeres)
    plan_all = []
    next_due = {row.POZO: row.proxima_visita_base for row in freq.itertuples()}
    zone_to_teams = {}
    for eq, zona_label in equipo_to_zona.items():
        zone_to_teams.setdefault(zona_label, []).append(eq)

    for (w_start, w_end) in weeks:
        for zona_label, team_list in zone_to_teams.items():
            zona_norm_label = _norm(zona_label)
            cand_all = build_candidates_with_coords(
                freq=freq,
                week_start=w_start,
                week_end=w_end,
                excl_pozos=excl_total,
                zonas_norm_incluidas={zona_norm_label},
                coords_df=coords_df,
                allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
                next_due_map=next_due
            )
            if cand_all.empty:
                continue

            cand_zone = cand_all[[  # mantener las columnas necesarias
                "POZO","ZONA","BATERIA","due_date","is_overdue","__v",
                "LAT","LON","has_coords","r_m3_d","ultima_medicion"
            ]].copy()

            plan_week_zone = assign_week_round_robin_by_zone(
                cand_all=cand_zone,
                team_ids=sorted(team_list),
                params=params,
                week_start=w_start,
                week_end=w_end,
                radius_km=RADIUS_KM
            )

            if not plan_week_zone.empty:
                plan_all.append(plan_week_zone)
                # actualizar next_due por pozo asignado
                for pz, fcal in plan_week_zone[["POZO","Plan_Fecha"]].drop_duplicates().itertuples(index=False):
                    dd = int(delta_by_pozo.get(pz, params["min_dias_freq"]))
                    next_due[pz] = pd.Timestamp(fcal) + pd.Timedelta(days=dd)

    plan = (pd.concat(plan_all, ignore_index=True)
            if plan_all else
            pd.DataFrame(columns=[
                "Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden","ZONA","BATERIA",
                "POZO","r_m3_d","Vol_Estimado_m3","Seed_POZO","Dist_km_semilla",
                "Dist_km_centroid","ultima_medicion","ClusterID","Centroide_LAT","Centroide_LON","Cluster_Score"
            ]))

    # 13) Cobertura anual reforzada (opcional) — mantiene tu lógica original (no forma clúster)
    if not freq.empty:
        eligible_mask = (freq["ZONA"].isin(zonas_labels)) & (freq["r_m3_d"].fillna(0) > RM3D_MIN)
        if "comentario" in freq.columns:
            eligible_mask &= (freq["comentario"].astype(str).fillna("").str.strip() == "")
        if allowed_bats_by_zone_norm:
            for zn, bats in allowed_bats_by_zone_norm.items():
                if bats is not None:
                    eligible_mask &= (~(freq["ZONA_NORM"] == zn)) | (freq["BATERIA_NORM"].isin(bats))

        all_pozos_in_zonas = freq.loc[eligible_mask, ["POZO","ZONA","BATERIA"]].drop_duplicates().copy()
        all_pozos_in_zonas = all_pozos_in_zonas[
            all_pozos_in_zonas["BATERIA"].notna() & (all_pozos_in_zonas["BATERIA"].astype(str).str.strip() != "")
        ].copy()

        # usa el filler original (sin clúster) SOLO para cubrir huecos anuales
     #   plan = ensure_annual_coverage_zone_locked(
      #      all_pozos_df=all_pozos_in_zonas,
       #     plan=plan,
        #    params=params,
         #   start_date=start,
          #  equipo_to_zona=equipo_to_zona,
           # allowed_bats_by_zone_norm=allowed_bats_by_zone_norm,
            #r_by_pozo=r_by_pozo
       # )

    # 14) Export opcional (agrego columnas nuevas de clúster)
    out_xlsx = None
    if escribir_excel:
        out_xlsx = unique_output_path(INPUT_FILE)
        coords_all = read_coords(COORDS_FILE)
        with pd.ExcelWriter(out_xlsx, engine="openpyxl", mode="w") as writer:
            # Frecuencias
            freq_out = freq.copy()
            for c in ["proxima_visita_base","ultima_medicion","ultima_exitosa"]:
                freq_out[c] = pd.to_datetime(freq_out[c], errors="coerce").dt.date
            freq_out = freq_out.sort_values(["ZONA","BATERIA","POZO"])
            cols_pref = ["POZO","ZONA","BATERIA","ZONA_NORM","BATERIA_NORM","r_m3_d",
                         "ultima_medicion","ultima_exitosa","delta_star_dias","comentario",
                         "proxima_visita_base","ceros_consec","alerta"]
            cols_final = [c for c in cols_pref if c in freq_out.columns] + \
                         [c for c in freq_out.columns if c not in cols_pref]
            freq_out = freq_out[cols_final]
            freq_out.to_excel(writer, "Frecuencias", index=False)

            # Plan por equipo + Km_al_siguiente
            cols_plan = ["Plan_Fecha","Semana_ISO","Equipo","Dia_Idx","Orden",
                         "ZONA","BATERIA","POZO","r_m3_d","Vol_Estimado_m3",
                         "ClusterID","Centroide_LAT","Centroide_LON","Cluster_Score",
                         "Dist_km_centroid"]
            for eq in range(1, params["equipos_activos"]+1):
                pe = plan.loc[plan["Equipo"]==eq].copy()
                if pe.empty:
                    pe = pd.DataFrame(columns=cols_plan + ["Km_al_siguiente","Ejecutado"])
                else:
                    pe = pe.sort_values(["Plan_Fecha","Dia_Idx","Orden","POZO"]).copy()
                    pe = pe.merge(coords_all, how="left", on="POZO")
                    pe["LAT_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LAT"].shift(-1)
                    pe["LON_next"] = pe.groupby(["Plan_Fecha","Dia_Idx"])["LON"].shift(-1)
                    def _leg_km(row):
                        if (pd.isna(row.get("LAT")) or pd.isna(row.get("LON")) or
                            pd.isna(row.get("LAT_next")) or pd.isna(row.get("LON_next"))):
                            return None
                        return round(float(haversine_km(row["LAT"], row["LON"],
                                                        row["LAT_next"], row["LON_next"])), 3)
                    pe["Km_al_siguiente"] = pe.apply(_leg_km, axis=1)
                    pe.drop(columns=["LAT","LON","LAT_next","LON_next"], inplace=True, errors="ignore")
                    pe["Ejecutado"] = ""
                    for c in cols_plan:
                        if c not in pe.columns: pe[c] = ""
                    pe = pe[cols_plan + ["Km_al_siguiente","Ejecutado"]]
                pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)

            # Auxiliares
            pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)

    # ====== Asserts/chequeos mínimos ======
    # ====== Asserts/chequeos mínimos ======
    if not plan.empty:
        K_chk = int(params.get("max_pozos_por_cluster", 5))

        # ✅ Validar SOLO clústeres reales (ClusterID no vacío)
        mask_real = plan["ClusterID"].notna() & (plan["ClusterID"].astype(str).str.strip() != "")
        gsize = (plan.loc[mask_real]
                 .groupby(["Plan_Fecha","Equipo","ClusterID"])["POZO"]
                 .count())

        if not gsize.empty:
            assert (gsize % K_chk == 0).all(), "Hay clústeres asignados que no cumplen tamaño K exacto."

        # ✅ No duplicación diaria (ningún pozo se repite en el mismo día)
        dupmax = plan.groupby(["Plan_Fecha","POZO"]).size().max()
        assert int(dupmax) == 1, "Un pozo aparece más de una vez en el mismo día."

        # ✅ Radio cumplido (tolerancia numérica)
        if "Dist_km_centroid" in plan.columns and plan["Dist_km_centroid"].notna().any():
            assert float(plan["Dist_km_centroid"].fillna(0).max()) <= float(RADIUS_KM) + 1e-6, \
                "Distancia a centroide excede el radio."

    return plan, freq, out_xlsx




# ============================================
# RUNNER (EDITÁ TUS RUTAS Y PARÁMETROS ACÁ)
# ============================================

INPUT_FILE = r"C:\Users\ry16123\Downloads\CÑE   FLUG S.A 2025 2.xlsx"
NOMBRES_POZO_FILE = r"C:\Users\ry16123\export_org_estructural\Nombres-Pozo.xlsx"
COORDS_FILE = r"C:\Users\ry16123\OneDrive - YPF\Escritorio\power BI\GUADAL- POWER BI\Inteligencia Artificial\coordenadas1.xlsx"

plan, freq, out_xlsx = run_pipeline_jupyter(
    input_file=INPUT_FILE,
    nombres_pozo_file=NOMBRES_POZO_FILE,
    coords_file=COORDS_FILE,
    semanas_plan=2,                 # probá corto para iterar rápido
    equipos_activos=2,              # cantidad de equipos
    dias_por_semana=5,              # 5 ó 6
    max_pozos_dia_equipo=4,
    K_max_pozos_por_cluster=4,      # tamaño máximo de clúster
    clusters_por_dia_max=4,
    backfill_nearest=True,
    umbral_km_backfill=5.0,
    radius_km=4.0,
    rm3d_min=0.01,
    zonas_incluir=None,             # o lista como ["Las Heras CG - Canadon Escondida"]
    baterias_por_zona=None,         # dict normalizado (keys en _norm) o None
    pozos_excluir=set(),            # ej.: {"BB-100"}
    escribir_excel=True            # poné True si querés exportar el Excel
)

# Mostrar un vistazo rápido
display(freq.head(10))
display(plan.head(30))
print("Excel generado:", out_xlsx)

  warn(msg)
  warn(msg)
  freq_out.to_excel(writer, "Frecuencias", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pe.to_excel(writer, f"Plan_Equipo_{eq}", index=False)
  pd.DataFrame(list(params.items()), columns=["Parametro","Valor"]).to_excel(writer, "Parametros_Usados", index=False)


Unnamed: 0,POZO,ZONA,BATERIA,ZONA_NORM,BATERIA_NORM,r_m3_d,ultima_medicion,ultima_exitosa,delta_star_dias,proxima_visita_base,ceros_consec,alerta,comentario
0,BB-101,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.035714,2025-08-25,2025-08-25,56,2025-10-20,0,,
1,BB-111,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.428571,2025-07-01,2025-07-01,7,2025-07-08,0,,
2,BB-133,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.012346,2025-10-20,2025-10-20,56,2025-12-15,0,,
3,BB-170,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.026374,2025-10-28,2025-10-22,56,2025-12-23,0,,NO CONTACTA NIVEL A 1500 MTS
4,BB-21,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.015564,2025-05-12,2025-05-12,56,2025-07-07,0,,
5,BB-50,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.001059,2025-10-09,2025-10-09,56,2025-12-04,0,,
6,BB-80,Las Heras CG - Canadon Escondida,BB 02,las heras cg canadon escondida,bb 02,0.05,2025-08-07,2025-08-07,42,2025-09-18,0,,
7,BB-91,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.0033,2025-10-20,2025-10-20,56,2025-12-15,0,,
8,BB.a-104,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.285714,2025-10-15,2025-01-24,7,2025-10-22,0,,NO SE PUEDE AFLOJAR TAPA
9,BB.a-75,Las Heras CG - Canadon Escondida,Swabing CE,las heras cg canadon escondida,swabing ce,0.012552,2025-09-24,2025-09-24,56,2025-11-19,0,,


Unnamed: 0,Plan_Fecha,Semana_ISO,Equipo,Dia_Idx,Orden,ZONA,BATERIA,POZO,r_m3_d,Vol_Estimado_m3,Seed_POZO,Dist_km_semilla,Dist_km_centroid,ultima_medicion,ClusterID,Centroide_LAT,Centroide_LON
0,2025-11-03,45,1,1,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE.a-586,1.8,9.0,,,0.49,2025-10-29,C00001,-46.401416,-68.571995
1,2025-11-03,45,1,1,2,Las Heras CG - Canadon Escondida,Swabing CE,CnE.a-226,0.230769,1.15,,,0.612,2025-10-29,C00001,-46.401416,-68.571995
2,2025-11-03,45,1,1,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-1091,0.285714,4.86,,,1.744,2025-10-17,C00001,-46.401416,-68.571995
3,2025-11-03,45,1,1,4,Las Heras CG - Canadon Escondida,Swabing CE,CnE-199,0.357143,1.79,,,2.037,2025-10-29,C00001,-46.401416,-68.571995
4,2025-11-03,45,2,1,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-370(I),0.230769,0.92,,,0.489,2025-10-30,C00002,-46.464236,-68.557056
5,2025-11-03,45,2,1,2,Las Heras CG - Canadon Escondida,CE 10,CnE-826,0.571429,4.0,,,0.608,2025-10-27,C00002,-46.464236,-68.557056
6,2025-11-03,45,2,1,3,Las Heras CG - Canadon Escondida,Swabing CE,CnE-839,0.2,1.4,,,2.743,2025-10-27,C00002,-46.464236,-68.557056
7,2025-11-03,45,2,1,4,Las Heras CG - Canadon Escondida,Swabing CE,ECE.x-1,0.384615,2.69,,,2.885,2025-10-27,C00002,-46.464236,-68.557056
8,2025-11-04,45,1,2,1,Las Heras CG - Canadon Escondida,Swabing CE,CnE-849,0.2,4.0,,,1.127,2025-10-15,C00003,-46.434831,-68.541859
9,2025-11-04,45,1,2,2,Las Heras CG - Canadon Escondida,Swabing CE,CnE-543,0.153846,3.08,,,2.02,2025-10-15,C00003,-46.434831,-68.541859


Excel generado: C:\Users\ry16123\Downloads\CÑE   FLUG S.A 2025 2_CRONOGRAMA_20251031.xlsx
