¿Qué hace esta herramienta?

Esta herramienta estima la probabilidad de que un establecimiento tenga un plan anti-estrés (pregunta Q300) a partir de su país, sector, tamaño y varias prácticas de gestión. Devuelve una probabilidad entre 0 % y 100 % y, con un umbral operativo del 30 %, clasificamos como ‘tiene plan’ cuando la probabilidad es ≥ 30 %. En datos de prueba, el modelo rinde bien: AUC ≈ 0,80, F1 ≈ 0,65, precisión ≈ 0,54, sensibilidad ≈ 0,80 y exactitud ≈ 0,70. Esto significa, en pocas palabras, que distingue correctamente a quienes tienen y no tienen plan alrededor de 8 de cada 10 veces.

Limitaciones y alcance
La predicción usa datos históricos y puede no contemplar eventos extraordinarios (cambios regulatorios, shocks macroeconómicos, etc.).
Los resultados pueden variar si faltan datos o si se introducen valores atípicos.
Revisa periódicamente el umbral para equilibrar falsos positivos y falsos negativos según tu objetivo.

# Flask + API v1 + UI amigable

In [23]:
from _00_env import *

In [24]:
# Apagar servidor previo si existía
try:
    server.shutdown()
except Exception:
    pass

In [30]:
# Artefactos del modelo
MODEL_PATH = "model/model.joblib"
SCHEMA_PATH = "model/schema.json"
META_PATH   = "model/meta.json"

model  = joblib.load(MODEL_PATH)
COLS   = json.load(open(SCHEMA_PATH, "r", encoding="utf-8"))["columns"]
THRESH = float(json.load(open(META_PATH,   "r", encoding="utf-8"))["threshold"])

# API key demo
API_KEY = "demo123"   # cambia si querés

# Helpers de scoring
def _align_df(records):
    df = pd.DataFrame.from_records(records)
    for c in COLS:
        if c not in df: df[c] = np.nan
    return df[COLS]

def _proba_pos(X: pd.DataFrame) -> np.ndarray:
    if hasattr(model, "predict_proba"):
        p = model.predict_proba(X)
        return p[:,1] if getattr(p, "ndim", 2) == 2 else np.asarray(p).ravel()
    if hasattr(model, "decision_function"):
        s = model.decision_function(X)
        s = s if isinstance(s, np.ndarray) else np.asarray(list(s))
        if s.ndim > 1: s = s.max(axis=1)
        s_min, s_max = float(np.min(s)), float(np.max(s))
        return (s - s_min)/(s_max - s_min) if s_max > s_min else np.zeros_like(s)
    return model.predict(X).astype(float)

# App y endpoints
app = Flask(__name__)

@app.get("/health")
def health():
    return jsonify(ok=True, total_campos=len(COLS), threshold=THRESH)

@app.post("/predict")
def predict():
    payload = request.get_json(force=True)
    X = _align_df(payload.get("records", []))
    prob = _proba_pos(X)
    pred = (prob >= THRESH).astype(int)
    return jsonify(ok=True, n_records=len(X), threshold=THRESH,
                predictions=pred.tolist(), probas=prob.tolist())

# Versionado + API key
@app.get("/api/v1/health")
def health_v1():
    return jsonify(ok=True, total_campos=len(COLS), threshold=THRESH)

@app.post("/api/v1/predict")
def predict_v1():
    if request.headers.get("X-API-Key") != API_KEY:
        return jsonify(ok=False, error="Unauthorized"), 401
    payload = request.get_json(force=True)
    X = _align_df(payload.get("records", []))
    prob = _proba_pos(X)
    pred = (prob >= THRESH).astype(int)
    return jsonify(ok=True, n_records=len(X), threshold=THRESH,
                predictions=pred.tolist(), probas=prob.tolist())

#        UI amigable
def _unique(seq):
    seen=set(); out=[]
    for x in seq:
        if x not in seen:
            seen.add(x); out.append(x)
    return out

def _group(prefix):
    return _unique([c for c in COLS if c.startswith(prefix)])

# Países con nombres completos
COUNTRIES = _group("COUNTRY_")
_country_names = {
    "AT":"Austria","BE":"Bélgica","BG":"Bulgaria","CH":"Suiza","CY":"Chipre","CZ":"Chequia",
    "DE":"Alemania","DK":"Dinamarca","EE":"Estonia","EL":"Grecia","ES":"España","FI":"Finlandia",
    "FR":"Francia","HR":"Croacia","HU":"Hungría","IE":"Irlanda","IS":"Islandia","IT":"Italia",
    "LT":"Lituania","LU":"Luxemburgo","LV":"Letonia","ME":"Montenegro", "MK":"Macedonia del Norte", 
    "MT":"Malta","NL":"Países Bajos","NO":"Noruega", "PL":"Polonia","PT":"Portugal","RO":"Rumanía",
    "RS":"Serbia", "SE":"Suecia","SI":"Eslovenia","SK":"Eslovaquia", "TR":"Turquía","UK":"Reino Unido",
    "GB":"Reino Unido"
}
def country_label(col):
    code = col.split("COUNTRY_")[-1].upper()
    return _country_names.get(code, code)

def _options(cols, label_fn):
    cols_sorted = sorted(cols, key=lambda c: label_fn(c))
    return "\n".join(f'<option value="{c}">{label_fn(c)}</option>' for c in cols_sorted)

#------------------------------------------------------------------------------------------ inicia
def _help(texto: str) -> str:
    # Bloque accesible y sin JS para “ayuda” contextual
    return f"""
<details class="help-block">
    <summary class="help-trigger" aria-label="Más información">ⓘ</summary>
    <div class="help-content">{texto}</div>
</details>
"""
#--------------------------------------------------------------------------------------------- finaliza

# --- País
country_html = (f"""
<div class="card">
    <h3 id="hdr-country">País</h3>
    <select id="country" class="input" aria-labelledby="hdr-country">
    <option value="">(elige)</option>{_options(COUNTRIES, country_label)}
    </select>
    {_help("Usamos el país para contextualizar la predicción. Internamente se traduce a variables del modelo.")}
</div>
""") if COUNTRIES else ""


# --- Sectores (lista fija EXACTA) — incluye A como base si no hay dummy ---
def _sector_cols():
    s = [c for c in COLS if c.startswith("SECTOR_")]
    return s if s else [c for c in COLS if c.startswith("NACE_")]

_norm = lambda s: re.sub(r"[^A-Za-z]","",s).upper()

_SECTOR_TARGETS = [
    ("A",                 "Agricultura, silvicultura y pesca"),
    ("B, D, E, F",        "Construcción, residuos, agua y electricidad"),
    ("C",                 "Industrias manufactureras"),
    ("G, H, I, R",        "Comercio, transporte, hostelería y ocio"),
    ("J, K, L, M, N, S",  "TI, finanzas, inmobiliario y servicios técnico-personales"),
    ("O",                 "Administración pública"),
    ("P",                 "Educación"),
    ("Q",                 "Salud humana y trabajo social"),
]
_TARGETS_NORM = [(_norm(code), label) for code, label in _SECTOR_TARGETS]

def _match_sector(code_norm, cols):
    for col in cols:
        raw = col.replace("SECTOR_","").replace("NACE_","")
        if _norm(raw) == code_norm:
            return col
    if len(code_norm) == 1:
        L = code_norm
        for col in cols:
            if col.startswith(f"SECTOR_{L}") or col.startswith(f"NACE_{L}"):
                return col
        for col in cols:
            raw = col.replace("SECTOR_","").replace("NACE_","")
            if raw and raw[0].upper() == L:
                return col
    return None

cols_candidates = _sector_cols()
SECTOR_OPTIONS = []
for code_norm, label in _TARGETS_NORM:
    found = _match_sector(code_norm, cols_candidates)
    if found:
        SECTOR_OPTIONS.append((found, label))
if not any(lbl.startswith("Agricultura") for _, lbl in SECTOR_OPTIONS):
    SECTOR_OPTIONS.insert(0, ("__BASE_A__", "Agricultura, silvicultura y pesca"))

SECTOR_OPTIONS_HTML = "\n".join(
    f'<option value="{col}">{label}</option>' for col, label in SECTOR_OPTIONS
)

sector_html = (f"""
<div class="card">
    <h3>Sector de actividad</h3>
    <select id="sector" class="input">
        <option value="">(elige)</option>{SECTOR_OPTIONS_HTML}
    </select>
    {_help("El sector ayuda a ajustar el cálculo al tipo de actividad. Internamente lo convertimos a variables del modelo.")}
</div>
""") if SECTOR_OPTIONS else ""

print("Sectores detectados:", SECTOR_OPTIONS)

# Tamaño de empresa: adaptar (rangos por dummies o numérico)
def _find_size_numeric_col(cols):
    MANUAL = None
    if MANUAL: return MANUAL
    preferred = {
        "emp_size","size","employees","n_employees","n_emp","emp_n",
        "tamano","tamaño","num_empleados","empleados","staff","workers","n_workers"
    }
    lower = {c.lower(): c for c in cols}
    for k in preferred:
        if k in lower: return lower[k]
    def canon(s): return re.sub(r'[^a-zñ]','', s.lower())
    keys = ["empsize","size","employees","nemployees","empn","tamano","tamaño","emplead","workers","nworkers","staff","numempleados"]
    for c in cols:
        if any(k in canon(c) for k in keys): return c
    return None

def _find_size_dummy_for(bucket: str, cols):
    patt_any = re.compile(r'(SIZE|EMP|WORK|STAFF|EMPL|WORKERS|N_EMP)', re.I)
    patt = {
        "10_49":   re.compile(r'10\s*[_-]?\s*(?:TO|A|-|–|_)?\s*49\b|10_?49', re.I),
        "50_249":  re.compile(r'50\s*[_-]?\s*(?:TO|A|-|–|_)?\s*249\b|50_?249', re.I),
        "250PLUS": re.compile(r'250\+|>=?\s*250|GE\s*250|GT\s*249|250PLUS', re.I),
    }[bucket]
    for c in cols:
        if patt_any.search(c) and patt.search(c):
            return c
    return None

SIZE_NUM_COL = _find_size_numeric_col(COLS)
BUCKETS = [("10_49","10–49",30), ("50_249","50–249",150), ("250PLUS","≥250",300)]
SIZE_DUMMY_MAP = {b: _find_size_dummy_for(b, COLS) for b,_,_ in BUCKETS}
SIZE_DUMMY_COLS = [c for c in SIZE_DUMMY_MAP.values() if c]

SIZE_OPTIONS = []
for b, label, rep in BUCKETS:
    if SIZE_DUMMY_MAP.get(b):
        SIZE_OPTIONS.append((f"DUMMY::{SIZE_DUMMY_MAP[b]}", label))
    elif SIZE_NUM_COL:
        SIZE_OPTIONS.append((f"NUM::{rep}", label))

SIZE_MODE = "dummy" if SIZE_DUMMY_COLS else ("numeric" if SIZE_NUM_COL else "none")
SIZE_OPTIONS_HTML = "\n".join(f'<option value="{v}">{lab}</option>' for v, lab in SIZE_OPTIONS)

size_html = ""
if SIZE_OPTIONS:
    # Texto 100% usuario; evita “dummies/numérico” en la interfaz
    ayuda_size = "Selecciona el tamaño aproximado de tu empresa. Usamos este dato para ajustar la predicción."
    size_html = f"""
<div class="card">
    <h3>Tamaño de empresa</h3>
    <select id="size_sel" class="input">
        <option value="">(elige)</option>{SIZE_OPTIONS_HTML}
    </select>
    {_help(ayuda_size)}
</div>
"""

SIZE_DUMMIES_FOR_JS = SIZE_DUMMY_COLS

print("Tamaño — columna numérica:", SIZE_NUM_COL)
print("Tamaño — dummies detectadas por bucket:")
for b, label, rep in BUCKETS:
    print(f"  {label:<7} -> {SIZE_DUMMY_MAP.get(b)}")
print("Tamaño — estrategia UI:", "solo dummies" if SIZE_MODE=="dummy" else ("solo numérico" if SIZE_MODE=="numeric" else "sin control"))

# Template HTML
HTML_TPL = """
<!doctype html><meta charset="utf-8"><title>Servicio de Predicción</title>
<style>
*{box-sizing:border-box} body{font-family:system-ui,Segoe UI,Arial;max-width:900px;margin:28px auto;line-height:1.5}
.card{border:1px solid #e6e6e6;border-radius:14px;padding:18px;margin:14px 0;box-shadow:0 1px 4px rgba(0,0,0,.04)}
.input{width:100%;padding:12px;border:1px solid #ddd;border-radius:12px}
.btn{padding:10px 16px;border-radius:10px;border:0;background:#0d6efd;color:#fff;cursor:pointer}
.muted{color:#666}

.help-block{display:inline-block;margin-left:.5rem;position:relative}
.help-trigger{
    display:inline-grid;place-items:center;
    width:1.25em;height:1.25em;font-weight:600;
    border:1px solid #cfcfcf;border-radius:50%;
    cursor:pointer;user-select:none
}
.help-content{
    position:absolute;z-index:10;top:1.75em;left:0;
    max-width:36ch;padding:.6rem .75rem;font-size:.92rem;line-height:1.3;
    background:#fff;border:1px solid #e6e6e6;border-radius:.5rem;
    box-shadow:0 6px 18px rgba(0,0,0,.08)
}
details.help-block:not([open]) .help-content{display:none}

.badge{display:inline-block;padding:4px 10px;border-radius:999px}
.ok{background:#e8f5e9;color:#2e7d32} .warn{background:#fff8e1;color:#8d6e63}

/* --- Resultado visual --- */
.result-block{display:grid;gap:8px}
.prob-big{font-size:1.6rem;font-weight:700}
.caption{color:#666;font-size:.95rem}
.meter{
    position:relative;height:18px;border-radius:10px;border:1px solid #e6e6e6;
    background:linear-gradient(90deg,#e8f5e9 0%,#fff8e1 50%,#ffebee 100%);
}
.meter .fill{position:absolute;left:0;top:0;bottom:0;width:0;border-radius:10px}
.ticks{display:flex;justify-content:space-between;font-size:11px;color:#666}


</style>

<h1>Servicio de Predicción</h1>
<p class="muted">Completa los campos y pulsa <b>Predecir</b>.</p>

__COUNTRY_HTML__
__SECTOR_HTML__
__SIZE_HTML__

<button class="btn" onclick="go()">Predecir</button>

<div class="card" id="main" style="display:none;margin-top:12px;"></div>

<script>
const COUNTRIES = __COUNTRIES__;
const SIZE_MODE = "__SIZE_MODE__";
const SIZE_NUM_COL = __SIZE_NUM_COL__;
const SIZE_DUMMIES = __SIZE_DUMMIES__;
const API_KEY   = "__APIKEY__";
const SERVICE_THR = __THRESH__;

function bandInfo(p){ // p en [0,1]
    if (p < 0.20) return {label:"Bajo",     color:"#2e7d32", advice:"Riesgo bajo; operación normalmente viable."};
    if (p < 0.50) return {label:"Moderado", color:"#ef6c00", advice:"Probabilidad moderada; considera solicitar más información."};
    if (p < 0.75) return {label:"Elevado",  color:"#d84315", advice:"Riesgo elevado; revisa garantías y condiciones."};
    return             {label:"Alto",      color:"#b71c1c", advice:"Riesgo muy alto; mejor no avanzar sin medidas adicionales."};
}


let lastProb = null;
let lastServiceThr = null;


function renderRisk(){
    if(lastProb === null) return;

  // Clasificación binaria con el umbral del servicio
    const thr  = Number(lastServiceThr ?? SERVICE_THR);
    const pred = (lastProb >= thr) ? 1 : 0;

  // Banda interpretativa por probabilidad
  const pct  = (lastProb * 100);
    const band = bandInfo(lastProb);

    const main = document.getElementById('main');
    main.style.display = 'block';
    main.innerHTML = `
    <h3>Resultado</h3>
    <div class="result-block">
        <div><span class="${pred ? 'badge warn' : 'badge ok'}">Riesgo: ${pred ? 'ALTO' : 'BAJO'}</span></div>
        <div class="prob-big" aria-live="polite">Probabilidad: ${pct.toLocaleString('es-ES',{minimumFractionDigits:1, maximumFractionDigits:1})}%</div>

        <div class="meter" role="img" aria-label="Probabilidad ${pct.toFixed(1)} por ciento">
        <div class="fill" style="width:${pct.toFixed(1)}%; background:${band.color};"></div>
        </div>
        <div class="ticks"><span>0%</span><span>50%</span><span>100%</span></div>

        <div class="caption"><b>Interpretación:</b> ${band.advice}</div>
    </div>
`;
}

async function go(){
    const rec = {};
    for(const c of COUNTRIES) rec[c]=0;
    for(const d of SIZE_DUMMIES) rec[d]=0; // inicializa dummies de tamaño

    const ctry = document.getElementById('country');
    if(ctry && ctry.value) rec[ctry.value] = 1;

    const sect = document.getElementById('sector');
    if(sect && sect.value && !sect.value.startsWith("__BASE_")) {
    rec[sect.value] = 1;       // "__BASE_A__" => todas 0 (categoría base)
}

    const sizeSel = document.getElementById('size_sel');
    if(sizeSel && sizeSel.value){
    if(sizeSel.value.startsWith("DUMMY::")){
        const col = sizeSel.value.split("::")[1];
        rec[col] = 1;
    } else if(sizeSel.value.startsWith("NUM::") && SIZE_NUM_COL){
        rec[SIZE_NUM_COL] = Number(sizeSel.value.split("::")[1]);
    }
}

    const r = await fetch("/api/v1/predict", {
    method:"POST",
    headers: {"Content-Type":"application/json","X-API-Key": API_KEY},
    body: JSON.stringify({records:[rec]})
    });
    if(r.status === 401){ alert("API Key inválida."); return; }
    const j = await r.json();

    lastProb = (j.probas && j.probas.length) ? j.probas[0] : null;
    lastServiceThr = j.threshold;
    renderRisk();
}
</script>
"""

@app.get("/")
def ui_root():
    html = (HTML_TPL
            .replace("__THRESH__", f"{THRESH:.2f}")
            .replace("__COUNTRY_HTML__", country_html)
            .replace("__SECTOR_HTML__", sector_html)
            .replace("__SIZE_HTML__", size_html)
            .replace("__COUNTRIES__", json.dumps(COUNTRIES))
            .replace("__SIZE_MODE__", SIZE_MODE)
            .replace("__SIZE_NUM_COL__", json.dumps(SIZE_NUM_COL))
            .replace("__SIZE_DUMMIES__", json.dumps(SIZE_DUMMIES_FOR_JS))
            .replace("__APIKEY__", API_KEY))
    return Response(html, mimetype="text/html")

# Levantar servidor
PORT = 5051
server = make_server("127.0.0.1", PORT, app)
thread = threading.Thread(target=server.serve_forever, daemon=True)
thread.start()
print(f"✅ UI lista en: http://127.0.0.1:{PORT}/  |  API: /api/v1/predict  (X-API-Key: {API_KEY})")
try:
    webbrowser.open(f"http://127.0.0.1:{PORT}/")
except Exception:
    pass

def stop_server():
    server.shutdown()
    print("🛑 Servidor detenido")

Sectores detectados: [('__BASE_A__', 'Agricultura, silvicultura y pesca'), ('SECTOR_B, D, E, F', 'Construcción, residuos, agua y electricidad'), ('SECTOR_C', 'Industrias manufactureras'), ('SECTOR_G, H, I, R', 'Comercio, transporte, hostelería y ocio'), ('SECTOR_J, K, L, M, N, S', 'TI, finanzas, inmobiliario y servicios técnico-personales'), ('SECTOR_O', 'Administración pública'), ('SECTOR_P', 'Educación'), ('SECTOR_Q', 'Salud humana y trabajo social')]
Tamaño — columna numérica: SIZE_250+
Tamaño — dummies detectadas por bucket:
  10–49   -> None
  50–249  -> SIZE_50-249
  ≥250    -> SIZE_250+
Tamaño — estrategia UI: solo dummies
✅ UI lista en: http://127.0.0.1:5051/  |  API: /api/v1/predict  (X-API-Key: demo123)
