In [6]:
#CONEXION A LA GILLA DE BM

# -*- coding: utf-8 -*-
import os
from datetime import datetime
import pandas as pd
import pyodbc

SERVER   = "SWPCRDLHSPI05"
DATABASE = "iUpstream"
QUERY    = "SELECT * FROM [dbo].[POZOS_TECSS_HIST]"
OUT_XLSX = f"POZOS_TECSS_HIST1_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"

PREFERIDOS = [
    "ODBC Driver 18 for SQL Server",
    "ODBC Driver 17 for SQL Server",
    "SQL Server",                   # heredado
]

def elegir_driver():
    disponibles = [d.strip() for d in pyodbc.drivers()]
    print("Drivers ODBC detectados por pyodbc:", disponibles)
    for d in PREFERIDOS:
        if d in disponibles:
            return d
    raise RuntimeError(
        "No se encontró ningún driver SQL Server compatible. "
        "Instalá el 'Microsoft ODBC Driver 18 for SQL Server (x64)'."
    )

def build_conn_str(driver):
    if driver in ("ODBC Driver 18 for SQL Server", "ODBC Driver 17 for SQL Server"):
        # Drivers modernos: requieren Encrypt; en intranet solés necesitar TrustServerCertificate
        return (
            f"DRIVER={{{driver}}};"
            f"SERVER={SERVER};"
            f"DATABASE={DATABASE};"
            "Trusted_Connection=yes;"
            "Encrypt=yes;"
            "TrustServerCertificate=yes;"
        )
    else:
        # Driver heredado “SQL Server”: no admite Encrypt/TrustServerCertificate
        return (
            "DRIVER={SQL Server};"
            f"SERVER={SERVER};"
            f"DATABASE={DATABASE};"
            "Trusted_Connection=yes;"
        )

def main():
    driver = elegir_driver()
    conn_str = build_conn_str(driver)
    print("Usando driver:", driver)
    print("Cadena ODBC:", conn_str)

    with pyodbc.connect(conn_str) as conn:
        df = pd.read_sql(QUERY, conn)

    df.to_excel(OUT_XLSX, index=False)
    size_mb = os.path.getsize(OUT_XLSX) / (1024*1024)
    print(f"Exportado {len(df):,} filas → {OUT_XLSX} ({size_mb:.2f} MB)")

if __name__ == "__main__":
    main()


Drivers ODBC detectados por pyodbc: ['SQL Server', 'Oracle en OraClient11g_home1', 'Oracle en OraClient12Home1', 'Oracle en OraClient12Home2', 'HDBODBC']
Usando driver: SQL Server
Cadena ODBC: DRIVER={SQL Server};SERVER=SWPCRDLHSPI05;DATABASE=iUpstream;Trusted_Connection=yes;


  df = pd.read_sql(QUERY, conn)


Exportado 206,378 filas → POZOS_TECSS_HIST1_20251021_194826.xlsx (6.26 MB)


In [2]:
#CREA LOS ARCHIVOS .PKL Y EL SEGUNDO COGIDO LOS PONE EN PRODUCCION
# Requisitos (si hace falta):
# !pip install pandas numpy scikit-learn openpyxl

import json, pickle
from pathlib import Path
import numpy as np
import pandas as pd

from sklearn.model_selection import GroupKFold
from sklearn.ensemble import HistGradientBoostingClassifier
from sklearn.metrics import roc_auc_score, average_precision_score, classification_report
from sklearn.utils.class_weight import compute_class_weight

# ================== PARÁMETROS (ajustables) ==================
INPUT_XLSX = "vibraciones_multi.xlsx"  # histórico para entrenar
RESAMPLE_RULE = "30min"
FEATURE_WINDOW = "24H"
FEATURE_MIN_SAMPLES = 6
ALERTA_HORAS = 48       # etiqueta como "pre-falla" las 48 h previas a cada FALLA
COOLDOWN_HORAS = 6      # ventana corta post-falla que se excluye del entrenamiento
N_SPLITS = 4
SEED = 42

OUT_PKL = "model_aib.pkl"
OUT_CFG = "model_aib_config.json"
OUT_IMP = "aib_feature_importances.csv"
# =============================================================

def _pick_column(df: pd.DataFrame, *opts):
    cols_lower = {c.lower(): c for c in df.columns}
    for o in opts:
        for k in cols_lower:
            if k == o:
                return cols_lower[k]
    return None

def cargar_datos_flexible(path_xlsx: str):
    """
    Soporta:
    (A) multi-hoja: cada hoja = pozo, columnas (Fecha/VIBR), 'FALLA' en VIBR marca evento
    (B) única hoja con Pozo/Timestamp/VIBR (y opcional 'FALLA' en VIBR)
    Devuelve:
        vib: [well_id, timestamp, vibr]
        ev : [well_id, failure_time]
    """
    xls = pd.ExcelFile(path_xlsx)
    frames, eventos = [], []

    if len(xls.sheet_names) > 1:
        # Formato (A)
        for sheet_name in xls.sheet_names:
            df = pd.read_excel(path_xlsx, sheet_name=sheet_name, dtype=str)
            if df is None or df.empty:
                continue
            df.columns = [str(c).strip() for c in df.columns]
            colmap = {}
            for c in df.columns:
                cl = c.lower()
                if cl in ("fecha","timestamp","datetime"):
                    colmap[c] = "timestamp"
                elif (cl in ("vibr","vibraciones","vibracion","valor","value","vibr.") or cl.startswith("vibr")):
                    colmap[c] = "vibr"
            if colmap:
                df = df.rename(columns=colmap)
            else:
                if len(df.columns) >= 2:
                    df = df.rename(columns={df.columns[0]:"timestamp", df.columns[1]:"vibr"})

            if "timestamp" not in df.columns or "vibr" not in df.columns:
                continue

            df["timestamp"] = pd.to_datetime(df["timestamp"], dayfirst=True, errors="coerce")

            falla_mask = df["vibr"].astype(str).str.contains("FALLA", case=False, na=False)
            if falla_mask.any():
                for t in df.loc[falla_mask, "timestamp"]:
                    if pd.notna(t):
                        eventos.append({"well_id": sheet_name, "failure_time": t})

            df_num = df.loc[~falla_mask].copy()
            df_num["vibr"] = pd.to_numeric(df_num["vibr"], errors="coerce")
            df_num = df_num.dropna(subset=["timestamp","vibr"])
            if df_num.empty:
                continue
            df_num.insert(0, "well_id", sheet_name)
            frames.append(df_num[["well_id","timestamp","vibr"]])

    else:
        # Formato (B) (1 sola hoja estilo tabla larga)
        df = pd.read_excel(path_xlsx, sheet_name=xls.sheet_names[0], dtype=str)
        df.columns = [str(c).strip() for c in df.columns]
        c_pozo = _pick_column(df, "pozo","well_id","equipo")
        c_ts   = _pick_column(df, "timestamp","fecha","datetime","ts")
        c_vibr = _pick_column(df, "vibr","vibraciones","vibracion","valor","value")
        if c_pozo and c_ts and c_vibr:
            df = df.rename(columns={c_pozo:"well_id", c_ts:"timestamp", c_vibr:"vibr"}).copy()
            df["timestamp"] = pd.to_datetime(df["timestamp"], dayfirst=True, errors="coerce")
            falla_mask = df["vibr"].astype(str).str.contains("FALLA", case=False, na=False)
            if falla_mask.any():
                tmp = df.loc[falla_mask, ["well_id","timestamp"]].dropna()
                tmp = tmp.rename(columns={"timestamp":"failure_time"})
                eventos.extend(tmp.to_dict(orient="records"))
            df_num = df.loc[~falla_mask].copy()
            df_num["vibr"] = pd.to_numeric(df_num["vibr"], errors="coerce")
            df_num = df_num.dropna(subset=["well_id","timestamp","vibr"])
            frames.append(df_num[["well_id","timestamp","vibr"]])
        else:
            # fallback: reintentar como (A)
            return cargar_datos_flexible(path_xlsx)

    if not frames:
        raise ValueError("No se encontraron datos numéricos de vibración en el Excel.")

    vib = pd.concat(frames, ignore_index=True).sort_values(["well_id","timestamp"])
    ev = pd.DataFrame(eventos)
    if not ev.empty:
        ev["failure_time"] = pd.to_datetime(ev["failure_time"], errors="coerce")
        ev = ev.dropna(subset=["well_id","failure_time"])
    return vib, ev

def build_features(vib: pd.DataFrame) -> pd.DataFrame:
    feats = []
    for well, g in vib.groupby("well_id", sort=False):
        g = g.sort_values("timestamp").set_index("timestamp")
        if g.empty:
            continue
        gs = g["vibr"].resample(RESAMPLE_RULE).median().interpolate(method="time", limit_direction="both")
        r = gs.rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES)
        mean = r.mean(); std = r.std(); p2p = r.max()-r.min()
        rms  = (gs.pow(2).rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean())**0.5
        diff_abs_mean = gs.diff().abs().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
        slope = gs.diff().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
        cv = std / (mean.abs() + 1e-9)
        F = pd.DataFrame({
            "well_id": well, "t_end": gs.index,
            "vib_last": gs.values, "vib_mean": mean.values, "vib_std": std.values,
            "vib_rms": rms.values, "vib_p2p": p2p.values,
            "vib_diff_mean": diff_abs_mean.values, "vib_slope": slope.values, "vib_cv": cv.values
        }).dropna()
        feats.append(F)
    if not feats:
        return pd.DataFrame(columns=["well_id","t_end"])
    return pd.concat(feats, ignore_index=True).sort_values(["well_id","t_end"])

def etiquetar(F: pd.DataFrame, eventos: pd.DataFrame) -> pd.DataFrame:
    F = F.copy(); F["y"] = 0
    if eventos is None or eventos.empty:
        print("Aviso: no hay eventos de falla detectados; entrenar así no generará un modelo útil.")
        return F
    for well, ge in eventos.groupby("well_id"):
        mask = F["well_id"] == well
        if not mask.any():
            continue
        t_end = F.loc[mask,"t_end"]
        for _, row in ge.iterrows():
            ft = row["failure_time"]
            dt_h = (ft - t_end).dt.total_seconds()/3600.0
            pos = mask & (dt_h>=0) & (dt_h<=ALERTA_HORAS)
            cool= mask & (dt_h<0) & (dt_h>=-COOLDOWN_HORAS)
            F.loc[pos,"y"] = 1
            F.loc[cool,"y"] = np.nan
    F = F.dropna(subset=["y"]).copy()
    F["y"] = F["y"].astype(int)
    return F

# ============ ENTRENAMIENTO ============
path = Path(INPUT_XLSX); assert path.exists(), f"No encontré {path}"
vib, ev = cargar_datos_flexible(str(path))

F = build_features(vib)
DS = etiquetar(F, ev)

pos = int(DS["y"].sum()) if "y" in DS else 0
print(f"Pozos: {vib['well_id'].nunique()} | Muestras brutas: {len(vib):,}")
print(f"Filas de features: {len(DS):,} | Positivos (pre-falla): {pos}")

# Validación por pozo
feat_cols = [c for c in DS.columns if c not in {"well_id","t_end","y"}]
classes = np.unique(DS["y"])
cw = compute_class_weight(class_weight="balanced", classes=classes, y=DS["y"])
class_weight = {int(k): float(v) for k, v in zip(classes, cw)}

gkf = GroupKFold(n_splits=max(2, min(N_SPLITS, DS["well_id"].nunique())))
y_true, y_pred, y_proba = [], [], []
for tr, te in gkf.split(DS[feat_cols], DS["y"], DS["well_id"].astype(str).values):
    Xtr, Xte = DS.iloc[tr][feat_cols], DS.iloc[te][feat_cols]
    ytr, yte = DS.iloc[tr]["y"], DS.iloc[te]["y"]
    sw = ytr.map(lambda yy: class_weight[int(yy)]).values
    m = HistGradientBoostingClassifier(learning_rate=0.08, max_iter=500, min_samples_leaf=20, random_state=SEED)
    m.fit(Xtr, ytr, sample_weight=sw)
    p = m.predict_proba(Xte)[:,1]
    y_proba.extend(p.tolist()); y_true.extend(yte.tolist())
    y_pred.extend((p>=0.5).astype(int).tolist())

print("\n=== MÉTRICAS (validación por pozo; umbral 0.50 informativo) ===")
try:
    print("ROC-AUC:", round(roc_auc_score(y_true,y_proba),4))
    print("PR-AUC :", round(average_precision_score(y_true,y_proba),4))
except Exception as e:
    print("No se pudieron calcular AUCs:", e)
print(classification_report(y_true, y_pred, digits=3))

# Modelo final + guardado
m = HistGradientBoostingClassifier(learning_rate=0.08, max_iter=500, min_samples_leaf=20, random_state=SEED)
sw_full = DS["y"].map(lambda yy: class_weight[int(yy)]).values
m.fit(DS[feat_cols], DS["y"], sample_weight=sw_full)

with open(OUT_PKL,"wb") as f:
    pickle.dump({"model": m, "features": feat_cols}, f)

with open(OUT_CFG,"w") as f:
    json.dump({
        "RESAMPLE_RULE": RESAMPLE_RULE,
        "FEATURE_WINDOW": FEATURE_WINDOW,
        "FEATURE_MIN_SAMPLES": FEATURE_MIN_SAMPLES
    }, f, indent=2)

# Importancias proxy (correlación simple) — orientativo
with np.errstate(invalid="ignore"):
    corrs = {c: np.corrcoef(DS[c].fillna(DS[c].median()), DS["y"])[0, 1] for c in feat_cols}
imp = pd.DataFrame({"feature": list(corrs.keys()), "corr_with_y": list(corrs.values())}) \
        .sort_values("corr_with_y", ascending=False)
imp.to_csv(OUT_IMP, index=False)

print(f"\n[OK] Guardé:\n - {OUT_PKL}\n - {OUT_CFG}\n - {OUT_IMP}")


  r = gs.rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES)
  rms  = (gs.pow(2).rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean())**0.5
  diff_abs_mean = gs.diff().abs().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
  slope = gs.diff().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
  r = gs.rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES)
  rms  = (gs.pow(2).rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean())**0.5
  diff_abs_mean = gs.diff().abs().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
  slope = gs.diff().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
  r = gs.rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES)
  rms  = (gs.pow(2).rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean())**0.5
  diff_abs_mean = gs.diff().abs().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SAMPLES).mean()
  slope = gs.diff().rolling(FEATURE_WINDOW, min_periods=FEATURE_MIN_SA

Pozos: 5 | Muestras brutas: 9,058
Filas de features: 12,242 | Positivos (pre-falla): 480

=== MÉTRICAS (validación por pozo; umbral 0.50 informativo) ===
ROC-AUC: 0.5607
PR-AUC : 0.0464
              precision    recall  f1-score   support

           0      0.959     0.957     0.958     11762
           1      0.006     0.006     0.006       480

    accuracy                          0.920     12242
   macro avg      0.483     0.482     0.482     12242
weighted avg      0.922     0.920     0.921     12242


[OK] Guardé:
 - model_aib.pkl
 - model_aib_config.json
 - aib_feature_importances.csv


In [7]:
# === AIB - SCORING V3 (robusto, una sola celda) ===
# Genera prediccion_aib.xlsx con hojas: Scores y Alertas

import json, pickle
import numpy as np
import pandas as pd
from pathlib import Path

# -------- CONFIG --------
INPUT_XLSX = "POZOS_TECSS_HIST1_20251016_103257.xlsx"  # tu Excel "live"
PKL        = "model_aib.pkl"                           # modelo entrenado
CFG        = "model_aib_config.json"                   # config del modelo
OUT_XLSX   = "prediccion_aib.xlsx"                     # salida

UMBRAL     = 0.75   # prob. mínima para alerta
CONSEC     = 2      # N consecutivas
COOLDOWN_H = 6      # horas entre alertas para el mismo pozo
DAYS_BACK  = 14     # acotar a últimos N días (opcional para acelerar)
# ------------------------

def safe_sort(df: pd.DataFrame, cols):
    """Ordena sólo por columnas presentes. Si falla, devuelve el df sin ordenar."""
    present = [c for c in cols if c in df.columns]
    if not present:
        print("Aviso: ninguna columna de orden está presente:", cols)
        return df
    try:
        return df.sort_values(present)
    except Exception as e:
        print("Aviso: sort_values falló:", repr(e), " — devuelvo sin ordenar.")
        return df

def pick_column(df: pd.DataFrame, *opts):
    cols_lower = {c.lower(): c for c in df.columns}
    for o in opts:
        for k in cols_lower:
            if k == o:
                return cols_lower[k]
    return None

def load_excel_first_sheet(path_xlsx: str) -> pd.DataFrame:
    xls = pd.ExcelFile(path_xlsx)
    df = pd.read_excel(path_xlsx, sheet_name=xls.sheet_names[0])
    c_pozo = pick_column(df, "pozo","well_id","equipo")
    c_ts   = pick_column(df, "timestamp","fecha","datetime","ts")
    c_vibr = pick_column(df, "vibr","vibraciones","vibracion","valor","value")
    if not c_pozo or not c_ts or not c_vibr:
        raise ValueError(f"El Excel debe tener columnas tipo Pozo/Timestamp/VIBR. Vistas: {list(df.columns)}")
    df = df.rename(columns={c_pozo:"Pozo", c_ts:"Timestamp", c_vibr:"VIBR"}).copy()
    df["Timestamp"] = pd.to_datetime(df["Timestamp"], errors="coerce", dayfirst=True)
    df["VIBR"] = pd.to_numeric(df["VIBR"], errors="coerce")
    df = df.dropna(subset=["Pozo","Timestamp","VIBR"])
    return df

def maybe_clip_days(df: pd.DataFrame, days_back: int):
    if days_back and days_back > 0 and not df.empty:
        tmax = df["Timestamp"].max()
        tmin = tmax - pd.Timedelta(days=days_back)
        df = df[df["Timestamp"] >= tmin].copy()
    return df

def build_features_live(vib: pd.DataFrame, resample_rule: str, feature_window: str, min_samp: int) -> pd.DataFrame:
    feats = []
    for well, g in vib.groupby("Pozo", sort=False):
        g = safe_sort(g, ["Timestamp"]).set_index("Timestamp")
        if g.empty:
            continue
        gs = (g["VIBR"]
              .resample(resample_rule).median()
              .interpolate(method="time", limit_direction="both"))
        r = gs.rolling(feature_window, min_periods=min_samp)
        mean = r.mean()
        std  = r.std()
        p2p  = r.max() - r.min()
        rms  = (gs.pow(2).rolling(feature_window, min_periods=min_samp).mean())**0.5
        diff_abs_mean = gs.diff().abs().rolling(feature_window, min_periods=min_samp).mean()
        slope = gs.diff().rolling(feature_window, min_periods=min_samp).mean()
        cv = std / (mean.abs() + 1e-9)
        F = pd.DataFrame({
            "well_id": well,
            "t_end": gs.index,
            "vib_last": gs.values,
            "vib_mean": mean.values,
            "vib_std": std.values,
            "vib_rms": rms.values,
            "vib_p2p": p2p.values,
            "vib_diff_mean": diff_abs_mean.values,
            "vib_slope": slope.values,
            "vib_cv": cv.values
        }).dropna()
        feats.append(F)
    if not feats:
        return pd.DataFrame(columns=["well_id","t_end"])
    out = pd.concat(feats, ignore_index=True)
    out["well_id"] = out["well_id"].astype(str)
    out["t_end"]   = pd.to_datetime(out["t_end"], errors="coerce")
    out = safe_sort(out, ["well_id","t_end"])
    return out

def generar_alertas(scores_df: pd.DataFrame, umbral: float, consecutivas: int, cooldown_h: int) -> pd.DataFrame:
    alertas = []
    if scores_df.empty:
        return pd.DataFrame(columns=["well_id","t_alerta","proba"])
    for well, g in scores_df.groupby("well_id", sort=False):
        g = safe_sort(g, ["t_end"]).reset_index(drop=True)
        run = 0; last_alert = None
        for _, row in g.iterrows():
            run = run + 1 if row["proba"] >= umbral else 0
            if run >= consecutivas:
                t = row["t_end"]
                if (last_alert is None) or ((t - last_alert).total_seconds() >= cooldown_h*3600):
                    alertas.append({"well_id": well, "t_alerta": t, "proba": float(row["proba"])})
                    last_alert = t
                run = 0
    return pd.DataFrame(alertas)

# ---------- FLUJO ----------
assert Path(INPUT_XLSX).exists(), f"No encontré: {INPUT_XLSX}"
assert Path(PKL).exists(), f"No encontré: {PKL}"
assert Path(CFG).exists(), f"No encontré: {CFG}"

# 1) Datos “live”
vib_df = load_excel_first_sheet(INPUT_XLSX)
print("Cols en Excel:", list(vib_df.columns))
print("Rango fechas live:", vib_df["Timestamp"].min(), "→", vib_df["Timestamp"].max())
vib_df = safe_sort(vib_df, ["Pozo","Timestamp"]).reset_index(drop=True)
vib_df = maybe_clip_days(vib_df, DAYS_BACK)
print("Pozos en live:", vib_df["Pozo"].nunique(), "| Filas:", len(vib_df))

# 2) Modelo + cfg
with open(PKL, "rb") as f:
    obj = pickle.load(f)
model = obj["model"]
feat_cols_model = list(obj["features"])
with open(CFG, "r") as f:
    cfg = json.load(f)
RESAMPLE_RULE  = cfg.get("RESAMPLE_RULE","30min")
FEATURE_WINDOW = cfg.get("FEATURE_WINDOW","24H")
MIN_SAMP       = int(cfg.get("FEATURE_MIN_SAMPLES", 6))
print("Config:", RESAMPLE_RULE, FEATURE_WINDOW, MIN_SAMP)
print("Features esperadas por el modelo:", feat_cols_model)

# 3) Features
F_df = build_features_live(vib_df, RESAMPLE_RULE, FEATURE_WINDOW, MIN_SAMP)
print("\nCols en F_df:", list(F_df.columns), "| Filas F_df:", len(F_df))
print(F_df.head(3))

# 4) Scores
faltantes = [c for c in feat_cols_model if c not in F_df.columns]
if faltantes:
    raise ValueError(f"Faltan columnas respecto al modelo: {faltantes}")

scores_df = F_df.copy()
scores_df = safe_sort(scores_df, ["well_id","t_end"]).reset_index(drop=True)
scores_df["proba"] = model.predict_proba(scores_df[feat_cols_model])[:, 1]

# 5) Alertas
alertas_df = generar_alertas(scores_df[["well_id","t_end","proba"]], UMBRAL, CONSEC, COOLDOWN_H)
alertas_df = safe_sort(alertas_df, ["well_id","t_alerta"]).reset_index(drop=True)

# 6) Exportar Excel
with pd.ExcelWriter(OUT_XLSX, engine="openpyxl", datetime_format="yyyy-mm-dd hh:mm") as wr:
    cols_scores = ["well_id","t_end","vib_last","vib_std","vib_p2p","vib_diff_mean","vib_slope","proba"]
    cols_scores = [c for c in cols_scores if c in scores_df.columns]
    scores_df[cols_scores].to_excel(wr, sheet_name="Scores", index=False)
    alertas_df.to_excel(wr, sheet_name="Alertas", index=False)

print(f"\n[OK] {OUT_XLSX} creado")
print(f" - Scores:  {len(scores_df)} filas")
print(f" - Alertas: {len(alertas_df)} filas")



  df["Timestamp"] = pd.to_datetime(df["Timestamp"], errors="coerce", dayfirst=True)


Cols en Excel: ['Pozo', 'Estado', 'VIBR', 'GPM', 'PRES. LINEA', '3SIGMA', 'Excesos', 'Timestamp']
Rango fechas live: 2025-10-11 23:37:08 → 2025-10-16 09:50:01
Pozos en live: 2275 | Filas: 258931
Config: 30min 24H 6
Features esperadas por el modelo: ['vib_last', 'vib_mean', 'vib_std', 'vib_rms', 'vib_p2p', 'vib_diff_mean', 'vib_slope', 'vib_cv']


  r = gs.rolling(feature_window, min_periods=min_samp)
  rms  = (gs.pow(2).rolling(feature_window, min_periods=min_samp).mean())**0.5
  diff_abs_mean = gs.diff().abs().rolling(feature_window, min_periods=min_samp).mean()
  slope = gs.diff().rolling(feature_window, min_periods=min_samp).mean()
  r = gs.rolling(feature_window, min_periods=min_samp)
  rms  = (gs.pow(2).rolling(feature_window, min_periods=min_samp).mean())**0.5
  diff_abs_mean = gs.diff().abs().rolling(feature_window, min_periods=min_samp).mean()
  slope = gs.diff().rolling(feature_window, min_periods=min_samp).mean()
  r = gs.rolling(feature_window, min_periods=min_samp)
  rms  = (gs.pow(2).rolling(feature_window, min_periods=min_samp).mean())**0.5
  diff_abs_mean = gs.diff().abs().rolling(feature_window, min_periods=min_samp).mean()
  slope = gs.diff().rolling(feature_window, min_periods=min_samp).mean()
  r = gs.rolling(feature_window, min_periods=min_samp)
  rms  = (gs.pow(2).rolling(feature_window, min_periods=min_sam


Cols en F_df: ['well_id', 't_end', 'vib_last', 'vib_mean', 'vib_std', 'vib_rms', 'vib_p2p', 'vib_diff_mean', 'vib_slope', 'vib_cv'] | Filas F_df: 467206
        well_id               t_end  vib_last  vib_mean   vib_std   vib_rms  \
0  YPF.SC.AB-19 2025-10-12 02:30:00    0.2620  0.254714  0.007241  0.254803   
1  YPF.SC.AB-19 2025-10-12 03:00:00    0.3015  0.260563  0.017848  0.261097   
2  YPF.SC.AB-19 2025-10-12 03:30:00    0.3410  0.269500  0.031586  0.271140   

   vib_p2p  vib_diff_mean  vib_slope    vib_cv  
0   0.0190       0.006167  -0.000167  0.028430  
1   0.0575       0.010929   0.005500  0.068499  
2   0.0970       0.014500   0.009750  0.117201  
Aviso: ninguna columna de orden está presente: ['well_id', 't_alerta']

[OK] prediccion_aib.xlsx creado
 - Scores:  467206 filas
 - Alertas: 0 filas
