In [9]:
from pathlib import Path
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px


df = pd.read_parquet(Path("data/processed/thickener_timeseries.parquet"))
df["timestamp"] = pd.to_datetime(df["timestamp"])
df = df.sort_values("timestamp")

# Bandas metalurgista (sobre CLEAN)
GREEN_MAX = 50
DEG_MAX = 100
SPEC = float(df["spec_limit_NTU"].iloc[0]) if "spec_limit_NTU" in df.columns else 200.0

df["clar_band"] = pd.cut(
    df["Overflow_Turb_NTU_clean"],
    bins=[-np.inf, GREEN_MAX, DEG_MAX, np.inf],
    labels=["GREEN(<50)", "DEGRADED(50-100)", "CRITICAL(>100)"],
)

df[["timestamp","Overflow_Turb_NTU_clean","Overflow_Turb_NTU","event_now","ControlMode","Regime"]].head()

Unnamed: 0,timestamp,Overflow_Turb_NTU_clean,Overflow_Turb_NTU,event_now,ControlMode,Regime
0,2026-01-01 00:00:00,32.138748,32.138748,0,AUTO,NORMAL
1,2026-01-01 00:05:00,33.334663,33.334663,0,AUTO,NORMAL
2,2026-01-01 00:10:00,20.422268,20.422268,0,AUTO,NORMAL
3,2026-01-01 00:15:00,29.657433,29.657433,0,AUTO,NORMAL
4,2026-01-01 00:20:00,32.088128,32.088128,0,AUTO,NORMAL


In [None]:
# Intentar configurar renderer para Jupyter, si falla usar browser
try:
    import nbformat
    # Verificar versión
    from packaging import version
    if version.parse(nbformat.__version__) < version.parse("4.2.0"):
        pio.renderers.default = "browser"
except ImportError:
    pio.renderers.default = "browser"

# Crear figura
fig = go.Figure()

# Asegurar que las variables críticas estén definidas
# REEMPLAZA ESTOS VALORES CON LOS TUYOS
GREEN_MAX = 50   # Límite para zona verde
DEG_MAX = 100    # Límite para zona degradada
SPEC = 150       # Especificación

# 1. LÍNEAS PRINCIPALES
fig.add_trace(go.Scatter(
    x=df["timestamp"], 
    y=df["Overflow_Turb_NTU_clean"],
    mode="lines",
    name="Turbidez CLEAN (truth)",
    line=dict(color="#1f77b4", width=2.5),
))

fig.add_trace(go.Scatter(
    x=df["timestamp"], 
    y=df["Overflow_Turb_NTU"],
    mode="lines",
    name="Turbidez MEDIDA (con fallas)",
    line=dict(color="#ff7f0e", width=1.5),
    opacity=0.7,
))

# 2. MARCADORES DE EVENTOS (solo si existen)
if "event_now" in df.columns:
    ev_mask = df["event_now"].fillna(False).astype(bool)
    if ev_mask.any():
        fig.add_trace(go.Scatter(
            x=df.loc[ev_mask, "timestamp"],
            y=df.loc[ev_mask, "Overflow_Turb_NTU_clean"],
            mode="markers",
            name="Evento (>100 NTU)",
            marker=dict(size=8, color="red", symbol="circle"),
        ))

# 3. BANDAS DE CALIDAD (verificar que existan las variables)
try:
    x0 = df["timestamp"].min()
    x1 = df["timestamp"].max()
    
    # Calcular límite superior
    y_max = max(
        df["Overflow_Turb_NTU_clean"].max(),
        df["Overflow_Turb_NTU"].max(),
        SPEC if 'SPEC' in locals() else 200
    ) * 1.1
    
    # Verificar que las variables de bandas existan
    green_max = GREEN_MAX if 'GREEN_MAX' in locals() else 50
    deg_max = DEG_MAX if 'DEG_MAX' in locals() else 100
    
    # Agregar bandas
    bands = [
        {"y0": 0, "y1": green_max, "color": "green", "name": "NORMAL"},
        {"y0": green_max, "y1": deg_max, "color": "orange", "name": "DEGRADADO"},
        {"y0": deg_max, "y1": y_max, "color": "red", "name": "CRÍTICO"}
    ]
    
    for band in bands:
        fig.add_shape(
            type="rect",
            xref="x", yref="y",
            x0=x0, x1=x1,
            y0=band["y0"], y1=band["y1"],
            fillcolor=band["color"],
            opacity=0.12,
            line_width=0,
            layer="below"
        )
        
except NameError as e:
    print(f"Advertencia: {e}. Saltando bandas de calidad.")

# 4. LÍNEA DE ESPECIFICACIÓN (si existe SPEC)
if 'SPEC' in locals():
    fig.add_hline(
        y=SPEC, 
        line_dash="dash", 
        line_color="black",
        annotation_text=f"Especificación: {SPEC:.0f} NTU",
    )

# 5. SOMBREADO PARA MODO MANUAL (si existe la columna)
if "ControlMode" in df.columns:
    manual_mask = (df["ControlMode"] == "MANUAL").to_numpy()
    timestamps = df["timestamp"].to_numpy()
    
    if manual_mask.any():
        starts = np.where((manual_mask[1:] == True) & (manual_mask[:-1] == False))[0] + 1
        ends = np.where((manual_mask[1:] == False) & (manual_mask[:-1] == True))[0] + 1
        
        if manual_mask[0]:
            starts = np.insert(starts, 0, 0)
        if manual_mask[-1]:
            ends = np.append(ends, len(manual_mask) - 1)
        
        for s, e in zip(starts, ends):
            fig.add_vrect(
                x0=timestamps[s], 
                x1=timestamps[e],
                fillcolor="gray", 
                opacity=0.15, 
                line_width=0,
                layer="below"
            )

# 6. CONFIGURACIÓN FINAL
fig.update_layout(
    title="Timeline Operacional: Turbidez Overflow",
    xaxis_title="Tiempo",
    yaxis_title="NTU",
    height=600,
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
    hovermode="x unified"
)

# 7. MOSTRAR O GUARDAR
try:
    # Intentar mostrar en Jupyter
    fig.show()
except Exception as e:
    print(f"No se pudo mostrar en Jupyter: {e}")
    print("Guardando como HTML y abriendo en navegador...")
    
    # Guardar como HTML
    fig.write_html("turbidez_timeline.html")
    
    # Intentar abrir en navegador
    import webbrowser
    import os
    webbrowser.open(f"file://{os.path.abspath('turbidez_timeline.html')}")
    
    print("Gráfica guardada como 'turbidez_timeline.html'")

In [10]:
df = df.copy()
freq_min = 5  # cfg.freq_min

# Construir episodios a partir de event_now
ev = df["event_now"].astype(int).to_numpy()
starts = np.where((ev[1:] == 1) & (ev[:-1] == 0))[0] + 1
ends = np.where((ev[1:] == 0) & (ev[:-1] == 1))[0] + 1
if ev[0] == 1:
    starts = np.r_[0, starts]
if ev[-1] == 1:
    ends = np.r_[ends, len(ev)]

rows = []
for s, e in zip(starts, ends):
    seg = df.iloc[s:e]
    rows.append({
        "start": seg["timestamp"].iloc[0],
        "end": seg["timestamp"].iloc[-1],
        "duration_min": len(seg) * freq_min,
        "max_clean_NTU": float(seg["Overflow_Turb_NTU_clean"].max()),
        "p95_clean_NTU": float(seg["Overflow_Turb_NTU_clean"].quantile(0.95)),
        "event_type_mode": seg["event_type"].mode().iat[0] if "event_type" in seg else "UNKNOWN",
        "regime_mode": seg["Regime"].mode().iat[0] if "Regime" in seg else "UNKNOWN",
    })

events = pd.DataFrame(rows)

fig = px.scatter(
    events,
    x="duration_min",
    y="max_clean_NTU",
    color="event_type_mode",
    hover_data=["start", "end", "p95_clean_NTU", "regime_mode"],
    title="Eventos: duración vs severidad (max CLEAN NTU)",
    labels={"duration_min": "Duración (min)", "max_clean_NTU": "Max turbidez CLEAN (NTU)"},
)
fig.update_layout(height=480)
fig.show()

events.describe(include="all")

Unnamed: 0,start,end,duration_min,max_clean_NTU,p95_clean_NTU,event_type_mode,regime_mode
count,146,146,146.0,146.0,146.0,146,146
unique,,,,,,2,3
top,,,,,,CLAY,CLAY
freq,,,,,,122,119
mean,2026-01-24 20:17:58.767123,2026-01-24 20:58:27.534246,45.479452,184.520038,175.284138,,
min,2026-01-16 03:50:00,2026-01-16 03:55:00,5.0,100.166662,100.166662,,
25%,2026-01-19 21:57:30,2026-01-19 22:27:30,10.0,139.341425,138.831047,,
50%,2026-01-22 22:50:00,2026-01-23 00:37:30,20.0,162.815493,158.923494,,
75%,2026-01-27 02:26:15,2026-01-27 02:35:00,48.75,223.203995,203.965638,,
max,2026-02-12 20:40:00,2026-02-12 20:45:00,345.0,437.746273,389.011348,,


In [13]:
import plotly.express as px

fig = px.scatter(
    df.sample(6000, random_state=0),
    x="UF_YieldStress_Pa",
    y="RakeTorque_kNm",
    color="Regime",
    trendline="ols",
    title="Torque (kNm) vs Yield Stress (Pa) — relación principal (Ricardo)",
    labels={"UF_YieldStress_Pa": "Yield Stress UF (Pa)", "RakeTorque_kNm": "Torque rastra (kNm)"},
)
fig.update_layout(height=480)
fig.show()

In [14]:
fig = px.scatter(
    df.sample(6000, random_state=1),
    x="Solids_u_pct",
    y="RakeTorque_kNm",
    color="Regime",
    trendline="ols",
    title="Torque (kNm) vs % sólidos UF (Cp) — relación NO directa",
    labels={"Solids_u_pct": "% Sólidos UF", "RakeTorque_kNm": "Torque rastra (kNm)"},
)
fig.update_layout(height=480)
fig.show()

In [15]:
df2 = df.copy()
df2["turb_error"] = df2["Overflow_Turb_NTU"] - df2["Overflow_Turb_NTU_clean"]

fig = px.histogram(
    df2,
    x="turb_error",
    nbins=120,
    title="Error de turbidez medida vs CLEAN (NTU) — efecto de fallas instrumentales",
    labels={"turb_error": "Error (medida - clean) [NTU]"},
)
fig.update_layout(height=420)
fig.show()

fig = px.scatter(
    df2.sample(7000, random_state=2),
    x="Overflow_Turb_NTU_clean",
    y="Overflow_Turb_NTU",
    color="Regime",
    title="Turbidez medida vs CLEAN (dispersión + fallas)",
    labels={"Overflow_Turb_NTU_clean": "CLEAN (truth) [NTU]", "Overflow_Turb_NTU": "Medida [NTU]"},
)
fig.update_layout(height=520)
fig.show()

In [16]:
df3 = df.copy()

# Proxy simple de recuperación de agua (normalizado 0-1)
# Idea: si UF saca más sólidos (Qu * Sol_u) en relación al feed, recuperas más agua arriba.
solids_out_proxy = df3["Qu_m3h"] * (df3["Solids_u_pct"] / 100.0)
solids_in_proxy = df3["Qf_total_m3h"] * (df3["Solids_f_pct"] / 100.0)
df3["water_recovery_proxy"] = 1.0 - (solids_out_proxy / np.maximum(solids_in_proxy, 1e-6))
df3["water_recovery_proxy"] = df3["water_recovery_proxy"].clip(-1, 1)

fig = px.scatter(
    df3.sample(8000, random_state=3),
    x="water_recovery_proxy",
    y="Overflow_Turb_NTU_clean",
    color="Regime",
    title="Trade-off: recuperación agua (proxy) vs calidad (turbidez CLEAN)",
    labels={"water_recovery_proxy": "Recuperación de agua (proxy)", "Overflow_Turb_NTU_clean": "Turbidez CLEAN (NTU)"},
)
fig.update_layout(height=520)
fig.show()