In [1]:
import pandas as pd

# Ruta al archivo parquet de ejemplo
parquet_path = "prueba.parquet"  # ajusta si está en otra carpeta

# Leer parquet
df = pd.read_parquet(parquet_path)

# Ver columnas
print("Columnas:")
print(df.columns.tolist())

print("\nPrimeras filas:")
print(df.head(5))

print("\nTipos de datos:")
print(df.dtypes)


Columnas:
['ControlVariableReadId', 'ProductionLineId', 'LocalTime', 'CreatedAt', 'ModifiedAt', 'Active', 'WorkShiftExecutionId', 'ControlVariableReadDetailId', 'ProductionLineControlVariableId', 'Value', 'MaxValue', 'MinValue', 'CriticalMaxValue', 'CriticalMinValue', 'DetailCreatedAt', 'DetailModifiedAt', 'DetailActive']

Primeras filas:
                  ControlVariableReadId                      ProductionLineId  \
0  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
1  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
2  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
3  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
4  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   

            LocalTime                     CreatedAt  \
0 2026-01-03 08:21:00 2026-01-03 14:21:04.008101800   
1 2026-01-03 08:21:00 2026-01-03 14:21:04.008101800   
2 2026-

In [2]:
# Guardar el DataFrame como CSV para inspección
csv_path = "prueba.csv"

df.to_csv(csv_path, index=False, encoding="utf-8")

print(f"Archivo CSV guardado en: {csv_path}")


Archivo CSV guardado en: prueba.csv


In [3]:
import pandas as pd

df = pd.read_csv("prueba.csv")

print("Filas:", len(df))
print("Variables distintas:", df["ProductionLineControlVariableId"].nunique())
print("Timestamps distintos:", df["LocalTime"].nunique())

print("\nEjemplo de variables:")
print(df["ProductionLineControlVariableId"].value_counts().head(10))

print("\nRango de tiempo:")
df["LocalTime"] = pd.to_datetime(df["LocalTime"], errors="coerce")
print(df["LocalTime"].min(), "->", df["LocalTime"].max())


Filas: 21420
Variables distintas: 42
Timestamps distintos: 510

Ejemplo de variables:
ProductionLineControlVariableId
004d8ca3-a1ca-4e13-9aa1-2b7a609c8064    510
4c9e5d38-3c6d-4bb1-ab56-abc85ac04ded    510
861c2e72-6bf6-4dec-b2f4-988dc43362eb    510
ab2d10bc-b497-4049-ad39-554c2e4bcc24    510
c2cbfc19-a1b9-4dc9-9873-32c83fd03464    510
8eaad38f-ff48-42f1-ba7e-544abfabc3ea    510
f71768ed-3006-4880-a2fd-9f62344870cc    510
9057486c-3a01-417d-b5e0-33f848eb19fb    510
a7b6cd67-bcdc-4eab-88b4-2b39e77743e6    510
1d1760a1-2fe0-48bc-aad1-731011d5bb9b    510
Name: count, dtype: int64

Rango de tiempo:
2026-01-03 07:01:00 -> 2026-01-03 15:30:00


In [4]:
import pandas as pd

df = pd.read_csv("prueba.csv")

critical_ids = [
    "11A4996C-F1AB-47D9-9A60-125D66F41F84",
    "3A84E612-5987-432C-8EF0-28EE3D74C313",
    "9057486C-3A01-417D-B5E0-33F848EB19FB",
    "AB2D10BC-B497-4049-AD39-554C2E4BCC24",
    "F71768ED-3006-4880-A2FD-9F62344870CC",
    "D592EFE2-94FF-4DBF-95C8-C1C01FE37D4F",
    "5EF87231-BDB9-41F1-B0D6-C5371B237684",
    "7AA64D76-1AE9-41DA-85AA-F53A9B5F0162"
]

df["ProductionLineControlVariableId"] = df["ProductionLineControlVariableId"].astype(str).str.upper()

present = df[df["ProductionLineControlVariableId"].isin([x.upper() for x in critical_ids])]

print("Filas que corresponden a variables críticas:", len(present))
print("Variables críticas encontradas:", present["ProductionLineControlVariableId"].nunique())
print("\nConteo por variable crítica:")
print(present["ProductionLineControlVariableId"].value_counts())


Filas que corresponden a variables críticas: 2550
Variables críticas encontradas: 5

Conteo por variable crítica:
ProductionLineControlVariableId
AB2D10BC-B497-4049-AD39-554C2E4BCC24    510
F71768ED-3006-4880-A2FD-9F62344870CC    510
9057486C-3A01-417D-B5E0-33F848EB19FB    510
7AA64D76-1AE9-41DA-85AA-F53A9B5F0162    510
D592EFE2-94FF-4DBF-95C8-C1C01FE37D4F    510
Name: count, dtype: int64


In [8]:
import duckdb

con = duckdb.connect()

df_test = con.execute("""
    SELECT *
    FROM read_parquet('prueba.parquet')
    LIMIT 5
""").df()

print(df_test)
print("\nColumnas:")
print(df_test.columns.tolist())


                  ControlVariableReadId                      ProductionLineId  \
0  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
1  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
2  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
3  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   
4  f7a28355-da3e-493d-b933-0031c0108b63  a1a5d0ea-edb4-4166-f3f8-08ddced0ef5e   

            LocalTime                  CreatedAt                 ModifiedAt  \
0 2026-01-03 08:21:00 2026-01-03 14:21:04.008101 2026-01-03 14:21:04.008101   
1 2026-01-03 08:21:00 2026-01-03 14:21:04.008101 2026-01-03 14:21:04.008101   
2 2026-01-03 08:21:00 2026-01-03 14:21:04.008101 2026-01-03 14:21:04.008101   
3 2026-01-03 08:21:00 2026-01-03 14:21:04.008101 2026-01-03 14:21:04.008101   
4 2026-01-03 08:21:00 2026-01-03 14:21:04.008101 2026-01-03 14:21:04.008101   

   Active                  WorkShiftEx

In [3]:
import duckdb
import pandas as pd

con = duckdb.connect()

var_id = "11a4996c-fa1b-47d9-9a60-125d66f41f84"  # Temperatura interna

df_ts = con.execute(f"""
SELECT
    CAST(LocalTime AS TIMESTAMP) AS LocalTime,
    CAST(Value AS DOUBLE)        AS Value,
    CAST(MinValue AS DOUBLE)     AS MinValue,
    CAST(MaxValue AS DOUBLE)     AS MaxValue,
    CAST(CriticalMinValue AS DOUBLE) AS CriticalMinValue,
    CAST(CriticalMaxValue AS DOUBLE) AS CriticalMaxValue
FROM read_parquet('prueba.parquet')
WHERE ProductionLineControlVariableId = '{var_id}'
ORDER BY LocalTime
""").df()

print("Filas:", len(df_ts))
print("Rango:", df_ts["LocalTime"].min(), "->", df_ts["LocalTime"].max())
print(df_ts.head(10))



Filas: 510
Rango: 2026-01-03 07:01:00 -> 2026-01-03 15:30:00
            LocalTime  Value  MinValue  MaxValue  CriticalMinValue  \
0 2026-01-03 07:01:00  -36.4     -42.0     -20.0             -40.0   
1 2026-01-03 07:02:00  -36.2     -42.0     -20.0             -40.0   
2 2026-01-03 07:03:00  -36.0     -42.0     -20.0             -40.0   
3 2026-01-03 07:04:00  -35.8     -42.0     -20.0             -40.0   
4 2026-01-03 07:05:00  -35.8     -42.0     -20.0             -40.0   
5 2026-01-03 07:06:00  -35.6     -42.0     -20.0             -40.0   
6 2026-01-03 07:07:00  -35.4     -42.0     -20.0             -40.0   
7 2026-01-03 07:08:00  -35.2     -42.0     -20.0             -40.0   
8 2026-01-03 07:09:00  -35.0     -42.0     -20.0             -40.0   
9 2026-01-03 07:10:00  -34.9     -42.0     -20.0             -40.0   

   CriticalMaxValue  
0             -18.0  
1             -18.0  
2             -18.0  
3             -18.0  
4             -18.0  
5             -18.0  
6             

In [4]:
df_ts["IsCriticalOut"] = (
    (df_ts["Value"] < df_ts["CriticalMinValue"]) |
    (df_ts["Value"] > df_ts["CriticalMaxValue"])
)

print("Puntos fuera de rango crítico:", int(df_ts["IsCriticalOut"].sum()))


Puntos fuera de rango crítico: 26


In [5]:
import plotly.graph_objects as go

# Asegurar orden por tiempo
df_plot = df_ts.sort_values("LocalTime").copy()

# Serie principal
fig = go.Figure()

fig.add_trace(go.Scatter(
    x=df_plot["LocalTime"],
    y=df_plot["Value"],
    mode="lines",
    name="Valor",
    line=dict(width=2)
))

# Límites críticos (bandas)
fig.add_trace(go.Scatter(
    x=df_plot["LocalTime"],
    y=df_plot["CriticalMaxValue"],
    mode="lines",
    name="Crítico Máx",
    line=dict(width=1, dash="dash"),
    opacity=0.7
))

fig.add_trace(go.Scatter(
    x=df_plot["LocalTime"],
    y=df_plot["CriticalMinValue"],
    mode="lines",
    name="Crítico Mín",
    line=dict(width=1, dash="dash"),
    opacity=0.7
))

# Relleno entre crítico min y max (banda)
fig.add_trace(go.Scatter(
    x=df_plot["LocalTime"],
    y=df_plot["CriticalMaxValue"],
    mode="lines",
    line=dict(width=0),
    showlegend=False
))
fig.add_trace(go.Scatter(
    x=df_plot["LocalTime"],
    y=df_plot["CriticalMinValue"],
    mode="lines",
    fill="tonexty",
    fillcolor="rgba(255, 0, 0, 0.08)",  # banda suave
    line=dict(width=0),
    name="Banda crítica"
))

# Puntos fuera de rango crítico (markers)
df_out = df_plot[df_plot["IsCriticalOut"]].copy()

fig.add_trace(go.Scatter(
    x=df_out["LocalTime"],
    y=df_out["Value"],
    mode="markers",
    name="Fuera de crítico",
    marker=dict(size=7, symbol="circle"),
))

# Layout moderno
fig.update_layout(
    title="Temperatura interna — serie de tiempo (con límites críticos)",
    xaxis_title="Hora local",
    yaxis_title="Valor",
    template="plotly_dark",  # look moderno
    hovermode="x unified",
    legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1),
    margin=dict(l=40, r=30, t=60, b=40)
)

fig.show()
