In [3]:
import pandas as pd
import numpy as np
from pathlib import Path
from collections import defaultdict

In [2]:
pip install openpyxl

Note: you may need to restart the kernel to use updated packages.


### Carga de datos

#### Datos 2023-2024

In [85]:
# 1) Cargar el archivo una sola vez
xls = pd.ExcelFile('Totalizadores Planta de Cerveza 2023_2024.xlsx')

# 2) Crear un dict con un DataFrame por hoja
dfs_2023_2024 = {}
resumen = []

for hoja in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name=hoja)
    dfs_2023_2024[hoja] = df
    resumen.append({
        "hoja": hoja,
        "filas": len(df),
        "columnas": df.shape[1],
        "nombres_columnas": ", ".join(map(str, df.columns.tolist()))
    })

# 3) Mostrar un resumen amigable
resumen_df = pd.DataFrame(resumen)

print("--- Resumen de hojas y columnas ---")
print(resumen_df)

# Nota: Los DataFrames quedan disponibles en el dict dfs (ej: dfs["NombreDeLaHoja"])

--- Resumen de hojas y columnas ---
                         hoja  filas  columnas                                                                                                                                                                                         nombres_columnas
0             Consolidado KPI  12010       125  DIA, HORA, EE Planta / Hl, EE Elaboracion / Hl, EE Bodega / Hl, EE Cocina / Hl, EE Envasado / Hl, EE Linea 2 / Hl, EE Linea 3 / Hl, EE Linea 4 / Hl, EE Linea 5 / Hl, EE Servicios / Hl, EE Sala Maq...
1                       Metas     48        57  Mes / Año, Año + Mes, Agua Planta, EE Planta, ET Planta, Aire Planta, Unnamed: 6, Meta Agua Elab, Meta Agua Bodega, Meta Agua Cocina, Meta Agua Envas, Meta Agua Linea 2, Meta Agua Linea 3, Meta Ag...
2      Consolidado Produccion  12011        19  DIA, HORA, Hl de Mosto, Hl Cerveza Cocina, Hl Producido Bodega, Hl Cerveza Filtrada, Hl Cerveza Envasada, Hl Cerveza L2, Hl Cerveza L3, Hl Cerveza L4, Hl Cerveza L5, Cocimi

In [5]:
nombre_hoja_para_ver = 'Consolidado KPI'

if nombre_hoja_para_ver in dfs_2023_2024:
    
    print(f"  Mostrando la hoja: {nombre_hoja_para_ver}")
    
    pd.set_option('display.max_columns', None) 
    pd.set_option('display.width', 1000)

    print("\n--- PRIMERAS 3 FILAS (.head()) ---")
    print(dfs_2023_2024[nombre_hoja_para_ver].head(3).to_string())
    
    print("\n\n--- ÚLTIMAS 3 FILAS (.tail()) ---")
    print(dfs_2023_2024[nombre_hoja_para_ver].tail(3).to_string())

else:
    print(f"Error: No se encontró la hoja '{nombre_hoja_para_ver}' en el diccionario dfs.")
    print("Las hojas disponibles son:")
    print(list(dfs_2023_2024.keys()))

  Mostrando la hoja: Consolidado KPI

--- PRIMERAS 3 FILAS (.head()) ---
         DIA      HORA  EE Planta / Hl  EE Elaboracion / Hl  EE Bodega / Hl  EE Cocina / Hl  EE Envasado / Hl  EE Linea 2 / Hl  EE Linea 3 / Hl  EE Linea 4 / Hl  EE Linea 5 / Hl  EE Servicios / Hl  EE Sala Maq / Hl  EE Frio / Hl  EE Aire / Hl  EE CO2 / Hl  EE Caldera / Hl  EE Eflu / Hl  EE Agua / Hl  EE Resto Serv / Hl  EE Resto Planta / Hl  Unnamed: 21  Unnamed: 22  Agua Planta / Hl  Agua Elab / Hl  Agua Bodega / Hl  Agua Cocina / Hl  Agua Envas / Hl  Agua Linea 2/Hl  Agua Linea 3/Hl  Agua Linea 4/Hl  Agua Linea 5/Hl  Agua Servicios/Hl  Agua Planta de Agua/Hl  Produccion Agua / Hl  Unnamed: 35  ET Planta / Hl  ET Elab/Hl  ET Bodega/Hl  ET Cocina/Hl  ET Envasado/Hl  ET Linea 2/Hl  ET Linea 3/Hl  ET Linea 4/Hl  ET Linea 5/Hl  ET Servicios / Hl  Unnamed: 46  Aire Planta / Hl  Aire Elaboracion / Hl  Aire Cocina / Hl  Aire Bodega / Hl  Aire Envasado / Hl  Aire L2 / Hl  Aire L3 / Hl  Aire L4 / Hl  Aire L5 / Hl  Aire Se

In [6]:
print("\n--- Análisis de Cobertura de Datos ---")

# Lista para guardar los resultados de cada hoja
resultados_analisis = []

# Función para formatear las listas de días y que no saturen la salida
def format_lista_dias(lista):
    if not lista:
        return "Ninguno"
    if len(lista) > 3:
        # Mostrar los primeros 3 y el total
        primeros_tres = ', '.join(map(str, lista[:3]))
        return f"{len(lista)} días (Ej: {primeros_tres}, ...)"
    else:
        return ', '.join(map(str, lista))

# Iterar sobre el dict de DataFrames que ya creaste
for hoja, df in dfs_2023_2024.items():
    
    # 1. Verificar si la hoja tiene las columnas 'DIA' y 'HORA'
    if 'DIA' not in df.columns or 'HORA' not in df.columns:
        
        # Intentar analizar hojas solo con fecha (como 'Metas')
        col_fecha_alt = next((col for col in ['Mes / Año', 'Dia'] if col in df.columns), None)
        if col_fecha_alt:
            try:
                fechas_alt = pd.to_datetime(df[col_fecha_alt], errors='coerce').dropna()
                if not fechas_alt.empty:
                    resultados_analisis.append({
                        "hoja": hoja,
                        "primer_dia": fechas_alt.min().date(),
                        "ultimo_dia": fechas_alt.max().date(),
                        "dias_sin_23_59": "N/A (Hoja no horaria)",
                        "dias_con_horas_faltantes": "N/A (Hoja no horaria)"
                    })
            except Exception:
                pass # Omitir si falla
        continue # Saltar esta hoja si no tiene DIA y HORA

    try:
        # 2. Preparar los datos
        df_proc = df.copy()
        
        # Convertir 'DIA' a datetime (solo la fecha)
        # errors='coerce' convierte fechas inválidas en NaT (Not a Time)
        df_proc['DIA_fecha'] = pd.to_datetime(df_proc['DIA'], errors='coerce').dt.date
        
        # Convertir 'HORA' a string para buscar '23:59' de forma segura
        df_proc['HORA_str'] = df_proc['HORA'].astype(str)
        
        # Eliminar filas donde la fecha no se pudo parsear
        df_proc = df_proc.dropna(subset=['DIA_fecha'])
        
        if df_proc.empty:
            continue # Saltar hoja si no hay datos de fecha válidos

        # 3. (Goal 2) Primer y último día
        primer_dia = df_proc['DIA_fecha'].min()
        ultimo_dia = df_proc['DIA_fecha'].max()
        
        # Días únicos que SÍ tienen el registro '23:59'
        # Usamos .str.contains() para capturar '23:59:00' o '23:59'
        dias_con_23_59 = df_proc[df_proc['HORA_str'].str.contains('23:59')]['DIA_fecha'].unique()
        
        # Todos los días únicos en el dataset de esta hoja
        todos_los_dias = df_proc['DIA_fecha'].unique()
        
        # 4. (Goal 1) Días que NO tienen 23:59 (Diferencia de conjuntos)
        dias_sin_23_59_lista = sorted(list(set(todos_los_dias) - set(dias_con_23_59)))

        # 5. (Goal 3) Días con horas faltantes
        # Contamos cuántos registros (horas) hay por cada día
        registros_por_dia = df_proc.groupby('DIA_fecha').size()
        
        # Un día debe tener al menos 24 registros (00:00 a 23:00).
        # Si tiene menos de 24, le faltan horas.
        dias_con_horas_faltantes_sr = registros_por_dia[registros_por_dia < 24]
        dias_con_horas_faltantes_lista = sorted(list(dias_con_horas_faltantes_sr.index))

        # 6. Guardar resultados
        resultados_analisis.append({
            "hoja": hoja,
            "primer_dia": primer_dia,
            "ultimo_dia": ultimo_dia,
            "dias_sin_23_59": format_lista_dias(dias_sin_23_59_lista),
            "dias_con_horas_faltantes": format_lista_dias(dias_con_horas_faltantes_lista)
        })

    except Exception as e:
        # Registrar error si algo falla en una hoja específica
        resultados_analisis.append({
            "hoja": hoja,
            "primer_dia": f"Error: {e}",
            "ultimo_dia": f"Error: {e}",
            "dias_sin_23_59": "Error",
            "dias_con_horas_faltantes": "Error"
        })

# 7. Mostrar el reporte final
if resultados_analisis:
    reporte_df = pd.DataFrame(resultados_analisis).set_index('hoja')
    
    # Configurar pandas para mostrar bien el resultado
    pd.set_option('display.max_colwidth', 200) # Para que no corte las listas
    pd.set_option('display.width', 1000)       # Para que use más ancho de pantalla
    
    print(reporte_df)
else:
    print("No se encontraron hojas con las columnas 'DIA' y 'HORA' para analizar.")


--- Análisis de Cobertura de Datos ---
                           primer_dia  ultimo_dia                                        dias_sin_23_59                              dias_con_horas_faltantes
hoja                                                                                                                                                         
Consolidado KPI            2023-01-01  2024-10-26  4 días (Ej: 2023-02-28, 2023-04-13, 2023-04-19, ...)  5 días (Ej: 2023-01-17, 2023-04-13, 2023-04-19, ...)
Metas                      2021-01-01  2024-12-01                                 N/A (Hoja no horaria)                                 N/A (Hoja no horaria)
Consolidado Produccion     2023-01-01  2024-10-26  4 días (Ej: 2023-02-28, 2023-04-13, 2023-04-19, ...)  5 días (Ej: 2023-01-17, 2023-04-13, 2023-04-19, ...)
Totalizadores Produccion   2023-01-01  2024-10-26  4 días (Ej: 2023-02-28, 2023-04-13, 2023-04-19, ...)  5 días (Ej: 2023-01-17, 2023-04-13, 2023-04-19, ...)
Consolidado 

In [7]:
dias_sin_23_59_lista

[datetime.date(2023, 2, 28),
 datetime.date(2023, 4, 13),
 datetime.date(2023, 4, 19),
 datetime.date(2024, 10, 26)]

In [8]:
if 'dfs_2023_2024' not in globals() or not isinstance(dfs_2023_2024, dict) or not dfs_2023_2024:
    print("Error: El diccionario 'dfs' no se encontró en memoria o está vacío.")
else:
    print("--- Iniciando Análisis de Días Faltantes (Gaps) ---")

    # Lista para guardar los resultados
    resultados_dias_faltantes = []

    # Función para formatear las listas de días
    def format_lista_dias(lista):
        if not lista:
            return "Ninguno"
        # Convertir fechas a strings
        lista_str = [str(d) for d in lista]
        if len(lista_str) > 3:
            primeros_tres = ', '.join(lista_str[:3])
            return f"{len(lista_str)} días (Ej: {primeros_tres}, ...)"
        else:
            return ', '.join(lista_str)

    # Iterar sobre el dict de DataFrames
    for hoja in sorted(dfs_2023_2024.keys()):
        df = dfs_2023_2024[hoja]
        
        # --- 1. Identificar columnas de fecha (lógica ya validada) ---
        date_col = None
        if 'DIA.1' in df.columns and 'HORA.1' in df.columns:
            date_col = 'DIA.1'
        elif 'DIA' in df.columns and 'HORA' in df.columns:
            date_col = 'DIA'
        elif 'Dia' in df.columns and 'Hora' in df.columns:
            date_col = 'Dia'
        elif 'Mes / Año' in df.columns:
            # Lógica para hojas mensuales como 'Metas'
            try:
                fechas_alt = pd.to_datetime(df['Mes / Año'], errors='coerce').dropna().dt.date
                if not fechas_alt.empty:
                    primer_dia_alt = fechas_alt.min()
                    ultimo_dia_alt = fechas_alt.max()
                    
                    # Para 'Metas', chequeamos meses faltantes
                    ideal_range_mes = pd.date_range(start=primer_dia_alt, end=ultimo_dia_alt, freq='MS') # MS = Month Start
                    ideal_meses_set = set(ideal_range_mes.date)
                    presentes_meses_set = set(fechas_alt)
                    
                    meses_faltantes = sorted(list(ideal_meses_set - presentes_meses_set))
                    
                    resultados_dias_faltantes.append({
                        "hoja": hoja,
                        "primer_dia": primer_dia_alt,
                        "ultimo_dia": ultimo_dia_alt,
                        "dias_faltantes": f"N/A (Mensual) - {format_lista_dias(meses_faltantes)}"
                    })
            except Exception:
                pass
            continue # Saltar al siguiente loop
        
        # Si no encontramos columnas, saltar
        if date_col is None:
            continue

        # --- 2. Procesar datos ---
        try:
            df_proc = df.copy()
            
            # Convertir col de fecha a datetime y extraer solo la fecha
            df_proc['DIA_fecha'] = pd.to_datetime(df_proc[date_col], errors='coerce').dt.date
            
            # Limpiar filas donde la fecha no se pudo parsear
            df_proc = df_proc.dropna(subset=['DIA_fecha'])
            
            if df_proc.empty:
                continue # Saltar hoja si no hay datos de fecha válidos

            # --- 3. Análisis de Primer/Último Día ---
            primer_dia = df_proc['DIA_fecha'].min()
            ultimo_dia = df_proc['DIA_fecha'].max()
            
            # --- 4. (NUEVO) Análisis de Días Faltantes ---
            
            # Obtener el set de días únicos PRESENTES en los datos
            dias_presentes = set(df_proc['DIA_fecha'].unique())
            
            # Crear el set de días IDEAL (todos los días desde el inicio al fin)
            # pd.date_range es inclusivo
            ideal_range = pd.date_range(start=primer_dia, end=ultimo_dia, freq='D')
            
            # Convertir el rango ideal a un set de objetos 'date' para comparar
            ideal_dias_set = set(ideal_range.date)
            
            # Calcular la diferencia: Días ideales MENOS Días presentes
            dias_faltantes_lista = sorted(list(ideal_dias_set - dias_presentes))

            # --- 5. Guardar resultados ---
            resultados_dias_faltantes.append({
                "hoja": hoja,
                "primer_dia": primer_dia,
                "ultimo_dia": ultimo_dia,
                "dias_faltantes": format_lista_dias(dias_faltantes_lista)
            })

        except Exception as e:
            resultados_dias_faltantes.append({
                "hoja": hoja,
                "primer_dia": f"Error: {e}",
                "ultimo_dia": "Error",
                "dias_faltantes": "Error"
            })

    # --- 6. Mostrar el reporte final ---
    if resultados_dias_faltantes:
        reporte_df = pd.DataFrame(resultados_dias_faltantes).set_index('hoja')
        
        # Reordenar para que coincida con el orden de carga (alfabético)
        reporte_df = reporte_df.reindex(sorted(dfs_2023_2024.keys()))
        
        pd.set_option('display.max_colwidth', 200)
        pd.set_option('display.width', 1000)
        
        print("\n--- Reporte de Días Faltantes (Gaps) ---")
        print(reporte_df.to_string())
    else:
        print("No se generaron resultados de análisis.")

    print("\n--- Fin del Análisis ---")

--- Iniciando Análisis de Días Faltantes (Gaps) ---

--- Reporte de Días Faltantes (Gaps) ---
                           primer_dia  ultimo_dia                                          dias_faltantes
hoja                                                                                                     
Auxiliar                   2023-01-01  2024-04-26                                                 Ninguno
Consolidado Agua           2023-01-01  2024-10-26  188 días (Ej: 2023-03-31, 2023-05-31, 2023-10-31, ...)
Consolidado Aire           2023-01-01  2024-10-26  188 días (Ej: 2023-03-31, 2023-05-31, 2023-10-31, ...)
Consolidado EE             2023-01-01  2024-10-26  188 días (Ej: 2023-03-31, 2023-05-31, 2023-10-31, ...)
Consolidado GasVapor       2023-01-01  2024-10-26  188 días (Ej: 2023-03-31, 2023-05-31, 2023-10-31, ...)
Consolidado KPI            2023-01-01  2024-10-26  188 días (Ej: 2023-03-31, 2023-05-31, 2023-10-31, ...)
Consolidado Produccion     2023-01-01  2024-10-26  188 día

In [9]:
dias_faltantes_lista

[datetime.date(2023, 3, 31),
 datetime.date(2023, 5, 31),
 datetime.date(2023, 10, 31),
 datetime.date(2023, 12, 31),
 datetime.date(2024, 1, 1),
 datetime.date(2024, 1, 2),
 datetime.date(2024, 1, 3),
 datetime.date(2024, 1, 4),
 datetime.date(2024, 1, 5),
 datetime.date(2024, 1, 6),
 datetime.date(2024, 1, 7),
 datetime.date(2024, 1, 8),
 datetime.date(2024, 1, 9),
 datetime.date(2024, 1, 10),
 datetime.date(2024, 1, 11),
 datetime.date(2024, 1, 12),
 datetime.date(2024, 1, 13),
 datetime.date(2024, 1, 14),
 datetime.date(2024, 1, 15),
 datetime.date(2024, 1, 16),
 datetime.date(2024, 1, 17),
 datetime.date(2024, 1, 18),
 datetime.date(2024, 1, 19),
 datetime.date(2024, 1, 20),
 datetime.date(2024, 1, 21),
 datetime.date(2024, 1, 22),
 datetime.date(2024, 1, 23),
 datetime.date(2024, 1, 24),
 datetime.date(2024, 1, 25),
 datetime.date(2024, 1, 26),
 datetime.date(2024, 1, 27),
 datetime.date(2024, 1, 28),
 datetime.date(2024, 1, 29),
 datetime.date(2024, 1, 30),
 datetime.date(2024, 

Una vez cargada toda la hoja de datos del 2023/2024 en un diccionario, visto los días faltantes y los días en los que no se cargó la última hora (23:59), vamos a crear un diccionario con todos los df con las filas que tengan la última hora de cada día ordenado por orden cronológico. 

In [86]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

def _to_date(x):
    try:
        return pd.to_datetime(x, errors="coerce").date()
    except Exception:
        return pd.NaT

def _to_minutes(x):
    if pd.isna(x):
        return -1
    ts = pd.to_datetime(x, errors="coerce")
    if pd.notna(ts):
        return int(ts.hour) * 60 + int(ts.minute)
    try:
        h = int(float(str(x).replace(",", ".")))
        if 0 <= h <= 23:
            return h * 60
    except Exception:
        pass
    return -1

dfs_23_24 = {}
hojas_saltadas = []

for hoja, df in dfs_2023_2024.items():
    if DAY_COL not in df.columns or HOUR_COL not in df.columns:
        hojas_saltadas.append((hoja, "Falta DIA u HORA"))
        continue

    tmp = df.copy()
    tmp["_dia"]  = tmp[DAY_COL].map(_to_date)
    tmp["_mins"] = tmp[HOUR_COL].map(_to_minutes)

    # Filtramos filas sin día y agregamos orden determinístico
    tmp = tmp.dropna(subset=["_dia"]).copy()
    if tmp.empty:
        hojas_saltadas.append((hoja, "Sin días válidos"))
        continue

    tmp["_ord"] = np.arange(len(tmp))  # <- evita usar el índice en sort_values

    # Orden por día, hora (minutos) y orden original
    tmp = tmp.sort_values(["_dia", "_mins", "_ord"], kind="stable")

    # Última fila por día (la mayor "_mins"; si empata, la última por "_ord")
    ultimas = tmp.groupby("_dia", as_index=False, sort=True).tail(1)

    # Limpieza de columnas auxiliares y orden final
    ultimas = ultimas.drop(columns=["_dia", "_mins", "_ord"]).sort_values(DAY_COL).reset_index(drop=True)

    dfs_23_24[hoja] = ultimas

In [11]:
dfk = dfs_23_24["Consolidado EE"].copy()

dfk["_hora_dt"] = pd.to_datetime(dfk["HORA"], errors="coerce")

mask_no_2359 = ~(
    (dfk["_hora_dt"].dt.hour == 23) &
    (dfk["_hora_dt"].dt.minute == 59))

df_no_2359 = dfk[mask_no_2359].drop(columns=["_hora_dt"])

# Ver resultados
print(len(df_no_2359), "filas con última hora distinta de 23:59")
print(df_no_2359["HORA"].value_counts(dropna=False).head())
df_no_2359.head(4)

4 filas con última hora distinta de 23:59
HORA
23:00:00    1
19:00:00    1
16:00:00    1
07:00:00    1
Name: count, dtype: int64


  dfk["_hora_dt"] = pd.to_datetime(dfk["HORA"], errors="coerce")


Unnamed: 0,DIA,HORA,Planta (Kw),Elaboracion (Kw),Bodega (Kw),Cocina (Kw),Envasado (Kw),Linea 2 (Kw),Linea 3 (Kw),Linea 4 (Kw),Servicios (Kw),Sala Maq (Kw),Aire (Kw),Calderas (Kw),Efluentes (Kw),Frio (Kw),Pta Agua / Eflu (Kw),Prod Agua (Kw),Resto Serv (Kw),Restos Planta (Kw),KW Gral Planta,KW CO2,Fecha/Hora,Kw de Frio
58,2023-02-28,23:00:00,55102.47,5783.0,4869.0,1277.0,17587.0,5315.47,7026.0,7692.0,27452.0,16937.0,5429.0,267.0,1150.0,15279.0,1475.0,280.0,4664.0,4280.47,56732.0,383.0,2023-08-28 23:00:00,15279.0
101,2023-04-13,19:00:00,30060.18,5109.0,4020.0,1844.0,442.0,643.18,549.0,0.0,22522.0,17006.0,3512.0,212.0,529.0,14315.0,701.0,141.0,3285.0,1987.18,31193.0,528.0,2023-10-13 19:00:00,14315.0
107,2023-04-19,16:00:00,44194.72,4184.0,3156.0,1329.0,13399.0,5103.22,5115.0,5222.0,23010.0,15840.0,4861.0,313.0,543.0,13439.0,869.0,287.0,3393.0,3601.72,45310.0,174.0,2023-10-19 16:00:00,13439.0
476,2024-10-26,07:00:00,8987.480469,842.0,973.0,76.0,1471.0,2104.980469,151.0,0.0,5521.0,3193.0,737.0,69.0,145.0,3124.0,181.0,22.0,1339.0,1153.480469,9467.0,85.0,2024-04-26 07:00:00,3124.0


Vamos a interpolar los 5 días faltantes:

In [87]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

def completar_e_interpolar_diario(df, day_col=DAY_COL, hour_col=HOUR_COL, hora_por_defecto="23:59:00"):
    g = df.copy()

    # --- fecha como datetime (normalizada al día) ---
    g["_fecha"] = pd.to_datetime(g[day_col], errors="coerce", dayfirst=True).dt.normalize()
    g = g.dropna(subset=["_fecha"]).sort_values("_fecha").drop_duplicates("_fecha", keep="last")

    # --- índice continuo día a día (agrega los días faltantes) ---
    idx_full = pd.date_range(g["_fecha"].min(), g["_fecha"].max(), freq="D")
    g = g.set_index("_fecha").reindex(idx_full)

    # --- reconstruir columnas de fecha/hora ---
    g[day_col] = g.index.date
    if hour_col in g.columns:
        g[hour_col] = g[hour_col].fillna(hora_por_defecto)
    else:
        g[hour_col] = hora_por_defecto

    # --- interpolación SOLO en columnas numéricas ---
    num_cols = g.select_dtypes(include="number").columns
    if len(num_cols):
        # usa el índice temporal para interpolar; luego rellena bordes
        g[num_cols] = g[num_cols].interpolate(method="time").ffill().bfill()

    return g.reset_index(drop=True)

# Aplicarlo a TODO el diccionario (una hoja por vez)
for nombre, df in dfs_23_24.items():
    dfs_23_24[nombre] = completar_e_interpolar_diario(df)

In [88]:
DAY_COL = "DIA"
inicio  = pd.Timestamp("2023-12-31")
fin     = pd.Timestamp("2024-06-30")

for nombre, df in dfs_23_24.items():
    if DAY_COL not in df.columns or df.empty:
        continue

    # Normalizar a fecha y construir máscara para CONSERVAR lo que queda fuera del rango
    fechas = pd.to_datetime(df[DAY_COL], dayfirst=True, errors="coerce").dt.normalize()

    # Rango INCLUSIVO: elimina 31/12/2023 ... 30/06/2024
    mask_keep = (fechas < inicio) | (fechas > fin) | fechas.isna()

    dfs_23_24[nombre] = df.loc[mask_keep].reset_index(drop=True)

#### Datos 2022-2023

Repetimos todo el proceso para los datos del 2022-2023

In [89]:
# 1) Cargar el archivo una sola vez
xls = pd.ExcelFile('Totalizadores Planta de Cerveza - 2022_2023.xlsx')

# 2) Crear un dict con un DataFrame por hoja
dfs_2022_2023 = {}
resumen = []

for hoja in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name=hoja)
    dfs_2022_2023[hoja] = df
    resumen.append({
        "hoja": hoja,
        "filas": len(df),
        "columnas": df.shape[1],
        "nombres_columnas": ", ".join(map(str, df.columns.tolist()))
    })

# 3) Mostrar un resumen amigable
resumen_df = pd.DataFrame(resumen)

print("--- Resumen de hojas y columnas ---")
print(resumen_df)

# Nota: Los DataFrames quedan disponibles en el dict dfs (ej: dfs["NombreDeLaHoja"])

--- Resumen de hojas y columnas ---
                         hoja  filas  columnas                                                                                                                                                                                         nombres_columnas
0             Consolidado KPI  15317       123  DIA, HORA, EE Planta / Hl, EE Elaboracion / Hl, EE Bodega / Hl, EE Cocina / Hl, EE Envasado / Hl, EE Linea 2 / Hl, EE Linea 3 / Hl, EE Linea 4 / Hl, EE Linea 5 / Hl, EE Servicios / Hl, EE Sala Maq...
1                       Metas     36        57  Mes / Año, Año + Mes, Agua Planta, EE Planta, ET Planta, Aire Planta, Unnamed: 6, Meta Agua Elab, Meta Agua Bodega, Meta Agua Cocina, Meta Agua Envas, Meta Agua Linea 2, Meta Agua Linea 3, Meta Ag...
2      Consolidado Produccion  15450        14  DIA, HORA, Hl de Mosto, Hl Cerveza Cocina, Hl Producido Bodega, Hl Cerveza Filtrada, Hl Cerveza Envasada, Hl Cerveza L2, Hl Cerveza L3, Hl Cerveza L4, Hl Cerveza L5, Cocimi

In [15]:
nombre_hoja_para_ver = 'Consolidado KPI'

if nombre_hoja_para_ver in dfs_2022_2023:
    
    print(f"  Mostrando la hoja: {nombre_hoja_para_ver}")
    
    pd.set_option('display.max_columns', None) 
    pd.set_option('display.width', 1000)

    print("\n--- PRIMERAS 3 FILAS (.head()) ---")
    print(dfs_2022_2023[nombre_hoja_para_ver].head(3).to_string())
    
    print("\n\n--- ÚLTIMAS 3 FILAS (.tail()) ---")
    print(dfs_2022_2023[nombre_hoja_para_ver].tail(3).to_string())

else:
    print(f"Error: No se encontró la hoja '{nombre_hoja_para_ver}' en el diccionario dfs.")
    print("Las hojas disponibles son:")
    print(list(dfs_2022_2023.keys()))

  Mostrando la hoja: Consolidado KPI

--- PRIMERAS 3 FILAS (.head()) ---
         DIA      HORA  EE Planta / Hl  EE Elaboracion / Hl  EE Bodega / Hl  EE Cocina / Hl  EE Envasado / Hl  EE Linea 2 / Hl  EE Linea 3 / Hl  EE Linea 4 / Hl  EE Linea 5 / Hl  EE Servicios / Hl  EE Sala Maq / Hl  EE Frio / Hl  EE Aire / Hl  EE CO2 / Hl  EE Caldera / Hl  EE Eflu / Hl  EE Agua / Hl  EE Resto Serv / Hl  EE Resto Planta / Hl  Unnamed: 21  Unnamed: 22  Agua Planta / Hl  Agua Elab / Hl  Agua Bodega / Hl  Agua Cocina / Hl  Agua Envas / Hl  Agua Linea 2/Hl  Agua Linea 3/Hl  Agua Linea 4/Hl  Agua Linea 5/Hl  Agua Servicios/Hl  Agua Planta de Agua/Hl  Produccion Agua / Hl  Unnamed: 35  ET Planta / Hl  ET Elab/Hl  ET Bodega/Hl  ET Cocina/Hl  ET Envasado/Hl  ET Linea 2/Hl  ET Linea 3/Hl  ET Linea 4/Hl  ET Linea 5/Hl  ET Servicios / Hl  Unnamed: 46  Aire Planta / Hl  Aire Elaboracion / Hl  Aire Cocina / Hl  Aire Bodega / Hl  Aire Envasado / Hl  Aire L2 / Hl  Aire L3 / Hl  Aire L4 / Hl  Aire L5 / Hl  Aire Se

In [16]:
print("\n--- Análisis de Cobertura de Datos ---")

# Lista para guardar los resultados de cada hoja
resultados_analisis = []

# Función para formatear las listas de días y que no saturen la salida
def format_lista_dias(lista):
    if not lista:
        return "Ninguno"
    if len(lista) > 3:
        # Mostrar los primeros 3 y el total
        primeros_tres = ', '.join(map(str, lista[:3]))
        return f"{len(lista)} días (Ej: {primeros_tres}, ...)"
    else:
        return ', '.join(map(str, lista))

# Iterar sobre el dict de DataFrames que ya creaste
for hoja, df in dfs_2022_2023.items():
    
    # 1. Verificar si la hoja tiene las columnas 'DIA' y 'HORA'
    if 'DIA' not in df.columns or 'HORA' not in df.columns:
        
        # Intentar analizar hojas solo con fecha (como 'Metas')
        col_fecha_alt = next((col for col in ['Mes / Año', 'Dia'] if col in df.columns), None)
        if col_fecha_alt:
            try:
                fechas_alt = pd.to_datetime(df[col_fecha_alt], errors='coerce').dropna()
                if not fechas_alt.empty:
                    resultados_analisis.append({
                        "hoja": hoja,
                        "primer_dia": fechas_alt.min().date(),
                        "ultimo_dia": fechas_alt.max().date(),
                        "dias_sin_23_59": "N/A (Hoja no horaria)",
                        "dias_con_horas_faltantes": "N/A (Hoja no horaria)"
                    })
            except Exception:
                pass # Omitir si falla
        continue # Saltar esta hoja si no tiene DIA y HORA

    try:
        # 2. Preparar los datos
        df_proc = df.copy()
        
        # Convertir 'DIA' a datetime (solo la fecha)
        # errors='coerce' convierte fechas inválidas en NaT (Not a Time)
        df_proc['DIA_fecha'] = pd.to_datetime(df_proc['DIA'], errors='coerce').dt.date
        
        # Convertir 'HORA' a string para buscar '23:59' de forma segura
        df_proc['HORA_str'] = df_proc['HORA'].astype(str)
        
        # Eliminar filas donde la fecha no se pudo parsear
        df_proc = df_proc.dropna(subset=['DIA_fecha'])
        
        if df_proc.empty:
            continue # Saltar hoja si no hay datos de fecha válidos

        # 3. (Goal 2) Primer y último día
        primer_dia = df_proc['DIA_fecha'].min()
        ultimo_dia = df_proc['DIA_fecha'].max()
        
        # Días únicos que SÍ tienen el registro '23:59'
        # Usamos .str.contains() para capturar '23:59:00' o '23:59'
        dias_con_23_59 = df_proc[df_proc['HORA_str'].str.contains('23:59')]['DIA_fecha'].unique()
        
        # Todos los días únicos en el dataset de esta hoja
        todos_los_dias = df_proc['DIA_fecha'].unique()
        
        # 4. (Goal 1) Días que NO tienen 23:59 (Diferencia de conjuntos)
        dias_sin_23_59_lista = sorted(list(set(todos_los_dias) - set(dias_con_23_59)))

        # 5. (Goal 3) Días con horas faltantes
        # Contamos cuántos registros (horas) hay por cada día
        registros_por_dia = df_proc.groupby('DIA_fecha').size()
        
        # Un día debe tener al menos 24 registros (00:00 a 23:00).
        # Si tiene menos de 24, le faltan horas.
        dias_con_horas_faltantes_sr = registros_por_dia[registros_por_dia < 24]
        dias_con_horas_faltantes_lista = sorted(list(dias_con_horas_faltantes_sr.index))

        # 6. Guardar resultados
        resultados_analisis.append({
            "hoja": hoja,
            "primer_dia": primer_dia,
            "ultimo_dia": ultimo_dia,
            "dias_sin_23_59": format_lista_dias(dias_sin_23_59_lista),
            "dias_con_horas_faltantes": format_lista_dias(dias_con_horas_faltantes_lista)
        })

    except Exception as e:
        # Registrar error si algo falla en una hoja específica
        resultados_analisis.append({
            "hoja": hoja,
            "primer_dia": f"Error: {e}",
            "ultimo_dia": f"Error: {e}",
            "dias_sin_23_59": "Error",
            "dias_con_horas_faltantes": "Error"
        })

# 7. Mostrar el reporte final
if resultados_analisis:
    reporte_df = pd.DataFrame(resultados_analisis).set_index('hoja')
    
    # Configurar pandas para mostrar bien el resultado
    pd.set_option('display.max_colwidth', 200) # Para que no corte las listas
    pd.set_option('display.width', 1000)       # Para que use más ancho de pantalla
    
    print(reporte_df)
else:
    print("No se encontraron hojas con las columnas 'DIA' y 'HORA' para analizar.")


--- Análisis de Cobertura de Datos ---
                           primer_dia  ultimo_dia                                        dias_sin_23_59                              dias_con_horas_faltantes
hoja                                                                                                                                                         
Consolidado KPI            2022-01-01  2023-12-30  4 días (Ej: 2022-03-02, 2022-07-13, 2023-02-28, ...)  5 días (Ej: 2022-07-01, 2022-07-13, 2022-11-01, ...)
Metas                      2021-01-01  2023-12-01                                 N/A (Hoja no horaria)                                 N/A (Hoja no horaria)
Consolidado Produccion     2022-01-01  2023-12-30  4 días (Ej: 2022-03-02, 2022-07-13, 2023-02-28, ...)  5 días (Ej: 2022-07-01, 2022-07-13, 2022-11-01, ...)
Totalizadores Produccion   2022-01-01  2023-12-30  4 días (Ej: 2022-03-02, 2022-07-13, 2023-02-28, ...)  5 días (Ej: 2022-07-01, 2022-07-13, 2022-11-01, ...)
Consolidado 

In [17]:
if 'dfs_2022_2023' not in globals() or not isinstance(dfs_2022_2023, dict) or not dfs_2022_2023:
    print("Error: El diccionario 'dfs' no se encontró en memoria o está vacío.")
else:
    print("--- Iniciando Análisis de Días Faltantes (Gaps) ---")

    # Lista para guardar los resultados
    resultados_dias_faltantes = []

    # Función para formatear las listas de días
    def format_lista_dias(lista):
        if not lista:
            return "Ninguno"
        # Convertir fechas a strings
        lista_str = [str(d) for d in lista]
        if len(lista_str) > 3:
            primeros_tres = ', '.join(lista_str[:3])
            return f"{len(lista_str)} días (Ej: {primeros_tres}, ...)"
        else:
            return ', '.join(lista_str)

    # Iterar sobre el dict de DataFrames
    for hoja in sorted(dfs_2022_2023.keys()):
        df = dfs_2022_2023[hoja]
        
        # --- 1. Identificar columnas de fecha (lógica ya validada) ---
        date_col = None
        if 'DIA.1' in df.columns and 'HORA.1' in df.columns:
            date_col = 'DIA.1'
        elif 'DIA' in df.columns and 'HORA' in df.columns:
            date_col = 'DIA'
        elif 'Dia' in df.columns and 'Hora' in df.columns:
            date_col = 'Dia'
        elif 'Mes / Año' in df.columns:
            # Lógica para hojas mensuales como 'Metas'
            try:
                fechas_alt = pd.to_datetime(df['Mes / Año'], errors='coerce').dropna().dt.date
                if not fechas_alt.empty:
                    primer_dia_alt = fechas_alt.min()
                    ultimo_dia_alt = fechas_alt.max()
                    
                    # Para 'Metas', chequeamos meses faltantes
                    ideal_range_mes = pd.date_range(start=primer_dia_alt, end=ultimo_dia_alt, freq='MS') # MS = Month Start
                    ideal_meses_set = set(ideal_range_mes.date)
                    presentes_meses_set = set(fechas_alt)
                    
                    meses_faltantes = sorted(list(ideal_meses_set - presentes_meses_set))
                    
                    resultados_dias_faltantes.append({
                        "hoja": hoja,
                        "primer_dia": primer_dia_alt,
                        "ultimo_dia": ultimo_dia_alt,
                        "dias_faltantes": f"N/A (Mensual) - {format_lista_dias(meses_faltantes)}"
                    })
            except Exception:
                pass
            continue # Saltar al siguiente loop
        
        # Si no encontramos columnas, saltar
        if date_col is None:
            continue

        # --- 2. Procesar datos ---
        try:
            df_proc = df.copy()
            
            # Convertir col de fecha a datetime y extraer solo la fecha
            df_proc['DIA_fecha'] = pd.to_datetime(df_proc[date_col], errors='coerce').dt.date
            
            # Limpiar filas donde la fecha no se pudo parsear
            df_proc = df_proc.dropna(subset=['DIA_fecha'])
            
            if df_proc.empty:
                continue # Saltar hoja si no hay datos de fecha válidos

            # --- 3. Análisis de Primer/Último Día ---
            primer_dia = df_proc['DIA_fecha'].min()
            ultimo_dia = df_proc['DIA_fecha'].max()
            
            # --- 4. (NUEVO) Análisis de Días Faltantes ---
            
            # Obtener el set de días únicos PRESENTES en los datos
            dias_presentes = set(df_proc['DIA_fecha'].unique())
            
            # Crear el set de días IDEAL (todos los días desde el inicio al fin)
            # pd.date_range es inclusivo
            ideal_range = pd.date_range(start=primer_dia, end=ultimo_dia, freq='D')
            
            # Convertir el rango ideal a un set de objetos 'date' para comparar
            ideal_dias_set = set(ideal_range.date)
            
            # Calcular la diferencia: Días ideales MENOS Días presentes
            dias_faltantes_lista = sorted(list(ideal_dias_set - dias_presentes))

            # --- 5. Guardar resultados ---
            resultados_dias_faltantes.append({
                "hoja": hoja,
                "primer_dia": primer_dia,
                "ultimo_dia": ultimo_dia,
                "dias_faltantes": format_lista_dias(dias_faltantes_lista)
            })

        except Exception as e:
            resultados_dias_faltantes.append({
                "hoja": hoja,
                "primer_dia": f"Error: {e}",
                "ultimo_dia": "Error",
                "dias_faltantes": "Error"
            })

    # --- 6. Mostrar el reporte final ---
    if resultados_dias_faltantes:
        reporte_df = pd.DataFrame(resultados_dias_faltantes).set_index('hoja')
        
        # Reordenar para que coincida con el orden de carga (alfabético)
        reporte_df = reporte_df.reindex(sorted(dfs_2022_2023.keys()))
        
        pd.set_option('display.max_colwidth', 200)
        pd.set_option('display.width', 1000)
        
        print("\n--- Reporte de Días Faltantes (Gaps) ---")
        print(reporte_df.to_string())
    else:
        print("No se generaron resultados de análisis.")

    print("\n--- Fin del Análisis ---")

--- Iniciando Análisis de Días Faltantes (Gaps) ---

--- Reporte de Días Faltantes (Gaps) ---
                           primer_dia  ultimo_dia                                          dias_faltantes
hoja                                                                                                     
Auxiliar                   2022-01-01  2023-12-30  121 días (Ej: 2022-03-31, 2022-05-31, 2022-10-31, ...)
Consolidado Agua           2022-01-01  2023-12-30  121 días (Ej: 2022-03-31, 2022-05-31, 2022-10-31, ...)
Consolidado Aire           2022-01-01  2023-12-30  121 días (Ej: 2022-03-31, 2022-05-31, 2022-10-31, ...)
Consolidado EE             2022-01-01  2023-12-30  121 días (Ej: 2022-03-31, 2022-05-31, 2022-10-31, ...)
Consolidado GasVapor       2022-01-01  2023-12-30  121 días (Ej: 2022-03-31, 2022-05-31, 2022-10-31, ...)
Consolidado KPI            2022-01-01  2023-12-30  121 días (Ej: 2022-03-31, 2022-05-31, 2022-10-31, ...)
Consolidado Produccion     2022-01-01  2023-12-30  121 día

In [18]:
dias_faltantes_lista

[datetime.date(2022, 3, 31),
 datetime.date(2022, 5, 31),
 datetime.date(2022, 10, 31),
 datetime.date(2022, 12, 31),
 datetime.date(2023, 3, 7),
 datetime.date(2023, 3, 8),
 datetime.date(2023, 3, 9),
 datetime.date(2023, 3, 10),
 datetime.date(2023, 3, 11),
 datetime.date(2023, 3, 12),
 datetime.date(2023, 3, 13),
 datetime.date(2023, 3, 14),
 datetime.date(2023, 3, 15),
 datetime.date(2023, 3, 16),
 datetime.date(2023, 3, 17),
 datetime.date(2023, 3, 18),
 datetime.date(2023, 3, 19),
 datetime.date(2023, 3, 20),
 datetime.date(2023, 3, 21),
 datetime.date(2023, 3, 22),
 datetime.date(2023, 3, 23),
 datetime.date(2023, 3, 24),
 datetime.date(2023, 3, 25),
 datetime.date(2023, 3, 26),
 datetime.date(2023, 3, 27),
 datetime.date(2023, 3, 28),
 datetime.date(2023, 3, 29),
 datetime.date(2023, 3, 30),
 datetime.date(2023, 3, 31),
 datetime.date(2023, 4, 1),
 datetime.date(2023, 4, 2),
 datetime.date(2023, 4, 3),
 datetime.date(2023, 4, 4),
 datetime.date(2023, 4, 5),
 datetime.date(2023,

In [90]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

dfs_22_23 = {}
hojas_saltadas = []

for hoja, df in dfs_2022_2023.items():
    if DAY_COL not in df.columns or HOUR_COL not in df.columns:
        hojas_saltadas.append((hoja, "Falta DIA u HORA"))
        continue

    tmp = df.copy()
    tmp["_dia"]  = tmp[DAY_COL].map(_to_date)
    tmp["_mins"] = tmp[HOUR_COL].map(_to_minutes)

    # Filtramos filas sin día y agregamos orden determinístico
    tmp = tmp.dropna(subset=["_dia"]).copy()
    if tmp.empty:
        hojas_saltadas.append((hoja, "Sin días válidos"))
        continue

    tmp["_ord"] = np.arange(len(tmp))  # <- evita usar el índice en sort_values

    # Orden por día, hora (minutos) y orden original
    tmp = tmp.sort_values(["_dia", "_mins", "_ord"], kind="stable")

    # Última fila por día (la mayor "_mins"; si empata, la última por "_ord")
    ultimas = tmp.groupby("_dia", as_index=False, sort=True).tail(1)

    # Limpieza de columnas auxiliares y orden final
    ultimas = ultimas.drop(columns=["_dia", "_mins", "_ord"]).sort_values(DAY_COL).reset_index(drop=True)

    dfs_22_23[hoja] = ultimas

In [20]:
dfk = dfs_22_23["Consolidado EE"].copy()

dfk["_hora_dt"] = pd.to_datetime(dfk["HORA"], errors="coerce")

mask_no_2359 = ~(
    (dfk["_hora_dt"].dt.hour == 23) &
    (dfk["_hora_dt"].dt.minute == 59))

df_no_2359 = dfk[mask_no_2359].drop(columns=["_hora_dt"])

# Ver resultados
print(len(df_no_2359), "filas con última hora distinta de 23:59")
print(df_no_2359["HORA"].value_counts(dropna=False).head())
df_no_2359.head(4)

4 filas con última hora distinta de 23:59
HORA
23:00:00    3
08:00:00    1
Name: count, dtype: int64


  dfk["_hora_dt"] = pd.to_datetime(dfk["HORA"], errors="coerce")


Unnamed: 0,DIA,HORA,Planta (Kw),Elaboracion (Kw),Bodega (Kw),Cocina (Kw),Envasado (Kw),Linea 2 (Kw),Linea 3 (Kw),Linea 4 (Kw),Servicios (Kw),Sala Maq (Kw),Aire (Kw),Calderas (Kw),Efluentes (Kw),Frio (Kw),Pta Agua / Eflu (Kw),Prod Agua (Kw),Resto Serv (Kw),Restos Planta (Kw),KW Gral Planta,KW CO2,Fecha/Hora,Kw de Frio
60,2022-03-02,23:00:00,50076.11,7775.0,6118.0,2047.0,9258.0,2034.86,1766.0,6652.0,31239.0,20287.0,5390.0,396.0,1029.0,19294.0,1548.0,459.0,4078.0,1804.11,51549.0,593.0,2022-09-02 23:00:00,19294.0
191,2022-07-13,23:00:00,72768.88,6938.5,5845.5,1901.0,17542.0,6024.13,6683.0,7423.0,49265.0,37052.0,7059.0,827.0,781.0,33704.0,1809.0,920.0,4594.0,-976.62,76246.0,1380.0,2022-01-13 23:00:00,33704.0
419,2023-02-28,23:00:00,55102.47,5783.0,4869.0,1277.0,17587.0,5315.47,7026.0,7692.0,27452.0,16937.0,5429.0,267.0,1150.0,15279.0,1475.0,280.0,4664.0,4280.47,56732.0,383.0,2023-08-28 23:00:00,15279.0
425,2023-03-06,08:00:00,20527.51,1663.0,1669.0,109.0,6352.0,1989.01,2561.0,2729.0,10648.0,6671.0,1851.0,0.0,358.0,6595.0,412.0,32.0,1394.0,1864.51,21070.0,418.0,2023-09-06 08:00:00,6595.0


In [91]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

# Aplicarlo a TODO el diccionario (una hoja por vez)
for nombre, df in dfs_22_23.items():
    dfs_22_23[nombre] = completar_e_interpolar_diario(df)

In [92]:
DAY_COL = "DIA"
inicio  = pd.Timestamp("2023-03-07")
fin     = pd.Timestamp("2023-06-30")

for nombre, df in dfs_22_23.items():
    if DAY_COL not in df.columns or df.empty:
        continue

    # Normalizar a fecha y construir máscara para CONSERVAR lo que queda fuera del rango
    fechas = pd.to_datetime(df[DAY_COL], dayfirst=True, errors="coerce").dt.normalize()

    # Rango INCLUSIVO: elimina 31/12/2023 ... 30/06/2024
    mask_keep = (fechas < inicio) | (fechas > fin) | fechas.isna()

    dfs_22_23[nombre] = df.loc[mask_keep].reset_index(drop=True)

#### Datos 2021-2022

In [93]:
# 1) Cargar el archivo una sola vez
xls = pd.ExcelFile('Totalizadores Planta de Cerveza 2021_2022.xlsx')

# 2) Crear un dict con un DataFrame por hoja
dfs_2021_2022 = {}
resumen = []

for hoja in xls.sheet_names:
    df = pd.read_excel(xls, sheet_name=hoja)
    dfs_2021_2022[hoja] = df
    resumen.append({
        "hoja": hoja,
        "filas": len(df),
        "columnas": df.shape[1],
        "nombres_columnas": ", ".join(map(str, df.columns.tolist()))
    })

# 3) Mostrar un resumen amigable
resumen_df = pd.DataFrame(resumen)

print("--- Resumen de hojas y columnas ---")
print(resumen_df)

# Nota: Los DataFrames quedan disponibles en el dict dfs (ej: dfs["NombreDeLaHoja"])


--- Resumen de hojas y columnas ---
                         hoja  filas  columnas                                                                                                                                                                                         nombres_columnas
0             Consolidado KPI  16049        62  DIA, HORA, EE Planta / Hl, EE Elaboracion / Hl, EE Bodega / Hl, EE Cocina / Hl, EE Envasado / Hl, EE Linea 2 / Hl, EE Linea 3 / Hl, EE Linea 4 / Hl, EE Linea 5 / Hl, EE Servicios / Hl, EE Sala Maq...
1      Consolidado Produccion  15573        12                DIA, HORA, Hl de Mosto, Hl Cerveza Cocina, Hl Producido Bodega, Hl Cerveza Filtrada, Hl Cerveza Envasada, Hl Cerveza L2, Hl Cerveza L3, Hl Cerveza L4, Hl Cerveza L5, Cocimientos Diarios
2    Totalizadores Produccion  15573        40  DIA, HORA, HL Mosto Budweiser, HL Mosto Tecate, HL Mosto Local, HL Mosto Heineken, HL Mosto Negra, HL Mosto Fuerte, HL Mosto Indio, HL Mosto Palermo, HL Mosto Bieckert, HL 

In [24]:
nombre_hoja_para_ver = 'Consolidado KPI'

if nombre_hoja_para_ver in dfs_2021_2022:
    
    print(f"  Mostrando la hoja: {nombre_hoja_para_ver}")
    
    pd.set_option('display.max_columns', None) 
    pd.set_option('display.width', 1000)

    print("\n--- PRIMERAS 3 FILAS (.head()) ---")
    print(dfs_2021_2022[nombre_hoja_para_ver].head(3).to_string())
    
    print("\n\n--- ÚLTIMAS 3 FILAS (.tail()) ---")
    print(dfs_2021_2022[nombre_hoja_para_ver].tail(3).to_string())

else:
    print(f"Error: No se encontró la hoja '{nombre_hoja_para_ver}' en el diccionario dfs.")
    print("Las hojas disponibles son:")
    print(list(dfs_2021_2022.keys()))

  Mostrando la hoja: Consolidado KPI

--- PRIMERAS 3 FILAS (.head()) ---
         DIA      HORA  EE Planta / Hl  EE Elaboracion / Hl  EE Bodega / Hl  EE Cocina / Hl  EE Envasado / Hl  EE Linea 2 / Hl  EE Linea 3 / Hl  EE Linea 4 / Hl  EE Linea 5 / Hl  EE Servicios / Hl  EE Sala Maq / Hl  EE Frio / Hl  EE Aire / Hl  EE CO2 / Hl  EE Caldera / Hl  EE Eflu / Hl  EE Agua / Hl  EE Resto Serv / Hl  EE Resto Planta / Hl  Unnamed: 21  Unnamed: 22  Agua Planta / Hl  Agua Elab / Hl  Agua Bodega / Hl  Agua Cocina / Hl  Agua Envas / Hl  Agua Linea 2/Hl  Agua Linea 3/Hl  Agua Linea 4/Hl  Agua Linea 5/Hl  Agua Servicios/Hl  Agua Planta de Agua/Hl  Produccion Agua / Hl  Unnamed: 35  ET Planta / Hl  ET Elab/Hl  ET Bodega/Hl  ET Cocina/Hl  ET Envasado/Hl  ET Linea 2/Hl  ET Linea 3/Hl  ET Linea 4/Hl  ET Linea 5/Hl  ET Servicios / Hl  Unnamed: 46  Aire Planta / Hl  Aire Elaboracion / Hl  Aire Cocina / Hl  Aire Bodega / Hl  Aire Envasado / Hl  Aire L2 / Hl  Aire L3 / Hl  Aire L4 / Hl  Aire L5 / Hl  Aire Se

In [25]:
print("\n--- Análisis de Cobertura de Datos ---")

# Lista para guardar los resultados de cada hoja
resultados_analisis = []

# Función para formatear las listas de días y que no saturen la salida
def format_lista_dias(lista):
    if not lista:
        return "Ninguno"
    if len(lista) > 3:
        # Mostrar los primeros 3 y el total
        primeros_tres = ', '.join(map(str, lista[:3]))
        return f"{len(lista)} días (Ej: {primeros_tres}, ...)"
    else:
        return ', '.join(map(str, lista))

# Iterar sobre el dict de DataFrames que ya creaste
for hoja, df in dfs_2021_2022.items():
    
    # 1. Verificar si la hoja tiene las columnas 'DIA' y 'HORA'
    if 'DIA' not in df.columns or 'HORA' not in df.columns:
        
        # Intentar analizar hojas solo con fecha (como 'Metas')
        col_fecha_alt = next((col for col in ['Mes / Año', 'Dia'] if col in df.columns), None)
        if col_fecha_alt:
            try:
                fechas_alt = pd.to_datetime(df[col_fecha_alt], errors='coerce').dropna()
                if not fechas_alt.empty:
                    resultados_analisis.append({
                        "hoja": hoja,
                        "primer_dia": fechas_alt.min().date(),
                        "ultimo_dia": fechas_alt.max().date(),
                        "dias_sin_23_59": "N/A (Hoja no horaria)",
                        "dias_con_horas_faltantes": "N/A (Hoja no horaria)"
                    })
            except Exception:
                pass # Omitir si falla
        continue # Saltar esta hoja si no tiene DIA y HORA

    try:
        # 2. Preparar los datos
        df_proc = df.copy()
        
        # Convertir 'DIA' a datetime (solo la fecha)
        # errors='coerce' convierte fechas inválidas en NaT (Not a Time)
        df_proc['DIA_fecha'] = pd.to_datetime(df_proc['DIA'], errors='coerce').dt.date
        
        # Convertir 'HORA' a string para buscar '23:59' de forma segura
        df_proc['HORA_str'] = df_proc['HORA'].astype(str)
        
        # Eliminar filas donde la fecha no se pudo parsear
        df_proc = df_proc.dropna(subset=['DIA_fecha'])
        
        if df_proc.empty:
            continue # Saltar hoja si no hay datos de fecha válidos

        # 3. (Goal 2) Primer y último día
        primer_dia = df_proc['DIA_fecha'].min()
        ultimo_dia = df_proc['DIA_fecha'].max()
        
        # Días únicos que SÍ tienen el registro '23:59'
        # Usamos .str.contains() para capturar '23:59:00' o '23:59'
        dias_con_23_59 = df_proc[df_proc['HORA_str'].str.contains('23:59')]['DIA_fecha'].unique()
        
        # Todos los días únicos en el dataset de esta hoja
        todos_los_dias = df_proc['DIA_fecha'].unique()
        
        # 4. (Goal 1) Días que NO tienen 23:59 (Diferencia de conjuntos)
        dias_sin_23_59_lista = sorted(list(set(todos_los_dias) - set(dias_con_23_59)))

        # 5. (Goal 3) Días con horas faltantes
        # Contamos cuántos registros (horas) hay por cada día
        registros_por_dia = df_proc.groupby('DIA_fecha').size()
        
        # Un día debe tener al menos 24 registros (00:00 a 23:00).
        # Si tiene menos de 24, le faltan horas.
        dias_con_horas_faltantes_sr = registros_por_dia[registros_por_dia < 24]
        dias_con_horas_faltantes_lista = sorted(list(dias_con_horas_faltantes_sr.index))

        # 6. Guardar resultados
        resultados_analisis.append({
            "hoja": hoja,
            "primer_dia": primer_dia,
            "ultimo_dia": ultimo_dia,
            "dias_sin_23_59": format_lista_dias(dias_sin_23_59_lista),
            "dias_con_horas_faltantes": format_lista_dias(dias_con_horas_faltantes_lista)
        })

    except Exception as e:
        # Registrar error si algo falla en una hoja específica
        resultados_analisis.append({
            "hoja": hoja,
            "primer_dia": f"Error: {e}",
            "ultimo_dia": f"Error: {e}",
            "dias_sin_23_59": "Error",
            "dias_con_horas_faltantes": "Error"
        })

# 7. Mostrar el reporte final
if resultados_analisis:
    reporte_df = pd.DataFrame(resultados_analisis).set_index('hoja')
    
    # Configurar pandas para mostrar bien el resultado
    pd.set_option('display.max_colwidth', 200) # Para que no corte las listas
    pd.set_option('display.width', 1000)       # Para que use más ancho de pantalla
    
    print(reporte_df)
else:
    print("No se encontraron hojas con las columnas 'DIA' y 'HORA' para analizar.")


--- Análisis de Cobertura de Datos ---
                           primer_dia  ultimo_dia                                        dias_sin_23_59                              dias_con_horas_faltantes
hoja                                                                                                                                                         
Consolidado KPI            2021-01-01  2022-12-30                    2022-03-02, 2022-03-16, 2022-07-13  4 días (Ej: 2022-03-16, 2022-07-01, 2022-07-13, ...)
Consolidado Produccion     2021-01-01  2022-12-30                    2022-03-02, 2022-03-16, 2022-07-13  4 días (Ej: 2022-03-16, 2022-07-01, 2022-07-13, ...)
Totalizadores Produccion   2021-01-01  2022-12-30                    2022-03-02, 2022-03-16, 2022-07-13  4 días (Ej: 2022-03-16, 2022-07-01, 2022-07-13, ...)
Consolidado EE             2021-01-01  2022-12-30                    2022-03-02, 2022-03-16, 2022-07-13                    2022-03-16, 2022-07-13, 2022-11-01
Totalizadore

In [26]:
dias_sin_23_59_lista

[datetime.date(2021, 7, 1),
 datetime.date(2022, 3, 2),
 datetime.date(2022, 3, 16),
 datetime.date(2022, 7, 13)]

In [27]:
if 'dfs_2021_2022' not in globals() or not isinstance(dfs_2021_2022, dict) or not dfs_2021_2022:
    print("Error: El diccionario 'dfs' no se encontró en memoria o está vacío.")
else:
    print("--- Iniciando Análisis de Días Faltantes (Gaps) ---")

    # Lista para guardar los resultados
    resultados_dias_faltantes = []

    # Función para formatear las listas de días
    def format_lista_dias(lista):
        if not lista:
            return "Ninguno"
        # Convertir fechas a strings
        lista_str = [str(d) for d in lista]
        if len(lista_str) > 3:
            primeros_tres = ', '.join(lista_str[:3])
            return f"{len(lista_str)} días (Ej: {primeros_tres}, ...)"
        else:
            return ', '.join(lista_str)

    # Iterar sobre el dict de DataFrames
    for hoja in sorted(dfs_2021_2022.keys()):
        df = dfs_2021_2022[hoja]
        
        # --- 1. Identificar columnas de fecha (lógica ya validada) ---
        date_col = None
        if 'DIA.1' in df.columns and 'HORA.1' in df.columns:
            date_col = 'DIA.1'
        elif 'DIA' in df.columns and 'HORA' in df.columns:
            date_col = 'DIA'
        elif 'Dia' in df.columns and 'Hora' in df.columns:
            date_col = 'Dia'
        elif 'Mes / Año' in df.columns:
            # Lógica para hojas mensuales como 'Metas'
            try:
                fechas_alt = pd.to_datetime(df['Mes / Año'], errors='coerce').dropna().dt.date
                if not fechas_alt.empty:
                    primer_dia_alt = fechas_alt.min()
                    ultimo_dia_alt = fechas_alt.max()
                    
                    # Para 'Metas', chequeamos meses faltantes
                    ideal_range_mes = pd.date_range(start=primer_dia_alt, end=ultimo_dia_alt, freq='MS') # MS = Month Start
                    ideal_meses_set = set(ideal_range_mes.date)
                    presentes_meses_set = set(fechas_alt)
                    
                    meses_faltantes = sorted(list(ideal_meses_set - presentes_meses_set))
                    
                    resultados_dias_faltantes.append({
                        "hoja": hoja,
                        "primer_dia": primer_dia_alt,
                        "ultimo_dia": ultimo_dia_alt,
                        "dias_faltantes": f"N/A (Mensual) - {format_lista_dias(meses_faltantes)}"
                    })
            except Exception:
                pass
            continue # Saltar al siguiente loop
        
        # Si no encontramos columnas, saltar
        if date_col is None:
            continue

        # --- 2. Procesar datos ---
        try:
            df_proc = df.copy()
            
            # Convertir col de fecha a datetime y extraer solo la fecha
            df_proc['DIA_fecha'] = pd.to_datetime(df_proc[date_col], errors='coerce').dt.date
            
            # Limpiar filas donde la fecha no se pudo parsear
            df_proc = df_proc.dropna(subset=['DIA_fecha'])
            
            if df_proc.empty:
                continue # Saltar hoja si no hay datos de fecha válidos

            # --- 3. Análisis de Primer/Último Día ---
            primer_dia = df_proc['DIA_fecha'].min()
            ultimo_dia = df_proc['DIA_fecha'].max()
            
            # --- 4. (NUEVO) Análisis de Días Faltantes ---
            
            # Obtener el set de días únicos PRESENTES en los datos
            dias_presentes = set(df_proc['DIA_fecha'].unique())
            
            # Crear el set de días IDEAL (todos los días desde el inicio al fin)
            # pd.date_range es inclusivo
            ideal_range = pd.date_range(start=primer_dia, end=ultimo_dia, freq='D')
            
            # Convertir el rango ideal a un set de objetos 'date' para comparar
            ideal_dias_set = set(ideal_range.date)
            
            # Calcular la diferencia: Días ideales MENOS Días presentes
            dias_faltantes_lista = sorted(list(ideal_dias_set - dias_presentes))

            # --- 5. Guardar resultados ---
            resultados_dias_faltantes.append({
                "hoja": hoja,
                "primer_dia": primer_dia,
                "ultimo_dia": ultimo_dia,
                "dias_faltantes": format_lista_dias(dias_faltantes_lista)
            })

        except Exception as e:
            resultados_dias_faltantes.append({
                "hoja": hoja,
                "primer_dia": f"Error: {e}",
                "ultimo_dia": "Error",
                "dias_faltantes": "Error"
            })

    # --- 6. Mostrar el reporte final ---
    if resultados_dias_faltantes:
        reporte_df = pd.DataFrame(resultados_dias_faltantes).set_index('hoja')
        
        # Reordenar para que coincida con el orden de carga (alfabético)
        reporte_df = reporte_df.reindex(sorted(dfs_2021_2022.keys()))
        
        pd.set_option('display.max_colwidth', 200)
        pd.set_option('display.width', 1000)
        
        print("\n--- Reporte de Días Faltantes (Gaps) ---")
        print(reporte_df.to_string())
    else:
        print("No se generaron resultados de análisis.")

    print("\n--- Fin del Análisis ---")

--- Iniciando Análisis de Días Faltantes (Gaps) ---

--- Reporte de Días Faltantes (Gaps) ---
                           primer_dia  ultimo_dia                                          dias_faltantes
hoja                                                                                                     
Auxiliar                   2021-01-01  2022-12-30  111 días (Ej: 2021-03-31, 2021-05-31, 2021-10-31, ...)
Consolidado Agua           2021-01-01  2022-12-30  111 días (Ej: 2021-03-31, 2021-05-31, 2021-10-31, ...)
Consolidado Aire           2021-01-01  2022-12-30  111 días (Ej: 2021-03-31, 2021-05-31, 2021-10-31, ...)
Consolidado EE             2021-01-01  2022-12-30  111 días (Ej: 2021-03-31, 2021-05-31, 2021-10-31, ...)
Consolidado GasVapor       2021-01-01  2022-12-30  111 días (Ej: 2021-03-31, 2021-05-31, 2021-10-31, ...)
Consolidado KPI            2021-01-01  2022-12-30  111 días (Ej: 2021-03-31, 2021-05-31, 2021-10-31, ...)
Consolidado Produccion     2021-01-01  2022-12-30  111 día

In [28]:
dias_faltantes_lista

[datetime.date(2021, 3, 31),
 datetime.date(2021, 5, 31),
 datetime.date(2021, 10, 31),
 datetime.date(2021, 12, 31),
 datetime.date(2022, 3, 17),
 datetime.date(2022, 3, 18),
 datetime.date(2022, 3, 19),
 datetime.date(2022, 3, 20),
 datetime.date(2022, 3, 21),
 datetime.date(2022, 3, 22),
 datetime.date(2022, 3, 23),
 datetime.date(2022, 3, 24),
 datetime.date(2022, 3, 25),
 datetime.date(2022, 3, 26),
 datetime.date(2022, 3, 27),
 datetime.date(2022, 3, 28),
 datetime.date(2022, 3, 29),
 datetime.date(2022, 3, 30),
 datetime.date(2022, 3, 31),
 datetime.date(2022, 4, 1),
 datetime.date(2022, 4, 2),
 datetime.date(2022, 4, 3),
 datetime.date(2022, 4, 4),
 datetime.date(2022, 4, 5),
 datetime.date(2022, 4, 6),
 datetime.date(2022, 4, 7),
 datetime.date(2022, 4, 8),
 datetime.date(2022, 4, 9),
 datetime.date(2022, 4, 10),
 datetime.date(2022, 4, 11),
 datetime.date(2022, 4, 12),
 datetime.date(2022, 4, 13),
 datetime.date(2022, 4, 14),
 datetime.date(2022, 4, 15),
 datetime.date(2022, 

In [94]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

dfs_21_22 = {}
hojas_saltadas = []

for hoja, df in dfs_2021_2022.items():
    if DAY_COL not in df.columns or HOUR_COL not in df.columns:
        hojas_saltadas.append((hoja, "Falta DIA u HORA"))
        continue

    tmp = df.copy()
    tmp["_dia"]  = tmp[DAY_COL].map(_to_date)
    tmp["_mins"] = tmp[HOUR_COL].map(_to_minutes)

    # Filtramos filas sin día y agregamos orden determinístico
    tmp = tmp.dropna(subset=["_dia"]).copy()
    if tmp.empty:
        hojas_saltadas.append((hoja, "Sin días válidos"))
        continue

    tmp["_ord"] = np.arange(len(tmp))  # <- evita usar el índice en sort_values

    # Orden por día, hora (minutos) y orden original
    tmp = tmp.sort_values(["_dia", "_mins", "_ord"], kind="stable")

    # Última fila por día (la mayor "_mins"; si empata, la última por "_ord")
    ultimas = tmp.groupby("_dia", as_index=False, sort=True).tail(1)

    # Limpieza de columnas auxiliares y orden final
    ultimas = ultimas.drop(columns=["_dia", "_mins", "_ord"]).sort_values(DAY_COL).reset_index(drop=True)

    dfs_21_22[hoja] = ultimas

In [30]:
dfk = dfs_21_22["Consolidado EE"].copy()

dfk["_hora_dt"] = pd.to_datetime(dfk["HORA"], errors="coerce")

mask_no_2359 = ~(
    (dfk["_hora_dt"].dt.hour == 23) &
    (dfk["_hora_dt"].dt.minute == 59))

df_no_2359 = dfk[mask_no_2359].drop(columns=["_hora_dt"])

# Ver resultados
print(len(df_no_2359), "filas con última hora distinta de 23:59")
print(df_no_2359["HORA"].value_counts(dropna=False).head())
df_no_2359.head(4)

3 filas con última hora distinta de 23:59
HORA
23:00:00    2
07:00:00    1
Name: count, dtype: int64


  dfk["_hora_dt"] = pd.to_datetime(dfk["HORA"], errors="coerce")


Unnamed: 0,DIA,HORA,Planta (Kw),Elaboracion (Kw),Bodega (Kw),Cocina (Kw),Envasado (Kw),Linea 2 (Kw),Linea 3 (Kw),Linea 4 (Kw),Servicios (Kw),Sala Maq (Kw),Aire (Kw),Calderas (Kw),Efluentes (Kw),Frio (Kw),Pta Agua / Eflu (Kw),Prod Agua (Kw),Resto Serv (Kw),Restos Planta (Kw),KW Gral Planta
421,2022-03-02,23:00:00,50076.11,7775.0,6118.0,2047.0,9258.0,2034.86,1766.0,6652.0,31239.0,20287.0,5390.0,396.0,1029.0,19294.0,1548.0,459.0,4078.0,1804.11,51549.0
435,2022-03-16,07:00:00,20493.18,2396.5,1937.5,603.0,5623.0,1778.68,2322.0,2412.0,10763.0,7326.0,2259.0,175.0,254.0,6292.0,459.0,191.0,1477.0,1710.68,20932.0
448,2022-07-13,23:00:00,72768.88,6938.5,5845.5,1901.0,17542.0,6024.13,6683.0,7423.0,49265.0,37052.0,7059.0,827.0,781.0,33704.0,1809.0,920.0,4594.0,-976.62,76246.0


In [95]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

# Aplicarlo a TODO el diccionario (una hoja por vez)
for nombre, df in dfs_21_22.items():
    dfs_21_22[nombre] = completar_e_interpolar_diario(df)

In [96]:
DAY_COL = "DIA"
inicio  = pd.Timestamp("2022-03-17")
fin     = pd.Timestamp("2022-06-30")

for nombre, df in dfs_21_22.items():
    if DAY_COL not in df.columns or df.empty:
        continue

    # Normalizar a fecha y construir máscara para CONSERVAR lo que queda fuera del rango
    fechas = pd.to_datetime(df[DAY_COL], dayfirst=True, errors="coerce").dt.normalize()

    # Rango INCLUSIVO: elimina 31/12/2023 ... 30/06/2024
    mask_keep = (fechas < inicio) | (fechas > fin) | fechas.isna()

    dfs_21_22[nombre] = df.loc[mask_keep].reset_index(drop=True)

### Análisis descriptivo y versionado de datos

Podemos ver que los datos comparten muchos días, por lo que son datos duplicados. Por eso vamos a crear un solo data frame que tenga todos los datos ordenados cronológicamente una sola vez.

In [97]:
DAY_COL  = "DIA"
HOUR_COL = "HORA"

RANGOS = {
    "dfs_21_22": [("2021-01-01", "2021-12-31")],
    "dfs_22_23": [("2022-01-01", "2022-12-31")],
    "dfs_23_24": [("2023-01-01", "2023-12-30"),
                  ("2024-07-01", "2024-10-26")],
}

HOJAS_INCLUIR = [
    "Consolidado KPI", "Consolidado Produccion", "Totalizadores Produccion", "Consolidado EE", "Totalizadores Energia",
    "Consolidado Agua", "Totalizadores Agua", "Consolidado GasVapor", "Totalizadores Gas y Vapor", "Consolidado Aire",
    "Totalizadores Aire", "Totalizadores Efluentes", "Totalizadores Glicol", "Totalizadores CO2"
]

DICS = {
    "dfs_21_22": dfs_21_22,
    "dfs_22_23": dfs_22_23,
    "dfs_23_24": dfs_23_24,
}

def slice_por_fecha(df, start, end, day_col=DAY_COL):
    if df.empty or day_col not in df.columns:
        return df.iloc[0:0]
    fechas = pd.to_datetime(df[day_col], errors="coerce", dayfirst=True).dt.normalize()
    mask = fechas.between(pd.to_datetime(start), pd.to_datetime(end), inclusive="both")
    return df.loc[mask].copy()

def ordenar_crono(df, day_col=DAY_COL, hour_col=HOUR_COL):
    if df.empty:
        return df
    dia = pd.to_datetime(df[day_col], errors="coerce", dayfirst=True)
    if hour_col in df.columns:
        dt = pd.to_datetime(dia.dt.date.astype(str) + " " + df[hour_col].astype(str),
                            errors="coerce", dayfirst=True)
    else:
        dt = dia
    return (df.assign(_dt=dt)
              .sort_values("_dt", kind="stable", na_position="last")
              .drop(columns="_dt").reset_index(drop=True))


partes_por_hoja = defaultdict(list)

for nombre_dic, dic in DICS.items():
    rangos = RANGOS.get(nombre_dic, [])
    for (inicio, fin) in rangos:
        for hoja, df in dic.items():
            if HOJAS_INCLUIR and hoja not in HOJAS_INCLUIR:
                continue
            recorte = slice_por_fecha(df, inicio, fin)
            if not recorte.empty:
                partes_por_hoja[hoja].append(recorte)

dfs_completo = {}
for hoja, partes in partes_por_hoja.items():
    # Unificar columnas: las faltantes se completan con 0
    todas_cols = list(set().union(*(p.columns for p in partes)))
    partes_alineadas = [p.reindex(columns=todas_cols, fill_value=0) for p in partes]
    combinado = pd.concat(partes_alineadas, ignore_index=True, sort=False)

    # Orden temporal final
    if DAY_COL in combinado.columns:
        combinado = ordenar_crono(combinado, DAY_COL, HOUR_COL)
    dfs_completo[hoja] = combinado

In [99]:
dfs_completo['Consolidado Produccion'] = dfs_completo['Consolidado Produccion'].drop(columns="Fecha/Hora", errors="ignore")
dfs_completo['Consolidado EE'] = dfs_completo['Consolidado EE'].drop(columns=['Fecha/Hora', 'Kw de Frio'], errors="ignore")

In [114]:
PATRONES_DIA = ("dia", "Dia", "DIA")

def detectar_col_dia(df, patrones=PATRONES_DIA):
    cols = [str(c) for c in df.columns]
    cand = [c for c in cols if any(p in c.lower() for p in patrones)]
    if not cand:
        raise ValueError("No se encontró columna de día/fecha en un DF.")
    # Heurística: prioriza nombres más específicos
    preferencia = ["dia", "día", "Dia", "DIA"]
    cand_orden = sorted(cand, key=lambda c: next((i for i,p in enumerate(preferencia) if p in c.lower()), 99))
    return cand_orden[0]

def normalizar_dia_col(df, col_dia):
    out = df.copy(deep=True)
    out[col_dia] = pd.to_datetime(out[col_dia], errors="coerce")
    # Si trae hora, nos quedamos con la fecha (día civil)
    out[col_dia] = out[col_dia].dt.normalize()
    # Renombramos a un nombre canónico común
    if col_dia != "dia":
        out = out.rename(columns={col_dia: "dia"})
    return out

def deduplicar_por_dia(df):
    # Si hay múltiples filas por día en un DF, evitamos explosiones en los merges
    # Estrategia simple: nos quedamos con la primera por día (ajusta si necesitas otra agregación)
    if df.duplicated("dia").any():
        df = df.sort_values("dia").drop_duplicates("dia", keep="first")
    return df

def mergear_por_dia(dfs_completo):
    dfs_norm = []
    for k, df in dfs_completo.items():
        col = detectar_col_dia(df)
        tmp = normalizar_dia_col(df, col)
        tmp = deduplicar_por_dia(tmp)
        # Evita choques de nombres: agrega sufijo con la clave del dict a las columnas no-clave
        cols_no_clave = [c for c in tmp.columns if c != "dia"]
        tmp = tmp[["dia"] + cols_no_clave].add_suffix(f"__{k}")
        tmp = tmp.rename(columns={f"dia__{k}": "dia"})
        dfs_norm.append(tmp)

    # Merge iterativo (outer) por 'dia'
    from functools import reduce
    df_unificado = reduce(lambda l, r: pd.merge(l, r, on="dia", how="outer"), dfs_norm)

    # Orden final
    df_unificado = df_unificado.sort_values("dia").reset_index(drop=True)
    return df_unificado

df_unificado = mergear_por_dia(dfs_completo)

Verificación de dimensiones del dataset, tipos de variables y rangos de valores

In [115]:
df_unificado.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1212 entries, 0 to 1211
Columns: 425 entries, dia to Tot  Trasiego__Totalizadores Glicol
dtypes: datetime64[ns](1), float64(403), object(21)
memory usage: 3.9+ MB


In [125]:
df_unificado.describe()

Unnamed: 0,dia,EE Cocina / Hl__Consolidado KPI,Aire Servicios / Hl__Consolidado KPI,Meta ET Linea 3__Consolidado KPI,Agua Linea 2/Hl__Consolidado KPI,Meta EE Elaboracion__Consolidado KPI,Agua Paste L3 / Hl__Consolidado KPI,Meta EE Sala Maq__Consolidado KPI,Meta Agua Servicios__Consolidado KPI,Meta Agua Planta__Consolidado KPI,EE Linea 4 / Hl__Consolidado KPI,Agua Envas / Hl__Consolidado KPI,Meta Agua Cocina__Consolidado KPI,Meta Agua Linea 5__Consolidado KPI,EE Linea 5 / Hl__Consolidado KPI,Meta ET Linea 2__Consolidado KPI,Unnamed: 113__Consolidado KPI,Meta EE Servicios__Consolidado KPI,EE CO2 / Hl__Consolidado KPI,Meta Agua Elab__Consolidado KPI,Meta EE Envasado__Consolidado KPI,Meta ET Cocina__Consolidado KPI,Meta Aire L4__Consolidado KPI,Meta ET Planta__Consolidado KPI,ET Servicios / Hl__Consolidado KPI,Meta ET Linea 4__Consolidado KPI,ET Planta / Hl__Consolidado KPI,EE Aire / Hl__Consolidado KPI,Unnamed: 124__Consolidado KPI,Aire L2 / Hl__Consolidado KPI,EE Caldera / Hl__Consolidado KPI,Meta ET Bodega__Consolidado KPI,EE Envasado / Hl__Consolidado KPI,Meta Agua Bodega__Consolidado KPI,EE Frio / Hl__Consolidado KPI,EE Bodega / Hl__Consolidado KPI,Agua Servicios/Hl__Consolidado KPI,Meta Aire L5__Consolidado KPI,Meta EE Cocina__Consolidado KPI,CO 2 Filtro / Hl__Consolidado KPI,Meta EE Aire__Consolidado KPI,Aire Bodega / Hl__Consolidado KPI,Aire L3 / Hl__Consolidado KPI,Aire L4 / Hl__Consolidado KPI,Meta EE Linea 4__Consolidado KPI,Meta Aire Bodega__Consolidado KPI,Agua Linea 4/Hl__Consolidado KPI,Meta EE Linea 3__Consolidado KPI,Aire Elaboracion / Hl__Consolidado KPI,Meta EE Linea 5__Consolidado KPI,Unnamed: 22__Consolidado KPI,Meta EE Resto Planta__Consolidado KPI,Meta Aire Cocina__Consolidado KPI,Meta ET Linea 5__Consolidado KPI,ET Cocina/Hl__Consolidado KPI,EE Linea 3 / Hl__Consolidado KPI,Unnamed: 72__Consolidado KPI,Meta Aire Servicios__Consolidado KPI,ET Bodega/Hl__Consolidado KPI,CO 2 linea 2 / Hl__Consolidado KPI,EE Eflu / Hl__Consolidado KPI,Meta Aire Elaboracion__Consolidado KPI,ET Linea 5/Hl__Consolidado KPI,EE Linea 2 / Hl__Consolidado KPI,Agua Linea 5/Hl__Consolidado KPI,Unnamed: 46__Consolidado KPI,Agua Planta / Hl__Consolidado KPI,Meta EE Resto Serv__Consolidado KPI,__Consolidado KPI,CO 2 Linea 4 / Hl__Consolidado KPI,Meta ET Elab__Consolidado KPI,Meta Produccion Agua__Consolidado KPI,EE Agua / Hl__Consolidado KPI,Meta EE Planta__Consolidado KPI,Agua Planta de Agua/Hl__Consolidado KPI,Meta ET Envasado__Consolidado KPI,EE Sala Maq / Hl__Consolidado KPI,CO 2 / Hl__Consolidado KPI,Meta Aire Planta__Consolidado KPI,Agua Linea 3/Hl__Consolidado KPI,Aire L5 / Hl__Consolidado KPI,CO 2 linea 3 / Hl__Consolidado KPI,Meta Aire L2__Consolidado KPI,Unnamed: 123__Consolidado KPI,Meta Aire L3__Consolidado KPI,Unnamed: 103__Consolidado KPI,EE Planta / Hl__Consolidado KPI,Meta EE Frio__Consolidado KPI,Meta EE Linea 2__Consolidado KPI,Meta Agua Linea 4__Consolidado KPI,Unnamed: 67__Consolidado KPI,Produccion Agua / Hl__Consolidado KPI,ET Envasado/Hl__Consolidado KPI,Agua Bodega / Hl__Consolidado KPI,Aire Planta / Hl__Consolidado KPI,Meta Aire Envasado__Consolidado KPI,ET Linea 4/Hl__Consolidado KPI,Meta EE Eflu__Consolidado KPI,Unnamed: 66__Consolidado KPI,ET Elab/Hl__Consolidado KPI,Unnamed: 65__Consolidado KPI,Meta EE Bodega__Consolidado KPI,Unnamed: 35__Consolidado KPI,Meta Agua Envas__Consolidado KPI,Agua Lavadora L3 / Hl__Consolidado KPI,Meta EE Agua __Consolidado KPI,Unnamed: 84__Consolidado KPI,EE Elaboracion / Hl__Consolidado KPI,Meta EE CO2__Consolidado KPI,Meta EE Caldera__Consolidado KPI,Meta ET Servicios__Consolidado KPI,Agua Cocina / Hl__Consolidado KPI,Aire Cocina / Hl__Consolidado KPI,ET Linea 2/Hl__Consolidado KPI,EE Servicios / Hl__Consolidado KPI,Meta Agua Planta de Agua__Consolidado KPI,Aire Expulsion / Hl__Consolidado KPI,Agua Elab / Hl__Consolidado KPI,ET Linea 3/Hl__Consolidado KPI,Meta Agua Linea 3__Consolidado KPI,Aire Envasado / Hl__Consolidado KPI,Meta Agua Linea 2__Consolidado KPI,EE Resto Serv / Hl__Consolidado KPI,Unnamed: 21__Consolidado KPI,EE Resto Planta / Hl__Consolidado KPI,Hl Cerveza L5__Consolidado Produccion,Hl Cerveza L4__Consolidado Produccion,Unnamed: 18__Consolidado Produccion,Hl Cerveza Envasada__Consolidado Produccion,Hl Cerveza Cocina__Consolidado Produccion,Hl Cerveza L3__Consolidado Produccion,Unnamed: 16__Consolidado Produccion,Unnamed: 14__Consolidado Produccion,Cocimientos Diarios__Consolidado Produccion,Hl de Mosto__Consolidado Produccion,Unnamed: 15__Consolidado Produccion,Hl Cerveza Filtrada__Consolidado Produccion,Hl Cerveza L2__Consolidado Produccion,Unnamed: 17__Consolidado Produccion,Hl de Mosto Copia__Consolidado Produccion,Hl Producido Bodega__Consolidado Produccion,Hl Cerveza L5__Totalizadores Produccion,HL Mosto Miller__Totalizadores Produccion,HL Mosto Negra__Totalizadores Produccion,HL Mosto Fuerte__Totalizadores Produccion,HL Mosto Palermo__Totalizadores Produccion,Hl Cerveza L4__Totalizadores Produccion,HL Mosto Local__Totalizadores Produccion,Hl Mosto Bieckert BAPA__Totalizadores Produccion,Hl Session IPA__Totalizadores Produccion,HL Mosto APA__Totalizadores Produccion,HL Mosto Bieckert__Totalizadores Produccion,HL Mosto Amstel__Totalizadores Produccion,HL Mosto Amstel Lager__Totalizadores Produccion,HL Mosto Golden__Totalizadores Produccion,HL Cerveza Sin Diliuir A90__Totalizadores Produccion,HL Mosto Frost__Totalizadores Produccion,Nivel Silo Bagazo Sur (2)__Totalizadores Produccion,Hl Cerveza L3__Totalizadores Produccion,HL Mosto Malta__Totalizadores Produccion,HL Mosto Heineken__Totalizadores Produccion,Hl Mosto Cautiva Roja__Totalizadores Produccion,Cocimientos Diarios__Totalizadores Produccion,HL Mosto Indio__Totalizadores Produccion,HL Mosto IPA__Totalizadores Produccion,HL Mosto Tecate__Totalizadores Produccion,Id__Totalizadores Produccion,HL Mosto Budweiser__Totalizadores Produccion,HL Mosto Sol__Totalizadores Produccion,HL Mosto Roja__Totalizadores Produccion,HL Cerceza Sin Diluir A190__Totalizadores Produccion,Hl Mosto Bieckert Urbana__Totalizadores Produccion,Nivel Tk Restos Lev__Totalizadores Produccion,Hl Reserva 7__Totalizadores Produccion,Hl Cerveza L2__Totalizadores Produccion,Hl Mosto Blue Moon__Totalizadores Produccion,Hl Mosto Cautiva Torrontes__Totalizadores Produccion,HL Mosto Isenbeck__Totalizadores Produccion,Hl Reserva 8__Totalizadores Produccion,Hl Mosto Cautiva Blend__Totalizadores Produccion,Frio (Kw)__Consolidado EE,Linea 4 (Kw)__Consolidado EE,Resto Serv (Kw)__Consolidado EE,Aire (Kw)__Consolidado EE,KW Gral Planta__Consolidado EE,Sala Maq (Kw)__Consolidado EE,Pta Agua / Eflu (Kw)__Consolidado EE,KW CO2__Consolidado EE,Planta (Kw)__Consolidado EE,Servicios (Kw)__Consolidado EE,Cocina (Kw)__Consolidado EE,Envasado (Kw)__Consolidado EE,Bodega (Kw)__Consolidado EE,Calderas (Kw)__Consolidado EE,Efluentes (Kw)__Consolidado EE,Restos Planta (Kw)__Consolidado EE,Elaboracion (Kw)__Consolidado EE,Linea 2 (Kw)__Consolidado EE,Linea 3 (Kw)__Consolidado EE,Prod Agua (Kw)__Consolidado EE,KW Enfluente Efl__Totalizadores Energia,KW Linea 3 y 4__Totalizadores Energia,KW Cond 11. 12 y 13__Totalizadores Energia,KW Ilum Dep L3/L4__Totalizadores Energia,KW Gral Planta__Totalizadores Energia,Kw Compresores Aire__Totalizadores Energia,KW Iluminacion L3__Totalizadores Energia,Kw Admininistracion__Totalizadores Energia,KW Trafo 11__Totalizadores Energia,KW Trafo 12__Totalizadores Energia,KW Planta de Agua__Totalizadores Energia,KW Mycom 3__Totalizadores Energia,Kw llum/Serv L2__Totalizadores Energia,KW Trafo 4__Totalizadores Energia,KW Cond 5. 6 y 9__Totalizadores Energia,Kw Molino__Totalizadores Energia,KW Trafo 10__Totalizadores Energia,KW Laboratorio__Totalizadores Energia,KW CO2__Totalizadores Energia,KW Mycom 1__Totalizadores Energia,KW Cocina__Totalizadores Energia,KW Comp Kaeser__Totalizadores Energia,KW Enfr Agua Cocina__Totalizadores Energia,KW Mycom 7__Totalizadores Energia,Unnamed: 55__Totalizadores Energia,Unnamed: 57__Totalizadores Energia,KW Mycom 4__Totalizadores Energia,KW Toma Agua__Totalizadores Energia,KW Enfluentes Hidr__Totalizadores Energia,KW Secador Kaeser__Totalizadores Energia,KW Mycom 2__Totalizadores Energia,Unnamed: 56__Totalizadores Energia,Id__Totalizadores Energia,KW Enfluentes Coc__Totalizadores Energia,KW Bba Glicol Sala MAq__Totalizadores Energia,KW Obrador Contratistas__Totalizadores Energia,KW Servicio L2__Totalizadores Energia,KW Pta Agua/Log__Totalizadores Energia,KW Caldera 4__Totalizadores Energia,Unnamed: 54__Totalizadores Energia,KW Linea 3__Totalizadores Energia,KW Atlas 3__Totalizadores Energia,Unnamed: 58__Totalizadores Energia,Kw Casona__Totalizadores Energia,Kw Linea Barriles__Totalizadores Energia,KW Filtr Carbon__Totalizadores Energia,KW Linea 4__Totalizadores Energia,KW Trafo 9__Totalizadores Energia,KW Caldera 3__Totalizadores Energia,KW Cond 7. 8 y 11__Totalizadores Energia,KW Mycom 6__Totalizadores Energia,Unnamed: 53__Totalizadores Energia,KW Linea 2__Totalizadores Energia,KW Iluminacion L4__Totalizadores Energia,KW Trafo 5__Totalizadores Energia,KW Bba Glicol Bod__Totalizadores Energia,KW Mycom 5__Totalizadores Energia,Agua Elaboracion (Hl)__Consolidado Agua,Red Paste L4__Consolidado Agua,Agua CO2__Consolidado Agua,FC Lavadora L2__Consolidado Agua,Agua Lavadora L3__Consolidado Agua,Agua Cond Evaporativos__Consolidado Agua,FC L1 y L2__Consolidado Agua,Agua Cocina (Hl)__Consolidado Agua,Agua Calderas__Consolidado Agua,Agua Envasado (Hl)__Consolidado Agua,Agua Bodega (Hl)__Consolidado Agua,FC Lavadora L3__Consolidado Agua,Red L1 y L2__Consolidado Agua,Agua Planta (Hl)__Consolidado Agua,Agua Efluentes__Consolidado Agua,Agua Dilucion (Hl)__Consolidado Agua,Produccion (Hl)__Consolidado Agua,Agua Servicios (Hl)__Consolidado Agua,Planta de agua (Hl)__Consolidado Agua,Red L3__Consolidado Agua,Agua Paste L3__Consolidado Agua,FC Barriles__Consolidado Agua,Rep Agua Cist CE__Totalizadores Agua,Red Paste L4__Totalizadores Agua,Agua Cald__Totalizadores Agua,FC Cocina__Totalizadores Agua,Agua Filt FMaCist CE__Totalizadores Agua,Agua Helada Cocina__Totalizadores Agua,FC Lavadora L2__Totalizadores Agua,Red Bodega__Totalizadores Agua,FC Centec II__Totalizadores Agua,Agua Cond REC__Totalizadores Agua,FC L1 y L2__Totalizadores Agua,Rinser__Totalizadores Agua,FC Centec I__Totalizadores Agua,Red Lavadora L3__Totalizadores Agua,Agua Enjuagadora L2__Totalizadores Agua,FC Lavadora L3__Totalizadores Agua,Red Bodega Interno__Totalizadores Agua,Temp Tq Intermedio__Totalizadores Agua,Red Administracion__Totalizadores Agua,Glicol Paste L3__Totalizadores Agua,Dilución A190__Totalizadores Agua,Agua Planta CO2__Totalizadores Agua,Red L1 y L2__Totalizadores Agua,Agua Cist Industrial__Totalizadores Agua,Agua Limpieza y Bbba Vacio L2__Totalizadores Agua,FC CIP Bodega__Totalizadores Agua,Entrada Osmosis__Totalizadores Agua,Id__Totalizadores Agua,FC Linea 3__Totalizadores Agua,Agua Lavadora L2__Totalizadores Agua,Retorno Planta CO2__Totalizadores Agua,Filtro de Carbon__Totalizadores Agua,Dilución A90__Totalizadores Agua,Red Paste L3__Totalizadores Agua,Rep Agua Puerto__Totalizadores Agua,Agua Red Servicios__Totalizadores Agua,Assa__Totalizadores Agua,Red Paste L1__Totalizadores Agua,Red L3__Totalizadores Agua,Red Barriles__Totalizadores Agua,FC Barriles__Totalizadores Agua,Rechazo Osmosis__Totalizadores Agua,Vapor Elaboracion (Kg)__Consolidado GasVapor,ET Envasado (Mj)__Consolidado GasVapor,Tot_Vapor_L3_L4__Consolidado GasVapor,Tot Vap Paste L3 / Hora__Consolidado GasVapor,Vapor _Vapor_L5 (KG)__Consolidado GasVapor,ET Elaboracion (Mj)__Consolidado GasVapor,Vapor Envasado (Kg)__Consolidado GasVapor,Vapor_L5 (KG)__Consolidado GasVapor,VAPOR DE LINEA 4 KG__Consolidado GasVapor,Medicion Gas Planta (M3)__Consolidado GasVapor,VAPOR DE LINEA 1 Y 2 KG__Consolidado GasVapor,Gas Planta (Mj)__Consolidado GasVapor,ET Servicios (Mj)__Consolidado GasVapor,Conversion Kg/Mj__Consolidado GasVapor,Tot Vap Lav L3 / Hora__Consolidado GasVapor,Vapor L3__Consolidado GasVapor,Vapor Cocina (Kg)__Consolidado GasVapor,Vapor Servicio (Kg)__Consolidado GasVapor,Tot_Vapor_CIP_Bodega__Consolidado GasVapor,Tot_Vapor_L3_L4__Totalizadores Gas y Vapor,VAPOR DE CALDERA 4 KG__Totalizadores Gas y Vapor,CAUDAL DE GAS CALDERA 1 M3__Totalizadores Gas y Vapor,VAPOR DE CALDERA 3 KG__Totalizadores Gas y Vapor,Vapor _Vapor_L5 (KG)__Totalizadores Gas y Vapor,Vapor Olla 1 (Ton)__Totalizadores Gas y Vapor,TOT GAS ENTRADA PRINCIPAL M3__Totalizadores Gas y Vapor,Vapor Paste L3__Totalizadores Gas y Vapor,CAUDAL DE BIO-GAS M3__Totalizadores Gas y Vapor,TOT AGUA DESAIREADA__Totalizadores Gas y Vapor,(Vapor Olla 2 (Ton)__Totalizadores Gas y Vapor,VAPOR COCINA 2 KG__Totalizadores Gas y Vapor,Tot_Vapor_Caldera 3__Totalizadores Gas y Vapor,Vapor_L5 (KG)__Totalizadores Gas y Vapor,VAPOR DE LINEA 4 KG__Totalizadores Gas y Vapor,Fuel Oil Tk2 (Kg)__Totalizadores Gas y Vapor,VAPOR DE CALDERA 1 KG__Totalizadores Gas y Vapor,VAPOR DE LINEA 1 Y 2 KG__Totalizadores Gas y Vapor,Id__Totalizadores Gas y Vapor,M3_Tot_Gas__Totalizadores Gas y Vapor,Fuel Oil Tk1 (Kg)__Totalizadores Gas y Vapor,Tot_Vapor_CIP_Bodega__Totalizadores Gas y Vapor,VAPOR COCINA 1 KG__Totalizadores Gas y Vapor,Tot Aire Expulsion__Consolidado Aire,Totalizador_Aire_Bodega__Consolidado Aire,Totalizador_Aire_L2__Consolidado Aire,Totalizador_Aire_L5__Consolidado Aire,Aire Elaboracion (m3)__Consolidado Aire,Aire Producido (M3)__Consolidado Aire,Aire Planta (M3)__Consolidado Aire,Aire Envasado (M3)__Consolidado Aire,Totalizador_Aire_Cocina__Consolidado Aire,Totalizador_Aire_L3__Consolidado Aire,Aire Servicios (M3)__Consolidado Aire,Totaliador_Aire_L4__Consolidado Aire,Tot Aire Expulsion__Totalizadores Aire,Totalizador_Aire_L5__Totalizadores Aire,Totalizador_Aire_L2__Totalizadores Aire,Totalizador_Aire_Bodega__Totalizadores Aire,Id__Totalizadores Aire,Totalizador_Aire_Cocina__Totalizadores Aire,Totalizador_Aire_L3__Totalizadores Aire,Totalizador Sistema Kaeser__Totalizadores Aire,Tot Aire Servicio Logistica__Totalizadores Aire,Totaliador_Aire_L4__Totalizadores Aire,Id__Totalizadores CO2,Totalizador_L2_Barriles__Totalizadores CO2,Totalizador_Tq_Pulmon__Totalizadores CO2,Totaliador_Latas__Totalizadores CO2,Totalizador_Filtracion__Totalizadores CO2,Totalizador_L3__Totalizadores CO2,Totalizador_Bodega__Totalizadores CO2,Id__Totalizadores Efluentes,Totalizador Bba Gas__Totalizadores Efluentes,Tot A130/330/430__Totalizadores Glicol,Id__Totalizadores Glicol,"Tot L3, L4 y Planta de CO2__Totalizadores Glicol",Tot L3. L4 y Planta de CO2__Totalizadores Glicol,Tot A10/20__Totalizadores Glicol,Tot Reposo Superior__Totalizadores Glicol,Tot Reposo Inferior__Totalizadores Glicol,Tot A40/240/50/60/Centec/Filtro__Totalizadores Glicol,Tot Fermantacion_Cocina__Totalizadores Glicol,Tot Trasiego__Totalizadores Glicol
count,1212,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,0.0,1212.0,365.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,730.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,0.0,1212.0,1212.0,1212.0,1212.0,1212.0,365.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,0.0,1212.0,1212.0,847.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,730.0,1212.0,365.0,1212.0,1212.0,1212.0,1212.0,365.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,365.0,1212.0,365.0,1212.0,0.0,1212.0,1212.0,1212.0,365.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,0.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,730.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,1212.0,847.0,1212.0,1212.0,1212.0,1212.0,1212.0
mean,2022-09-16 07:36:14.257425664,0.363995,2.081642,10.248324,3.926518,0.601243,0.689677,3.040012,0.385838,2.411485,2.853538,1.209606,0.964594,1.033494,,13.664431,0.0,2.941035,0.32929,1.260862,1.293773,6.148994,0.424188,45.092624,9.595265,4.44094,78.184406,1.110423,0.0,8.600537,0.194643,0.683222,2.526509,0.551197,4.866212,1.056503,1.778765,2.120939,0.212094,0.44862,0.424188,3.650328,2.110426,1.241689,1.131167,1.060469,1.043851,1.201865,5.045442,1.201865,,0.134326,0.35349,6.832216,8.759241,2.317373,0.0,0.848376,2.483572,0.427264,0.181574,2.474429,41.818566,6.354511,3.080863,,5.186264,0.318141,0.0,0.976545,15.782418,59.401815,0.081604,4.948857,25.227851,26.577319,6.282022,0.486407,4.383274,2.080827,16.314983,0.425237,1.767449,0.0,1.060469,0.0,11.144397,1.908845,1.838147,0.344498,0.0,74.772149,41.517811,1.221508,9.126317,1.060469,12.09623,0.070698,0.0,27.071331,0.0,0.388839,,0.744115,0.454833,0.070698,0.0,1.112703,0.176745,0.070698,2.664564,1.551811,1.272859,54.795283,7.097512,10.334936,0.509413,2.197894,21.359784,0.964594,1.999233,1.446891,0.333766,,0.407672,116.175743,4047.813944,1147.391914,8476.05198,8576.688866,2939.117987,1693.091584,0.0,9.476073,5451.438119,2653.428218,7927.546617,1372.944307,545.69967,3546.405116,8526.370423,116.175743,510.937294,76.65099,0.0,0.0,4047.813944,0.0,2.30198,0.0,124.170792,0.0,21.419142,356.764439,420.82632,2550.303218,0.0,18.764333,2939.117987,0.0,782.167492,35.575083,9.476073,0.0,351.249175,2443.485561,14331.345297,0.0,134.853135,27.549505,2620.27599,4.35231,37.432899,0.0,1372.944307,2.30363,8.224422,115.604373,0.0,33.002475,28173.9,6354.80363,28.27104,6383.493812,67749.68,38186.7,1427.963284,796.974835,65505.96,38585.629538,1718.745462,17823.06,5946.251188,1194.249587,945.570957,2158.496,6938.771403,4404.012245,5373.237624,413.977723,729.176155,11129.193482,1309.507426,207.14934,67749.68,6098.913779,351.146452,140.278366,17172.50165,6292.850248,239.823432,2610.648927,0.0,5255.077145,626.055281,809.638614,10022.363399,687.488036,1446.168729,7210.116,909.106848,8537.443,3.468647,3325.125,101.790429,35.235149,1694.694307,137.719884,151.553218,235.768564,3681.112624,358.976898,14331.345297,216.394802,1286.237624,64.945957,1026.284241,365.999587,1036.689769,360.181518,5022.091172,48.811469,308.382838,56.350248,107.962046,-50.172855,0.0,14364.684406,157.559818,455.015264,1768.063119,52.209158,6110.495,232.224422,4229.760314,2612.330446,1591.523515,15643.803815,159.786498,68.173596,267.73861,9.51055,1175.275539,95.521403,7986.362059,25.991386,8659.8651,7657.441756,89.013216,167.518771,32071.301242,69.619431,2756.9967,24303.668916,7767.632327,39166.785961,326.669346,3.456567,27.477277,0.0,159.786498,51.574063,798.636206,0.0,1056.153259,267.73861,77.813808,151.14755,180.882731,95.521403,6.364851,312.940064,161.117315,81.768465,89.013216,141.163446,19.635639,14.663267,0.018082,1174.426155,123.106868,167.518771,8.459901,86.498135,160.493116,606.191419,14331.345297,107.753409,30.062752,30.697731,1495.447807,1582.570545,68.411074,1438.501717,118.180689,1726.345585,167.670423,326.669346,0.0,27.477277,21.726407,57778.251754,297486.0,72489.570604,-1338.660145,1036.330833,183826.58339,88964.565633,3227.953816,26909.303755,9843.107702,2978.913366,529579.5,48266.89825,3.571146,2784.585771,45659.88058,44285.953795,13492.297959,13492.297959,72489.570604,32755.35,1649.286465,20222.622937,1036.330833,21.961716,2.151411,585.151317,540.963663,26304.63007,21.942987,26189.316007,0.0,3227.953816,26909.303755,0.0,0.0,2978.913366,14331.345297,14802.864559,0.0,13492.297959,18096.637789,2915.888994,18309.850509,4812.924099,1353.536753,26440.420974,53111.112624,37430.756146,13906.224166,5214.681471,4965.020703,12764.467483,2774.742611,3249.927526,1353.536753,4812.924099,18309.850509,14331.345297,5214.681471,4965.020703,53111.112624,10.408981,2774.742611,14331.380776,370.837046,11045.082096,3858.55198,3442.40099,1236.619224,3460.739909,14331.345297,0.25165,3385.6906,18723.569307,1433.609991,364.184223,2114.529681,40.923094,1709.0813,2040.165301,416.580656,1460.432296
min,2021-01-01 00:00:00,0.0,-65.28977,0.0,0.0,0.0,-0.018261,0.0,0.0,0.0,0.0,0.003756,0.0,0.0,,0.0,0.0,0.0,0.018053,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.43285,0.116643,0.0,0.0,0.0,0.0,0.080662,0.0,0.347828,-0.445517,-2.460233,0.0,0.0,0.0,0.0,0.803341,0.0,0.0,0.0,0.0,0.0,0.0,1.142743,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.01163,0.0,0.0,-0.094981,0.0,,1.994283,0.0,0.0,0.0,0.0,0.0,-29.573295,0.0,-80.017895,0.0,0.392412,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.973938,0.0,0.0,0.0,0.0,17.844336,0.042389,0.242152,0.705196,0.0,0.0,0.0,0.0,0.164008,0.0,0.0,,0.0,0.0,0.0,0.0,0.153733,0.0,0.0,0.0,0.0,0.0,0.0,0.957608,0.0,0.0,0.250194,-7.990921,0.0,0.020958,0.0,-707.419891,,-587.600356,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2310.0,0.0,-5535738.0,737.0,9467.0,2706.0,-292540.0,0.0,8987.48,5521.0,76.0,318.0,-6247.0,0.0,33.0,-4492792.0,-672.0,-254.55,0.0,-293679.0,0.0,0.0,0.0,0.0,9467.0,737.0,0.0,8.519531,0.0,0.0,-8533.0,0.0,0.0,0.0,0.0,72.0,1901.0,83.0,85.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,-7603.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,-294331.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1039.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,1015.0,0.0,0.0,4457.905273,0.0,0.0,1045.0,-11178.5,4845.84082,0.0,-0.42,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.900391,0.0,0.0,0.0,0.0,0.0,0.0,13.21,-3.0,2.0,0.0,0.0,16.4,0.0,0.0,0.0,3.400391,0.0,0.0,0.0,0.0,-0.6,0.0,0.0,-1771.0,0.0,0.2,45.41,0.0,0.0,0.0,0.0,0.0,1135.65625,191.8901,0.0,-957112.5,0.0,1678.418196,62.42,0.0,0.0,0.0,0.0,4440.315,0.0,0.023968,0.0,0.0,0.0,0.0,0.0,0.0,-9869770.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1844.4375,0.0,0.0,2562.09375,6603.0,4848.96875,4.5,0.0,0.0,-148994.36,0.0,0.0,0.0,0.0,1844.4375,0.0,0.0,0.0,6603.0,0.0,0.0,0.0,0.0,-805849.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2021-10-30 18:00:00,0.275466,1.168613,0.0,1.132107,0.0,0.0,0.0,0.0,0.0,1.458879,0.733096,0.0,0.0,,0.0,0.0,0.0,0.092822,0.0,0.0,0.0,0.0,0.0,3.35383,0.0,52.981146,0.625259,0.0,2.138847,0.046479,0.0,1.349542,0.0,2.248223,0.552749,0.66161,0.0,0.0,0.364333,0.0,1.619651,1.419102,0.587131,0.0,0.0,0.259538,0.0,2.352911,0.0,,0.0,0.0,0.0,5.149163,1.562273,0.0,0.0,1.066945,0.0,0.07314,0.0,13.90911,2.404806,0.913786,,3.277265,0.0,0.0,0.913387,0.0,0.0,0.042529,0.0,19.102283,0.0,2.575832,0.284434,0.0,1.066234,6.64553,0.30315,0.0,0.0,0.0,0.0,6.312645,0.0,0.0,0.0,0.0,70.301422,26.498394,0.706563,5.239022,0.0,6.052351,0.0,0.0,15.814944,0.0,0.0,,0.0,0.0,0.0,0.0,0.673541,0.0,0.0,0.0,1.373947,0.733447,14.84392,3.783296,0.0,0.0,1.563335,12.868499,0.0,1.237106,0.0,0.44517,,0.282513,0.0,2950.75,0.0,5763.75,5573.25,501.25,0.0,0.0,4.0,3648.75,0.0,5768.0,262.25,0.0,0.0,5968.895,0.0,0.0,0.0,0.0,0.0,2950.75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1545.75,0.0,0.01,501.25,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1509.25,0.0,23.33,0.0,262.25,0.0,0.0,0.0,0.0,0.0,17928.75,5220.5,4186.875,5606.0,53976.75,20777.25,1217.5,0.0,51678.17,31379.0,1259.75,9760.5,4846.75,335.75,706.0,2468.448,5700.0,2237.565,2577.75,326.0,599.0,8176.5,775.0,167.0,53976.75,5261.5,339.75,66.3525,13480.0,3068.25,191.0,472.0,0.0,3797.0,328.75,642.75,8653.9375,544.0,690.0,0.0,577.75,1657.75,0.0,0.0,0.0,0.0,0.0,37.75,105.0,213.0,0.0,0.0,0.0,81.0,879.375,51.0,746.0,253.75,271.0,0.0,2252.25,0.0,0.0,29.0,74.0,5.0,0.0,11155.25,28.0,1.0,80.75,0.0,678.75,216.75,1829.75,2261.0,0.0,12909.325,86.95,0.0,0.0,0.0,0.0,26.875,5845.25,0.0,5643.75,6355.65,3.375,34.275,26551.125,0.0,1782.75,19478.175,6173.7,30654.5,201.7575,0.0,8.3,0.0,86.95,18.3,584.525,0.0,0.0,0.0,0.0,1.4675,140.12,26.875,0.0,208.575,91.0675,6.0,3.375,88.1425,18.1375,10.9,0.0,308.5,56.5,34.275,0.0,36.685,116.275,0.0,0.0,25.75,0.0,0.0,1097.25,817.25,8.4675,995.54,69.45,1295.845,53.675,201.7575,0.0,8.3,0.0,27913.44,227046.8,47832.1075,0.0,0.0,116258.983971,65262.4625,0.0,23583.17125,0.0,721.25,445249.5,32573.815206,2.83736,0.0,23039.2475,15692.5,10127.7625,10127.7625,47832.1075,0.0,0.0,5997.5,0.0,14.7,0.0,0.0,0.0,62.665,12.7,0.0,0.0,0.0,23583.17125,0.0,0.0,721.25,0.0,12428.0625,0.0,10127.7625,0.0,694.8375,16036.83,2086.8125,146.83,21513.4375,46238.75,32757.7,9740.5225,4362.3425,2797.08,9423.5025,2619.2625,428.43,146.83,2086.8125,16036.83,0.0,4362.3425,2797.08,46238.75,0.0,2619.2625,0.0,0.0,8790.5,2685.75,2418.25,98.75,2326.44,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,587.4875
50%,2022-08-29 12:00:00,0.308599,1.72745,13.409091,1.949128,0.777143,0.0,3.931429,0.528,3.3,1.556587,0.980745,1.32,1.414286,,17.878788,0.0,3.803429,0.159259,1.725429,1.673143,8.045455,0.548571,59.0,5.081013,5.810606,61.145463,0.737346,0.0,3.532668,0.071812,0.893939,1.633681,0.754286,2.72773,0.673711,0.879828,2.742857,0.274286,0.415926,0.548571,2.063082,1.665484,0.681744,1.462857,1.371429,0.389493,1.554286,2.967122,1.554286,,0.173714,0.457143,8.939394,8.652797,1.807153,0.0,1.097143,1.536172,0.0,0.096732,3.2,23.196051,3.781748,1.534085,,3.716827,0.411429,0.0,0.936175,20.65,85.0,0.063969,6.4,23.525765,34.774242,3.077804,0.391729,5.668571,1.352359,9.866061,0.355819,2.285714,0.0,1.371429,0.0,7.295102,2.468571,2.377143,0.471429,0.0,76.474235,33.760495,0.873666,6.127528,1.371429,6.668367,0.091429,0.0,20.754016,0.0,0.502857,,1.018286,0.0,0.091429,0.0,0.792819,0.228571,0.091429,3.486364,1.446984,0.874746,20.472909,4.457838,14.142857,0.14415,1.767705,14.769014,1.32,1.596221,1.98,0.608472,,0.457026,0.0,5131.5,0.0,9341.5,9313.795,3723.5,0.0,0.0,12.0,6446.5,0.0,8357.0,1132.0,0.0,3448.0,9162.575,0.0,0.0,0.0,0.0,0.0,5131.5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2867.0,0.0,15.215,3723.5,0.0,0.0,0.0,12.0,0.0,0.0,1717.0,0.0,0.0,0.0,0.0,2921.5,0.0,38.33,0.0,1132.0,0.0,0.0,0.0,0.0,0.0,23655.5,8004.0,5097.0,6533.0,68781.0,26798.75,1516.0,618.0,66852.34,38999.0,1966.5,16357.0,6086.125,505.0,844.5,3572.28,7145.125,5270.225,7094.0,479.5,739.0,14024.0,1391.0,192.0,68781.0,6219.0,359.0,110.36,18464.0,6836.5,252.0,2481.5,0.0,5446.5,548.0,802.5,10257.75,663.0,1207.5,263.5,1052.5,2372.0,0.0,4509.0,0.0,0.0,775.5,72.0,148.0,267.0,5455.5,0.0,0.0,107.0,1201.5,62.0,825.0,367.5,384.5,0.0,6728.0,0.0,0.0,50.0,100.0,16.0,0.0,16730.0,70.0,5.0,1629.0,0.0,2826.0,245.0,3506.5,2688.0,0.0,16917.5,153.65,53.85,0.0,2.395,1289.5,101.9,9385.1,7.45,9261.35,7890.05,97.005,141.8,33391.9,53.9,2800.0,25560.95,7457.0,41140.245,368.3,0.0,21.595,0.0,153.65,48.795,938.51,0.0,0.0,0.0,0.0,141.5,182.9,101.9,0.0,299.95,176.049707,46.55,97.005,124.55,20.335,14.1,0.0,1024.0,100.45,141.8,0.8,96.91,154.6,1.0,0.0,109.255,7.605,0.0,1556.1,1651.0,39.82,1485.205,142.6,1739.345,120.395,368.3,0.0,21.595,0.0,62130.83,314507.1,85540.705,0.0,0.0,191610.806573,97545.89,3627.98,32444.97,12220.94,2821.5,541723.0,43008.857981,3.232668,1370.82,56486.705,50385.0,12931.46,12931.46,85540.705,0.0,0.0,13695.0,0.0,25.0,0.0,237.88,130.695,118.66,25.35,10435.0,0.0,3627.98,32444.97,0.0,0.0,2821.5,0.0,15156.205,0.0,12931.46,800.0,1109.91,18559.97,5394.375,769.46,25483.545,54445.0,38289.125,14987.23,5490.72,6229.7,13834.515,3207.29,964.575,769.46,5394.375,18559.97,0.0,5490.72,6229.7,54445.0,1.2,3207.29,0.0,0.0,12647.0,4792.0,3512.0,1341.5,3495.305,0.0,0.0,4385.23,16058.5,0.0,0.0,0.0,0.0,0.0,1349.75,0.0,1158.68
75%,2023-06-28 06:00:00,0.343724,2.261211,14.460227,3.812334,0.880357,0.0,4.453571,0.5584,3.49,1.762264,1.333608,1.396,1.495714,,19.280303,0.0,4.308571,0.240396,1.824771,1.895357,8.676136,0.621429,63.625,8.351621,6.266098,74.940577,0.938419,0.0,6.959357,0.093678,0.964015,1.987688,0.797714,3.502035,0.917068,1.255007,3.107143,0.310714,0.481967,0.621429,2.938873,2.09038,0.822825,1.657143,1.553571,0.593414,1.760714,3.948812,1.760714,,0.196786,0.517857,9.640152,10.722372,2.230597,0.0,1.242857,2.329922,0.681649,0.136213,3.625,32.84977,5.368043,2.61975,,4.576956,0.466071,0.0,1.029637,22.26875,85.0,0.090689,7.25,29.698578,37.500189,3.912567,0.549335,6.421429,1.749723,14.239591,0.463871,2.589286,0.0,1.553571,0.0,9.101131,2.796429,2.692857,0.498571,0.0,80.897717,44.001203,1.165661,7.861524,1.553571,7.728369,0.103571,0.0,28.182528,0.0,0.569643,,1.076914,0.704817,0.103571,0.0,1.030311,0.258929,0.103571,3.759659,1.532224,1.114978,36.081948,5.569244,14.957143,0.177706,2.173486,18.419416,1.396,2.065748,2.094,0.887435,,0.64239,206.0,5562.25,3139.0,11683.25,12383.55,4681.0,4537.0,0.0,14.0,7555.5,7066.0,10746.75,2444.25,1398.0,6826.5,11263.0125,206.0,0.0,0.0,0.0,0.0,5562.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3687.75,0.0,32.19,4681.0,0.0,426.0,0.0,14.0,0.0,0.0,4408.0,34505.25,0.0,0.0,0.0,3823.25,0.0,43.33,0.0,2444.25,0.0,0.0,0.0,0.0,0.0,29300.0,8266.75,6687.125,7385.5,79069.25,32742.0,1743.25,1192.0,76545.96,46320.875,2236.0,19053.75,7185.0,905.0,1000.25,5066.783,8362.5,6357.9375,7659.0,845.25,885.25,15157.0,1686.0,269.25,79069.25,7093.0,374.0,195.105,21518.5,9453.0,304.0,4397.5,0.0,6615.25,1050.0,1039.0,11541.6875,800.0,2108.5,5929.25,1279.25,2850.25,0.0,5786.5,338.0,117.0,3034.0,233.0,189.0,299.0,6103.25,1192.0,34505.25,133.0,1735.25,78.0,1345.0,458.0,839.25,1196.0,7283.0,0.0,1024.0,76.25,133.0,139.5,0.0,18686.0,132.0,1240.75,3023.75,16.0,3762.0,257.0,6517.75,3013.5,3491.25,19260.9,223.1,111.2,0.0,15.1975,1892.75,137.325,10600.0,46.325,11659.55,9149.175,128.705,265.45,38422.4875,136.725,3778.5,29933.625,9031.525,47956.425,436.725,1.28,38.425,0.0,223.1,74.3275,1060.0,0.0,0.0,0.0,9.02,258.075,228.65,137.325,0.1,411.475,225.725,71.2,128.705,169.125,22.16,18.0,0.0,1833.5,145.8,265.45,3.5,122.95,194.11,1160.0,34505.25,133.6075,28.6,9.325,1913.51,2368.75,75.4875,1803.82,158.5625,2159.4325,215.0875,436.725,0.0,38.425,31.9075,82660.435,384356.9,97290.395,143.975,421.37,244387.811771,111479.6,5070.3775,35290.3075,15840.84,4578.0,642621.5,56619.382761,4.002301,2476.15,64493.225,68085.0,16431.795,16431.795,97290.395,84785.0,0.0,26130.0,421.37,29.7,0.0,396.6175,998.14,158.9975,30.0,56395.0,0.0,5070.3775,35290.3075,0.0,0.0,4578.0,34505.25,17990.6175,0.0,16431.795,33180.0,3808.0,21040.8675,7513.09,2567.8875,29742.3725,61503.5,43652.885,18922.6375,6167.255,7005.895,18021.1125,3634.16,1356.5875,2567.8875,7513.09,21040.8675,34505.25,6167.255,7005.895,61503.5,2.55,3634.16,34505.25,19.0,15136.0,5337.25,4592.5,1691.0,4607.845,34505.25,0.0,5613.755,34505.25,628.9775,528.33,4870.5,0.9025,3780.2575,2931.5175,461.5475,1688.0475
max,2024-10-26 00:00:00,17.475728,156.647143,19.090909,102.193548,1.055214,131.7,5.338143,0.6672,4.17,129.732143,55.821429,1.668,1.787143,,25.454545,0.0,5.164343,89.714286,2.180314,2.271814,11.454545,0.744857,84.0,860.543522,8.272727,3506.412338,105.178571,0.0,226.34,87.753154,1.272727,589.066178,0.953143,878.785714,156.964286,429.453571,3.724286,0.372429,6.0,0.744857,438.362558,65.517368,57.37,1.986286,1.862143,35.666667,2.110429,611.646429,2.110429,,0.235871,0.620714,12.727273,232.038835,97.5,0.0,1.489714,146.731163,5.105263,19.357143,4.345,3712.865,95.648824,302.5,,580.917857,0.558643,0.0,4.666667,29.4,85.0,27.357143,8.69,82.155664,49.509091,962.321429,17.053721,7.696857,175.171429,1575.985,6.707424,3.103571,0.0,1.862143,0.0,1849.173571,3.351857,3.227714,0.595714,0.0,180.017895,2025.687786,93.785714,855.571429,1.862143,500.03,0.124143,0.0,1043.384593,0.0,0.682786,,1.286743,39.972857,0.124143,0.0,134.678571,0.310357,0.124143,4.963636,24.874603,56.651068,5165.0,1341.5,17.871429,125.515185,95.642857,1192.786316,1.668,87.277857,2.502,202.678571,,283.150905,727.0,6544.0,3139.0,15989.0,16318.8,6316.0,4537.0,0.0,24.0,9823.0,9193.0,15883.0,4136.0,3287.0,9823.0,15315.5,727.0,9148.0,4054.0,0.0,0.0,6544.0,0.0,1869.0,0.0,6318.0,0.0,4792.0,5387.0,7866.0,6311.0,0.0,94.53,6316.0,0.0,7389.0,4386.0,24.0,0.0,7310.0,9066.0,42008.0,0.0,5938.0,5675.0,6621.0,2432.0,100.0,0.0,4136.0,1411.0,4775.0,5138.0,0.0,4393.0,5572801.0,15621.0,289142.5,11186.0,2413859.0,5575053.0,198770.0,3866.0,2412967.0,65902.5,3125.0,4504000.0,16918.0,524487.0,125882.0,2368897.0,11575.0,8747.36,8500.0,197630.0,1118.0,19952.0,4134.0,425.0,2413859.0,10893.0,578.0,568.05,30512.0,13852.0,534.0,8176.0,0.0,25801.0,1202.0,1414.0,17366.0,1451.0,4118.0,5540000.0,2253.0,4129585.0,91.0,26937.0,338.0,117.0,9684.0,755.0,348.0,1525.0,12790.0,1192.0,42008.0,125244.0,4691.0,4995.0,5057.0,2491.0,524289.0,1196.0,8131.0,831.0,1024.0,214.0,335.0,196644.0,0.0,20187.0,65593.0,1749.0,4932.0,749.0,4495238.0,338.0,13507.0,8413.0,17134.0,26351.9,733.8,394.1,32768.0,559.62,3441.0,364.6,14001.4,183.0,21329.1,15978.0,1181.77,852.5,54361.0,285.5,7256.0,40803.2,24203.5,65355.9,928.7,745.5,207.8,0.0,733.8,203.1,1400.14,0.0,10506.5,32768.0,12649.6,520.71,374.03,364.6,1368.3,804.88,580.3,3056.63,1181.77,773.7,30.66,38.8,11.73,4290.0,595.86,852.5,1241.3,234.0,687.7,6576.0,42008.0,13264.1,4621.46,363.43,3330.3,4149.0,18625.57,37585.7,340.41,3647.6,16462.01,928.7,0.0,207.8,4747.1,132041.54,1019735.0,127997.0,552.5,9405.35,416220.86321,178496.31,9824.0,56967.44,23515.73,39230.0,1471832.0,226048.235357,21.619245,963270.35,98976.56,124090.0,27894.75,27894.75,127997.0,213340.0,87355.73,132390.0,9405.35,151.0,150.0,132107.63,51081.01,259029.0,158.0,101030.0,0.0,9824.0,56967.44,0.0,0.0,39230.0,42008.0,23713.06,0.0,27894.75,124090.0,154752.7,31508.88,11039.0,5049.97,179673.93,85740.0,59860.38,25706.66,8675.78,8094.19,36442.71,6012.5,154752.7,5049.97,11039.0,31508.88,42008.0,8675.78,8094.19,85740.0,395.66,6012.5,42008.0,4831.0,23787.0,6663.0,7742.0,12670.0,12421.81,42008.0,305.0,11548.22,42008.0,27378.38,9275.44,11548.22,4054.27,11066.5,12927.96,10870.69,296218.41
std,,0.593883,6.744098,6.911801,7.520377,0.402309,7.401922,2.034258,0.255215,1.595095,7.752609,1.91324,0.638038,0.683612,,9.215735,0.0,1.968027,2.847671,0.834007,0.865743,4.147081,0.28385,30.411926,30.92728,2.995114,138.891451,4.227681,0.0,19.120606,2.604678,0.460787,19.023499,0.364593,33.797188,4.960329,13.566319,1.41925,0.141925,0.216308,0.28385,18.048528,3.095848,3.076648,0.756933,0.709625,3.301631,0.804242,24.427674,0.804242,,0.089886,0.236542,4.607868,10.708174,4.006887,0.0,0.5677,6.07747,0.78426,0.898584,1.655792,178.949644,10.092119,14.845113,,18.306864,0.212888,0.0,0.363322,10.644174,39.010694,1.574183,3.311583,10.119307,17.924605,40.591956,0.679998,2.933117,7.721049,71.552196,0.4008,1.182708,0.0,0.709625,0.0,56.857988,1.277325,1.230017,0.227871,0.0,10.119307,78.455566,2.953216,33.139607,0.709625,30.242795,0.047308,0.0,44.317499,0.0,0.260196,,0.492201,1.601294,0.047308,0.0,4.123468,0.118271,0.047308,1.797068,1.161819,2.540393,232.756349,41.789095,6.83612,5.445524,3.171711,65.056054,0.638038,3.193377,0.957057,21.434228,,18.880012,171.810063,2174.523417,1507.008223,4183.647607,4660.785521,2038.708739,2172.488617,0.0,5.724175,2776.679252,3394.330364,3572.82259,1175.72957,700.404016,3351.753892,3484.767679,171.810063,1429.765672,458.321754,0.0,0.0,2174.523417,0.0,59.830337,0.0,658.969602,0.0,248.158345,1032.719352,1278.068409,1492.594188,0.0,19.746667,2038.708739,0.0,1658.061585,356.143216,5.724175,0.0,1025.468986,2628.860007,17779.405377,0.0,644.814238,348.785916,1552.03276,86.284986,23.209025,0.0,1175.72957,56.688482,173.018762,642.747361,0.0,324.378462,159584.9,3157.192828,160617.8,1295.443857,69603.59,224482.0,14665.337674,822.738715,69565.5,10009.254345,700.815743,129115.6,1829.599138,15389.775026,3599.348261,146036.1,1958.037789,2398.03489,2849.716502,14219.117807,199.852472,5033.623608,694.708219,78.485928,69603.59,1262.918265,41.571164,98.664889,5912.449614,3911.886727,268.017326,2109.231405,0.0,1782.440707,385.718646,283.025865,2106.977425,202.315957,901.868493,159095.6,478.699871,156122.4,10.320076,2783.689438,155.124879,53.697074,1985.921236,128.736352,59.87733,112.856684,2861.908791,547.067621,17779.405377,3596.552773,482.887464,268.240343,359.854214,161.375167,15265.797222,548.903419,2825.249353,179.076217,469.964131,34.056003,49.673411,14218.706274,0.0,5294.733421,1927.711907,653.32664,1530.712469,175.899767,129065.6,42.854631,2658.145126,558.779535,2136.999281,4948.672349,108.977228,66.805225,2119.351171,22.78747,939.783656,72.082771,3687.760135,33.393425,4158.136217,2194.234804,93.216749,150.423917,8407.355122,70.881827,1432.315273,7402.508994,2791.022956,11434.576276,171.174287,27.657955,26.659185,0.0,108.977228,39.998436,368.776013,0.0,2650.41939,2119.351171,887.849345,135.042224,62.335288,72.082771,53.364138,150.496196,95.394224,182.448997,93.216749,87.100489,4.841244,5.210003,0.377247,976.311074,98.875439,150.423917,76.663548,57.634316,64.713951,865.336593,17779.405377,386.278058,177.048783,85.565167,641.143292,1032.126741,539.96461,1484.386694,62.572658,616.080371,599.820332,171.174287,0.0,26.659185,138.446708,30550.626587,130835.0,32074.482067,35021.769838,2084.341019,81753.169278,39674.704647,2690.546197,14008.725486,7457.689329,2742.811326,152254.0,24660.40064,1.302078,35216.575224,25158.47314,30390.850293,4547.516416,4547.516416,32074.482067,292261.9,7777.910114,21233.618529,2084.341019,12.143224,17.232073,6045.73096,1796.94144,68687.995276,13.071442,29482.639851,0.0,2690.546197,14008.725486,0.0,0.0,2742.811326,17779.405377,4295.249257,0.0,4547.516416,30327.936031,10185.035609,3944.124992,2972.540129,1368.637117,11843.920107,11086.673678,8398.441418,6050.742543,1519.761432,2566.333544,12025.771335,1349.616989,10946.561485,1368.637117,2972.540129,3944.124992,17779.405377,1519.761432,2566.333544,11086.673678,36.089833,1349.616989,17779.44967,794.956143,23936.891255,2090.881763,1576.600386,1419.417442,1816.156489,17779.405377,8.760896,2838.281533,15395.123305,3599.171219,783.549679,2629.050309,314.082434,2826.658002,2296.003593,1018.553256,8515.193813


In [131]:
df_unificado.select_dtypes(include=["object"]).columns.tolist()

['HORA__Consolidado KPI',
 'HORA__Consolidado Produccion',
 'Nivel Silo Bagazo Norte (1)__Totalizadores Produccion',
 'HORA__Totalizadores Produccion',
 'HORA__Consolidado EE',
 'KW Trafo 8__Totalizadores Energia',
 'HORA__Totalizadores Energia',
 'HORA__Consolidado Agua',
 'HORA__Totalizadores Agua',
 'HORA__Consolidado GasVapor',
 'HORA__Totalizadores Gas y Vapor',
 'HORA__Consolidado Aire',
 'HORA__Totalizadores Aire',
 'HORA__Totalizadores CO2',
 'Totalizador Bba P51__Totalizadores Efluentes',
 'Totalizador Bba P2__Totalizadores Efluentes',
 'Totalizador Bba P4__Totalizadores Efluentes',
 'Totalizador Bba Envasado__Totalizadores Efluentes',
 'HORA__Totalizadores Efluentes',
 'Totalizador Bba P1__Totalizadores Efluentes',
 'HORA__Totalizadores Glicol']

In [132]:
cols_float = [
    "Nivel Silo Bagazo Norte (1)__Totalizadores Produccion",
    "Totalizador Bba P51__Totalizadores Efluentes",
    "Totalizador Bba P2__Totalizadores Efluentes",
    "Totalizador Bba P4__Totalizadores Efluentes",
    "Totalizador Bba Envasado__Totalizadores Efluentes",
    "Totalizador Bba P1__Totalizadores Efluentes",
    "KW Trafo 8__Totalizadores Energia",
]

df_unificado[cols_float] = (df_unificado[cols_float].astype(str).apply(lambda s: s.str.replace(r"\.", "", regex=True).str.replace(",", ".", regex=False))
      .apply(pd.to_numeric, errors="coerce").astype("float64")
)

In [None]:
cols_hora = [
    'HORA__Consolidado KPI',
    'HORA__Consolidado Produccion',
    'HORA__Totalizadores Produccion',
    'HORA__Consolidado EE',
    'HORA__Totalizadores Energia',
    'HORA__Consolidado Agua',
    'HORA__Totalizadores Agua',
    'HORA__Consolidado GasVapor',
    'HORA__Totalizadores Gas y Vapor',
    'HORA__Consolidado Aire',
    'HORA__Totalizadores Aire',
    'HORA__Totalizadores CO2',
    'HORA__Totalizadores Efluentes',
    'HORA__Totalizadores Glicol',
]

# 1) Igualdad exacta columna a columna (por fila), tratando NaN como iguales
base = df_unificado[cols_hora[0]].fillna("__NA__")
iguales_mask = df_unificado[cols_hora].fillna("__NA__").eq(base, axis=0)

# 2) ¿Todas las columnas son iguales en todas las filas?
todas_iguales = bool(iguales_mask.all().all())
print("¿Todas las HORA__ son iguales en todas las filas?:", todas_iguales)

# 3) Filas donde NO coinciden todas
filas_ok = iguales_mask.all(axis=1)
diff_rows = df_unificado.loc[~filas_ok, cols_hora]
print("Filas con diferencias:", len(diff_rows))

print(diff_rows.head(10))

¿Todas las HORA__ son iguales en todas las filas?: False
Filas con diferencias: 2
    HORA__Consolidado KPI HORA__Consolidado Produccion HORA__Totalizadores Produccion HORA__Consolidado EE HORA__Totalizadores Energia HORA__Consolidado Agua HORA__Totalizadores Agua HORA__Consolidado GasVapor HORA__Totalizadores Gas y Vapor HORA__Consolidado Aire HORA__Totalizadores Aire HORA__Totalizadores CO2 HORA__Totalizadores Efluentes HORA__Totalizadores Glicol
788              23:00:00                     23:00:00                       23:00:00             23:00:00                    23:00:00               23:00:00                 23:00:00                   23:00:00                        23:00:00               23:00:00                 23:00:00                23:59:00                      23:00:00                   23:00:00
828              23:59:00                     23:59:00                       23:59:00             23:59:00                    23:59:00               23:59:00                 23

In [136]:
# crea la columna HORA
df_unificado["HORA"] = pd.to_datetime(df_unificado["HORA__Consolidado KPI"], errors="coerce")

# elimina todas las columnas de hora originales
df_unificado = df_unificado.drop(columns=[c for c in cols_hora if c in df_unificado.columns])

  df_unificado["HORA"] = pd.to_datetime(df_unificado["HORA__Consolidado KPI"], errors="coerce")


Identificación y cuantificación de valores faltantes, detección de valores atípicos y errores

In [139]:
df_unificado.isnull().sum().sort_values(ascending=False).head(25)

Unnamed: 35__Consolidado KPI                             1212
Unnamed: 46__Consolidado KPI                             1212
Unnamed: 21__Consolidado KPI                             1212
EE Linea 5 / Hl__Consolidado KPI                         1212
Unnamed: 22__Consolidado KPI                             1212
Unnamed: 84__Consolidado KPI                              847
Unnamed: 67__Consolidado KPI                              847
Unnamed: 66__Consolidado KPI                              847
Unnamed: 113__Consolidado KPI                             847
Unnamed: 65__Consolidado KPI                              847
Unnamed: 103__Consolidado KPI                             847
Unnamed: 72__Consolidado KPI                              847
Unnamed: 14__Consolidado Produccion                       482
Unnamed: 123__Consolidado KPI                             482
Unnamed: 124__Consolidado KPI                             482
 __Consolidado KPI                                        365
Tot A10/

In [163]:
# Cuenta ceros
zero_counts = (df_unificado.eq(0) | df_unificado.eq("0")).sum(axis=0)
zero_ratio  = ((zero_counts / len(df_unificado)) * 100).sort_values(ascending=False)

umbral = 75.0
muchos_ceros = zero_ratio[zero_ratio >= umbral]

print("\nColumnas con ≥", umbral, "% ceros:", len(muchos_ceros), 'de', len(df_unificado.columns), "columnas totales")
print()
print(muchos_ceros.apply(lambda x: f"{x:.2f}%"))


Columnas con ≥ 75.0 % ceros: 46 de 412 columnas totales

VAPOR DE CALDERA 1 KG__Totalizadores Gas y Vapor           100.00%
Tot_Vapor_Caldera 3__Totalizadores Gas y Vapor             100.00%
Fuel Oil Tk2 (Kg)__Totalizadores Gas y Vapor               100.00%
Fuel Oil Tk1 (Kg)__Totalizadores Gas y Vapor               100.00%
HL Mosto Indio__Totalizadores Produccion                   100.00%
HL Mosto Bieckert__Totalizadores Produccion                100.00%
Hl Reserva 8__Totalizadores Produccion                     100.00%
Hl Session IPA__Totalizadores Produccion                   100.00%
HL Mosto Palermo__Totalizadores Produccion                 100.00%
Agua Filt FMaCist CE__Totalizadores Agua                   100.00%
Rep Agua Cist CE__Totalizadores Agua                       100.00%
HL Mosto Fuerte__Totalizadores Produccion                  100.00%
Red Barriles__Totalizadores Agua                           100.00%
KW Linea 4__Totalizadores Energia                          100.00%
Hl R