Lo primero que hacemos es cargar todos las hojas a utilizar para posteriormente concatenarlos

In [1]:
import pandas as pd
from pathlib import Path
from functools import reduce
import numpy as np

ruta_archivo = Path(r'C:\Users\agusm\Trabajo-Final-Lab-Datos-\Archivos_xlsx\Planta_2020_2021.xlsx')
fecha_limite_str = '2021-07-01'  # usa ISO para evitar ambigüedad

def preparar_hoja(df: pd.DataFrame, nombre_hoja: str) -> pd.DataFrame:
    df = df.copy()
    
    # Construir FECHA_HORA
    if 'FECHA_HORA' in df.columns:
        ts = pd.to_datetime(df['FECHA_HORA'], errors='coerce')
    elif {'DIA', 'HORA'}.issubset(df.columns):

        df['HORA'] = df['HORA'].astype(str).str.extract(r'(\d{1,2}:\d{2}:\d{2})')[0]
        ts = pd.to_datetime(df['DIA'].astype(str) + ' ' + df['HORA'].astype(str), errors='coerce')
    elif 'DIA' in df.columns:
        ts = pd.to_datetime(df['DIA'], errors='coerce')
    else:
        return None

    df['FECHA_HORA'] = ts
    #quiero saber si hay nans
    print(f"Hoja {nombre_hoja}: Nulos en FECHA_HORA antes de dropna: {df['FECHA_HORA'].isna().sum()}")
    dias_nulos = df['DIA'][df['FECHA_HORA'].isna()]
    horas_nulas = df['HORA'][df['FECHA_HORA'].isna()]
    if not dias_nulos.empty or not horas_nulas.empty:
        print(f"Días nulos:\n{dias_nulos}")
        print(f"Horas nulas:\n{horas_nulas}") 
    df = df.dropna(subset=['FECHA_HORA']).reset_index(drop=True)
    # 1) Normalizar a una fila por timestamp dentro de la hoja
    #    - numéricas: 'mean' (si tus columnas son intensidades; usa 'sum' si son totales)
    #    - no numéricas: 'first'
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    other_cols = [c for c in df.columns if c not in numeric_cols + ['DIA', 'HORA', 'FECHA_HORA']]
    print(f"Duplicadas por FECHA_HORA en hoja {nombre_hoja} antes de procesmiento: {df.duplicated(subset=['FECHA_HORA']).sum()}")

    agg_map = {**{c: 'mean' for c in numeric_cols}, **{c: 'first' for c in other_cols}}
    df = (
        df.drop(columns=['DIA', 'HORA'], errors='ignore')
          .groupby('FECHA_HORA', as_index=False)
          .agg(agg_map)
    )
    print(f"Duplicadas por FECHA_HORA en hoja {nombre_hoja} despues de procesmiento: {df.duplicated(subset=['FECHA_HORA']).sum()}")
    #quiero mostrar las filas duplicadas
    filas_duplicadas = df[df.duplicated(subset=['FECHA_HORA'], keep=False)]
    if not filas_duplicadas.empty:
        print(f"Filas duplicadas por FECHA_HORA en hoja {nombre_hoja}:\n{filas_duplicadas}")
        print(filas_duplicadas)

    # 2) Renombrar para evitar colisiones entre hojas
    keep = [c for c in df.columns if c not in ['FECHA_HORA']]
    df = df[['FECHA_HORA'] + keep].rename(columns={c: f'{nombre_hoja}__{c}' for c in keep})
    return df

# Carga y preparación por hoja
dict_de_hojas = pd.read_excel(ruta_archivo, sheet_name=None)
hojas_preparadas = [h for nombre, df in dict_de_hojas.items()
                    if (h := preparar_hoja(df, nombre)) is not None and len(h) > 0]

# Merge por FECHA_HORA (ya sin duplicados por hoja)
df_combinado = reduce(lambda l, r: pd.merge(l, r, on='FECHA_HORA', how='outer'), hojas_preparadas)
df_combinado = df_combinado.sort_values('FECHA_HORA').reset_index(drop=True)

# Diagnóstico duplicados tras el merge (debería bajar muchísimo)
print("Duplicadas exactas:", df_combinado.duplicated().sum())
print("Duplicadas por FECHA_HORA:", df_combinado.duplicated(subset=['FECHA_HORA']).sum())



# Filtro de fecha (elige 'desde' o 'hasta' explícitamente)
fecha_limite = pd.to_datetime(fecha_limite_str)

# Caso A: quedarme con datos HASTA esa fecha (incluida)
df_filtrado = df_combinado[df_combinado['FECHA_HORA'] < fecha_limite].copy()

# Caso B: quedarme con datos DESDE esa fecha (excluida)
# df_filtrado = df_combinado[df_combinado['FECHA_HORA'] > fecha_limite].copy()

print(f"Rango: {df_filtrado['FECHA_HORA'].min()} -> {df_filtrado['FECHA_HORA'].max()}")
print(f"Filas: {len(df_filtrado)}, Columnas: {df_filtrado.shape[1]}")


Hoja Consolidado KPI: Nulos en FECHA_HORA antes de dropna: 462
Días nulos:
15573   NaT
15574   NaT
15575   NaT
15576   NaT
15577   NaT
         ..
16030   NaT
16031   NaT
16032   NaT
16033   NaT
16034   NaT
Name: DIA, Length: 462, dtype: datetime64[ns]
Horas nulas:
15573    NaN
15574    NaN
15575    NaN
15576    NaN
15577    NaN
        ... 
16030    NaN
16031    NaN
16032    NaN
16033    NaN
16034    NaN
Name: HORA, Length: 462, dtype: object
Duplicadas por FECHA_HORA en hoja Consolidado KPI antes de procesmiento: 298
Duplicadas por FECHA_HORA en hoja Consolidado KPI despues de procesmiento: 0
Hoja Consolidado Produccion: Nulos en FECHA_HORA antes de dropna: 0
Duplicadas por FECHA_HORA en hoja Consolidado Produccion antes de procesmiento: 298
Duplicadas por FECHA_HORA en hoja Consolidado Produccion despues de procesmiento: 0
Hoja Consolidado EE: Nulos en FECHA_HORA antes de dropna: 0
Duplicadas por FECHA_HORA en hoja Consolidado EE antes de procesmiento: 297
Duplicadas por FECHA_HORA 

In [2]:
dias_raw   = pd.DatetimeIndex(df_filtrado['FECHA_HORA'].dt.normalize().unique()).sort_values()
inicio = dias_raw.min()
fin    = dias_raw.max()
full   = pd.date_range(inicio, fin, freq='D')

# Faltan en el raw (no hay ningún timestamp ese día)
faltan_en_raw = full.difference(dias_raw)

print(f"Días en raw: {len(dias_raw)}")
print(f"Días desde {inicio.date()} hasta {fin.date()}: {len(full)}")
print(f"Días esperados: {len(full)}")
print(f"Faltan en raw (todas las hojas): {len(faltan_en_raw)}")
#cuales son esos dias
print(faltan_en_raw)

Días en raw: 358
Días desde 2020-07-01 hasta 2021-06-30: 365
Días esperados: 365
Faltan en raw (todas las hojas): 7
DatetimeIndex(['2020-08-29', '2020-08-30', '2020-08-31', '2020-10-31',
               '2020-12-31', '2021-03-31', '2021-05-31'],
              dtype='datetime64[ns]', freq=None)


In [3]:
df1=df_filtrado.copy()

In [4]:
df1

Unnamed: 0,FECHA_HORA,Consolidado KPI__EE Planta / Hl,Consolidado KPI__EE Elaboracion / Hl,Consolidado KPI__EE Bodega / Hl,Consolidado KPI__EE Cocina / Hl,Consolidado KPI__EE Envasado / Hl,Consolidado KPI__EE Linea 2 / Hl,Consolidado KPI__EE Linea 3 / Hl,Consolidado KPI__EE Linea 4 / Hl,Consolidado KPI__EE Servicios / Hl,...,Consolidado Aire__Aire Elaboracion (m3),Consolidado Aire__Aire Envasado (M3),Consolidado Aire__Aire Servicios (M3),Consolidado Aire__Tot Aire Expulsion,Consolidado Aire__Totalizador_Aire_Bodega,Consolidado Aire__Totalizador_Aire_Cocina,Consolidado Aire__Totalizador_Aire_L2,Consolidado Aire__Totalizador_Aire_L3,Consolidado Aire__Totaliador_Aire_L4,Consolidado Aire__Totalizador_Aire_L5
0,2020-07-01 00:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000,0.000,0.000,0.000,0.000,0.00,0.000,0.00,0.000,0.000
1,2020-07-01 01:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000,0.000,1328.000,0.000,0.000,0.00,0.000,0.00,0.000,0.000
2,2020-07-01 02:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1077.820,49.800,1528.380,0.000,834.530,243.29,44.450,0.12,0.220,5.010
3,2020-07-01 03:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,2158.360,100.170,1725.470,0.000,1670.980,487.38,89.200,0.37,0.260,10.340
4,2020-07-01 04:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,3241.720,153.700,1916.580,0.000,2509.870,731.85,135.950,0.49,1.850,15.410
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8946,2021-06-30 21:00:00,9.086660,0.769001,0.760380,0.293222,1.954775,2.353923,1.797698,1.527574,5.849461,...,30427.245,18227.665,2254.090,11866.760,14019.565,4540.92,6147.755,5459.78,3772.660,2847.470
8947,2021-06-30 22:00:00,9.268396,0.777740,0.775656,0.297887,1.967890,2.371261,1.781836,1.520340,6.000492,...,31138.805,18809.365,2789.830,11886.465,14547.810,4704.53,6370.485,5613.81,3883.315,2941.755
8948,2021-06-30 23:00:00,9.318111,0.778922,0.779851,0.294093,1.954111,2.393310,1.788795,1.515841,6.062856,...,31901.425,19370.485,3305.090,11906.050,15111.015,4884.36,6594.890,5764.56,3991.720,3019.315
8949,2021-06-30 23:59:00,9.475108,0.784620,0.790020,0.298222,1.960522,2.427161,1.780074,1.514144,6.200286,...,32657.340,19939.355,3797.305,11925.535,15664.915,5066.89,6816.975,5918.17,4094.880,3109.330


In [5]:
import pandas as pd
from pathlib import Path
from functools import reduce
import numpy as np

ruta_archivo = Path(r'C:\Users\agusm\Trabajo-Final-Lab-Datos-\Archivos_xlsx\Planta_2021_2022.xlsx')
fecha_limite_str = '2023-03-06'  # usa ISO para evitar ambigüedad

def preparar_hoja(df: pd.DataFrame, nombre_hoja: str) -> pd.DataFrame:
    df = df.copy()

    # Construir FECHA_HORA
    if 'FECHA_HORA' in df.columns:
        ts = pd.to_datetime(df['FECHA_HORA'], errors='coerce')
    elif {'DIA', 'HORA'}.issubset(df.columns):
        ts = pd.to_datetime(df['DIA'].astype(str) + ' ' + df['HORA'].astype(str), errors='coerce')
    elif 'DIA' in df.columns:
        ts = pd.to_datetime(df['DIA'], errors='coerce')
    else:
        return None

    df['FECHA_HORA'] = ts
    #quiero saber si hay nans
    print(f"Hoja {nombre_hoja}: Nulos en FECHA_HORA antes de dropna: {df['FECHA_HORA'].isna().sum()}")
    dias_nulos = df['DIA'][df['FECHA_HORA'].isna()]
    horas_nulas = df['HORA'][df['FECHA_HORA'].isna()]
    if not dias_nulos.empty or not horas_nulas.empty:
        print(f"Días nulos:\n{dias_nulos}")
        print(f"Horas nulas:\n{horas_nulas}") 
    df = df.dropna(subset=['FECHA_HORA']).reset_index(drop=True)
    # 1) Normalizar a una fila por timestamp dentro de la hoja
    #    - numéricas: 'mean' (si tus columnas son intensidades; usa 'sum' si son totales)
    #    - no numéricas: 'first'
    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
    other_cols = [c for c in df.columns if c not in numeric_cols + ['DIA', 'HORA', 'FECHA_HORA']]
    print(f"Duplicadas por FECHA_HORA en hoja {nombre_hoja} antes de procesmiento: {df.duplicated(subset=['FECHA_HORA']).sum()}")

    agg_map = {**{c: 'mean' for c in numeric_cols}, **{c: 'first' for c in other_cols}}
    df = (
        df.drop(columns=['DIA', 'HORA'], errors='ignore')
          .groupby('FECHA_HORA', as_index=False)
          .agg(agg_map)
    )
    print(f"Duplicadas por FECHA_HORA en hoja {nombre_hoja} despues de procesmiento: {df.duplicated(subset=['FECHA_HORA']).sum()}")
    #quiero mostrar las filas duplicadas
    filas_duplicadas = df[df.duplicated(subset=['FECHA_HORA'], keep=False)]
    if not filas_duplicadas.empty:
        print(f"Filas duplicadas por FECHA_HORA en hoja {nombre_hoja}:\n{filas_duplicadas}")
        print(filas_duplicadas)

    # 2) Renombrar para evitar colisiones entre hojas
    keep = [c for c in df.columns if c not in ['FECHA_HORA']]
    df = df[['FECHA_HORA'] + keep].rename(columns={c: f'{nombre_hoja}__{c}' for c in keep})
    return df


# Carga y preparación por hoja
dict_de_hojas = pd.read_excel(ruta_archivo, sheet_name=None)
hojas_preparadas = [h for nombre, df in dict_de_hojas.items()
                    if (h := preparar_hoja(df, nombre)) is not None and len(h) > 0]

# Merge por FECHA_HORA (ya sin duplicados por hoja)
df_combinado = reduce(lambda l, r: pd.merge(l, r, on='FECHA_HORA', how='outer'), hojas_preparadas)
df_combinado = df_combinado.sort_values('FECHA_HORA').reset_index(drop=True)

# Diagnóstico duplicados tras el merge (debería bajar muchísimo)
print("Duplicadas exactas:", df_combinado.duplicated().sum())
print("Duplicadas por FECHA_HORA:", df_combinado.duplicated(subset=['FECHA_HORA']).sum())

# Opcional: colapsar timestamps repetidos restantes (si aún hay alguno)
df_combinado = (
    df_combinado
    .groupby('FECHA_HORA', as_index=False)   # última salvaguarda
    .agg('mean')
    .sort_values('FECHA_HORA')
    .reset_index(drop=True)
)

# Filtro de fecha (elige 'desde' o 'hasta' explícitamente)
fecha_limite = pd.to_datetime(fecha_limite_str)

# Caso A: quedarme con datos HASTA esa fecha (incluida)
df_filtrado = df_combinado[df_combinado['FECHA_HORA'] < fecha_limite].copy()

# Caso B: quedarme con datos DESDE esa fecha (excluida)
# df_filtrado = df_combinado[df_combinado['FECHA_HORA'] > fecha_limite].copy()

print(f"Rango: {df_filtrado['FECHA_HORA'].min()} -> {df_filtrado['FECHA_HORA'].max()}")
print(f"Filas: {len(df_filtrado)}, Columnas: {df_filtrado.shape[1]}")


Hoja Consolidado KPI: Nulos en FECHA_HORA antes de dropna: 0
Duplicadas por FECHA_HORA en hoja Consolidado KPI antes de procesmiento: 299
Duplicadas por FECHA_HORA en hoja Consolidado KPI despues de procesmiento: 0
Hoja Consolidado Produccion: Nulos en FECHA_HORA antes de dropna: 1
Días nulos:
15316   NaT
Name: DIA, dtype: datetime64[ns]
Horas nulas:
15316    NaN
Name: HORA, dtype: object
Duplicadas por FECHA_HORA en hoja Consolidado Produccion antes de procesmiento: 299
Duplicadas por FECHA_HORA en hoja Consolidado Produccion despues de procesmiento: 0
Hoja Consolidado EE: Nulos en FECHA_HORA antes de dropna: 1
Días nulos:
15316   NaT
Name: DIA, dtype: datetime64[ns]
Horas nulas:
15316    NaN
Name: HORA, dtype: object
Duplicadas por FECHA_HORA en hoja Consolidado EE antes de procesmiento: 298
Duplicadas por FECHA_HORA en hoja Consolidado EE despues de procesmiento: 0
Hoja Consolidado Agua: Nulos en FECHA_HORA antes de dropna: 159
Días nulos:
496     2021-07-21
497     2021-07-21
498  

In [6]:
dias_raw   = pd.DatetimeIndex(df_filtrado['FECHA_HORA'].dt.normalize().unique()).sort_values()
inicio = dias_raw.min()
fin    = dias_raw.max()
full   = pd.date_range(inicio, fin, freq='D')

# Faltan en el raw (no hay ningún timestamp ese día)
faltan_en_raw = full.difference(dias_raw)

print(f"Días en raw: {len(dias_raw)}")
print(f"Días desde {inicio.date()} hasta {fin.date()}: {len(full)}")
print(f"Días esperados: {len(full)}")
print(f"Faltan en raw (todas las hojas): {len(faltan_en_raw)}")
#cuales son esos dias
print(faltan_en_raw)

Días en raw: 601
Días desde 2021-07-01 hasta 2023-03-05: 613
Días esperados: 613
Faltan en raw (todas las hojas): 12
DatetimeIndex(['2021-08-29', '2021-08-30', '2021-08-31', '2021-10-31',
               '2021-12-31', '2022-03-31', '2022-05-31', '2022-08-29',
               '2022-08-30', '2022-08-31', '2022-10-31', '2022-12-31'],
              dtype='datetime64[ns]', freq=None)


In [7]:
df2=df_filtrado.copy()
df2

Unnamed: 0,FECHA_HORA,Consolidado KPI__EE Planta / Hl,Consolidado KPI__EE Elaboracion / Hl,Consolidado KPI__EE Bodega / Hl,Consolidado KPI__EE Cocina / Hl,Consolidado KPI__EE Envasado / Hl,Consolidado KPI__EE Linea 2 / Hl,Consolidado KPI__EE Linea 3 / Hl,Consolidado KPI__EE Linea 4 / Hl,Consolidado KPI__EE Servicios / Hl,...,Consolidado Aire__Aire Elaboracion (m3),Consolidado Aire__Aire Envasado (M3),Consolidado Aire__Aire Servicios (M3),Consolidado Aire__Tot Aire Expulsion,Consolidado Aire__Totalizador_Aire_Bodega,Consolidado Aire__Totalizador_Aire_Cocina,Consolidado Aire__Totalizador_Aire_L2,Consolidado Aire__Totalizador_Aire_L3,Consolidado Aire__Totaliador_Aire_L4,Consolidado Aire__Totalizador_Aire_L5
0,2021-07-01 02:00:00,0.207674,0.006979,0.015400,0.002115,0.001821,0.013610,0.013274,0.000000,0.189959,...,22361.05,16687.95,15468.00,0.00,17523.48,4837.57,6440.43,4903.54,3200.90,2143.08
1,2021-07-01 03:00:00,0.419615,0.014110,0.030876,0.003929,0.003945,0.027048,0.026549,0.000000,0.384015,...,23053.16,16897.16,15698.68,0.00,18048.57,5004.59,6529.94,4950.46,3273.15,2143.61
2,2021-07-01 04:00:00,0.625781,0.021393,0.046428,0.006044,0.005766,0.040458,0.039191,0.000000,0.573443,...,23759.91,17110.51,16011.58,0.00,18584.67,5175.24,6620.61,4997.80,3345.37,2146.73
3,2021-07-01 05:00:01,0.822242,0.027690,0.060917,0.007857,0.007586,0.053429,0.051833,0.000000,0.754146,...,24463.00,17325.05,16326.95,0.00,19118.36,5344.64,6712.13,5045.33,3417.79,2149.80
4,2021-07-01 06:00:01,1.012486,0.034593,0.075711,0.009973,0.009407,0.066299,0.063843,0.000000,0.929236,...,25172.39,17541.75,16641.86,0.00,19657.22,5515.17,6805.28,5093.20,3490.21,2153.06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15004,2023-03-05 20:00:00,6.556630,0.713452,0.566726,0.305691,1.825698,2.426241,1.725187,1.615697,3.459979,...,17965.24,13438.12,16355.64,807.95,13658.30,3498.99,2494.08,5646.22,3037.55,2260.27
15005,2023-03-05 21:00:00,6.621998,0.720256,0.578699,0.308625,1.856028,2.408729,1.724542,1.594413,3.479915,...,18631.49,14123.25,17072.26,834.89,14179.85,3616.75,2681.53,5909.38,3202.69,2329.65
15006,2023-03-05 22:00:00,6.687534,0.724845,0.587880,0.311559,1.888087,2.380853,1.735978,1.579945,3.500317,...,19302.02,14818.72,17773.26,861.57,14701.79,3738.66,2878.77,6173.29,3367.22,2399.44
15007,2023-03-05 23:00:00,6.789452,0.730969,0.597780,0.314492,1.925381,2.376754,1.760276,1.574925,3.548271,...,19955.22,15496.92,18335.86,867.67,15226.81,3860.74,3061.42,6439.03,3526.59,2469.88


In [8]:
df1

Unnamed: 0,FECHA_HORA,Consolidado KPI__EE Planta / Hl,Consolidado KPI__EE Elaboracion / Hl,Consolidado KPI__EE Bodega / Hl,Consolidado KPI__EE Cocina / Hl,Consolidado KPI__EE Envasado / Hl,Consolidado KPI__EE Linea 2 / Hl,Consolidado KPI__EE Linea 3 / Hl,Consolidado KPI__EE Linea 4 / Hl,Consolidado KPI__EE Servicios / Hl,...,Consolidado Aire__Aire Elaboracion (m3),Consolidado Aire__Aire Envasado (M3),Consolidado Aire__Aire Servicios (M3),Consolidado Aire__Tot Aire Expulsion,Consolidado Aire__Totalizador_Aire_Bodega,Consolidado Aire__Totalizador_Aire_Cocina,Consolidado Aire__Totalizador_Aire_L2,Consolidado Aire__Totalizador_Aire_L3,Consolidado Aire__Totaliador_Aire_L4,Consolidado Aire__Totalizador_Aire_L5
0,2020-07-01 00:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000,0.000,0.000,0.000,0.000,0.00,0.000,0.00,0.000,0.000
1,2020-07-01 01:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000,0.000,1328.000,0.000,0.000,0.00,0.000,0.00,0.000,0.000
2,2020-07-01 02:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,1077.820,49.800,1528.380,0.000,834.530,243.29,44.450,0.12,0.220,5.010
3,2020-07-01 03:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,2158.360,100.170,1725.470,0.000,1670.980,487.38,89.200,0.37,0.260,10.340
4,2020-07-01 04:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,3241.720,153.700,1916.580,0.000,2509.870,731.85,135.950,0.49,1.850,15.410
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8946,2021-06-30 21:00:00,9.086660,0.769001,0.760380,0.293222,1.954775,2.353923,1.797698,1.527574,5.849461,...,30427.245,18227.665,2254.090,11866.760,14019.565,4540.92,6147.755,5459.78,3772.660,2847.470
8947,2021-06-30 22:00:00,9.268396,0.777740,0.775656,0.297887,1.967890,2.371261,1.781836,1.520340,6.000492,...,31138.805,18809.365,2789.830,11886.465,14547.810,4704.53,6370.485,5613.81,3883.315,2941.755
8948,2021-06-30 23:00:00,9.318111,0.778922,0.779851,0.294093,1.954111,2.393310,1.788795,1.515841,6.062856,...,31901.425,19370.485,3305.090,11906.050,15111.015,4884.36,6594.890,5764.56,3991.720,3019.315
8949,2021-06-30 23:59:00,9.475108,0.784620,0.790020,0.298222,1.960522,2.427161,1.780074,1.514144,6.200286,...,32657.340,19939.355,3797.305,11925.535,15664.915,5066.89,6816.975,5918.17,4094.880,3109.330


In [9]:
vector_df=[df1,df2]
df_concatenado = pd.concat(vector_df, ignore_index=True)

In [10]:
df_concatenado

Unnamed: 0,FECHA_HORA,Consolidado KPI__EE Planta / Hl,Consolidado KPI__EE Elaboracion / Hl,Consolidado KPI__EE Bodega / Hl,Consolidado KPI__EE Cocina / Hl,Consolidado KPI__EE Envasado / Hl,Consolidado KPI__EE Linea 2 / Hl,Consolidado KPI__EE Linea 3 / Hl,Consolidado KPI__EE Linea 4 / Hl,Consolidado KPI__EE Servicios / Hl,...,Consolidado Aire__Aire Envasado (M3),Consolidado Aire__Aire Servicios (M3),Consolidado Aire__Tot Aire Expulsion,Consolidado Aire__Totalizador_Aire_Bodega,Consolidado Aire__Totalizador_Aire_Cocina,Consolidado Aire__Totalizador_Aire_L2,Consolidado Aire__Totalizador_Aire_L3,Consolidado Aire__Totaliador_Aire_L4,Consolidado Aire__Totalizador_Aire_L5,Consolidado GasVapor__Vapor_L5 (KG)
0,2020-07-01 00:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,
1,2020-07-01 01:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.00,1328.00,0.00,0.00,0.00,0.00,0.00,0.00,0.00,
2,2020-07-01 02:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,49.80,1528.38,0.00,834.53,243.29,44.45,0.12,0.22,5.01,
3,2020-07-01 03:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,100.17,1725.47,0.00,1670.98,487.38,89.20,0.37,0.26,10.34,
4,2020-07-01 04:00:00,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,153.70,1916.58,0.00,2509.87,731.85,135.95,0.49,1.85,15.41,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23955,2023-03-05 20:00:00,6.556630,0.713452,0.566726,0.305691,1.825698,2.426241,1.725187,1.615697,3.459979,...,13438.12,16355.64,807.95,13658.30,3498.99,2494.08,5646.22,3037.55,2260.27,3819.67
23956,2023-03-05 21:00:00,6.621998,0.720256,0.578699,0.308625,1.856028,2.408729,1.724542,1.594413,3.479915,...,14123.25,17072.26,834.89,14179.85,3616.75,2681.53,5909.38,3202.69,2329.65,3987.42
23957,2023-03-05 22:00:00,6.687534,0.724845,0.587880,0.311559,1.888087,2.380853,1.735978,1.579945,3.500317,...,14818.72,17773.26,861.57,14701.79,3738.66,2878.77,6173.29,3367.22,2399.44,4153.38
23958,2023-03-05 23:00:00,6.789452,0.730969,0.597780,0.314492,1.925381,2.376754,1.760276,1.574925,3.548271,...,15496.92,18335.86,867.67,15226.81,3860.74,3061.42,6439.03,3526.59,2469.88,4316.91


In [11]:
prefijos = [
    "Consolidado Agua__",
    "Consolidado EE__",
    "Consolidado KPI__",
    "Consolidado Produccion__",
    "Consolidado GasVapor__",
    "Consolidado Aire__"
]

for p in prefijos:
    df_concatenado.columns = df_concatenado.columns.str.replace(p, "", regex=False)

print(df_concatenado.columns.tolist())

['FECHA_HORA', 'EE Planta / Hl', 'EE Elaboracion / Hl', 'EE Bodega / Hl', 'EE Cocina / Hl', 'EE Envasado / Hl', 'EE Linea 2 / Hl', 'EE Linea 3 / Hl', 'EE Linea 4 / Hl', 'EE Servicios / Hl', 'EE Sala Maq / Hl', 'EE Frio / Hl', 'EE Aire / Hl', 'EE CO2 / Hl', 'EE Caldera / Hl', 'EE Eflu / Hl', 'EE Agua / Hl', 'EE Resto Serv / Hl', 'EE Resto Planta / Hl', 'Agua Planta / Hl', 'Agua Elab / Hl', 'Agua Bodega / Hl', 'Agua Cocina / Hl', 'Agua Envas / Hl', 'Agua Linea 2/Hl', 'Agua Linea 3/Hl', 'Agua Linea 4/Hl', 'Agua Linea 5/Hl', 'Agua Servicios/Hl', 'Agua Planta de Agua/Hl', 'Produccion Agua / Hl', 'ET Planta / Hl', 'ET Elab/Hl', 'ET Bodega/Hl', 'ET Cocina/Hl', 'ET Envasado/Hl', 'ET Linea 2/Hl', 'ET Linea 3/Hl', 'ET Linea 4/Hl', 'ET Linea 5/Hl', 'ET Servicios / Hl', 'Aire Planta / Hl', 'Aire Elaboracion / Hl', 'Aire Cocina / Hl', 'Aire Bodega / Hl', 'Aire Envasado / Hl', 'Aire L2 / Hl', 'Aire L3 / Hl', 'Aire L4 / Hl', 'Aire L5 / Hl', 'Aire Servicios / Hl', 'CO 2 / Hl', 'CO 2 Filtro / Hl', 'CO 

In [12]:
import pandas as pd

# Agrupa por la fecha (la parte de día de 'FECHA_HORA') y encuentra la hora máxima (idxmax) para esa fecha.
indices_ultima_medicion = df_concatenado.groupby(
    df_concatenado['FECHA_HORA'].dt.date
)['FECHA_HORA'].idxmax()

# --- PASO 3: Filtrar el DataFrame Original ---
# Selecciona solo las filas correspondientes a los índices de la última medición.
df_final = df_concatenado.loc[indices_ultima_medicion]



# Muestra el resultado
print("✅ Proceso completado. El DataFrame final está listo.")
print(df_final.head())
print(f"Dimensiones del DataFrame final: {df_final.shape}")
# Días presentes en cada etapa
dias_final = pd.DatetimeIndex(df_final['FECHA_HORA'].dt.normalize().unique()).sort_values()
dias_raw   = pd.DatetimeIndex(df_concatenado['FECHA_HORA'].dt.normalize().unique()).sort_values()

# Rango común para evaluar
inicio = max(dias_final.min(), dias_raw.min())
fin    = min(dias_final.max(), dias_raw.max())
full   = pd.date_range(inicio, fin, freq='D')

# Faltan en el raw (no hay ningún timestamp ese día)
faltan_en_raw = full.difference(dias_raw)

# Días que estaban en raw pero se perdieron al construir df_final
perdidos_en_proceso = dias_raw.difference(dias_final)

print(f"Días esperados: {len(full)}")
print(f"Dias en df_final: {len(dias_final)}")
print(f"Va desde {dias_final.min().date()} hasta {dias_final.max().date()}")

✅ Proceso completado. El DataFrame final está listo.
             FECHA_HORA  EE Planta / Hl  EE Elaboracion / Hl  EE Bodega / Hl  \
24  2020-07-01 23:59:00      642.727209            47.145349       69.023256   
49  2020-07-02 23:59:00        7.767254             0.769609        0.798838   
74  2020-07-03 23:59:00        8.801205             0.862593        0.835762   
99  2020-07-04 23:59:00        5.175639             0.439225        0.371077   
124 2020-07-05 23:59:00        7.924665             0.802365        0.717787   

     EE Cocina / Hl  EE Envasado / Hl  EE Linea 2 / Hl  EE Linea 3 / Hl  \
24         0.000000         13.813953        14.578784         0.000000   
49         0.319229          2.358593         4.158962         1.506838   
74         0.260924          1.985462        39.076667         1.448962   
99         0.258048          1.442114         4.348182         1.355238   
124        0.301592          1.664726         5.125920         2.704348   

     EE Linea 4

In [13]:
import pandas as pd
import io 

# --- 2. Extraer Características (¡sin la hora!) ---

# 'año' es una tendencia, no un ciclo

# Características cíclicas
df_final['mes'] = df_final['FECHA_HORA'].dt.month
df_final['dia_semana'] = df_final['FECHA_HORA'].dt.dayofweek  # 0=Lunes, 6=Domingo

# --- 3. Transformar Cíclicas (Seno/Coseno) ---

# Mes (1-12)
df_final['mes_sin'] = np.sin(2 * np.pi * df_final['mes'] / 12)
df_final['mes_cos'] = np.cos(2 * np.pi * df_final['mes'] / 12)

# Día de la semana (0-6)
df_final['dia_semana_sin'] = np.sin(2 * np.pi * df_final['dia_semana'] / 7)
df_final['dia_semana_cos'] = np.cos(2 * np.pi * df_final['dia_semana'] / 7)

# --- 4. Limpieza Final ---
# Eliminamos la columna original y las intermedias
columnas_a_eliminar = ['mes', 'dia_semana'] #mas adelante vamos a elimnar la FECHA_HORA por ahora lo dejamos para tener una mejor trazabilidad de los datos
df_final = df_final.drop(columns=columnas_a_eliminar)

print("DataFrame 100% numérico y listo para escalar:")
print(df_final.head())

def descargar_dataframe_como_csv(df, nombre_archivo="datos_descargados.csv"):
    """
    Guarda un DataFrame de Pandas como un archivo CSV y 
    fuerza la descarga en el navegador si se ejecuta en Colab o Jupyter.
    """
    try:
        # 1. Guardar el archivo en el disco local del entorno
        df.to_csv(nombre_archivo, index=False, encoding='utf-8')
        print(f"✅ DataFrame guardado exitosamente como '{nombre_archivo}' en el entorno local.")

        # 2. Lógica para forzar la descarga al ordenador del usuario
        # ---
        


    except Exception as e:
        print(f"❌ Ocurrió un error al guardar o descargar el archivo: {e}")

df_final=pd.DataFrame(df_final)

descargar_dataframe_como_csv(df_final, "foundational_dataset.csv")

DataFrame 100% numérico y listo para escalar:
             FECHA_HORA  EE Planta / Hl  EE Elaboracion / Hl  EE Bodega / Hl  \
24  2020-07-01 23:59:00      642.727209            47.145349       69.023256   
49  2020-07-02 23:59:00        7.767254             0.769609        0.798838   
74  2020-07-03 23:59:00        8.801205             0.862593        0.835762   
99  2020-07-04 23:59:00        5.175639             0.439225        0.371077   
124 2020-07-05 23:59:00        7.924665             0.802365        0.717787   

     EE Cocina / Hl  EE Envasado / Hl  EE Linea 2 / Hl  EE Linea 3 / Hl  \
24         0.000000         13.813953        14.578784         0.000000   
49         0.319229          2.358593         4.158962         1.506838   
74         0.260924          1.985462        39.076667         1.448962   
99         0.258048          1.442114         4.348182         1.355238   
124        0.301592          1.664726         5.125920         2.704348   

     EE Linea 4 / Hl  

### ESTO ES TODO, YA ESTÁ. EL CÓDIGO A CONTINUACIÓN NO ES MÁS UTILIZADO EN EL PIPELINE:

In [242]:
analizar_corr=[
 'Agua Bodega / Hl', 'Agua Cocina / Hl', 'Agua Dilucion (Hl)', 'Agua Elab / Hl', 
 'Agua Envas / Hl', 'Agua Linea 2/Hl', 'Agua Linea 3/Hl', 'Agua Linea 4/Hl', 'Agua Planta / Hl', 'Agua Planta de Agua/Hl', 
 'Agua Servicios (Hl)', 'Agua Servicios/Hl', 'Aire Bodega / Hl', 
 'Aire Cocina / Hl', 'Aire Elaboracion (m3)', 'Aire Elaboracion / Hl', 
 'Aire Envasado (M3)', 'Aire Envasado / Hl', 'Aire L2 / Hl', 'Aire L3 / Hl', 
 'Aire L4 / Hl', 'Aire Planta / Hl', 'Aire Servicios (M3)', 
 'Aire Servicios / Hl', 'CO 2 / Hl', 'CO 2 Filtro / Hl', 'CO 2 Linea 4 / Hl', 
 'CO 2 linea 3 / Hl', 'Conversion Kg/Mj', 'EE Agua / Hl', 
 'EE Aire / Hl', 'EE CO2 / Hl', 'EE Caldera / Hl', 'EE Eflu / Hl', 
 'EE Linea 2 / Hl', 'EE Linea 3 / Hl', 'EE Linea 4 / Hl', 
 'EE Resto Planta / Hl', 'EE Resto Serv / Hl', 
 'EE Servicios / Hl', 'ET Bodega/Hl', 'ET Cocina/Hl', 'ET Elab/Hl', 
 'ET Elaboracion (Mj)', 'ET Envasado (Mj)', 'ET Envasado/Hl', 
 'ET Linea 2/Hl', 'ET Linea 3/Hl', 'ET Linea 4/Hl', 
 'ET Planta / Hl', 'ET Servicios (Mj)', 'ET Servicios / Hl', 
 'FC Barriles', 'FC L1 y L2', 'FC Lavadora L2', 'FC Lavadora L3', 
 'Produccion Agua / Hl', 'Red L1 y L2', 'Red L3', 'Red Paste L4',
 'Tot Aire Expulsion', 'Tot_Vapor_CIP_Bodega', 'Tot_Vapor_L3_L4', 
 'Totaliador_Aire_L4', 'Totalizador_Aire_Bodega', 'Totalizador_Aire_Cocina', 
 'Totalizador_Aire_L2', 'Totalizador_Aire_L3', 'Totalizador_Aire_L5', 
 'VAPOR DE LINEA 1 Y 2 KG', 'VAPOR DE LINEA 4 KG', 'Vapor L3', 
 'Vapor Servicio (Kg)'
]

In [243]:
import numpy as np
import pandas as pd

TARGET = 'Frio (Kw)'
MIN_PARES = 30  # mínimo de filas válidas para calcular correlación

# Si querés limitar al subconjunto analizar_corr:
# candidate_cols = [c for c in analizar_corr if c in df_concatenado.columns]
# Si no, todas las numéricas menos el target:
candidate_cols = analizar_corr.copy()
candidate_cols = [c for c in candidate_cols if c != TARGET]

def pair_corr(df, col, target=TARGET, min_pairs=MIN_PARES, method='pearson'):
    s1 = df[target].astype(float)
    s2 = df[col].astype(float)
    valid = s1.notna() & s2.notna()
    if valid.sum() < min_pairs:
        return np.nan
    return s1[valid].corr(s2[valid], method=method)

corrs = pd.Series({c: pair_corr(df_final, c) for c in candidate_cols}).dropna()

# Filtrar por umbral de correlación absoluta
umbral = 0.30
cols_bajas = corrs[corrs.abs() < umbral].index.tolist()
cols_altas = corrs[corrs.abs() >= umbral].index.tolist()

print(f"Columnas evaluadas: {len(corrs)}")
print(f"Con |corr| < {umbral}: {len(cols_bajas)}")
print(f"Con |corr| >= {umbral}: {len(cols_altas)}")

Columnas evaluadas: 73
Con |corr| < 0.3: 69
Con |corr| >= 0.3: 4


In [244]:
corrs[cols_altas]

Agua Linea 3/Hl       0.573846
EE Linea 3 / Hl       0.529771
EE Resto Serv / Hl   -0.833393
ET Linea 3/Hl         0.349395
dtype: float64

In [245]:
df_final = df_final.drop(columns=cols_bajas)


In [246]:
df_final

Unnamed: 0,FECHA_HORA,EE Planta / Hl,EE Elaboracion / Hl,EE Bodega / Hl,EE Cocina / Hl,EE Envasado / Hl,EE Linea 3 / Hl,EE Sala Maq / Hl,EE Frio / Hl,EE Resto Serv / Hl,...,Vapor Cocina (Kg),Vapor Envasado (Kg),Vapor _Vapor_L5 (KG),Aire Producido (M3),Aire Planta (M3),Vapor_L5 (KG),mes_sin,mes_cos,dia_semana_sin,dia_semana_cos
24,2020-07-01 23:59:00,642.727209,47.145349,69.023256,0.000000,13.813953,0.000000,397.209302,333.139535,52.116279,...,470.0,12619.87,923.69,31871.0,26192.02,,-0.5,-8.660254e-01,0.974928,-0.222521
49,2020-07-02 23:59:00,7.767254,0.769609,0.798838,0.319229,2.358593,1.506838,3.885211,3.419545,0.544181,...,30740.0,114069.87,270.75,51079.0,43136.07,,-0.5,-8.660254e-01,0.433884,-0.900969
74,2020-07-03 23:59:00,8.801205,0.862593,0.835762,0.260924,1.985462,1.448962,4.205406,3.787626,0.442502,...,46380.0,143068.86,299.68,62958.0,45528.85,,-0.5,-8.660254e-01,-0.433884,-0.900969
99,2020-07-04 23:59:00,5.175639,0.439225,0.371077,0.258048,1.442114,1.355238,2.165177,1.870213,0.308385,...,64050.0,124411.10,4046.11,69435.0,52367.44,,-0.5,-8.660254e-01,-0.974928,-0.222521
124,2020-07-05 23:59:00,7.924665,0.802365,0.717787,0.301592,1.664726,2.704348,3.756100,3.354482,0.424516,...,65890.0,102608.87,5017.28,66431.0,48039.29,,-0.5,-8.660254e-01,-0.781831,0.623490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23859,2023-03-01 23:59:00,6.960582,0.907813,0.715616,0.356634,1.829548,1.696989,2.566239,2.266825,0.553679,...,84630.0,107194.93,,60947.0,40059.96,4221.38,1.0,6.123234e-17,0.974928,-0.222521
23884,2023-03-02 23:59:00,9.307652,1.326883,1.062212,0.323571,1.221995,9.078431,4.049660,3.696766,0.975361,...,74360.0,45374.07,,46734.0,28308.05,3814.44,1.0,6.123234e-17,0.433884,-0.900969
23909,2023-03-03 23:59:00,10.349507,1.383240,1.289095,0.360673,0.524170,,5.171581,5.037719,0.657542,...,34610.0,17512.84,,27958.0,17607.70,4529.47,1.0,6.123234e-17,-0.433884,-0.900969
23934,2023-03-04 23:59:00,5.403903,0.556676,0.351268,0.344653,1.502742,1.691494,1.924659,1.779488,0.399285,...,80780.0,115864.40,,56101.0,35510.51,4119.21,1.0,6.123234e-17,-0.974928,-0.222521


A partir de esto comienzo con el EDA. 

Lo primero que hago es detrectar valores faltantes


In [247]:
null_counts=df_final.isnull().sum().sort_values()
n = len(df_final)
null_counts = null_counts[null_counts > 0]
null_counts / n

EE Frio / Hl            0.019812
EE Bodega / Hl          0.019812
EE Envasado / Hl        0.019812
EE Sala Maq / Hl        0.019812
EE Elaboracion / Hl     0.019812
EE Planta / Hl          0.019812
EE Resto Serv / Hl      0.019812
EE Cocina / Hl          0.079249
EE Linea 3 / Hl         0.155370
Agua Linea 3/Hl         0.155370
ET Linea 3/Hl           0.155370
Vapor_L5 (KG)           0.373306
Aire L5 / Hl            0.391032
Agua Linea 5/Hl         0.391032
ET Linea 5/Hl           0.391032
Vapor _Vapor_L5 (KG)    0.626694
dtype: float64

In [248]:
#Aca lo que hago es eliminar los null en cualquiera de las dos columnas a modo de pruea para solamente ver
#que tan correlacionada estan las columnas que quiero borrar (muchos null) y la objetivo
import numpy as np
import pandas as pd

# Tu bucle original con la modificación
for i in null_counts.index:
    # Crear un DataFrame temporal con solo las dos columnas
    temp_df = df_final[['Frio (Kw)', i]].copy()

    # **Eliminar las filas donde haya algún NaN en estas dos columnas**
    temp_df_cleaned = temp_df.dropna()
    
    # Comprobar si aún quedan datos para correlacionar
    if len(temp_df_cleaned) > 1: 
        # Calcular la correlación en los datos limpios
        correlacion = np.corrcoef(temp_df_cleaned['Frio (Kw)'], temp_df_cleaned[i])
        print(f"Correlación entre 'Frio (Kw)' y '{i}':")
        print(correlacion)
        if abs(correlacion[0, 1]) < 0.3:
            df_final = df_final.drop(columns=[i])
    else:
        print(f"No hay suficientes datos válidos (después de eliminar NaN) para calcular la correlación entre 'Frio (Kw)' y '{i}'.")

Correlación entre 'Frio (Kw)' y 'EE Frio / Hl':
[[1.         0.35009968]
 [0.35009968 1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Bodega / Hl':
[[ 1.         -0.00474077]
 [-0.00474077  1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Envasado / Hl':
[[ 1.         -0.00479217]
 [-0.00479217  1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Sala Maq / Hl':
[[1.         0.27312795]
 [0.27312795 1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Elaboracion / Hl':
[[ 1.         -0.00440825]
 [-0.00440825  1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Planta / Hl':
[[ 1.         -0.00507589]
 [-0.00507589  1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Resto Serv / Hl':
[[ 1.         -0.83339295]
 [-0.83339295  1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Cocina / Hl':
[[ 1.         -0.00887645]
 [-0.00887645  1.        ]]
Correlación entre 'Frio (Kw)' y 'EE Linea 3 / Hl':
[[1.         0.52977137]
 [0.52977137 1.        ]]
Correlación entre 'Frio (Kw)' y 'Agua Linea 3/Hl':
[[1.

In [249]:
#vamos a rellenar con 0 a las columnas con muchos nulos
null_counts= null_counts[null_counts > 0.15]
for col in null_counts.index:
    if col in df_final.columns:
        df_final[col] = df_final[col].fillna(0)

In [250]:
df_final

Unnamed: 0,FECHA_HORA,EE Linea 3 / Hl,EE Frio / Hl,EE Resto Serv / Hl,Agua Linea 3/Hl,ET Linea 3/Hl,Hl de Mosto,Hl Cerveza Cocina,Hl Producido Bodega,Hl Cerveza Filtrada,...,Vapor Elaboracion (Kg),Vapor Cocina (Kg),Vapor Envasado (Kg),Aire Producido (M3),Aire Planta (M3),Vapor_L5 (KG),mes_sin,mes_cos,dia_semana_sin,dia_semana_cos
24,2020-07-01 23:59:00,0.000000,333.139535,52.116279,0.000000,0.000000,0.0,0.00,43.000,0.0,...,6779.44,470.0,12619.87,31871.0,26192.02,0.00,-0.5,-8.660254e-01,0.974928,-0.222521
49,2020-07-02 23:59:00,1.506838,3.419545,0.544181,1.204334,12.047473,2907.0,3343.05,7005.025,13361.0,...,45103.22,30740.0,114069.87,51079.0,43136.07,0.00,-0.5,-8.660254e-01,0.433884,-0.900969
74,2020-07-03 23:59:00,1.448962,3.787626,0.442502,1.454557,15.512192,4829.0,4902.50,7463.250,9493.0,...,61087.32,46380.0,143068.86,62958.0,45528.85,0.00,-0.5,-8.660254e-01,-0.433884,-0.900969
99,2020-07-04 23:59:00,1.355238,1.870213,0.308385,1.150267,11.598608,7828.0,13705.60,12964.300,8450.0,...,80588.95,64050.0,124411.10,69435.0,52367.44,0.00,-0.5,-8.660254e-01,-0.974928,-0.222521
124,2020-07-05 23:59:00,2.704348,3.354482,0.424516,3.245652,24.778109,6406.0,9459.95,8908.975,12894.0,...,78034.90,65890.0,102608.87,66431.0,48039.29,0.00,-0.5,-8.660254e-01,-0.781831,0.623490
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23859,2023-03-01 23:59:00,1.696989,2.266825,0.553679,1.033182,14.177334,6595.0,9452.08,10343.540,9437.0,...,100145.99,84630.0,107194.93,60947.0,40059.96,4221.38,1.0,6.123234e-17,0.974928,-0.222521
23884,2023-03-02 23:59:00,9.078431,3.696766,0.975361,8.896078,108.461098,6385.0,8364.35,5100.675,5795.0,...,94099.88,74360.0,45374.07,46734.0,28308.05,3814.44,1.0,6.123234e-17,0.433884,-0.900969
23909,2023-03-03 23:59:00,0.000000,5.037719,0.657542,0.000000,0.000000,2853.0,4011.83,2039.415,0.0,...,47088.59,34610.0,17512.84,27958.0,17607.70,4529.47,1.0,6.123234e-17,-0.433884,-0.900969
23934,2023-03-04 23:59:00,1.691494,1.779488,0.399285,1.023778,16.740129,7715.0,11938.52,11620.760,4486.0,...,95852.94,80780.0,115864.40,56101.0,35510.51,4119.21,1.0,6.123234e-17,-0.974928,-0.222521
