In [159]:
import pandas as pd
import gzip
from functions.columns import *

In [54]:
def new_df(dff,columns_1,columns_2):
    """
    Crea un nuevo DataFrame combinando columnas de `dff` según reglas específicas.

    Parámetros:
    - dff: DataFrame. El DataFrame original.
    - columns_1: Lista. Lista de nombres de columnas para el primer subconjunto de columnas.
    - columns_2: Lista. Lista de nombres de columnas para el segundo subconjunto de columnas.

    Retorna:
    - DataFrame. Un nuevo DataFrame combinado según las reglas especificadas.

    Descripción:
    Esta función toma un DataFrame `dff` y dos listas de nombres de columnas `columns_1` y `columns_2`.
    A partir de estas listas, se crean dos DataFrames `dff1` y `dff2` que contienen las columnas
    correspondientes de `dff`. Luego, se asignan nuevos índices a cada DataFrame y se renombran las columnas
    con números enteros en ambos DataFrames.

    Posteriormente, se realiza una concatenación vertical de los DataFrames `dff1` y `dff2` en el orden
    especificado por los índices. Finalmente, se ordena el DataFrame resultante según los índices y se retorna.
    """
    
    dff1=dff[columns_1]
    dff2=dff[columns_2]

    index1=[x for x in np.arange(0,len(dff1)*2,2)]
    index2=[x for x in np.arange(1,len(dff2)*2,2)]

    dff1 = dff1.set_index(pd.Index(index1))
    dff2 = dff2.set_index(pd.Index(index2))

    dff1.columns=[x for x in range(0,len(dff1.columns))]
    dff2.columns=[x for x in range(0,len(dff2.columns))]
    dff3 = pd.concat([dff1, dff2], ignore_index=False, axis=0)
    dff3 = dff3.sort_index()
    return dff3


#Funcion para comprimir archivo
def CompressFile(name_file, name_file_compress):
    print("Comprimiendo archivo")
    with open(name_file, 'rb') as file:
        with gzip.open(name_file_compress, 'wb') as file_compress:
            file_compress.writelines(file)
    
    print("Archivo comprimido exitosamente: ", name_file_compress)

In [193]:
rounds=dict(round_int=["GLOBAL_Avg","GLOBAL_Min","GLOBAL_Max","DIRECT_Avg","DIRECT_Min","DIRECT_Max","DIFFUSE_Avg",
                       "DIFFUSE_Min","DIFFUSE_Max","DOWNWARD_Avg","DOWNWARD_Min","DOWNWARD_Max","UPWARD_SW_Avg",
                       "UPWARD_SW_Min","UPWARD_SW_Max","UPWARD_LW_Avg","UPWARD_LW_Min","UPWARD_LW_Max","PRESSURE_Avg",
                       "UPWARD_SW_Avg","UPWARD_SW_Min","UPWARD_SW_Max","UPWARD_LW_Avg","UPWARD_LW_Min","UPWARD_LW_Max",],
           round_float1=["GLOBAL_Std","DIRECT_Std","DIFFUSE_Std","DOWNWARD_Std","AIR_TEMPERATURE_Avg",
                         "RELATIVE_HUMIDITY_Avg","UPWARD_SW_Std","UPWARD_LW_Std","UVB_Avg","UVB_Std","UVB_Min","UVB_Max",
                        "DWTERMO_Avg","UWTERMO_Avg"],
           round_float2=["DWIRTEMPC_Avg","UWIRTEMPC_Avg"])


#########################################
#  Opciones para categorias de archivo  #
#########################################

options=dict(C0100=["day","minute","GLOBAL_Avg","GLOBAL_Std","GLOBAL_Min","GLOBAL_Max","DIRECT_Avg","DIRECT_Std",
                    "DIRECT_Min","DIRECT_Max","DIFFUSE_Avg","DIFFUSE_Std","DIFFUSE_Min","DIFFUSE_Max","DOWNWARD_Avg",
                    "DOWNWARD_Std","DOWNWARD_Min","DOWNWARD_Max","AIR_TEMPERATURE_Avg","RELATIVE_HUMIDITY_Avg",
                    "PRESSURE_Avg","empy"],
             C0200=["day","minute","GLOBAL_Avg","GLOBAL_Std","GLOBAL_Min","GLOBAL_Max"],
             C0300=["day","minute","UPWARD_SW_Avg","UPWARD_SW_Std","UPWARD_SW_Min","UPWARD_SW_Max","UPWARD_LW_Avg",
                    "UPWARD_LW_Std","UPWARD_LW_Min","UPWARD_LW_Max","null_int","null_float1","null_int", "null_int"],
             C0500=["day","minute","null_float1", "UVB_Avg","UVB_Std","UVB_Min","UVB_Max","empy"], 
             C4000=["day","minute","DWIRTEMPC_Avg","null_float2","null_float2","null_float2","DWTERMO_Avg",
                    "UWIRTEMPC_Avg","null_float2","null_float2","null_float2","UWTERMO_Avg","empy"])

#########################################
#         Opciones para espacio         #
#########################################
spaces=dict(C0100=[3,4,6,5,4,4,6,5,4,4,8,5,4],
            C0300=[3,4,6,5,4,4,6,5,4,4,6,5,4,4],
            C0500=[3,4,5,5,5,5,5,5,5,5,5,5,5,5],
            C4000=[3,4,6,5,5,5,5,7,5,5,5,6])



In [69]:
#Se agregan columnas de minutos y dias

day = [x.day for x in df["TIMESTAMP"]]
minute = [x.hour*60+x.minute for x in df["TIMESTAMP"]]
month = [x.month for x in df["TIMESTAMP"]]

df_aux=df.copy()
df_aux.insert(0,"minute",minute)
df_aux.insert(0,"day",day)

In [70]:
#Se redondean segun opciones y se reemplazan los datos null
for i in rounds["round_int"]:
    df_aux[i]=df_aux[i].fillna(-999)
    df_aux[i]=df_aux[i].round(0)
    df_aux[i]=df_aux[i].astype(int)

for i in rounds["round_float1"]:
    df_aux[i]=df_aux[i].fillna(-99.9)
    df_aux[i]=df_aux[i].round(1)

for i in rounds["round_float2"]:
    df_aux[i]=df_aux[i].fillna(-99.99)
    df_aux[i]=df_aux[i].round(2)

In [189]:
dff=df_aux[list(options["C0300"])]


In [190]:
dff

Unnamed: 0,day,minute,UPWARD_SW_Avg,UPWARD_SW_Std,UPWARD_SW_Min,UPWARD_SW_Max,UPWARD_LW_Avg,UPWARD_LW_Std,UPWARD_LW_Min,UPWARD_LW_Max,null_int,null_float1,null_int.1,null_int.2
10440,1,0,4,0.2,4,5,469,0.3,469,470,-999,-99.9,-999,-999
10441,1,1,4,0.2,3,4,470,0.3,469,470,-999,-99.9,-999,-999
10442,1,2,3,0.1,3,4,469,0.2,469,470,-999,-99.9,-999,-999
10443,1,3,3,0.0,3,3,469,0.3,468,469,-999,-99.9,-999,-999
10444,1,4,3,0.0,3,3,469,0.1,468,469,-999,-99.9,-999,-999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55075,31,1435,3,0.0,3,3,477,0.1,476,477,-999,-99.9,-999,-999
55076,31,1436,3,0.0,3,3,476,0.4,475,477,-999,-99.9,-999,-999
55077,31,1437,3,0.0,3,3,476,0.2,475,476,-999,-99.9,-999,-999
55078,31,1438,3,0.1,3,3,476,0.3,476,477,-999,-99.9,-999,-999
