In [None]:
import csv
import pandas as pd
from pathlib import Path

In [None]:
# Find repo root even if notebook is inside /notebooks
ROOT = Path.cwd()
if ROOT.name == "notebooks":
    ROOT = ROOT.parent
#if ROOT.name == "micro-grid-macro-impact":
   # ROOT = ROOT.parent
wind_data = str(ROOT / "data/SyslabWind.csv")

In [None]:
def clean_wind_data(file_path,resolution,delim=",",save_as_new_file=False):
    """
    reduce time resolution (in minutes!), remove irrelevant columns    """
    # Read the CSV file
    try:
        df = pd.read_csv(file_path,delimiter=delim)
    except Exception as e:
        print(f"Error reading file: {e}")
        return
    # remove empty columns
    df =df.drop(columns=["EGen_GaiaM_CT_STATE | syslab-03/gaia1/330_12/State_Gaia | 1205565",
                                 "EGen_GaiaM_MT_WINDSPEED | syslab-03/gaia1/330_12/W_Gaia | 1205561",
                                 "EGen_GaiaM_ME_RPM0 | syslab-03/gaia1/330_12/RPM0_Gaia | 1205562",
                                 "EGen_GaiaM_ME_RPM1 | syslab-03/gaia1/330_12/RPM1_Gaia | 1205563",
                                 "EGen_GaiaM_ED_U | syslab-03/gaia1/330_12/U_Gaia | 1205564"
                                 ]) 

    df = df.rename(columns={"EGen_GaiaM_ED_P | syslab-50/330_12/P_Gaia | 804165": "Gaia_WT Power","EGen_AirconM_ED_P | syslab-01/319_2/P_Aircon | 804166":"Aircon_WT Power"})

    df = df.drop(columns="Gaia_WT Power") ## DROP Gaia WT


    df["ts"] = pd.to_datetime(df['ts'])
    df = df.set_index("ts") # set time as index

    df = df.resample(f'{resolution}min',).mean() # resize the resolution
    if save_as_new_file==True:
        df.to_csv(f'{file_path[:-4]}_{resolution}min.csv', index=True)

    return df



In [None]:
def clean_wind_data_nozeros(file_path,resolution,delim=",",save_as_new_file=False):
    """
    reduce time resolution (in minutes!), remove irrelevant columns    """
    # Read the CSV file
    try:
        df = pd.read_csv(file_path,delimiter=delim)
    except Exception as e:
        print(f"Error reading file: {e}")
        return
    # remove empty columns
    df =df.drop(columns=["EGen_GaiaM_CT_STATE | syslab-03/gaia1/330_12/State_Gaia | 1205565",
                                 "EGen_GaiaM_MT_WINDSPEED | syslab-03/gaia1/330_12/W_Gaia | 1205561",
                                 "EGen_GaiaM_ME_RPM0 | syslab-03/gaia1/330_12/RPM0_Gaia | 1205562",
                                 "EGen_GaiaM_ME_RPM1 | syslab-03/gaia1/330_12/RPM1_Gaia | 1205563",
                                 "EGen_GaiaM_ED_U | syslab-03/gaia1/330_12/U_Gaia | 1205564"
                                 ]) 

    df = df.rename(columns={"EGen_GaiaM_ED_P | syslab-50/330_12/P_Gaia | 804165": "Gaia_WT Power","EGen_AirconM_ED_P | syslab-01/319_2/P_Aircon | 804166":"Aircon_WT Power"})

    df = df.drop(columns="Gaia_WT Power") ## DROP Gaia WT

      #change negative values into 0
    
    df.loc[df['Aircon_WT Power'] < 0, 'Aircon_WT Power'] = 0

    df["ts"] = pd.to_datetime(df['ts'])
    df = df.set_index("ts") # set time as index

    df = df.resample(f'{resolution}min',).mean() # resize the resolution
    if save_as_new_file==True:
        df.to_csv(f'{file_path[:-4]}_{resolution}min_nozeros.csv', index=True)

    return df



In [None]:
clean_wind_data(wind_data,15,save_as_new_file=True)

In [None]:
clean_wind_data_nozeros(str(wind_data), 15, save_as_new_file=True)