In [None]:
import pandas as pd
from pathlib import Path

In [None]:
def clean_pv_data(file_path,resolution,delim=",",save_as_new_file=False):
    """
    Reduce time resolution 
    """
    # Read the CSV file
    try:
        df = pd.read_csv(file_path,delimiter=delim)
    except Exception as e:
        print(f"Error reading file: {e}")
        return
    
    df["ts"] = pd.to_datetime(df['ts'])
    df = df.set_index("ts") # set time as index

    ## reduce to only building 715
    df = df.drop(columns=["117_2_P_PVkW | syslab-11/117_2/P_PV | 804158",
                          "319_2_P_PVkW | syslab-01/319_2/P_PV | 804159",
                          "330_12_P_PV_1kW | syslab-50/330_12/P_PV_1 | 804160",
                          "330_12_P_PVBatt_2kW | syslab-50/330_12/P_PVBatt_2 | 804161",
                          "330_12_P_PVBatt_3kW | syslab-50/330_12/P_PVBatt_3 | 804162",
                          "716_2_P_PVkW | syslab-29/716_2/P_PV | 804164"])
    df = df.rename(columns={"715_2_P_PVkW | syslab-09/715_2/P_PV | 804163": "PV B715"})

    df = df.resample(f'{resolution}min',).mean() # resize resolution
    if save_as_new_file == True:
        df.to_csv(f'{file_path[:-4]}_{resolution}min.csv', index=True) # save new file
    return df 

In [None]:
def clean_pv_data_no_zeros(file_path,resolution,delim=",",save_as_new_file=False):
    """
    Reduce time resolution 
    """
    # Read the CSV file
    try:
        df = pd.read_csv(file_path,delimiter=delim)
    except Exception as e:
        print(f"Error reading file: {e}")
        return
    
    df["ts"] = pd.to_datetime(df['ts'])
    df = df.set_index("ts") # set time as index

    ## reduce to only building 715
    df = df.drop(columns=["117_2_P_PVkW | syslab-11/117_2/P_PV | 804158",
                          "319_2_P_PVkW | syslab-01/319_2/P_PV | 804159",
                          "330_12_P_PV_1kW | syslab-50/330_12/P_PV_1 | 804160",
                          "330_12_P_PVBatt_2kW | syslab-50/330_12/P_PVBatt_2 | 804161",
                          "330_12_P_PVBatt_3kW | syslab-50/330_12/P_PVBatt_3 | 804162",
                          "716_2_P_PVkW | syslab-29/716_2/P_PV | 804164"])
    df = df.rename(columns={"715_2_P_PVkW | syslab-09/715_2/P_PV | 804163": "PV B715"})

    #change negative values into 0
    
    df.loc[df['PV B715'] < 0, 'PV B715'] = 0
    
    #resampling to new time resolution without zeros
    
    df = df.resample(f'{resolution}min',).mean() # resize resolution
    if save_as_new_file == True:
        df.to_csv(f'{file_path[:-4]}_{resolution}min_nozeros.csv', index=True) # save new file
    return df 


In [None]:
# Find repo root even if notebook is inside /notebooks
ROOT = Path.cwd()
if ROOT.name == "notebooks":
    ROOT = ROOT.parent
#if ROOT.name == "micro-grid-macro-impact":
   # ROOT = ROOT.parent

pv_data = ROOT / "data/SyslabPV.csv"

In [None]:
clean_pv_data(str(pv_data),15,save_as_new_file=True)

In [None]:
clean_pv_data_no_zeros(str(pv_data), '15', save_as_new_file=True )