# Pre-Processing Module: 

In [1]:
import pandas as pd
from pathlib import Path
import os

In [7]:
## Function to convert AC power at point of interconnection (Grid storage) to DC power at battery terminals
def ACtoDC(df, efficiences):
    inverter_eff = efficiencies['inverter_eff'] 
    batt_dc_disch_eff = efficiencies['batt_dc_disch_eff']
    batt_dc_ch_eff = efficiencies['batt_dc_ch_eff'] 

    # Applying efficiencies
    df['max_disch_MW'] = df['max_disch_MW']/(inverter_eff*batt_dc_disch_eff)
    df['max_ch_MW'] = df['max_ch_MW']*(inverter_eff*batt_dc_ch_eff)
    df['avg_disch_MW'] = df['avg_disch_MW']/(inverter_eff*batt_dc_disch_eff)
    df['avg_ch_MW'] = df['avg_ch_MW']*(inverter_eff*batt_dc_ch_eff)
    df['disch_th_kWh'] = df['disch_th_kWh']/(inverter_eff*batt_dc_disch_eff)
    df['ch_th_kWh'] = df['ch_th_kWh']*(inverter_eff*batt_dc_ch_eff)
    
    return df

In [2]:
## Function to read power timeseries data from csv
def get_gd(path):
    # path variable, is path to timeseries csv
    df = pd.read_csv(path, encoding = "ISO-8859-1") # IF UTF-8 encoding throws up errors
    df.rename(columns = {"Battery Power Output (MW)":"disch_MW"}, inplace = True)
    
    ## If a time column is available:
    df.rename(columns = {"time/s":"date"}, inplace = True)
    df['date'] = pd.to_datetime(df['date'], unit = 's')
    
    df = df[['date','disch_MW']]
    df.loc[df['disch_MW']==-0,'disch_MW']=0 #ensuring all zeros are postiive
    
    return df

In [3]:
## Rainflow algorithm to characterise timeseries into cycle dataframe
def process_gd(f):
    
    df_cols = ['cycle_num','time','disch_dur','ch_dur','rest_dur',
                'max_disch_MW','max_ch_MW','avg_disch_MW','avg_ch_MW',
                'disch_th_kWh','ch_th_kWh', 'rest_imb_kWh']

    ## Setting initial values
    processed_df = pd.DataFrame(columns = df_cols)
    cycle_num = 0
    start_time = 0
    
    disch_dur = 0
    ch_dur = 0
    rest_dur = 0
    max_disch = 0
    max_ch = 0
    disch_sum = 0
    ch_sum = 0
    disch_th = 0
    ch_th = 0
    rest_th_imb = 0

    prev_row = f.loc[0,:]
    ind = +1
    starting_zeros = 0
    
    # Iterating through rows
    for index, row in f.iterrows():
        if index<(len(f)-1): # before last row
            # getting time step
            dt = (f['date'][index+1]-f['date'][index]).total_seconds()
        else: # last row: record results
            try:
                avg_disch = (disch_sum/(disch_dur/dt))
            except:
                avg_disch = 0
            try:
                avg_ch = (ch_sum/(ch_dur/dt))
            except:
                avg_ch = 0
            try:
                rest_imb = (rest_th_imb/(rest_dur/dt))
            except:
                rest_imb = 0
            ##store values, reset values and move to next cycle
            temp = pd.Series([cycle_num, start_time, disch_dur, ch_dur, rest_dur,
                             max_disch, max_ch, avg_disch ,avg_ch,
                             disch_th, ch_th, rest_imb],
                 index = df_cols)
            processed_df = processed_df.append(temp, ignore_index = True)
            continue
            
        if starting_zeros == 0: # ignores starting zeros
            if row['disch_MW']==0:
                continue #ignore rows at the start
            else:
                starting_zeros=1
                prev_row = row                
        
        ## When going into next cycle (charge-discharge):
        if (np.sign(row['disch_MW']) != np.sign(prev_row['disch_MW']))&(row['disch_MW']!=0): # checking if in the same cycle
            ind = ind*(-1)
            if ind > 0:
                try:
                    avg_disch = (disch_sum/(disch_dur/dt))
                except:
                    avg_disch = 0
                    print('avg_disch error')
                try:
                    avg_ch = (ch_sum/(ch_dur/dt))
                except:
                    avg_ch = 0
                    print('avg_ch error')
                try:
                    rest_imb = (rest_th_imb/(rest_dur/dt))
                except:
                    rest_imb = 0
                ##store values, reset values and move to next cycle
                temp = pd.Series([cycle_num, start_time, disch_dur, ch_dur, rest_dur,
                                 max_disch, max_ch, avg_disch ,avg_ch,
                                 disch_th, ch_th, rest_imb],
                     index = df_cols)
                processed_df = processed_df.append(temp, ignore_index = True)
                #Incrementing Variables
                start_time = start_time + disch_dur + ch_dur +rest_dur
                cycle_num += 1
                #Resetting Variables
                disch_dur = 0
                ch_dur = 0
                rest_dur = 0
                max_disch = 0
                max_ch = 0
                disch_sum = 0
                ch_sum = 0
                disch_th = 0
                ch_th = 0
                rest_th_imb = 0

        if row['disch_MW']>0: ##when discharging
            disch_dur += dt ##adding to discharge duration
            disch_sum += row['disch_MW']
            disch_th += (row['disch_MW']*dt*(1000/3600)) #need to be in kWh
            if row['disch_MW']>max_disch:
                max_disch = row['disch_MW']
            prev_row = row
        elif row['disch_MW']<0: ##when Charging
            ch_dur += dt ##adding to discharge duration
            ch_sum += row['disch_MW']
            ch_th += (row['disch_MW']*dt*(1000/3600))
            if row['disch_MW']<max_ch:
                max_ch = row['disch_MW']
            prev_row = row
        ## cell is resting
        else:
            rest_dur += dt
            rest_th_imb += disch_th+ch_th ##sum of imbalance volumes at rest (to average later)
    
    return processed_df

In [None]:
def graphGridData(res_path,df):
    fig, (ax1, ax2) = plt.subplots(1,2, figsize =(10,6))

    ax1.plot(df['cycle_num'],df['disch_th_kWh'], label = 'Discharge', c='turquoise')
    ax1.plot(df['cycle_num'],df['ch_th_kWh'], label = 'Charge', c= 'tomato')
    ax1.set_ylabel('Throughput (kWh)')
    ax1.set_xlabel('Cycle Number')
    ax1.legend()

    ax2.plot(df['cycle_num'],df['avg_disch_MW'], label = 'Avg Discharge', c= 'teal')
    ax2.plot(df['cycle_num'],df['avg_ch_MW'], label = 'Avg Charge', c= 'orange')
    ax2.plot(df['cycle_num'],df['max_disch_MW'], label = 'Max Discharge', c='springgreen')
    ax2.plot(df['cycle_num'],df['max_ch_MW'], label = 'Max Charge', c= 'r')
    ax2.set_ylabel('Power (MW)')
    ax2.set_xlabel('Cycle Number')
    ax2.legend()

    fig.savefig(res_path/'01_Load'/f'{today}_Load Plot')
    return