# Formatting DEOP Data

In [20]:
import pandas as pd
import numpy as np
import os

In [None]:
DEOP_Path = '../../Private_Data/DEOP/'

# Checking if the necessary files exist for formatting.
if not any(os.path.isfile(os.path.join(DEOP_Path, f)) for f in os.listdir(DEOP_Path)):
    print(f"Warning: This script will not run without the original DEOP data in {DEOP_Path}.")

In [22]:
year = '2023'
start_month = 1
end_month = 12

months = []

# Read in each month file individually.
for ii in range(start_month,end_month+1,1):
    month = year+str('%02d' % ii)
    print(month)
    df = pd.read_csv(f'../../Private_Data/DEOP/DEOP-export-AD Campus Renewables {month}.csv',sep=';',
                     dtype={'date0':str,'time0':str,'power-con-ave':float,
                            'power-gen-wt-ave':float,'power-gen-pv-ave':float})
    
    # Format the date and time so they are readable by Python.
    format_date = "%m/%d/%Y %I:%M:%S %p"
    df['DateTime'] = pd.to_datetime(df['date0']+' '+df['time0'],format=format_date)
    months.append(df)

combined_months = pd.concat(months,ignore_index=True)
print(combined_months.columns)

202301
202302
202303
202304
202305
202306
202307
202308
202309
202310
202311
202312
Index(['date0', 'time0', 'power-ss13-max', 'power-ss13-min', 'power-ss13-ave',
       'date1', 'time1', 'power-gen-max', 'power-gen-min', 'power-gen-ave',
       'date2', 'time2', 'power-con-max', 'power-con-min', 'power-con-ave',
       'date3', 'time3', 'power-gen-wt-max', 'power-gen-wt-min',
       'power-gen-wt-ave', 'date4', 'time4', 'power-gen-pv-max',
       'power-gen-pv-min', 'power-gen-pv-ave', 'date5', 'time5',
       'storage-charge-max', 'storage-charge-min', 'storage-charge-ave',
       'DateTime'],
      dtype='object')


In [23]:
# Set the index to DateTime.
combined_months.index = combined_months.DateTime

# Filter so the data are of the same length as the Solcast data.
combined_months = combined_months.loc[(combined_months.index >= '2023-01-01 00:05')
                     & (combined_months.index <= '2023-12-31 23:55')]

In [24]:
# Replacing NaN with zero.
combined_months = combined_months.fillna(0)

# Creating a lower limit for consumption and production of zero.
combined_months['power-con-ave']= combined_months['power-con-ave'].clip(lower=0)
combined_months['power-gen-wt-ave'] = combined_months['power-gen-wt-ave'].clip(lower=0)
combined_months['power-gen-pv-ave'] = combined_months['power-gen-pv-ave'].clip(lower=0)

In [25]:
# Create a copy of the DataFrame
averaged_comb_months = combined_months.copy()

# Interpolating power consumption
averaged_comb_months['power-con-ave'] = averaged_comb_months['power-con-ave'].replace(0, np.nan)
averaged_comb_months['power-con-ave'] = averaged_comb_months['power-con-ave'].interpolate()


In [26]:
# The original data.
combined_months.to_csv(f'../../Data/DEOP/{year}_DEOP.csv',index=False,
                       columns=['power-con-ave','power-gen-wt-ave','power-gen-pv-ave','DateTime'])

# The dataset which includes interpolated data.
averaged_comb_months.to_csv(f'../../Data/DEOP/{year}_DEOP_Interp.csv',index=False,
                            columns=['power-con-ave','power-gen-wt-ave','power-gen-pv-ave','DateTime'])