In [1]:
import pandas as pd
import numpy as np

# Settings

In [2]:
# input dataset
series_csv = "../../HERON/20210101_to_20211014/EVB-P1840070_Heron_HQ_Parking.csv"
# series_csv = "../../HERON/20210101_to_20211014/EVB-P1941076_Heron_Parking.csv"
# Choose output file frequency / resolution in minutes
resolution = 15

# Output name
output_csv = f"{series_csv.split('/')[-1].split('.')[0]}_mean_demand_20210102_to_20211013_{str(resolution)+'min'}.csv"

# Read csv

In [3]:
ts = pd.read_csv(series_csv, 
                delimiter=',', 
                header=0, 
                index_col=None, 
                parse_dates=False)
ts.head()

Unnamed: 0,Connector ID,Card ID,Start date,End date,Duration,kWh,Charging cost (EUR),Transaction fee (EUR),Reimbursable (EUR),Charging Type
0,1838140,GR-ENG-C00106872-P,1/2/2021 10:36,1/2/2021 20:09,9:33:21,6.6,0,0,1.32,Private
1,1838140,GR-ENG-C00106872-P,1/4/2021 8:40,1/4/2021 19:54,11:14:11,3.57,0,0,0.71,Private
2,1838254,GR-ENG-C00106872-P,1/4/2021 9:17,1/4/2021 15:20,6:02:07,41.6,0,0,8.32,Private
3,1838254,GR-ENG-C00106872-P,1/4/2021 16:07,1/4/2021 17:13,1:06:24,5.94,0,0,1.19,Private
4,1838140,GR-ENG-C00106872-P,1/5/2021 8:53,1/5/2021 17:40,8:46:23,15.24,0,0,3.05,Private


# Drop useless columns and create useful ones
Also drop rows with very small duration

In [4]:
ts = ts.drop(columns=["Card ID",
                      "Charging cost (EUR)",
                      "Transaction fee (EUR)",
                      "Reimbursable (EUR)",
                      "Charging Type"])

ts["Start date"] = pd.to_datetime(ts['Start date'])
ts["End date"] = pd.to_datetime(ts['End date'])
ts["Duration (h)"] = (ts["End date"] - ts["Start date"]) / \
    np.timedelta64(1, 's') / 3600

# drop small durations
ts = ts[ts["Duration (h)"] > 5/60]

ts = ts.drop(columns=["Duration"])
ts["Mean Demand (kw)"] = ts["kWh"] / ts["Duration (h)"]
ts.head()

Unnamed: 0,Connector ID,Start date,End date,kWh,Duration (h),Mean Demand (kw)
0,1838140,2021-01-02 10:36:00,2021-01-02 20:09:00,6.6,9.55,0.691099
1,1838140,2021-01-04 08:40:00,2021-01-04 19:54:00,3.57,11.233333,0.317804
2,1838254,2021-01-04 09:17:00,2021-01-04 15:20:00,41.6,6.05,6.876033
3,1838254,2021-01-04 16:07:00,2021-01-04 17:13:00,5.94,1.1,5.4
4,1838140,2021-01-05 08:53:00,2021-01-05 17:40:00,15.24,8.783333,1.735104


# Create in / out power and total power dataset

## in (plug) / out (unplug) power datasets

In [5]:
# this dataframe contains the plug in times along with the differential contribution of the plug to the station demand
plug = pd.DataFrame(data=ts[["Start date", "Mean Demand (kw)"]])
plug.rename(columns={"Mean Demand (kw)": "Diff Demand (kw)"}, inplace=True)
plug = plug.set_index("Start date")

# this dataframe contains the plug out times along with the differential contribution (<0) of the unplugging process to the station demand
unplug = pd.DataFrame(data=ts[["End date", "Mean Demand (kw)"]])
unplug.rename(columns={"Mean Demand (kw)": "Diff Demand (kw)"}, inplace=True)
unplug["Diff Demand (kw)"] = - unplug["Diff Demand (kw)"]
unplug = unplug.set_index("End date")
unplug.head(20)


Unnamed: 0_level_0,Diff Demand (kw)
End date,Unnamed: 1_level_1
2021-01-02 20:09:00,-0.691099
2021-01-04 19:54:00,-0.317804
2021-01-04 15:20:00,-6.876033
2021-01-04 17:13:00,-5.4
2021-01-05 17:40:00,-1.735104
2021-01-05 10:01:00,-4.287805
2021-01-05 18:43:00,-0.699723
2021-01-08 18:43:00,-0.25142
2021-01-07 19:05:00,-3.02862
2021-01-08 18:48:00,-1.451309


## Regularize time scales and normalize middle values
Need to get rid of irregular indices to obtain a timeseries of the selected resolution
### Create regular time index

In [6]:
# in
periodic_index = pd.date_range(
    start=plug.index[0].date(), end=plug.index[-1].date(), freq=str(resolution)+'min')

regular_index_plug = pd.DataFrame(index=periodic_index, columns=["Diff Demand (kw)"])

# out
periodic_index = pd.date_range(
    start=unplug.index[0].date(), end=unplug.index[-1].date(), freq=str(resolution)+'min')

regular_index_unplug = pd.DataFrame(
    index=periodic_index, columns=["Diff Demand (kw)"])
regular_index_unplug.head()


Unnamed: 0,Diff Demand (kw)
2021-01-02 00:00:00,
2021-01-02 00:15:00,
2021-01-02 00:30:00,
2021-01-02 00:45:00,
2021-01-02 01:00:00,


### Merge scales
Both datasets are merged with the regular indices. Cumsums are important to get a mean demand contribution at each timestep for each dataset.

In [7]:
# in
plug_aligned = pd.concat([plug, regular_index_plug],
                         join='outer').sort_index().fillna(0)
plug_aligned["inDemand (kw)"] = plug_aligned["Diff Demand (kw)"].cumsum()
# plug_aligned.drop(columns=["Diff Demand (kw)"], inplace=True)
plug_aligned = plug_aligned[~plug_aligned.index.duplicated(keep='first')]

# out
unplug_aligned = pd.concat([unplug, regular_index_unplug],
                         join='outer').sort_index().fillna(0)
unplug_aligned["outDemand (kw)"] = unplug_aligned["Diff Demand (kw)"].cumsum()
# unplug_aligned.drop(columns=["Diff Demand (kw)"], inplace=True)
unplug_aligned = unplug_aligned[~unplug_aligned.index.duplicated(keep='first')]
plug_aligned.head(30)

Unnamed: 0,Diff Demand (kw),inDemand (kw)
2021-01-02 00:00:00,0.0,0.0
2021-01-02 00:15:00,0.0,0.0
2021-01-02 00:30:00,0.0,0.0
2021-01-02 00:45:00,0.0,0.0
2021-01-02 01:00:00,0.0,0.0
2021-01-02 01:15:00,0.0,0.0
2021-01-02 01:30:00,0.0,0.0
2021-01-02 01:45:00,0.0,0.0
2021-01-02 02:00:00,0.0,0.0
2021-01-02 02:15:00,0.0,0.0


### Move to regular scale, normalizing mid values
Then both datasets are converted to the regular index scale through normalization. irregular indices are dropped afterwards.

In [8]:
# in
for index, row in plug_aligned.iterrows():
    if index.minute % resolution != 0:  # track irregular indices
        location = plug_aligned.index.get_loc(index)  # keep location as int
        # calculate normalized value proportionally
        normalized_value = plug_aligned.loc[index]["inDemand (kw)"] - \
            plug_aligned.loc[index]["Diff Demand (kw)"] * (1 - (index.minute % resolution) / resolution)
        plug_aligned.drop(index, inplace=True)  # drop irregular index
        # assign normalized value to the previous regular index
        plug_aligned.iloc[location-1] = normalized_value

# out
for index, row in unplug_aligned.iterrows():
    if index.minute % resolution != 0:  # track irregular indices
        location = unplug_aligned.index.get_loc(index)  # keep location as int
        # calculate normalized value proportionally
        # normalized_value = unplug_aligned.loc[index] * (index.minute / resolution)
        normalized_value = unplug_aligned.loc[index]["outDemand (kw)"] - \
            unplug_aligned.loc[index]["Diff Demand (kw)"] * (
                (index.minute % resolution) / resolution)
        # normalized_value = - plug_aligned.loc[plug_aligned.index[location-1]]
        unplug_aligned.drop(index, inplace=True)  # drop irregular index
        # assign normalized value to the previous regular index
        unplug_aligned.iloc[location-1] = normalized_value

unplug_aligned.head(30)


Unnamed: 0,Diff Demand (kw),outDemand (kw)
2021-01-02 00:00:00,0.0,0.0
2021-01-02 00:15:00,0.0,0.0
2021-01-02 00:30:00,0.0,0.0
2021-01-02 00:45:00,0.0,0.0
2021-01-02 01:00:00,0.0,0.0
2021-01-02 01:15:00,0.0,0.0
2021-01-02 01:30:00,0.0,0.0
2021-01-02 01:45:00,0.0,0.0
2021-01-02 02:00:00,0.0,0.0
2021-01-02 02:15:00,0.0,0.0


In [9]:
unplug_aligned.drop(columns=["Diff Demand (kw)"], inplace=True)
plug_aligned.drop(columns=["Diff Demand (kw)"], inplace=True)


## Create total power dataset by summing up contributions of plugs and unplugs at each timestep

In [10]:
# demand = plug_aligned.add(unplug_aligned, fill_value=0)
demand = pd.concat([plug_aligned, unplug_aligned], axis=1).fillna(0)
demand["Total Demand (kw)"] = demand["inDemand (kw)"] + demand["outDemand (kw)"]
demand.drop(columns=["inDemand (kw)", "outDemand (kw)"], inplace=True)
# demand[abs(demand["Total Demand (kw)"]) < 1e-5] = 0
demand.head(50)

Unnamed: 0,Total Demand (kw)
2021-01-02 00:00:00,0.0
2021-01-02 00:15:00,0.0
2021-01-02 00:30:00,0.0
2021-01-02 00:45:00,0.0
2021-01-02 01:00:00,0.0
2021-01-02 01:15:00,0.0
2021-01-02 01:30:00,0.0
2021-01-02 01:45:00,0.0
2021-01-02 02:00:00,0.0
2021-01-02 02:15:00,0.0


In [11]:
demand.to_csv(output_csv)

# Time covariates

In [12]:
from etl import get_time_covariates
time_covariates = get_time_covariates(demand.squeeze(), 'GR')
time_covariates.to_csv(output_csv.split(".")[0]+"_time_covariates"+".csv")