In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from glob import glob
plt.style.use('ggplot')

In [None]:
CLUSTER = "WEBSTER"

In [None]:
df = pd.read_csv(glob(f"../predictions/best_sweep_tgcn_cpnll/predictions_TGCN_{CLUSTER}*.csv")[0]  , index_col=0, parse_dates=["Date"])
# Keep only the columns relataed to the cluster + the date
df = df.loc[:, (df.columns.str.startswith(CLUSTER) | (df.columns == "Date"))]
df.head(5)

In [None]:
plug_capacity = pd.read_csv("../data/MaximumPlugsPerCluster.csv", index_col=0, parse_dates=["StartDate", "EndDate"])
plug_capacity = plug_capacity[plug_capacity.Cluster == CLUSTER]
plug_capacity

In [None]:
df[df.Date < '2019-05-30'].plot(x="Date", figsize=(20, 7), title=f"Predictions for {CLUSTER} cluster", ylabel='Number of sessions')

In [None]:
# Assume we have forecast horizon of 1. Then we rename the columns to something more readable
df.rename(columns={CLUSTER+"_1": "Censored Observations", CLUSTER+"_1_pred": "Predicted", f"{CLUSTER}_1_true": 'True Observations'}, inplace=True)
LEVELS = ["Censored Observations", "Predicted", "True Observations"]

In [None]:
# find the highest predictions for each day
fig, ax = plt.subplots(figsize=(15, 5))

df.groupby(df["Date"].dt.date)[LEVELS].max().plot(ax=ax)
ax.set_title(f"Max number of sessions for each day in cluster {CLUSTER}")
ax.set_ylabel("Number of sessions")
# Replace CLUSTER_[0-9] by Censored Observations, CLUSTER_[0-9]_pred by Predictions and CLUSTER_[0-9]_true by True Values

In [None]:

def plot_utilisation_per_day(df):
    df_util_plot = df.copy()
    # We should provide tau from the model training
    for col in LEVELS:
        df_util_plot['tau_'+col] = df_util_plot[col].max()

    sessions_per_day = df_util_plot.groupby(df_util_plot["Date"].dt.date).sum().copy()

    # Calculate utilisation
    for col in LEVELS:
        sessions_per_day['utilisation_'+col] = sessions_per_day[col] / sessions_per_day['tau_'+col]
    # Plot
    # drop all tau columns
    sessions_per_day = sessions_per_day[sessions_per_day.columns[sessions_per_day.columns.str.startswith('utilisation_')]]
    # remove utilisation_ from the column names
    sessions_per_day.columns = sessions_per_day.columns.str.replace('utilisation_', '')
    fig, ax = plt.subplots(figsize=(15, 5))
    sessions_per_day.plot(ax=ax)
    ax.set_title(f"Utilisation per day in cluster {CLUSTER}")
    ax.set_ylabel("Utilisation")
    sessions_per_day.head()

plot_utilisation_per_day(df)

In [None]:
def calculate_utility(df, CLUSTER, max_tau):
    """ 
    Calulcate the utility for a given cluster, given predictions, censored and true values of usage, for taus from 1 - max_tau.
    """
    # 
    util_values = {level: [] for level in LEVELS}
    # calculate the utility for predicted usage, true usage and censored usage, for each tau
    for level, util_list in util_values.items():
        for tau_hypothetical in range(1, max_tau + 1):
            df_util = df.copy()
            # clip the values to the hypothetical tau (we use values from df instead of df_util to avoid clipping the values multiple times)
            df_util[level] = df[level].clip(upper=tau_hypothetical)
            # add the tau_hypothetical as a column
            df_util[f'tau_{tau_hypothetical}'] = tau_hypothetical
            # Sum the half-hourly values to get the daily values
            tmp = df_util.groupby(df_util["Date"].dt.date)[[level, f'tau_{tau_hypothetical}']].sum()
            # calculate the utility per day
            tmp['utilisation'] = (tmp[level] / tmp[f'tau_{tau_hypothetical}'])
            # add the mean utility to the list
            util_list.append(tmp['utilisation'].mean())

    return util_values
utility_values = calculate_utility(df, CLUSTER, 12)

In [None]:
df_utilisation = pd.DataFrame(utility_values)
df_utilisation['Capacity'] = df_utilisation.index + 1
df_utilisation

In [None]:
fig, ax = plt.subplots(figsize=(12, 7), sharey=True)
ax.plot(df_utilisation['Capacity'], df_utilisation["Predicted"], '-o', label="Utilisation for predicted demand")

ax.plot(df_utilisation['Capacity'], df_utilisation["True Observations"], '-o', label="Utilisation for true demand")

ax.axhline(y=0.2, linestyle='--', color='g', label="20% utilisation")
ax.set_xlabel("Capacity")
ax.set_ylabel("Utilisation")
ax.yaxis.set_tick_params(labelleft=True)
ax.legend()
fig.suptitle(f"Predicted vs Real utilisation for cluster {CLUSTER}")
plt.show()

## Utilization is key (PWC)
Once an EV-charging station is built, all costs are essentially fixed, so utilization is key to achieving efficiency. Still, even a quite low utilization is likely to result in lines at busy times. In practice, the stand-alone, fast-charger industry uses a 20% utilization as a rule of thumb. But if a charging station is utilized at more than this 20% threshold, the operator will likely look to expand capacity or, more likely, add another site nearby.