# Utilisation Visualizations

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import os
from glob import glob
plt.style.use('ggplot')

# Use colormap='tab10'
plt.set_cmap('tab10')

In [None]:
PLUGS_PER_CLUSTER = {
    'WEBSTER': 6,
    'BRYANT': 9,
    'HIGH': 8,
    'HAMILTON': 3,
    'CAMBRIDGE': 10,
    'MPL': 6,
    'RINCONADA': 3,
    'TED': 8
}

## Data preprocessing

### Define which cluster and model to use

In [None]:
model = 'TGCN'

### Load predictions by models trained on censoring strategies

In [None]:
def add_censoring_strategy_predictions(df=None, censoring_level=1, censoring_strategy="Static", keep_obs=False, prefix=None, cluster=None):
    if prefix is None:
        raise ValueError("Prefix cannot be None")
    directory_path = f"../predictions/{prefix}_{model.lower()}_{censoring_strategy.lower()}_{censoring_level}"
    if not os.path.exists(directory_path):
        raise NotADirectoryError(directory_path)
    
    if cluster is None:
        raise ValueError("Cluster cannot be None")
        

    preds_path = glob(f"{directory_path}/predictions_{model}_{cluster}*.csv")[0] 
    df_preds = pd.read_csv(preds_path, parse_dates=["Date"], index_col=0)

    # Because we HAD a bug of double predictions being generated
    df_preds.dropna(inplace=True, how='all')

    # Assume we have forecast horizon of 1. Then we rename the columns to something more readable
    df_preds.rename(columns={
        cluster+"_1": f"Censored Observations {censoring_strategy} {censoring_level}",
        cluster+"_1_pred": f"Predicted {censoring_strategy} {censoring_level}",
        f"{cluster}_1_true": "True Observations"
    }, inplace=True)

    df_preds["No. Plugs per Hub"] = PLUGS_PER_CLUSTER[cluster]
    df_preds["Cluster"] = cluster

    if not keep_obs:
        df_preds = df_preds.drop(["True Observations", "No. Plugs per Hub", "Cluster"], axis=1)

    if df is None:
        return df_preds
    return df.merge(df_preds, on="Date")

### Visualization functions

In [None]:
get_relevant_cols = lambda censoring_level, censoring_strategy: [
        'Date', 'No. Plugs per Hub', "True Observations",
        f"Censored Observations {censoring_strategy} {censoring_level}", f"Predicted {censoring_strategy} {censoring_level}"]

def plot_predictions(df, censoring_level, censoring_strategy):
    # Keep only relevant columns
    df_plot = df.copy()[get_relevant_cols(censoring_level, censoring_strategy)]

    cluster = df.Cluster.unique()[0]
    
    fig, ax = plt.subplots(figsize=(15, 7))
    ax.axhline(y=df_plot[df_plot.Date < '2019-05-30'][f'Censored Observations {censoring_strategy} {censoring_level}'].max(), linestyle='--', label='Censored Threshold', color='black', )
    df_plot[df_plot.Date < '2019-05-30']\
        .drop('No. Plugs per Hub', axis=1)\
        .drop(f"Censored Observations {censoring_strategy} {censoring_level}", axis=1)\
        .plot(x="Date", ax=ax, title=f"{model} Predictions for {cluster} cluster with censoring strategy {censoring_strategy} {censoring_level}", ylabel='Number of sessions', colormap='tab10')

    # Set horizontal line at the maximum number of plugs
    ax.legend()
    fig.savefig(f"../Figures/prediction_timeseries_{model}_{cluster}_{censoring_strategy}_{censoring_level}.png")
    plt.show()

In [None]:
def plot_daily_max_sessions(df, censoring_level, censoring_strategy):
    cluster = df.Cluster.unique()[0]
    df_plot = df.copy()[get_relevant_cols(censoring_level, censoring_strategy)]
    # Plot the highest prediction for each day, together with the highest recorded simultaneous number of sessions in a time interval
    fig, ax = plt.subplots(figsize=(20, 7))
    ax.axhline(y=df_plot[df_plot.Date < '2019-05-30'][f'Censored Observations {censoring_strategy} {censoring_level}'].max(), linestyle='--', label='Censored Threshold', color='black', )
    df_plot\
        .drop('No. Plugs per Hub', axis=1) \
        .drop(f"Censored Observations {censoring_strategy} {censoring_level}", axis=1) \
        .groupby(df_plot["Date"].dt.date).max() \
        .plot(x="Date", ax=ax, title=f"Max number of sessions for each day in cluster {cluster}", colormap='tab10')
    ax.set_title(f"Max number of sessions for each day in cluster {cluster}")
    ax.set_ylabel("Number of sessions")
    plt.show()

In [None]:
def plot_utilisation_per_day(df, censoring_level, censoring_strategy):
    cluster = df.Cluster.unique()[0]
    df_util_plot = df.copy()
    cols = get_relevant_cols(censoring_level, censoring_strategy)
    cols.remove('Date')
    cols.remove('No. Plugs per Hub')
    # We should provide tau from the model training
    for col in cols:
        df_util_plot['tau_'+col] = df_util_plot[col].max()

    sessions_per_day = df_util_plot.groupby(df_util_plot["Date"].dt.date).sum().copy()

    # Calculate utilisation
    for col in cols:
        sessions_per_day['utilisation_'+col] = sessions_per_day[col] / sessions_per_day['No. Plugs per Hub']
    # drop all tau columns
    sessions_per_day = sessions_per_day[sessions_per_day.columns[sessions_per_day.columns.str.startswith('utilisation_')]]
    # remove utilisation_ from the column names
    sessions_per_day.columns = sessions_per_day.columns.str.replace('utilisation_', '')
    fig, ax = plt.subplots(figsize=(15, 5))
    sessions_per_day.plot(ax=ax, colormap='tab10')

    ax.set_title(f"Utilisation per day in cluster {cluster}")
    ax.set_ylabel("Utilisation")
    fig.savefig(f"../Figures/daily_utilisation_{model}_{cluster}_{censoring_strategy}_{censoring_level}.png")

### Utilisation functions

In [None]:
def calculate_utility(df, max_tau, censoring_level, censoring_strategy, mean=True):
    """ 
    Calulcate the utility for a given cluster, given predictions, censored and true values of usage, for taus from 1 - max_tau.
    """
    cols = get_relevant_cols(censoring_level, censoring_strategy)
    cols.remove('Date')
    cols.remove('No. Plugs per Hub')
    util_values = {level: [] for level in cols}
    # calculate the utility for predicted usage, true usage and censored usage, for each tau
    for level, util_list in util_values.items():
        for tau_hypothetical in range(1, max_tau + 1):
            df_util = df.copy()
            # clip the values to the hypothetical tau (we use values from df instead of df_util to avoid clipping the values multiple times)
            df_util[level] = df[level].clip(upper=tau_hypothetical)
            # add the tau_hypothetical as a column
            df_util[f'tau_{tau_hypothetical}'] = tau_hypothetical
            # Sum the half-hourly values to get the daily values
            tmp = df_util.groupby(df_util["Date"].dt.date)[[level, f'tau_{tau_hypothetical}']].sum()
            # calculate the utility per day
            tmp['utilisation'] = (tmp[level] / tmp[f'tau_{tau_hypothetical}'])
            # add the mean utility to the list
            if mean:
                util_list.append(tmp['utilisation'].mean())
            else:
                util_list.append(tmp['utilisation'].std())

    return util_values

In [None]:
def plot_utility(df, max_tau, censoring_level, censoring_strategy, ax=None, plot_std=False):
    cluster = df.Cluster.unique()[0]
    utility_values = calculate_utility(df, max_tau, censoring_level, censoring_strategy)
    if plot_std:
        utility_values_std = calculate_utility(df, max_tau, censoring_level, censoring_strategy, mean=False)
        df_utilisation_std = pd.DataFrame(utility_values_std)
        df_utilisation_std['Capacity'] = df_utilisation_std.index + 1
    
    df_utilisation = pd.DataFrame(utility_values)
    df_utilisation['Capacity'] = df_utilisation.index + 1
    capacity = df['No. Plugs per Hub'].max()
    if ax is None:
        fig, ax = plt.subplots(figsize=(12, 7), sharey=True)
        # Only plot true observations if nothing else is plotted yet
        # Plot observed demand
        line, =  ax.plot(
            df_utilisation[df_utilisation.Capacity <= capacity]['Capacity'],
            df_utilisation[df_utilisation.Capacity <= capacity]["True Observations"],
            linestyle='-', marker='o', label="True demand"
        )
        color = line.get_color()
        ax.plot(
            df_utilisation[df_utilisation.Capacity >= capacity]['Capacity'],
            df_utilisation[df_utilisation.Capacity >= capacity]["True Observations"],
            linestyle='dashed', marker='o', color=color, alpha=0.5
        )
        fig.suptitle(f"{model} Predicted vs observed utilisation for cluster {cluster} for different capacities")

    # Might change if the censoring level is string
    predicted_col_name = f"Predicted {censoring_strategy} {censoring_level}"
    if isinstance(censoring_level, str):
        censoring_level = int(censoring_level[0])
    max_observed_by_model = 0
    if censoring_strategy == "Dynamic":
        max_observed_by_model = capacity - censoring_level
    else:
        max_observed_by_model = censoring_level

    # Plot predicted demand
    
    line, = ax.plot(
        df_utilisation[df_utilisation.Capacity <= max_observed_by_model]['Capacity'],
        df_utilisation[df_utilisation.Capacity <= max_observed_by_model][predicted_col_name],
        linestyle='-', marker='o', label=f"Predicted demand {censoring_strategy} {censoring_level}"
    )
    color = line.get_color()
    ax.plot(
        df_utilisation[df_utilisation.Capacity >= max_observed_by_model]['Capacity'],
        df_utilisation[df_utilisation.Capacity >= max_observed_by_model][predicted_col_name],
        linestyle='dashed', marker='o', color=color, alpha=0.5
    )
    if plot_std:
        ax.fill_between(
            df_utilisation[df_utilisation.Capacity <= max_observed_by_model]['Capacity'],
            df_utilisation[df_utilisation.Capacity <= max_observed_by_model][predicted_col_name] - df_utilisation_std[df_utilisation_std.Capacity <= max_observed_by_model][predicted_col_name],
            df_utilisation[df_utilisation.Capacity <= max_observed_by_model][predicted_col_name] + df_utilisation_std[df_utilisation_std.Capacity <= max_observed_by_model][predicted_col_name],
            alpha=0.2, 
            color=line.get_color()
        )
        ax.fill_between(
            df_utilisation[df_utilisation.Capacity >= max_observed_by_model]['Capacity'],
            df_utilisation[df_utilisation.Capacity >= max_observed_by_model][predicted_col_name] - df_utilisation_std[df_utilisation_std.Capacity >= max_observed_by_model][predicted_col_name],
            df_utilisation[df_utilisation.Capacity >= max_observed_by_model][predicted_col_name] + df_utilisation_std[df_utilisation_std.Capacity >= max_observed_by_model][predicted_col_name],
            alpha=0.2, color=color
        )

    ax.set_xlabel("Capacity")
    ax.set_ylabel("Utilisation")
    ax.yaxis.set_tick_params(labelleft=True)
    ax.legend()
    return ax

### 30 min forecast lead

Load predictions

In [None]:
# Add the predictions for the different censoring strategies
df = add_censoring_strategy_predictions(None, 2, "Dynamic", keep_obs=True, prefix="forecast_1_aware", cluster='WEBSTER').reset_index()
# df = add_censoring_strategy_predictions(df, 1, "Dynamic", prefix="forecast_1_aware")
df = add_censoring_strategy_predictions(df, 2, "Static", prefix="forecast_1_aware", cluster='WEBSTER')
df = add_censoring_strategy_predictions(df, 3, "Static", prefix="forecast_1_aware", cluster='WEBSTER')

df.head()

In [None]:
calculate_utility(df, 12, 3, 'Static')

Visualize the predictions made on test set

In [None]:
plot_predictions(df, 2, "Dynamic")

Max number of daily sessions

In [None]:
plot_daily_max_sessions(df, 2, "Dynamic")


Daily utilisation

In [None]:
plot_utilisation_per_day(df, 2, "Dynamic")

Predicted mean utilisation for each strategy and the actual utilisation

In [None]:
ax = plot_utility(df, 12, 2, "Dynamic")
# ax = plot_utility(df, 12, 1, "Dynamic")
ax = plot_utility(df, 12, 2, "Static", ax=ax)
ax = plot_utility(df, 12, 3, "Static", ax=ax)


ax.axhline(y=0.2, linestyle='--', color='g', label="20% utilisation")
plt.legend()
plt.show()

Once an EV-charging station is built, all costs are essentially fixed, so utilization is key to achieving efficiency. Still, even a quite low utilization is likely to result in lines at busy times. In practice, the stand-alone, fast-charger industry uses a 20% utilization as a rule of thumb. But if a charging station is utilized at more than this 20% threshold, the operator will likely look to expand capacity or, more likely, add another site nearby.

### 24 hours forecast lead

Load predictions

In [None]:
df_48 = add_censoring_strategy_predictions(None, 2, "Dynamic", keep_obs=True, prefix="forecast_48_aware", cluster='WEBSTER')
# df_48 = add_censoring_strategy_predictions(df_48, 1, "Dynamic", prefix="forecast_48_aware", cluster='WEBSTER')
df_48 = add_censoring_strategy_predictions(df_48, 2, "Static", prefix="forecast_48_aware", cluster='WEBSTER')
df_48 = add_censoring_strategy_predictions(df_48, 3, "Static", prefix="forecast_48_aware", cluster='WEBSTER')

df_48.head()

Max number of daily sessions

In [None]:
plot_daily_max_sessions(df_48.reset_index(), 2, "Dynamic")

Utilisation plot

In [None]:
ax = plot_utility(df_48.reset_index(), 12, 2, "Dynamic")
#ax = plot_utility(df_48.reset_index(), 12, 1, "Dynamic", ax=ax)
ax = plot_utility(df_48.reset_index(), 12, 2, "Static", ax=ax)
ax = plot_utility(df_48.reset_index(), 12, 3, "Static", ax=ax)


ax.axhline(y=0.2, linestyle='--', color='g', label="20% utilisation")
plt.legend()
plt.show()

### 24 hours forecast lead plotted together with unaware model

In [None]:
df_48.head()
CLUSTER = 'WEBSTER' # Cluster for which we want to plot the predictions aware vs unaware

In [None]:
df_48_unaware = add_censoring_strategy_predictions(None, 3, "Static", prefix="forecast_48_unaware", cluster=CLUSTER, keep_obs=True)
# As we're not using a censored dataset
df_48_unaware["True Observations"] = df_48_unaware["Censored Observations Static 3"]
df_48_unaware["No. Plugs per Hub"] = PLUGS_PER_CLUSTER[CLUSTER]

df_48_unaware.head()

In [None]:
# Plot aware
ax = plot_utility(df_48.reset_index(), 12, 3, "Static", plot_std=True)
ax.lines[-2].set_label("Aware model Static 3")
# Plot unaware
ax = plot_utility(df_48_unaware.reset_index(), 12, 3, "Static", ax=ax, plot_std=True)
# Change label of the last plot
ax.lines[-2].set_label("Unaware model Static 3")
ax.axhline(y=0.2, linestyle='--', color='g', label="20% utilisation")
plt.legend()
plt.savefig(f"../Figures/aware_vs_unaware_{CLUSTER}.png", bbox_inches='tight')
plt.show()

In [None]:
df_48_unaware.head()

Time series plot

In [None]:
import matplotlib.pyplot as plt

fig, ax = plt.subplots(3, figsize=(15, 12))
ax[0].set_title(f"24 hour forecast lead predictions for {CLUSTER} cluster Static 3")
# define colormap
cmap = plt.get_cmap("tab20")

max_date = '2019-04-29' # 2 weeks
df_48_unaware_plot = df_48_unaware.copy().reset_index()
df_48_unaware_plot = df_48_unaware_plot[df_48_unaware_plot.Date < max_date]

df_48_aware_plot = df_48.copy().reset_index()
df_48_aware_plot = df_48_aware_plot[df_48_aware_plot.Date < max_date]

# Plot y=3
ax[0].axhline(y=3, linestyle='--', color=cmap(0), label="Censor threshold")

ax[0].plot(df_48_aware_plot.Date, df_48_aware_plot["True Observations"], color=cmap(1), label="True Observations")
ax[0].plot(df_48_aware_plot.Date, df_48_aware_plot["Predicted Static 3"], color=cmap(2), label="Aware model")
# Plot unaware
ax[0].plot(df_48_unaware_plot.Date, df_48_unaware_plot["Predicted Static 3"], color=cmap(3), label="Unaware model")

# Set horizontal line at the maximum number of plugs
ax[0].legend()


## 24 hours

# TODO: Make this for 12 hour and 30 min as well
ax[1].set_title(f"12 hour forecast lead predictions for {CLUSTER} cluster Static 3")

df_24_unaware = add_censoring_strategy_predictions(None, 3, "Static", prefix="forecast_24_unaware")
df_24_aware = add_censoring_strategy_predictions(None, 3, "Static", prefix="forecast_24")

df_24_unaware["True Observations"] = df_24_unaware["Censored Observations Static 3"]
df_24_unaware["No. Plugs per Hub"] = PLUGS_PER_CLUSTER[CLUSTER]

df_24_unaware_plot = df_24_unaware.copy().reset_index()
df_24_unaware_plot = df_24_unaware_plot[df_24_unaware_plot.Date < max_date]

df_24_aware_plot = df_24_aware.copy().reset_index()
df_24_aware_plot = df_24_aware_plot[df_24_aware_plot.Date < max_date]

# Plot y=3
ax[1].axhline(y=3, linestyle='--', color=cmap(0), label="Censor threshold")

ax[1].plot(df_24_aware_plot.Date, df_24_aware_plot["Censored Observations Static 3"], color=cmap(1), label="True Observations")
ax[1].plot(df_24_aware_plot.Date, df_24_aware_plot["Predicted Static 3"], color=cmap(2), label="Aware model")
# Plot unaware
ax[1].plot(df_24_unaware_plot.Date, df_24_unaware_plot["Predicted Static 3"], color=cmap(3), label="Unaware model")

# Set horizontal line at the maximum number of plugs
ax[1].legend()

## 30 min

ax[2].set_title(f"30 minute forecast lead predictions for {CLUSTER} cluster Static 3")

df_1_unaware = add_censoring_strategy_predictions(None, 3, "Static", prefix="forecast_1_unaware")
df_1_aware = add_censoring_strategy_predictions(None, 3, "Static", prefix="forecast_1_aware")

df_1_unaware_plot = df_1_unaware.copy().reset_index()
df_1_unaware_plot = df_1_unaware_plot[df_1_unaware_plot.Date < max_date]

df_1_aware_plot = df_1_aware[df_1_aware.Date < max_date]

# Plot y=3
ax[2].axhline(y=3, linestyle='--', color=cmap(0), label="Censor threshold")

ax[2].plot(df_1_aware_plot.Date, df_1_aware_plot["Censored Observations Static 3"], color=cmap(1), label="True Observations")
ax[2].plot(df_1_aware_plot.Date, df_1_aware_plot["Predicted Static 3"], color=cmap(2), label="Aware model")
# Plot unaware
ax[2].plot(df_1_unaware_plot.Date, df_1_unaware_plot["Predicted Static 3"], color=cmap(3), label="Unaware model")

ax[2].legend()
plt.show()



## 24 hour vs 30 min forecast
For Dynamic 2 

In [None]:
CLUSTER = 'CAMBRIDGE'

In [None]:
df_24_vs_30 = add_censoring_strategy_predictions(None, 2, "Dynamic", keep_obs=True, prefix="forecast_1_aware", cluster=CLUSTER)

df_24_vs_30 = df_24_vs_30.add_suffix('_forecast_1')
df_24_vs_30 = df_24_vs_30.rename(columns={'True Observations_forecast_1': 'True Observations'})
df_24_vs_30 = add_censoring_strategy_predictions(df_24_vs_30, 2, "Dynamic", prefix="forecast_48_aware", cluster=CLUSTER)
df_24_vs_30["No. Plugs per Hub"] = PLUGS_PER_CLUSTER[CLUSTER]

df_24_vs_30.head()

In [None]:
fig, ax = plt.subplots(figsize=(15, 5))
max_date = '2019-04-29' # 2 weeks
df_24_vs_30_plot = df_24_vs_30.copy().reset_index()
df_24_vs_30_plot = df_24_vs_30_plot[df_24_vs_30_plot.Date < max_date]


plt.plot(
    df_24_vs_30_plot.Date, np.maximum(2, df_24_vs_30_plot["No. Plugs per Hub"] - 2), # Dynamic 2
    linestyle='--', label="Censor threshold") 
plt.plot(df_24_vs_30_plot.Date, df_24_vs_30_plot["True Observations"], label="True Observations")
plt.plot(df_24_vs_30_plot.Date, df_24_vs_30_plot["Predicted Dynamic 2_forecast_1"], label="30 min forecast")
plt.plot(df_24_vs_30_plot.Date, df_24_vs_30_plot["Predicted Dynamic 2"], label="24 hour forecast forecast")

# Set horizontal line at the maximum number of plugs
ax.set_title(f"24 hour vs 30 min forecast lead predictions for {CLUSTER} cluster Dynamic 2")
ax.legend()
plt.show()

In [None]:
ax = plot_utility(df_48.reset_index(), 12, 2, "Static", plot_std=True)