In [38]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.seasonal import MSTL

from sktime.utils.plotting import plot_series

### Data
We will work with the hourly electricity demand dataset. It is the electricity demand for the state of Victora in Australia from 2002 to the start of 2015. 

In [4]:
data = pd.read_csv('../../Datasets/victoria_electricity_demand.csv', 
                   usecols=["demand", "temperature", "date_time"], 
                   index_col='date_time', parse_dates=['date_time'])

# For this demo we will use a subset of the data
data = data.loc["2010":]

# plot
plot_series(data['demand'])
plt.xticks(rotation=30);

<img src='./plots/victoria-electricity-deman-2010-2015.png'>

In [5]:
df = data.copy()

### Computing exponential weights for rolling window features using Pandas

$exponential\:smoothing$

$$\frac{\alpha y_{t} + (1-\alpha) y_{t-1} + (1-\alpha)^2 y_{t-2} + ... + (1-\alpha)^n y_{t-n} }{\alpha + (1+\alpha) + (1+\alpha)^2 + (1+\alpha)^3 + .... +(1+\alpha)^n}$$

where $\alpha$ is the rate and $n$ is the window-size



In [8]:
# compute exponential weights
def compute_exp_weights(alpha, window_size):
    weights=np.ones(shape=window_size)
    for i in range(window_size):
        weights[i] = (1-alpha)**i 
    return weights[::-1]
    

In [16]:
w = compute_exp_weights(alpha=0.05, window_size=12)
plt.figure(figsize=(15, 3))
plt.stem(w)

<img src='./plots/exponential-weigts-example.png'>

In [14]:
def exponential_weighted_window_mean(x, alpha=0.05):
    window_len = len(x)
    weights = compute_exp_weights(alpha=alpha, window_size=window_len)
    return np.dot(x, weights) / np.sum(weights)

In [27]:
rolling_stats_pandas = (
    df['demand']
    .rolling(window=7*24)
    .agg(["mean", exponential_weighted_window_mean])
    .shift(periods=1, freq='H')
    .add_prefix('rolling_')
)

In [36]:
# rolling_stats_pandas.plot(subplots=True, figsize=(15, 4));

<img src='./plots/exponentially-weighted-window_stats.png'>

#### Lets look at the last week

In [37]:
index = df.iloc[-24*7:].index
ax = df.loc[index].plot(y=['demand'], figsize=(15,5))
rolling_stats_pandas.loc[index].plot(ax = ax);

<img src='./plots/exponentially-weighted-window_stats-feb-22-29-2015.png'>

#### The rolling mean with exponential weights is a lot more sensitive to changes in the original data because there is a lot more weight given to recent data points. This means that the daily seasonality causes the exponentially weighted data to oscillate.

#### Overall, we can see the weighted mean moves more quickly in response to changing trends in the data.

#### If we want to use the weighted window functions to be more responsive to short term **trends** then de-seasonalising the data first may be helpful. Let's try this and see what happens.

### Smoothing the data after de-trending

In [62]:
res = MSTL(endog=df['demand'], periods=[24, 24*7], stl_kwargs={'seasonal_deg':0}).fit()

In [67]:
result = (
    res.trend.rolling(window=7*24)
    .agg(['mean', exponential_weighted_window_mean])
    .shift(periods=1, freq='h')
    .add_prefix('demand_trend_window_168_')
)

#### The exponentially weighted window mean clearly responds faster to the changing trend relative to the non-weighted window mean.

In [72]:
index = df.iloc[-24*7:].index
ax = res.trend.loc[index].plot(figsize=(15,5), legend=True, label="demand trend")
result.loc[index].plot(ax = ax);

<img src='./plots/exponentially-weighted-window_stats-on-demand-trend-feb-22-29-2015.png'>

### Smoothing the data after de-trending and de-season

In [39]:
res = MSTL(endog=df['demand'], periods=[24, 24*7], stl_kwargs={'seasonal_deg':0}).fit()

In [60]:
result = (
    res.resid.rolling(window=7*24)
    .agg(['mean', exponential_weighted_window_mean])
    .shift(periods=1, freq='h')
    .add_prefix('demand_window_168_')
)

### Residuals and exponential window rolling mean

In [73]:
index = df.iloc[-24*7:].index
ax = res.resid.loc[index].plot(figsize=(15,5), legend=True, label="demand de-trend and de-season")
result.loc[index].plot(ax = ax);

<img src='./plots/exponentially-weighted-window_stats-on-demand-after-detrend-and-deseason-feb-22-29-2015.png'>

### Computing expanding windows features with exponential weights using Pandas `ewm`

We want to compute with exponential weights the expanding:
- mean
- standard deviation

As the weights are computed internally in Pandas when using the `ewm` (exponentially weighted moving) method we cannot pass custom functions.

In [74]:
result_ewm = (
    df['demand']
    .ewm(alpha=0.05)
    .agg(['mean', 'std'])
    .shift(periods=1, freq='H')
    .add_prefix("demand_ewm_0.05_")
)

In [83]:
result_ewm.plot(subplots=True, figsize=(15,4));

<img src='./plots/pandas-ewm-exponential-weighted-moving.png'>

### Custom Metrics

If we want to compute custom metrics with exponential weights and an expanding window, we can use `expanding`. Let's illustrate this by manually implementing the expanding mean with exponential weights.

In [78]:
result_expanding = (
    df['demand']
    .expanding()
    .agg(["mean", exponential_weighted_window_mean])
    .shift(periods=1, freq="H")
    .add_prefix("expanding_window_")
)

In [82]:
result_expanding.plot(subplots=True, figsize=(16,8));

<img src='./plots/expanding--weighted-window-stats-electricity-demand-pandas.png'>