In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt


from sktime.utils.plotting import plot_series, plot_windows
from feature_engine.timeseries.forecasting import WindowFeatures
from sktime.transformations.series.summarize import WindowSummarizer

### Data
We will work with the hourly electricity demand dataset. It is the electricity demand for the state of Victora in Australia from 2002 to the start of 2015. 


In [3]:
data = pd.read_csv('../../Datasets/victoria_electricity_demand.csv', 
                   usecols=["demand", "temperature", "date_time"], 
                   index_col='date_time', parse_dates=['date_time'])

# For this demo we will use a subset of the data
data = data.loc["2010":]

# plot
plot_series(data['demand'])
plt.xticks(rotation=30);

<img src='./plots/victoria-electricity-deman-2010-2015.png'>

In [4]:
df = data.copy()

In [43]:
def MAD(x):
    return np.median(np.abs(x-np.median(x)))


result_pandas = (
    df['demand']
    .rolling(window=24)
    .agg(['mean','std', MAD])
    .shift(periods=1, freq='H')
    .add_prefix('demand_window_24_')
)


In [24]:
result_pandas.plot(figsize=(16, 8), subplots=True);

<img src='./plots/rolling-stats-pandas-24h-window.png'>

## Computing weighted rolling windows features using Pandas

$$weighted \: mean \: (\mu_w)= \frac{\sum_i^n w_i x_i} {w_i}$$

$$weighted \: standard \: deviation  \: (\sigma_w)= \sqrt{ \frac{ \sum_i w_i (x_i - \mu_w)^2 }{ \sum_i w_i } }$$

In [55]:
def weighted_mean(x, weights):
    return np.dot(x, weights) / np.sum(weights)

def weighted_standard_deviation(x, weights):
    mean = weighted_mean(x, weights)
    squared_diff = np.square(x-mean)
    return np.sqrt ( np.dot(weights, squared_diff) / np.sum(weights) )


#### Let's compute the rolling weighted mean and standard deviation. The weights and window size are not independent. They must be the same size.

In [25]:
window_len = 24
weights = np.arange(1, window_len+1)

result_weighted_mean = (
    df['demand']
    .rolling(window=window_len)
    .apply(weighted_mean, args=(weights,))
    .shift(periods=1, freq='H')
)


In [56]:
window_len = 24
weights = np.arange(1, window_len+1)

result_weighted_std = (
    df['demand']
    .rolling(window=window_len)
    .apply(weighted_standard_deviation, args=(weights,))
    .shift(periods=1, freq='H')
)

#### As the weighted mean is far more sensitive to recent data we can see the weighted mean fluctuates more


In [76]:
ax = df.plot(y=['demand'], figsize=(16, 6), alpha=0.5)
result_weighted_mean.plot( color='seagreen', ax=ax, label='Weighted window 24hr mean')
result_pandas.plot(y=['demand_window_24_mean'], color='yellow', ax=ax, alpha=0.5)

plt.legend();

<img src='./plots/weighted_window_mean_electricity_demand.png'>

In [77]:
index = df.iloc[-24:].index
ax = df.loc[index].plot(y=['demand'], figsize=(15,5))
result_pandas.loc[index].plot(y=['demand_window_24_mean'], ax=ax)
result_weighted_mean.loc[index].plot(ax=ax, label="weighted_mean_24hr_window")
plt.legend()
plt.suptitle(f'Electricity Demand on {index[-1].year}-{index[-1].month}-{index[-1].day}');

<img src='./plots/Electricity-demand-rolling-mean-and-weighted_mean-on-28-feb-2015.png'>

In [93]:
fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(16, 6), constrained_layout=True)
result_weighted_std.plot(ax=ax[0], color='seagreen')
result_pandas.plot(y=['demand_window_24_std'], color='blue', alpha=0.5, label=['weighted_standard_deviation'], ax=ax[1])
ax[0].legend(['rolling_standard_deviation'])

<img src='./plots/weighted_window_STD_of_demand.png'>

## Weekly pattern

In [101]:
result_weekly = (
    df['demand']
    .rolling(window=7*24)
    .agg(['mean'])
    .shift(periods=1, freq='H')
    .add_prefix('demand_weekly_window_')
)

result_weighted_weekly = (
    df['demand']
    .rolling(window=7*24)
    .apply(weighted_mean, args=(np.arange(1,(7*24)+1),))
    .shift(periods=1, freq='H')
)

### As the weighted mean is far more sensitive to recent data we can see that weighted mean fluctuates more due to the daily seasonality.


In [115]:
index = df.iloc[-7*24:].index
ax = df.loc[index].plot(y=['demand'], figsize=(15,4))
result_weekly.loc[index].plot(y=['demand_weekly_window_mean'], ax=ax)
result_weighted_mean.loc[index].plot(ax=ax, label="Weighted mean")
plt.legend()
plt.suptitle(t='Weekly pattern', size=24)

<img src='./plots/weighted_mean_weekly_window.png'>