In [62]:
import pandas as pd

from pandasql import sqldf
mysql = lambda q: sqldf(q, globals())

# Dataset: Bike Sharing Daily
* Source: https://www.kaggle.com/contactprad/bike-share-daily-data
* Licence: [1] Fanaee-T, Hadi, and Gama, Joao, "Event labeling combining ensemble detectors and background knowledge", Progress in Artificial Intelligence (2013): pp. 1-15, Springer Berlin Heidelberg, doi:10.1007/s13748-013-0040-3.

@article{
year={2013},
issn={2192-6352},
journal={Progress in Artificial Intelligence},
doi={10.1007/s13748-013-0040-3},
title={Event labeling combining ensemble detectors and background knowledge},
url={http://dx.doi.org/10.1007/s13748-013-0040-3},
publisher={Springer Berlin Heidelberg},
keywords={Event labeling; Event detection; Ensemble learning; Background knowledge},
author={Fanaee-T, Hadi and Gama, Joao},
pages={1-15}
}

In [58]:
data = pd.read_csv('../sklearn-pipelines/datasets/bike_sharing_daily.csv')

df = data[['season', 'dteday', 'temp']].copy()
df['dteday'] = pd.to_datetime(df['dteday'])
df

Unnamed: 0,season,dteday,temp
0,1,2011-01-01,0.344167
1,1,2011-01-02,0.363478
2,1,2011-01-03,0.196364
3,1,2011-01-04,0.200000
4,1,2011-01-05,0.226957
...,...,...,...
726,1,2012-12-27,0.254167
727,1,2012-12-28,0.253333
728,1,2012-12-29,0.253333
729,1,2012-12-30,0.255833


# Python - Moving average overall

#### using 'rolling'
* Pandas rolling documentation: https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html

In [59]:
period = 5
df['average_5_periods_rolling'] = df['temp'].rolling(window=period).mean()
df['sum_5_periods_rolling'] = df['temp'].rolling(window=period).sum()
df

Unnamed: 0,season,dteday,temp,average_5_periods_rolling,sum_5_periods_rolling
0,1,2011-01-01,0.344167,,
1,1,2011-01-02,0.363478,,
2,1,2011-01-03,0.196364,,
3,1,2011-01-04,0.200000,,
4,1,2011-01-05,0.226957,0.266193,1.330966
...,...,...,...,...,...
726,1,2012-12-27,0.254167,0.253188,1.265941
727,1,2012-12-28,0.253333,0.254688,1.273441
728,1,2012-12-29,0.253333,0.259094,1.295470
729,1,2012-12-30,0.255833,0.252000,1.259999


# Python - Moving average by group

In [60]:
df['average_5_periods_rolling_by_season'] = df.groupby('season')['temp'].transform(lambda x: x.rolling(window=period).mean())
df

Unnamed: 0,season,dteday,temp,average_5_periods_rolling,sum_5_periods_rolling,average_5_periods_rolling_by_season
0,1,2011-01-01,0.344167,,,
1,1,2011-01-02,0.363478,,,
2,1,2011-01-03,0.196364,,,
3,1,2011-01-04,0.200000,,,
4,1,2011-01-05,0.226957,0.266193,1.330966,0.266193
...,...,...,...,...,...,...
726,1,2012-12-27,0.254167,0.253188,1.265941,0.253188
727,1,2012-12-28,0.253333,0.254688,1.273441,0.254688
728,1,2012-12-29,0.253333,0.259094,1.295470,0.259094
729,1,2012-12-30,0.255833,0.252000,1.259999,0.252000


# SQL - Moving average overall

In [66]:
data = pd.read_csv('../sklearn-pipelines/datasets/bike_sharing_daily.csv')

df = data[['season', 'dteday', 'temp']].copy()
df['dteday'] = pd.to_datetime(df['dteday'])

query = 'SELECT *, ' \
        '       avg(temp) OVER (ORDER BY dteday ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS moving_average, ' \
        '       sum(temp) OVER (ORDER BY dteday ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS moving_sum ' \
        'FROM df' \

mysql(query)

Unnamed: 0,season,dteday,temp,moving_average,moving_sum
0,1,2011-01-01 00:00:00.000000,0.344167,0.344167,0.344167
1,1,2011-01-02 00:00:00.000000,0.363478,0.353823,0.707645
2,1,2011-01-03 00:00:00.000000,0.196364,0.301336,0.904009
3,1,2011-01-04 00:00:00.000000,0.200000,0.276002,1.104009
4,1,2011-01-05 00:00:00.000000,0.226957,0.266193,1.330966
...,...,...,...,...,...
726,1,2012-12-27 00:00:00.000000,0.254167,0.255296,1.531774
727,1,2012-12-28 00:00:00.000000,0.253333,0.253212,1.519274
728,1,2012-12-29 00:00:00.000000,0.253333,0.254462,1.526774
729,1,2012-12-30 00:00:00.000000,0.255833,0.258550,1.551303


# SQL - Moving average per group

In [68]:
query = 'SELECT *, ' \
        '       avg(temp) OVER (ORDER BY dteday ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS moving_average, ' \
        '       avg(temp) OVER (PARTITION BY season ORDER BY dteday ROWS BETWEEN 5 PRECEDING AND CURRENT ROW) AS moving_average ' \
        'FROM df' \

mysql(query)

Unnamed: 0,season,dteday,temp,moving_average,moving_average.1
0,1,2011-01-01 00:00:00.000000,0.344167,0.344167,0.344167
1,1,2011-01-02 00:00:00.000000,0.363478,0.353823,0.353823
2,1,2011-01-03 00:00:00.000000,0.196364,0.301336,0.301336
3,1,2011-01-04 00:00:00.000000,0.200000,0.276002,0.276002
4,1,2011-01-05 00:00:00.000000,0.226957,0.266193,0.266193
...,...,...,...,...,...
726,1,2012-12-27 00:00:00.000000,0.254167,0.255296,0.255296
727,1,2012-12-28 00:00:00.000000,0.253333,0.253212,0.253212
728,1,2012-12-29 00:00:00.000000,0.253333,0.254462,0.254462
729,1,2012-12-30 00:00:00.000000,0.255833,0.258550,0.258550
