In [1]:
import os
import math
import pandas as pd
import numpy as np
from workalendar.europe import Netherlands
from azureml.core import Workspace, Dataset

In [2]:
def add_fourier_features(df, column_name, period, n, period_name = "f"):
    t = df[column_name]
    for i in range(n):
        j = math.ceil((i+1)/2)
        if i%2:
            df[f'{period_name}_{i}'] = np.cos(j * 2 * np.pi * t / period)
        else:
            df[f'{period_name}_{i}'] = np.sin(j * 2 * np.pi * t / period)
    return df

In [3]:
def create_holiday_features(df):
    # holiday indicator feature
    cal = Netherlands(include_carnival=True)

    # Make a pandas series with holidays of interest
    holidates = cal.holidays(2020) + cal.holidays(2021)
    pd_holidays = pd.to_datetime([d[0] for d in holidates])

    df['is_holiday'] = pd.to_datetime(df['data_index_'].dt.date).isin(pd_holidays)
    return df

In [4]:
def create_workday_weekend_features(df, fourier_order):
    # split features in workday / weekend
    df['is_workday'] = (~(df.is_holiday.astype(bool) | (df.day_of_week == 5) | (df.day_of_week == 6)))
    workday_data = {
        f'workday_{k}':df[k]*df.is_workday.astype(int)
        for k
        in ['temperature', 'solar_ghi'] + [f'f_quarter_{f}' for f in range(fourier_order)]
    }
    weekend_data = {
        f'weekend_{k}':df[k]*(~df.is_workday).astype(int)
        for k
        in ['temperature', 'solar_ghi'] + [f'f_quarter_{f}' for f in range(fourier_order)]
    }
    return workday_data, weekend_data

In [5]:
# get the workspace from config.json
ws = Workspace.from_config()
# get the datastore to upload our data
datastore = ws.get_default_datastore()

## Generate real-time data

In [54]:
# retrieve real-time dataset from Azure Datastore
ds = Dataset.get_by_name(ws, name="energy_data_realtime")
df = ds.to_pandas_dataframe()
# pick up only last two hours
X = df.iloc[-10:]
X

Unnamed: 0,data_index_,solar_prediction_mw,wind_prediction_mw,load_actuals_mw
50487,2021-06-09 21:45:00,0.0,36.29834,96.104623
50488,2021-06-09 22:00:00,0.0,52.180125,94.768896
50489,2021-06-09 22:15:00,0.0,52.216332,92.284519
50490,2021-06-09 22:30:00,0.0,52.100467,90.824687
50491,2021-06-09 22:45:00,0.0,51.901324,90.218876
50492,2021-06-09 23:00:00,0.0,51.823736,90.339627
50493,2021-06-09 23:15:00,0.0,50.666638,88.413236
50494,2021-06-09 23:30:00,0.0,49.584024,87.441277
50495,2021-06-09 23:45:00,0.0,48.517445,85.329757
50496,2021-06-10 00:00:00,0.0,47.509316,84.122788


In [51]:
REALTIME_FOLDER = 'realtime-data'
os.makedirs(REALTIME_FOLDER, exist_ok=True)

In [52]:
# Save each sample as a separate file
header = X.columns.tolist()

for index, row in X.iterrows():
    fname = str(index+1) + '.csv'
    f = open(os.path.join(REALTIME_FOLDER, fname), 'w')
    f.write(','.join(str(c) for c in header))
    f.write('\n')
    f.write(','.join(str(v) for v in row.values))
    f.close()

In [62]:
datastore.upload(src_dir="realtime-data", target_path="realtime-data", overwrite=True, show_progress=True)

Uploading an estimated of 10 files
Uploading realtime-data\50488.csv
Uploaded realtime-data\50488.csv, 1 files out of an estimated total of 10
Uploading realtime-data\50491.csv
Uploaded realtime-data\50491.csv, 2 files out of an estimated total of 10
Uploading realtime-data\50494.csv
Uploaded realtime-data\50494.csv, 3 files out of an estimated total of 10
Uploading realtime-data\50489.csv
Uploaded realtime-data\50489.csv, 4 files out of an estimated total of 10
Uploading realtime-data\50490.csv
Uploaded realtime-data\50490.csv, 5 files out of an estimated total of 10
Uploading realtime-data\50492.csv
Uploaded realtime-data\50492.csv, 6 files out of an estimated total of 10
Uploading realtime-data\50493.csv
Uploaded realtime-data\50493.csv, 7 files out of an estimated total of 10
Uploading realtime-data\50495.csv
Uploaded realtime-data\50495.csv, 8 files out of an estimated total of 10
Uploading realtime-data\50496.csv
Uploaded realtime-data\50496.csv, 9 files out of an estimated total

$AZUREML_DATAREFERENCE_bcd282478332449881f4a9b4317f60d8

In [63]:
# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(datastore, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='realtime-data',
                                             description='realtime data for pytown demand energy forecast',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")

Done!


## Generate batch data

In [55]:
# retrieve batch dataset from Azure Datastore
ds = Dataset.get_by_name(ws, name="energy_data_15_min")
df = ds.to_pandas_dataframe()
# pick up only last two hours
X = df.iloc[-10:]
X

Unnamed: 0,data_index_,temperature,solar_ghi,solar_prediction_mw,wind_prediction_mw,load_actuals_mw
50487,2021-06-09 21:45:00,289.144745,0.0,0.0,36.29834,96.104623
50488,2021-06-09 22:00:00,288.980194,0.0,0.0,52.180125,94.768896
50489,2021-06-09 22:15:00,288.802124,0.0,0.0,52.216332,92.284519
50490,2021-06-09 22:30:00,288.624084,0.0,0.0,52.100467,90.824687
50491,2021-06-09 22:45:00,288.446014,0.0,0.0,51.901324,90.218876
50492,2021-06-09 23:00:00,288.254852,0.0,0.0,51.823736,90.339627
50493,2021-06-09 23:15:00,288.050537,0.0,0.0,50.666638,88.413236
50494,2021-06-09 23:30:00,287.846252,0.0,0.0,49.584024,87.441277
50495,2021-06-09 23:45:00,287.641937,0.0,0.0,48.517445,85.329757
50496,2021-06-10 00:00:00,287.930267,0.0,0.0,47.509316,84.122788


In [56]:
# drop target and add features
X = X.drop(['load_actuals_mw'], axis=1)
# generate additional ML features
X['day_of_week'] = X['data_index_'].dt.dayofweek
X['quarter_of_day'] =  np.where(X['data_index_'].dt.hour > 0, X['data_index_'].dt.hour.apply(lambda x: math.ceil(x / 6.)), 4)
X = create_holiday_features(X)

# add Fourier features to capture daily pattern in model
fourier_order = 6

X = add_fourier_features(X, "quarter_of_day", 4 * 24, fourier_order, "f_quarter")

# split workdays and weekend/holidays
workday_data, weekend_data = create_workday_weekend_features(X, fourier_order)
X_linregr = pd.DataFrame(
    {**workday_data, **weekend_data}
)

In [58]:
# List the input feature columns
feat_columns = list(workday_data.keys()) + list(weekend_data.keys())

In [60]:
BATCH_FOLDER = 'batch-data'
os.makedirs(BATCH_FOLDER, exist_ok=True)

In [64]:
# Save each sample as a separate file
X = X_linregr[feat_columns].to_numpy()
for i in range(len(X)):
    fname = str(i+1) + '.csv'
    X[i].tofile(os.path.join(BATCH_FOLDER, fname), sep=",")

In [65]:
datastore.upload(src_dir="batch-data", target_path="batch-data", overwrite=True, show_progress=True)

Uploading an estimated of 10 files
Uploading batch-data\1.csv
Uploaded batch-data\1.csv, 1 files out of an estimated total of 10
Uploading batch-data\10.csv
Uploaded batch-data\10.csv, 2 files out of an estimated total of 10
Uploading batch-data\2.csv
Uploaded batch-data\2.csv, 3 files out of an estimated total of 10
Uploading batch-data\3.csv
Uploaded batch-data\3.csv, 4 files out of an estimated total of 10
Uploading batch-data\4.csv
Uploaded batch-data\4.csv, 5 files out of an estimated total of 10
Uploading batch-data\5.csv
Uploaded batch-data\5.csv, 6 files out of an estimated total of 10
Uploading batch-data\6.csv
Uploaded batch-data\6.csv, 7 files out of an estimated total of 10
Uploading batch-data\7.csv
Uploaded batch-data\7.csv, 8 files out of an estimated total of 10
Uploading batch-data\8.csv
Uploaded batch-data\8.csv, 9 files out of an estimated total of 10
Uploading batch-data\9.csv
Uploaded batch-data\9.csv, 10 files out of an estimated total of 10
Uploaded 10 files


$AZUREML_DATAREFERENCE_123392323f5a4b78b59f7de882000c33

In [66]:
# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(datastore, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data for pytown demand energy forecast',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")

Done!
