In [1]:
import datetime as dt
import numpy as np
import pandas as pd

import pickle

from sklearn import base
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn import ensemble


In [2]:
#define custom transformers
class HourofDay(base.BaseEstimator, base.TransformerMixin):
    
    def __init__(self, column_name='target', horizon=0):
        self.column_name=column_name
        self.horizon = horizon
    
    def fit(self, X, y=None):
        return self
    
    def hour_vector(self, hour):
        v = np.zeros(24)
        v[hour] = 1
        return v
    
    def transform(self, X):
        # Adjust the timestamp with the horizon offset
        adjusted_timestamp = X[self.column_name] + pd.DateOffset(hours=self.horizon)
        
        # Extract the hour of the day for the adjusted timestamp
        hour_of_day = adjusted_timestamp.dt.hour
        
        return np.stack([self.hour_vector(h) for h in hour_of_day])
    
class DayofWeek(base.BaseEstimator, base.TransformerMixin):
    
    def __init__(self, column_name='target', horizon=0):
        self.column_name=column_name
        self.horizon=horizon
    
    def fit(self, X, y=None):
        return self
    
    def weekday_vector(self, weekday):
        v = np.zeros(7)
        v[weekday] = 1
        return v
    
    def transform(self, X):
        # Adjust the timestamp with the horizon offset
        adjusted_timestamp = X[self.column_name] + pd.DateOffset(hours=self.horizon)
        
        # Extract the day of the week for the adjusted timestamp
        day_of_week = adjusted_timestamp.dt.dayofweek
        
        return np.stack([self.weekday_vector(d) for d in day_of_week])

class MonthofYear(base.BaseEstimator, base.TransformerMixin):
    
    def __init__(self, column_name='target', horizon=0):
        self.column_name=column_name
        self.horizon=horizon
    
    def fit(self, X, y=None):
        return self
    
    def month_vector(self, month):
        v = np.zeros(12)
        v[month-1] = 1
        return v
    
    def transform(self, X):
        # Adjust the timestamp with the horizon offset
        adjusted_timestamp = X[self.column_name] + pd.DateOffset(hours=self.horizon)
        
        # Extract the hour of the day for the adjusted timestamp
        month_of_year = adjusted_timestamp.dt.month
        
        return np.stack([self.month_vector(m) for m in month_of_year])
    
class LagTransformer(base.BaseEstimator, base.TransformerMixin):
    def __init__(self, column_name='label', lag=1):
        self.lag=lag
        self.column_name=column_name
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        # Create a copy of the input DataFrame to avoid altering the original
        X_copy = X.copy()
        
        # Shift the specified column by 'lag' rows
        if self.lag <= len(X) - 1:
            X_copy[f'{self.column_name}-{self.lag}'] = X_copy[self.column_name].shift(self.lag)
            X_copy[f'{self.column_name}-{self.lag}'] = X_copy[f'{self.column_name}-{self.lag}'].fillna(method='bfill', axis=0)
            return X_copy[[f'{self.column_name}-{self.lag}']]
        else:
            print(f'Requested shift {self.lag} > number of records. Returning {self.column_name}-{len(X_copy)-1}')
            X_copy[f'{self.column_name}-{len(X_copy)-1}'] = X_copy[self.column_name].shift(len(X_copy) - 1)
            X_copy[f'{self.column_name}-{len(X_copy)-1}'] = X_copy[f'{self.column_name}-{len(X_copy)-1}'].fillna(method='bfill', axis=0)
            return X_copy[f'{self.column_name}-{len(X_copy)-1}']

In [3]:
# Load the pickled models
with open('base_nowcast.bin', 'rb') as f_in:
    season_model_f0 = pickle.load(f_in)

with open('base_forecast_f2.bin', 'rb') as f_in:
    season_model_f2 = pickle.load(f_in)

with open('residual_forecast_f2.bin', 'rb') as f_in:
    residual_model_f2 = pickle.load(f_in)

In [24]:
# Load 5 hours of historical demand data into the input data variables
input_json = {
    'data': [
        {'date_time': '2019-07-02 00:00:00', 'demand_MW': 17918},
        {'date_time': '2019-07-02 01:00:00', 'demand_MW': 17400},
        {'date_time': '2019-07-02 02:00:00', 'demand_MW': 16700},
        {'date_time': '2019-07-02 03:00:00', 'demand_MW': 15190},
        {'date_time': '2019-07-02 04:00:00', 'demand_MW': 13623}
    ]
}

test_df = pd.DataFrame(input_json['data'])
test_df['date_time'] = pd.to_datetime(test_df['date_time'])
test_df['demand_MW'] = test_df['demand_MW'].astype(float)

In [25]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   date_time  5 non-null      datetime64[ns]
 1   demand_MW  5 non-null      float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 212.0 bytes


In [5]:
test_df['base_demand_f0'] = np.expm1(season_model_f0.predict(test_df))

In [6]:
base_demand_f2 = np.expm1(season_model_f2.predict(test_df))
base_demand_f2

array([17934.82994512, 16594.01968172, 15269.32974356, 14378.77351081,
       13959.767421  ])

In [7]:
test_df['demand_anomaly_f0'] = test_df['demand_MW'] - test_df.base_demand_f0
test_df.head()

Unnamed: 0,date_time,demand_MW,base_demand_f0,demand_anomaly_f0
0,2019-07-02 00:00:00,17918.0,19200.860439,-1282.860439
1,2019-07-02 01:00:00,17400.0,18864.549754,-1464.549754
2,2019-07-02 02:00:00,16700.0,17987.453369,-1287.453369
3,2019-07-02 03:00:00,15190.0,16642.709191,-1452.709191
4,2019-07-02 04:00:00,13623.0,15313.665261,-1690.665261


In [8]:
demand_anomaly_f2 = residual_model_f2.predict(test_df)
demand_anomaly_f2

array([-1071.01611934, -1435.8496306 , -1062.95375993, -1389.62678814,
       -1714.02232646])

In [9]:
pred_demand_f2 = base_demand_f2[-1] + demand_anomaly_f2[-1]
pred_demand_f2

12245.745094542379

In [30]:
pred_time = test_df['date_time'].iloc[-1]

pred_time

Timestamp('2019-07-02 04:00:00')

In [31]:
# Define a function to make predictions
def predict_f2(input_json):
    # Convert the input data into a DataFrame with the same structure as f0_train
    # The input data should contain the past 5 hours of demand data in the 'demand_MW' column
    input_df = pd.DataFrame(input_json['data'])
    input_df['date_time'] = pd.to_datetime(input_df['date_time'])
    input_df['demand_MW'] = input_df['demand_MW'].astype(float)
    
    # Use the fitted season models to predict base demand for the current and future hours
    input_df['base_demand_f0'] = np.expm1(season_model_f0.predict(input_df))
    base_demand_f2 = np.expm1(season_model_f2.predict(input_df))
    
    # Calculate the demand anomaly for the current hour and 2 hours in the future
    input_df['demand_anomaly_f0'] = input_df['demand_MW'] - input_df['base_demand_f0']
    demand_anomaly_f2 = residual_model_f2.predict(input_df)
    
    # Calculate the final demand forecast for 2 hours in the future
    pred_demand_f2 = base_demand_f2[-1] + demand_anomaly_f2[-1]
    
    pred_time = input_df.iloc[-1]['date_time'] + pd.DateOffset(hours=2)  
    
    pred_timestring = pred_time.strftime('%Y-%m-%d %H:%M:%S')
    
    return pred_timestring, int(pred_demand_f2)

In [34]:
records ={
    'data': [
        {'date_time': '2019-07-02 00:00:00', 'demand_MW': 17918},
        {'date_time': '2019-07-02 01:00:00', 'demand_MW': 17400},
        {'date_time': '2019-07-02 02:00:00', 'demand_MW': 16700},
        {'date_time': '2019-07-02 03:00:00', 'demand_MW': 15190},
        {'date_time': '2019-07-02 04:00:00', 'demand_MW': 13623}
    ]
}


# Make a demand forecast for 2 hours in the future
time_f2, forecast = predict_f2(records)
    
print(f"Forecasted demand for {time_f2}: {forecast} MW")

Forecasted demand for 2019-07-02 06:00:00: 12245 MW


In [33]:
type(forecast)

tuple