In [4]:
import datetime as dt
import numpy as np
import pandas as pd

import pickle

from sklearn import base
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn import metrics
from sklearn import ensemble

In [5]:
#define custom transformers
class HourofDay(base.BaseEstimator, base.TransformerMixin):
    
    def __init__(self, column_name='target', horizon=0):
        self.column_name=column_name
        self.horizon = horizon
    
    def fit(self, X, y=None):
        return self
    
    def hour_vector(self, hour):
        v = np.zeros(24)
        v[hour] = 1
        return v
    
    def transform(self, X):
        # Adjust the timestamp with the horizon offset
        adjusted_timestamp = X[self.column_name] + pd.DateOffset(hours=self.horizon)
        
        # Extract the hour of the day for the adjusted timestamp
        hour_of_day = adjusted_timestamp.dt.hour
        
        return np.stack([self.hour_vector(h) for h in hour_of_day])
    
class DayofWeek(base.BaseEstimator, base.TransformerMixin):
    
    def __init__(self, column_name='target', horizon=0):
        self.column_name=column_name
        self.horizon=horizon
    
    def fit(self, X, y=None):
        return self
    
    def weekday_vector(self, weekday):
        v = np.zeros(7)
        v[weekday] = 1
        return v
    
    def transform(self, X):
        # Adjust the timestamp with the horizon offset
        adjusted_timestamp = X[self.column_name] + pd.DateOffset(hours=self.horizon)
        
        # Extract the day of the week for the adjusted timestamp
        day_of_week = adjusted_timestamp.dt.dayofweek
        
        return np.stack([self.weekday_vector(d) for d in day_of_week])

class MonthofYear(base.BaseEstimator, base.TransformerMixin):
    
    def __init__(self, column_name='target', horizon=0):
        self.column_name=column_name
        self.horizon=horizon
    
    def fit(self, X, y=None):
        return self
    
    def month_vector(self, month):
        v = np.zeros(12)
        v[month-1] = 1
        return v
    
    def transform(self, X):
        # Adjust the timestamp with the horizon offset
        adjusted_timestamp = X[self.column_name] + pd.DateOffset(hours=self.horizon)
        
        # Extract the hour of the day for the adjusted timestamp
        month_of_year = adjusted_timestamp.dt.month
        
        return np.stack([self.month_vector(m) for m in month_of_year])
    
class LagTransformer(base.BaseEstimator, base.TransformerMixin):
    def __init__(self, column_name='label', lag=1):
        self.lag=lag
        self.column_name=column_name
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        # Create a copy of the input DataFrame to avoid altering the original
        X_copy = X.copy()
        
        # Shift the specified column by 'lag' rows
        if self.lag <= len(X) - 1:
            X_copy[f'{self.column_name}-{self.lag}'] = X_copy[self.column_name].shift(self.lag)
            X_copy[f'{self.column_name}-{self.lag}'] = X_copy[f'{self.column_name}-{self.lag}'].fillna(method='bfill', axis=0)
            return X_copy[[f'{self.column_name}-{self.lag}']]
        else:
            print(f'Requested shift {self.lag} > number of records. Returning {self.column_name}-{len(X_copy)-1}')
            X_copy[f'{self.column_name}-{len(X_copy)-1}'] = X_copy[self.column_name].shift(len(X_copy) - 1)
            X_copy[f'{self.column_name}-{len(X_copy)-1}'] = X_copy[f'{self.column_name}-{len(X_copy)-1}'].fillna(method='bfill', axis=0)
            return X_copy[f'{self.column_name}-{len(X_copy)-1}']

In [6]:
# Load the pickled models
with open('base_nowcast.bin', 'rb') as f_in:
    season_model_f0 = pickle.load(f_in)

with open('base_forecast_f2.bin', 'rb') as f_in:
    season_model_f2 = pickle.load(f_in)

with open('residual_forecast_f2.bin', 'rb') as f_in:
    residual_model_f2 = pickle.load(f_in)

In [7]:
# Define a function to make predictions
def predict_demand(input_data):
    # Convert the input data into a DataFrame with the same structure as f0_train
    # The input data should contain the past 5 hours of demand data in the 'demand_MW' column

    input_df = pd.DataFrame({'demand_MW': input_data})
    
    # Use the fitted season models to predict base demand for the current and future hours
    base_demand_f0 = np.expm1(season_model_f0.predict(input_df))
    base_demand_f2 = np.expm1(season_model_f2.predict(input_df))
    
    # Calculate the demand anomaly for the current hour and 2 hours in the future
    demand_anomaly_f0 = input_df['demand_MW'] - base_demand_f0
    demand_anomaly_f2 = input_df['demand_MW'] - base_demand_f2
    
    # Create lag features for residual modeling
    lags = [demand_anomaly_f0.shift(lag).fillna(method='bfill') for lag in range(5)]
    
    # Use the fitted residual model to predict the residual demand for 2 hours in the future
    input_features = pd.concat([demand_anomaly_f0] + lags, axis=1)
    pred_residual_f2 = residual_model_f2.predict(input_features)
    
    # Calculate the final demand forecast for 2 hours in the future
    pred_demand_f2 = base_demand_f2 + pred_residual_f2
    
    return pred_demand_f2

In [None]:
# Load 5 hours of historical demand data into the input_data variable
input_data = [data_hour_1, data_hour_2, data_hour_3, data_hour_4, data_hour_5]
    
# Make a demand forecast for 2 hours in the future
forecast = predict_demand(input_data)

print(f"Forecasted demand for 2 hours in the future: {forecast} MW")