# Pepco Substation Load Forecasting
This notebook goes throug the process used to fit the model and create

In [1]:
from Code import OSI as osi
from Code import featureengineering as fe

from Code import pipeline as pipe

from sklearn.externals import joblib
import pandas as pd
import numpy as np
import time
from datetime import datetime

Welcome to PIthon!!


In [2]:
# Show the pipeline used to create the model
pipe.pipeline

Pipeline(steps=[('features', FeatureUnion(n_jobs=1,
       transformer_list=[('Hour of Day', HourOfDayTransformer()), ('Day of Week', DayofWeekTransformer()), ('Month_of_year', MonthTransformer()), ('Day_of_month', DayofMonthTransformer()), ('Year', YearTransformer()), ('hour dummies', HourDummies()), ('mont...imators=300, n_jobs=1, oob_score=False, random_state=None,
           verbose=0, warm_start=False))])

In [6]:
def build_and_save_models(load_tags, 
                          weather_tags = ['NWS_KDCA_DewPoint_F','NWS_KDCA_Temperature_F','NWS_KDCA_WindSpeed_mph'],
                         start ='Jan 1 2010',
                          end = '*',
                          freq = '1h',
                         summarytype = osi.AFSummaryTypes.Average):
    # Set starttime
    start_time = time.time()
    print(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    # Create and all tags list
    all_tags = load_tags + weather_tags
    # Pull all weather and historic tags
    print("Pulling PI data for the following points:")
    print(*all_tags, sep='\n')
    df = osi.Pull_Multi_PIData(all_tags, start, end, freq, complete_cases=False, summarytype=summarytype)
    print("Done pulling at " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")) + " Taking " + str(time.time() - start_time))
    # Loop through each load tag
    for load in load_tags:
        # Make mini dataframe with only the one load tag
        print("Filtering data for only: "+str(load))
        df_mini = df[[load]+weather_tags]
        # Make it only complete cases
        df_mini = df_mini.dropna(axis=0, how='any')
        
        # Create X and y variables
        X = df_mini.drop([load], axis = 1)
        y = pd.DataFrame(df_mini[load])
        y_log = np.log(y)
        
        print("Fitting Model for: " + str(load) + " at time: " + str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
        # Fit the model
        #pipe.pipeline.fit(X, y_log)
        # Change to this because of error?
        pipe.pipeline.fit(X, y_log.values.ravel())
        
        # Save model as file
        filename = str(load)+'.sav'
        print("Saving fitted model for: " + str(load) + " with filename: " + str(filename))
        print("So far this script has been running for: " + str(time.time() - start_time))
        joblib.dump(pipe.pipeline, filename)

In [5]:
build_and_save_models(load_tags=['PEPCO.BEVFRM_MVA.AV','PEPCO.BLADBG_MVA.AV','PEPCO.BML-13_MVA.AV','PEPCO.BELTVL_MVA.AV'])

Pulling PI data for all of the points
Filtering data for only: PEPCO.BEVFRM_MVA.AV
Fitting Model for: PEPCO.BEVFRM_MVA.AV
Saving fitted model for: PEPCO.BEVFRM_MVA.AV with filename: PEPCO.BEVFRM_MVA.AV.sav
Filtering data for only: PEPCO.BLADBG_MVA.AV
Fitting Model for: PEPCO.BLADBG_MVA.AV
Saving fitted model for: PEPCO.BLADBG_MVA.AV with filename: PEPCO.BLADBG_MVA.AV.sav
Filtering data for only: PEPCO.BML-13_MVA.AV
Fitting Model for: PEPCO.BML-13_MVA.AV
Saving fitted model for: PEPCO.BML-13_MVA.AV with filename: PEPCO.BML-13_MVA.AV.sav
Filtering data for only: PEPCO.BELTVL_MVA.AV
Fitting Model for: PEPCO.BELTVL_MVA.AV
Saving fitted model for: PEPCO.BELTVL_MVA.AV with filename: PEPCO.BELTVL_MVA.AV.sav


In [26]:
from sklearn.externals import joblib
import pandas as pd
import numpy as np

def predict_and_store(points):
    
    X_future = osi.Pull_Multi_PIData(
        pitags = ['NWS_KDCA_DewPoint_F.Forecast','NWS_KDCA_Temperature_F.Forecast','NWS_KDCA_WindSpeed_mph.Forecast'], 
        start= 't',
        end = 't+7d',
        freq = '1h',
        complete_cases=True,
        summarytype=osi.AFSummaryTypes.Average)
    
    X_future = osi.rename_forecast_cols(X_future)
    
    for model in points:
        filename = str(model)+'.sav'
        loaded_model = joblib.load(filename)

        fut_predictions = pd.DataFrame(np.exp(loaded_model.predict(X_future)), index=X_future.index, columns=['Future_Predictions'])
        
        storepoint = str(model)[:-2]+'Forecast'
        print("Storing forecast for: " + str(storepoint))
        osi.Store_Preds(fut_predictions, valuecol='Future_Predictions',pointname=storepoint)

In [25]:
predict_and_store(['PEPCO.BEVFRM_MVA.AV','PEPCO.BLADBG_MVA.AV','PEPCO.BML-13_MVA.AV','PEPCO.BELTVL_MVA.AV'])

Sotring forecast for: PEPCO.SHDYGR_MVA.Forecast
Sotring forecast for: PEPCO.POTMAC_MVA.Forecast
