In [1]:
#!/usr/bin/env python
# coding: utf-8


#libraries

import pandas as pd
import numpy  as np 
import pickle
import joblib
import http.client
import json
import time
import csv
from   datetime import datetime, timedelta
import holidays
import sqlite3
import re
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

# Configure Nrgstrem API call functionalities

class NRGStreamApi:    
    
    def __init__(self,username=None,password=None):
        self.username = 'Fahad'
        self.password = 'ABFAHc2'                
        self.server = 'api.nrgstream.com'        
        self.tokenPath = '/api/security/token'
        self.releasePath = '/api/ReleaseToken'
        self.tokenPayload = f'grant_type=password&username={self.username}&password={self.password}'
        self.tokenExpiry = datetime.now() - timedelta(seconds=60)
        self.accessToken = ""        
        
    def getToken(self):
        try:
            if self.isTokenValid() == False:                             
                headers = { }        
                # Connect to API server to get a token
                conn = http.client.HTTPSConnection(self.server)
                conn.request('POST', self.tokenPath, self.tokenPayload, headers)
                res = conn.getresponse()                
                res_code = res.code
                # Check if the response is good
                
                if res_code == 200:
                    res_data = res.read()
                    # Decode the token into an object
                    jsonData = json.loads(res_data.decode('utf-8'))
                    self.accessToken = jsonData['access_token']                         
                    # Calculate new expiry date
                    self.tokenExpiry = datetime.now() + timedelta(seconds=jsonData['expires_in'])                        
                    #print('token obtained')
                    #print(self.accessToken)
                else:
                    res_data = res.read()
                    print(res_data.decode('utf-8'))
                conn.close()                          
        except Exception as e:
            print("getToken: " + str(e))
            # Release token if an error occured
            self.releaseToken()      

    def releaseToken(self):
        try:            
            headers = {}
            headers['Authorization'] = f'Bearer {self.accessToken}'            
            conn = http.client.HTTPSConnection(self.server)
            conn.request('DELETE', self.releasePath, None, headers)  
            res = conn.getresponse()
            res_code = res.code
            if res_code == 200:   
                # Set expiration date back to guarantee isTokenValid() returns false                
                self.tokenExpiry = datetime.now() - timedelta(seconds=60)
                #print('token released')            
        except Exception as e:
            print("releaseToken: " + str(e))
                    
    def isTokenValid(self):
        if self.tokenExpiry==None:
            return False
        elif datetime.now() >= self.tokenExpiry:            
            return False
        else:
            return True            
    
    def GetStreamDataByStreamId(self,streamIds, fromDate, toDate, dataFormat='csv', dataOption=''):
        stream_data = "" 
        # Set file format to csv or json            
        DataFormats = {}
        DataFormats['csv'] = 'text/csv'
        DataFormats['json'] = 'Application/json'
        
        try:                            
            for streamId in streamIds:            
                # Get an access token            
                self.getToken()    
                if self.isTokenValid():
                    # Setup the path for data request. Pass dates in via function call
                    path = f'/api/StreamData/{streamId}'
                    if fromDate != '' and toDate != '':
                        path += f'?fromDate={fromDate.replace(" ", "%20")}&toDate={toDate.replace(" ", "%20")}'
                    if dataOption != '':
                        if fromDate != '' and toDate != '':
                            path += f'&dataOption={dataOption}'        
                        else:
                            path += f'?dataOption={dataOption}'        
                    
                    # Create request header
                    headers = {}            
                    headers['Accept'] = DataFormats[dataFormat]
                    headers['Authorization']= f'Bearer {self.accessToken}'
                    
                    # Connect to API server
                    conn = http.client.HTTPSConnection(self.server)
                    conn.request('GET', path, None, headers)
                    res = conn.getresponse()        
                    res_code = res.code                    
                    if res_code == 200:   
                        try:
                            print(f'{datetime.now()} Outputing stream {path} res code {res_code}')
                            # output return data to a text file            
                            if dataFormat == 'csv':
                                stream_data += res.read().decode('utf-8').replace('\r\n','\n') 
                            elif dataFormat == 'json':
                                stream_data += json.dumps(json.loads(res.read().decode('utf-8')), indent=2, sort_keys=False)
                            conn.close()

                        except Exception as e:
                            print(str(e))            
                            self.releaseToken()
                            return None  
                    else:
                        print(str(res_code) + " - " + str(res.reason) + " - " + str(res.read().decode('utf-8')))
                    
                self.releaseToken()   
                # Wait 1 second before next request
                time.sleep(1)
            return stream_data        
        except Exception as e:
            print(str(e))    
            self.releaseToken()
            return None
        
        
    def StreamDataOptions(self, streamId, dataFormat='csv'):
        try:      
            DataFormats = {}
            DataFormats['csv'] = 'text/csv'
            DataFormats['json'] = 'Application/json'
            resultSet = {}
            for streamId in streamIds:
                # Get an access token    
                if streamId not in resultSet:
                    self.getToken()                        
                    if self.isTokenValid():                 
                        # Setup the path for data request.
                        path = f'/api/StreamDataOptions/{streamId}'                        
                        # Create request header
                        headers = {}     
                        headers['Accept'] = DataFormats[dataFormat]                                   
                        headers['Authorization'] = f'Bearer {self.accessToken}'
                        # Connect to API server
                        conn = http.client.HTTPSConnection(self.server)
                        conn.request('GET', path, None, headers)
                        res = conn.getresponse()
                        self.releaseToken()       
                        if dataFormat == 'csv':
                            resultSet[streamId] = res.read().decode('utf-8').replace('\r\n','\n') 
                        elif dataFormat == 'json':
                            resultSet[streamId] = json.dumps(json.loads(res.read().decode('utf-8')), indent=2, sort_keys=False)                            
                    time.sleep(1)                        
            return resultSet            
        except Exception as e:
            print(str(e))    
            self.releaseToken()
            return None          
        
        except Exception as e:            
            self.releaseToken()                        
            return str(e)        

# Authenticate with your NRG Stream username and password    
nrgStreamApi = NRGStreamApi('Username','Password')      

# in-memory weather database loading- deleted

#conn = sqlite3.connect('weather_db.db')
#c = conn.cursor()
#c.execute('''  
#SELECT * FROM HISTORICALFCAST
#          ''')
#results = c.fetchall()
#weather_df = pd.DataFrame(results)

#c.close()
#conn.close()

# generate time-stamps for forecast hours

now_time                 = datetime.now()
year_of_run              = now_time.year
month_of_run             = now_time.month
day_of_run               = now_time.day
forecast_start           = datetime(year_of_run ,month_of_run , day_of_run)
forecast_interval_stamps = pd.date_range(forecast_start, periods=24, freq= 'h')

# create the dataframe to hold input data for 24-h forecast

column_list = ['hour_of_day',  'day',
       'hour_x_day', 'sin.day', 'cos.day', 'sin.hour', 'cos.hour', 
        'weekend', 'month', 'year', 'sunlight_avaialbility',
       'AIL_previous_hour', 'AIL_24h_lagged', 'AIL_2day_lagged',
       'AIL_3day_lagged', 'AIL_4day_lagged', 'AIL_5day_lagged',
       'AIL_6day_lagged', 'AIL_oneweek_lagged', 'temp_calgary', 'wind_calgary',
       'temp_edmonton', 'wind_edmonton', 'temp_ftmcmry', 'wind_ftmcmry',
       'temp_lthbrg', 'wind_lthbrg', 'temp_mdcnht', 'wind_mdcnht', 'temp_rddr',
       'wind_rddr', 'temp_slvlk', 'wind_slvlk' ]

prediction_df            = pd.DataFrame(columns = column_list)


###########################################################################
####################### Feature Generation ################################
###########################################################################

hrs                          = np.arange(0, 24, 1).tolist()
prediction_df['hour_of_day'] = hrs

# add time-related features

prediction_df['day']       = forecast_interval_stamps.day
prediction_df['hour_x_day'] = prediction_df['day']*prediction_df['hour_of_day'] 
prediction_df['sin.day']  = np.sin(prediction_df['day']*2*np.pi/365)
prediction_df['cos.day']  = np.cos(prediction_df['day']*2*np.pi/365)
prediction_df['sin.hour'] = np.sin(prediction_df['hour_of_day']*2*np.pi/24)
prediction_df['cos.hour'] = np.cos(prediction_df['hour_of_day']*2*np.pi/24)
weekdays                  = [d.weekday() for d in forecast_interval_stamps]
prediction_df['weekend']  = [1 if d >= 5 else 0 for d in weekdays]

In [2]:
for i in range(0,len(prediction_df.columns)):
    print(i, '--->',prediction_df.columns[i],'--->', prediction_df.dtypes[i])

0 ---> hour_of_day ---> int64
1 ---> day ---> int64
2 ---> hour_x_day ---> int64
3 ---> sin.day ---> float64
4 ---> cos.day ---> float64
5 ---> sin.hour ---> float64
6 ---> cos.hour ---> float64
7 ---> weekend ---> int64
8 ---> month ---> object
9 ---> year ---> object
10 ---> sunlight_avaialbility ---> object
11 ---> AIL_previous_hour ---> object
12 ---> AIL_24h_lagged ---> object
13 ---> AIL_2day_lagged ---> object
14 ---> AIL_3day_lagged ---> object
15 ---> AIL_4day_lagged ---> object
16 ---> AIL_5day_lagged ---> object
17 ---> AIL_6day_lagged ---> object
18 ---> AIL_oneweek_lagged ---> object
19 ---> temp_calgary ---> object
20 ---> wind_calgary ---> object
21 ---> temp_edmonton ---> object
22 ---> wind_edmonton ---> object
23 ---> temp_ftmcmry ---> object
24 ---> wind_ftmcmry ---> object
25 ---> temp_lthbrg ---> object
26 ---> wind_lthbrg ---> object
27 ---> temp_mdcnht ---> object
28 ---> wind_mdcnht ---> object
29 ---> temp_rddr ---> object
30 ---> wind_rddr ---> object
31 ---> 

In [3]:
prediction_df['month'] = forecast_interval_stamps.month
prediction_df

Unnamed: 0,hour_of_day,day,hour_x_day,sin.day,cos.day,sin.hour,cos.hour,weekend,month,year,...,temp_ftmcmry,wind_ftmcmry,temp_lthbrg,wind_lthbrg,temp_mdcnht,wind_mdcnht,temp_rddr,wind_rddr,temp_slvlk,wind_slvlk
0,0,16,0,0.271958,0.962309,0.0,1.0,0,2,,...,,,,,,,,,,
1,1,16,16,0.271958,0.962309,0.258819,0.9659258,0,2,,...,,,,,,,,,,
2,2,16,32,0.271958,0.962309,0.5,0.8660254,0,2,,...,,,,,,,,,,
3,3,16,48,0.271958,0.962309,0.7071068,0.7071068,0,2,,...,,,,,,,,,,
4,4,16,64,0.271958,0.962309,0.8660254,0.5,0,2,,...,,,,,,,,,,
5,5,16,80,0.271958,0.962309,0.9659258,0.258819,0,2,,...,,,,,,,,,,
6,6,16,96,0.271958,0.962309,1.0,6.123234000000001e-17,0,2,,...,,,,,,,,,,
7,7,16,112,0.271958,0.962309,0.9659258,-0.258819,0,2,,...,,,,,,,,,,
8,8,16,128,0.271958,0.962309,0.8660254,-0.5,0,2,,...,,,,,,,,,,
9,9,16,144,0.271958,0.962309,0.7071068,-0.7071068,0,2,,...,,,,,,,,,,


In [4]:
prediction_df['year'] = year_of_run

In [5]:
# Leap Year Check
is_leapyear = int(year_of_run) % 4 == 0 and int(year_of_run) % 100 != 0
xls = pd.ExcelFile('edmonton_sunrise_sunset.xls')
dfess_2015 = pd.read_excel(xls, '2015') #df= dafaframe, e= edmonston, ss= sunrise and sunset 
dfess_2016 = pd.read_excel(xls, '2016')
dfess_2015 = dfess_2015[['Sunrise_hr', 'Sunset_hr']]
dfess_2016 = dfess_2016[['Sunrise_hr', 'Sunset_hr']]
prediction_df['sunlight_avaialbility'] = 0

for i in range(0,prediction_df.shape[0]):
    if (prediction_df.iloc[i,prediction_df.columns.get_loc('year')]%4 ==0 and df.iloc[i,df.columns.get_loc('year')]%100 !=0):
        criteria = prediction_df.iloc[i,df.columns.get_loc('day')] #day_of_year
        sunrise  = dfess_2016.iloc[criteria-1,0] #sunrise
        sunset   = dfess_2016.iloc[criteria-1,1] #sunset
        if (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')]>= sunrise) and (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')] <= sunset):
            prediction_df.iloc[i,prediction_df.columns.get_loc('sunlight_avaialbility')] = 1
    else:
        criteria = prediction_df.iloc[i,prediction_df.columns.get_loc('day')] #day_of_year
        sunrise  = dfess_2015.iloc[criteria-1,0]#sunrise
        sunset   = dfess_2015.iloc[criteria-1,1] #sunset
        if (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')]>= sunrise) and (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')] <= sunset):
            prediction_df.iloc[i,prediction_df.columns.get_loc('sunlight_avaialbility')] = 1
            
prediction_df['sunlight_avaialbility']

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     1
9     1
10    1
11    1
12    1
13    1
14    1
15    1
16    1
17    0
18    0
19    0
20    0
21    0
22    0
23    0
Name: sunlight_avaialbility, dtype: int64

In [6]:
#one-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -1)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_24h_lagged'] = stream_df_yesterday.values
prediction_df['AIL_24h_lagged'] = prediction_df['AIL_24h_lagged'].astype('float64')

#2-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -2)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_2day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_2day_lagged'] = prediction_df['AIL_2day_lagged'].astype('float64')

#3-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -3)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_3day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_3day_lagged'] = prediction_df['AIL_3day_lagged'].astype('float64')

#4-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -4)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_4day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_4day_lagged'] = prediction_df['AIL_4day_lagged'].astype('float64')

#5-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -5)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_5day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_5day_lagged'] = prediction_df['AIL_5day_lagged'].astype('float64')

#6-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -6)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_6day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_6day_lagged'] = prediction_df['AIL_6day_lagged'].astype('float64')


# 7-day in the past feature
datetime_pastweek               = now_time + pd.Timedelta(days= -7)
fromDate                        = '{}/{}/{}'.format(datetime_pastweek.month ,datetime_pastweek.day, datetime_pastweek.year)
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_7daypast              = pd.read_csv(STREAM_DATA, sep=";")
stream_df_7daypast              = stream_df_7daypast.iloc[14:38,:]
stream_df_7daypast.columns      = ["Datetime,AIL"]
temp_df                         = stream_df_7daypast['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_7daypast["Datetime"]  = temp_df[0] 
stream_df_7daypast['AIL']       = temp_df[1] 
stream_df_7daypast['AIL']       = pd.to_numeric(stream_df_7daypast['AIL'],errors='coerce')
stream_df_7daypast              = stream_df_7daypast.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_oneweek_lagged'] = stream_df_7daypast.values
prediction_df['AIL_oneweek_lagged'] = prediction_df['AIL_oneweek_lagged'].astype('float64')

2021-02-16 11:06:25.898540 Outputing stream /api/StreamData/3?fromDate=2/15/2021&toDate=2/15/2021 res code 200
2021-02-16 11:06:27.433597 Outputing stream /api/StreamData/3?fromDate=2/14/2021&toDate=2/14/2021 res code 200
2021-02-16 11:06:28.970669 Outputing stream /api/StreamData/3?fromDate=2/13/2021&toDate=2/13/2021 res code 200
2021-02-16 11:06:30.652932 Outputing stream /api/StreamData/3?fromDate=2/12/2021&toDate=2/12/2021 res code 200
2021-02-16 11:06:32.781548 Outputing stream /api/StreamData/3?fromDate=2/11/2021&toDate=2/11/2021 res code 200
2021-02-16 11:06:34.837509 Outputing stream /api/StreamData/3?fromDate=2/10/2021&toDate=2/10/2021 res code 200
2021-02-16 11:06:36.318343 Outputing stream /api/StreamData/3?fromDate=2/9/2021&toDate=2/9/2021 res code 200


In [7]:
prediction_df

Unnamed: 0,hour_of_day,day,hour_x_day,sin.day,cos.day,sin.hour,cos.hour,weekend,month,year,...,temp_ftmcmry,wind_ftmcmry,temp_lthbrg,wind_lthbrg,temp_mdcnht,wind_mdcnht,temp_rddr,wind_rddr,temp_slvlk,wind_slvlk
0,0,16,0,0.271958,0.962309,0.0,1.0,0,2,2021,...,,,,,,,,,,
1,1,16,16,0.271958,0.962309,0.258819,0.9659258,0,2,2021,...,,,,,,,,,,
2,2,16,32,0.271958,0.962309,0.5,0.8660254,0,2,2021,...,,,,,,,,,,
3,3,16,48,0.271958,0.962309,0.7071068,0.7071068,0,2,2021,...,,,,,,,,,,
4,4,16,64,0.271958,0.962309,0.8660254,0.5,0,2,2021,...,,,,,,,,,,
5,5,16,80,0.271958,0.962309,0.9659258,0.258819,0,2,2021,...,,,,,,,,,,
6,6,16,96,0.271958,0.962309,1.0,6.123234000000001e-17,0,2,2021,...,,,,,,,,,,
7,7,16,112,0.271958,0.962309,0.9659258,-0.258819,0,2,2021,...,,,,,,,,,,
8,8,16,128,0.271958,0.962309,0.8660254,-0.5,0,2,2021,...,,,,,,,,,,
9,9,16,144,0.271958,0.962309,0.7071068,-0.7071068,0,2,2021,...,,,,,,,,,,


In [8]:
# weather feature

fromDate = '{}/{}/{}'.format(month_of_run,day_of_run, year_of_run )
toDate   = fromDate

df_clgry   = pd.DataFrame()
df_edmtn   = pd.DataFrame()
df_ftmcmry = pd.DataFrame()
df_lthbrg  = pd.DataFrame()
df_mdcnht  = pd.DataFrame()
df_rddr    = pd.DataFrame()
df_slvlk   = pd.DataFrame()
df_list = {}

streams = [242498, 242497, 242500, 242508, 242511, 242519, 242522]
city_names = ['calgary', 'edmonton', 'ftmcmry','lthbrg','mdcnht', 'rddr','slvlk']
k = 0

for i in streams:
    stream_data = nrgStreamApi.GetStreamDataByStreamId([i], fromDate, toDate, 'csv', '')        
    STREAM_DATA = StringIO(stream_data)
    df = pd.read_csv(STREAM_DATA, sep=";")
    
    df = df[16:(16+24)] # removing header information
    df.columns = ["Datetime,temp,wind,direction"]
    new = df['Datetime,temp,wind,direction'].str.split(",", n = 4, expand = True) 
    # making separate first name column from new data frame 
    df["Datetime"]= new[0] 
    df["temp"]= new[1] 
    df["wind"] = new [2]
    df["direction"] = new[3]
    df = df.drop(['Datetime,temp,wind,direction','direction','Datetime'],axis=1)
    df = df.reset_index(drop=True)
    df.columns = [str(col) + '_'+ city_names[k] for col in df.columns]
    df_list[k] = df
    k = k+1

df_clgry   = df_list[0]
df_edmtn   = df_list[1]
df_ftmcmry = df_list[2]
df_lthbrg  = df_list[3]
df_mdcnht  = df_list[4]
df_rddr    = df_list[5]
df_slvlk   = df_list[6]


alberta_weather_merged = pd.concat([df_clgry, df_edmtn, df_ftmcmry, df_lthbrg, df_mdcnht,
                                   df_rddr, df_slvlk], axis=1)
prediction_df.iloc[:,19:19+14] = alberta_weather_merged.values.astype(np.float)
prediction_df

2021-02-16 11:06:37.866874 Outputing stream /api/StreamData/242498?fromDate=2/16/2021&toDate=2/16/2021 res code 200
2021-02-16 11:06:39.975171 Outputing stream /api/StreamData/242497?fromDate=2/16/2021&toDate=2/16/2021 res code 200
2021-02-16 11:06:41.491373 Outputing stream /api/StreamData/242500?fromDate=2/16/2021&toDate=2/16/2021 res code 200
2021-02-16 11:06:43.060062 Outputing stream /api/StreamData/242508?fromDate=2/16/2021&toDate=2/16/2021 res code 200
2021-02-16 11:06:44.642203 Outputing stream /api/StreamData/242511?fromDate=2/16/2021&toDate=2/16/2021 res code 200
2021-02-16 11:06:46.292510 Outputing stream /api/StreamData/242519?fromDate=2/16/2021&toDate=2/16/2021 res code 200
2021-02-16 11:06:47.789652 Outputing stream /api/StreamData/242522?fromDate=2/16/2021&toDate=2/16/2021 res code 200


Unnamed: 0,hour_of_day,day,hour_x_day,sin.day,cos.day,sin.hour,cos.hour,weekend,month,year,...,temp_ftmcmry,wind_ftmcmry,temp_lthbrg,wind_lthbrg,temp_mdcnht,wind_mdcnht,temp_rddr,wind_rddr,temp_slvlk,wind_slvlk
0,0,16,0,0.271958,0.962309,0.0,1.0,0,2,2021,...,-21.0,15.0,-20.0,10.0,-20.0,10.0,-22.0,10.0,-21.0,15.0
1,1,16,16,0.271958,0.962309,0.258819,0.9659258,0,2,2021,...,-21.0,15.0,-24.0,10.0,-21.0,10.0,-23.0,10.0,-21.0,15.0
2,2,16,32,0.271958,0.962309,0.5,0.8660254,0,2,2021,...,-22.0,15.0,-25.0,10.0,-21.0,10.0,-23.0,10.0,-21.0,15.0
3,3,16,48,0.271958,0.962309,0.7071068,0.7071068,0,2,2021,...,-22.0,15.0,-24.0,10.0,-22.0,10.0,-23.0,10.0,-21.0,15.0
4,4,16,64,0.271958,0.962309,0.8660254,0.5,0,2,2021,...,-23.0,15.0,-24.0,10.0,-22.0,10.0,-24.0,10.0,-22.0,15.0
5,5,16,80,0.271958,0.962309,0.9659258,0.258819,0,2,2021,...,-22.0,5.0,-22.0,10.0,-23.0,10.0,-24.0,5.0,-21.0,10.0
6,6,16,96,0.271958,0.962309,1.0,6.123234000000001e-17,0,2,2021,...,-22.0,5.0,-22.0,10.0,-22.0,10.0,-24.0,5.0,-21.0,10.0
7,7,16,112,0.271958,0.962309,0.9659258,-0.258819,0,2,2021,...,-23.0,5.0,-21.0,10.0,-20.0,10.0,-23.0,5.0,-20.0,10.0
8,8,16,128,0.271958,0.962309,0.8660254,-0.5,0,2,2021,...,-23.0,5.0,-21.0,10.0,-19.0,10.0,-23.0,5.0,-20.0,10.0
9,9,16,144,0.271958,0.962309,0.7071068,-0.7071068,0,2,2021,...,-22.0,5.0,-18.0,10.0,-18.0,10.0,-21.0,5.0,-19.0,10.0


In [9]:
reduced_column_list = ['hour_of_day',  'day',
       'hour_x_day',  'weekend', 'month', 'year', 'sunlight_avaialbility',
       'AIL_previous_hour', 'AIL_24h_lagged', 'AIL_2day_lagged',
       'AIL_3day_lagged', 'AIL_4day_lagged', 'AIL_5day_lagged',
       'AIL_6day_lagged', 'AIL_oneweek_lagged', 'temp_calgary',
       'temp_edmonton'  ]

prediction_df = pd.DataFrame(prediction_df, columns = reduced_column_list)

In [10]:
#prediction_df.to_csv('input_data_{}_{}_{}_generate_date_{}_v8_only_temp.csv'.format(day_of_run, month_of_run, year_of_run, now_time))

In [11]:
#Generate the forecast

#loaded_model = joblib.load('model_generated_on_13_2_2021_v6_non_holiday.joblib.dat')
loaded_model = joblib.load('model_generated_on_14_2_2021_v8_non_holiday_mape_loss_only_temp.joblib.dat')


forecast = pd.DataFrame() # this dataframe will contain the forecasts (timestamp and values)
forecast['datetime_of_forecast']= forecast_interval_stamps.values 
forecast['values']              = 0 #initializing the column with zero

prediction_df.iloc[0,prediction_df.columns.get_loc('AIL_previous_hour')] = prediction_df.iloc[23,prediction_df.columns.get_loc('AIL_24h_lagged')] # assignment of AIL_previous_hour (24th hour of previous day)
input_data               = prediction_df.iloc[0] # preparing input data
predictions              = loaded_model.predict(np.array(input_data).reshape((1,-1))) # temporary location for forecast value
prediction_df.iloc[1,prediction_df.columns.get_loc('AIL_previous_hour')] = predictions[0] #load forecast in AIL_previous_hour
forecast.iloc[0,1]       = predictions[0]

for i in range(1, 24):
    input_data          = prediction_df.iloc[i]
    predictions         = loaded_model.predict(np.array(input_data).reshape((1,-1)))
    forecast.iloc[i,1]  = predictions[0]
    if i+1 <= 23:
        prediction_df.iloc[i+1,prediction_df.columns.get_loc('AIL_previous_hour')] = predictions[0]

forecast
#forecast.to_csv('forecast_{}_{}_{}_generate_date_{}.csv'.format(day_of_run, month_of_run, year_of_run, now_time ))

Unnamed: 0,datetime_of_forecast,values
0,2021-02-16 00:00:00,10217.751953
1,2021-02-16 01:00:00,10248.555664
2,2021-02-16 02:00:00,10381.03125
3,2021-02-16 03:00:00,10533.449219
4,2021-02-16 04:00:00,10663.148438
5,2021-02-16 05:00:00,10849.947266
6,2021-02-16 06:00:00,11086.061523
7,2021-02-16 07:00:00,11346.711914
8,2021-02-16 08:00:00,11264.489258
9,2021-02-16 09:00:00,11269.237305
