In [1]:
#!/usr/bin/env python
# coding: utf-8


#libraries

import pandas as pd
import numpy  as np 
import pickle
import joblib
import http.client
import json
import time
import csv
from   datetime import datetime, timedelta
import holidays
import sqlite3
import re
import sys
if sys.version_info[0] < 3: 
    from StringIO import StringIO
else:
    from io import StringIO

# Configure Nrgstrem API call functionalities

class NRGStreamApi:    
    
    def __init__(self,username=None,password=None):
        self.username = 'Fahad'
        self.password = 'ABFAHc2'                
        self.server = 'api.nrgstream.com'        
        self.tokenPath = '/api/security/token'
        self.releasePath = '/api/ReleaseToken'
        self.tokenPayload = f'grant_type=password&username={self.username}&password={self.password}'
        self.tokenExpiry = datetime.now() - timedelta(seconds=60)
        self.accessToken = ""        
        
    def getToken(self):
        try:
            if self.isTokenValid() == False:                             
                headers = { }        
                # Connect to API server to get a token
                conn = http.client.HTTPSConnection(self.server)
                conn.request('POST', self.tokenPath, self.tokenPayload, headers)
                res = conn.getresponse()                
                res_code = res.code
                # Check if the response is good
                
                if res_code == 200:
                    res_data = res.read()
                    # Decode the token into an object
                    jsonData = json.loads(res_data.decode('utf-8'))
                    self.accessToken = jsonData['access_token']                         
                    # Calculate new expiry date
                    self.tokenExpiry = datetime.now() + timedelta(seconds=jsonData['expires_in'])                        
                    #print('token obtained')
                    #print(self.accessToken)
                else:
                    res_data = res.read()
                    print(res_data.decode('utf-8'))
                conn.close()                          
        except Exception as e:
            print("getToken: " + str(e))
            # Release token if an error occured
            self.releaseToken()      

    def releaseToken(self):
        try:            
            headers = {}
            headers['Authorization'] = f'Bearer {self.accessToken}'            
            conn = http.client.HTTPSConnection(self.server)
            conn.request('DELETE', self.releasePath, None, headers)  
            res = conn.getresponse()
            res_code = res.code
            if res_code == 200:   
                # Set expiration date back to guarantee isTokenValid() returns false                
                self.tokenExpiry = datetime.now() - timedelta(seconds=60)
                #print('token released')            
        except Exception as e:
            print("releaseToken: " + str(e))
                    
    def isTokenValid(self):
        if self.tokenExpiry==None:
            return False
        elif datetime.now() >= self.tokenExpiry:            
            return False
        else:
            return True            
    
    def GetStreamDataByStreamId(self,streamIds, fromDate, toDate, dataFormat='csv', dataOption=''):
        stream_data = "" 
        # Set file format to csv or json            
        DataFormats = {}
        DataFormats['csv'] = 'text/csv'
        DataFormats['json'] = 'Application/json'
        
        try:                            
            for streamId in streamIds:            
                # Get an access token            
                self.getToken()    
                if self.isTokenValid():
                    # Setup the path for data request. Pass dates in via function call
                    path = f'/api/StreamData/{streamId}'
                    if fromDate != '' and toDate != '':
                        path += f'?fromDate={fromDate.replace(" ", "%20")}&toDate={toDate.replace(" ", "%20")}'
                    if dataOption != '':
                        if fromDate != '' and toDate != '':
                            path += f'&dataOption={dataOption}'        
                        else:
                            path += f'?dataOption={dataOption}'        
                    
                    # Create request header
                    headers = {}            
                    headers['Accept'] = DataFormats[dataFormat]
                    headers['Authorization']= f'Bearer {self.accessToken}'
                    
                    # Connect to API server
                    conn = http.client.HTTPSConnection(self.server)
                    conn.request('GET', path, None, headers)
                    res = conn.getresponse()        
                    res_code = res.code                    
                    if res_code == 200:   
                        try:
                            print(f'{datetime.now()} Outputing stream {path} res code {res_code}')
                            # output return data to a text file            
                            if dataFormat == 'csv':
                                stream_data += res.read().decode('utf-8').replace('\r\n','\n') 
                            elif dataFormat == 'json':
                                stream_data += json.dumps(json.loads(res.read().decode('utf-8')), indent=2, sort_keys=False)
                            conn.close()

                        except Exception as e:
                            print(str(e))            
                            self.releaseToken()
                            return None  
                    else:
                        print(str(res_code) + " - " + str(res.reason) + " - " + str(res.read().decode('utf-8')))
                    
                self.releaseToken()   
                # Wait 1 second before next request
                time.sleep(1)
            return stream_data        
        except Exception as e:
            print(str(e))    
            self.releaseToken()
            return None
        
        
    def StreamDataOptions(self, streamId, dataFormat='csv'):
        try:      
            DataFormats = {}
            DataFormats['csv'] = 'text/csv'
            DataFormats['json'] = 'Application/json'
            resultSet = {}
            for streamId in streamIds:
                # Get an access token    
                if streamId not in resultSet:
                    self.getToken()                        
                    if self.isTokenValid():                 
                        # Setup the path for data request.
                        path = f'/api/StreamDataOptions/{streamId}'                        
                        # Create request header
                        headers = {}     
                        headers['Accept'] = DataFormats[dataFormat]                                   
                        headers['Authorization'] = f'Bearer {self.accessToken}'
                        # Connect to API server
                        conn = http.client.HTTPSConnection(self.server)
                        conn.request('GET', path, None, headers)
                        res = conn.getresponse()
                        self.releaseToken()       
                        if dataFormat == 'csv':
                            resultSet[streamId] = res.read().decode('utf-8').replace('\r\n','\n') 
                        elif dataFormat == 'json':
                            resultSet[streamId] = json.dumps(json.loads(res.read().decode('utf-8')), indent=2, sort_keys=False)                            
                    time.sleep(1)                        
            return resultSet            
        except Exception as e:
            print(str(e))    
            self.releaseToken()
            return None          
        
        except Exception as e:            
            self.releaseToken()                        
            return str(e)        

# Authenticate with your NRG Stream username and password    
nrgStreamApi = NRGStreamApi('Username','Password')      

# in-memory weather database loading- deleted

#conn = sqlite3.connect('weather_db.db')
#c = conn.cursor()
#c.execute('''  
#SELECT * FROM HISTORICALFCAST
#          ''')
#results = c.fetchall()
#weather_df = pd.DataFrame(results)

#c.close()
#conn.close()

# generate time-stamps for forecast hours

now_time                 = datetime.now()
year_of_run              = now_time.year
month_of_run             = now_time.month
day_of_run               = now_time.day
forecast_start           = datetime(year_of_run ,month_of_run , day_of_run)
forecast_interval_stamps = pd.date_range(forecast_start, periods=24, freq= 'h')

# create the dataframe to hold input data for 24-h forecast

column_list = [
       'hour_of_day', 'off_peak', 'on_peak',
       'day', 'sin.day', 'cos.day', 'sin.hour', 'cos.hour', 'weekend',
       'monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday',
       'sunday', 'month_0', 'month_1', 'month_2', 'month_3', 'month_4',
       'month_5', 'month_6', 'month_7', 'month_8', 'month_9', 'month_10',
       'month_11', 'hour_0', 'hour_1', 'hour_2', 'hour_3', 'hour_4', 'hour_5',
       'hour_6', 'hour_7', 'hour_8', 'hour_9', 'hour_10', 'hour_11', 'hour_12',
       'hour_13', 'hour_14', 'hour_15', 'hour_16', 'hour_17', 'hour_18',
       'hour_19', 'hour_20', 'hour_21', 'hour_22', 'hour_23', 'year',
       'sunlight_avaialbility', 'AIL_previous_hour', 'AIL_24h_lagged',
       'AIL_2day_lagged', 'AIL_3day_lagged', 'AIL_4day_lagged',
       'AIL_5day_lagged', 'AIL_6day_lagged', 'AIL_oneweek_lagged'
]

prediction_df            = pd.DataFrame(columns = column_list)


###########################################################################
####################### Feature Generation ################################
###########################################################################

hrs                          = np.arange(0, 24, 1).tolist()
prediction_df['hour_of_day'] = hrs

# off-peak and on-peak hour assignment 
conditions = [
    (prediction_df['hour_of_day'] < 7),
    (prediction_df['hour_of_day'] >= 7) & (prediction_df['hour_of_day'] <= 19),
    (prediction_df['hour_of_day'] > 19)
    ]

# create a list of the values we want to assign for each condition
values = [0, 1, 0]

# create a new column and use np.select to assign values to it using our lists as arguments
prediction_df['on_peak'] = np.select(conditions, values)

conditions = [
    (prediction_df['hour_of_day'] < 7),
    (prediction_df['hour_of_day'] >= 7) & (prediction_df['hour_of_day'] <= 19),
    (prediction_df['hour_of_day'] > 19)
    ]

values = [1, 0, 1]
prediction_df['off_peak'] = np.select(conditions, values)



# add time-related features

prediction_df['day']      = forecast_interval_stamps.day
prediction_df['sin.day']  = np.sin(prediction_df['day']*2*np.pi/365 + np.pi/4)
prediction_df['cos.day']  = np.cos(prediction_df['day']*2*np.pi/365 + np.pi/4)
prediction_df['sin.hour'] = np.sin(prediction_df['hour_of_day']*2*np.pi/24)
prediction_df['cos.hour'] = np.cos(prediction_df['hour_of_day']*2*np.pi/24)
weekdays                  = [d.weekday() for d in forecast_interval_stamps]
prediction_df['weekend']  = [1 if d >= 5 else 0 for d in weekdays]

In [2]:
for i in range(0,len(prediction_df.columns)):
    print(i, '--->',prediction_df.columns[i],'--->', prediction_df.dtypes[i])

0 ---> hour_of_day ---> int64
1 ---> off_peak ---> int64
2 ---> on_peak ---> int64
3 ---> day ---> int64
4 ---> sin.day ---> float64
5 ---> cos.day ---> float64
6 ---> sin.hour ---> float64
7 ---> cos.hour ---> float64
8 ---> weekend ---> int64
9 ---> monday ---> object
10 ---> tuesday ---> object
11 ---> wednesday ---> object
12 ---> thursday ---> object
13 ---> friday ---> object
14 ---> saturday ---> object
15 ---> sunday ---> object
16 ---> month_0 ---> object
17 ---> month_1 ---> object
18 ---> month_2 ---> object
19 ---> month_3 ---> object
20 ---> month_4 ---> object
21 ---> month_5 ---> object
22 ---> month_6 ---> object
23 ---> month_7 ---> object
24 ---> month_8 ---> object
25 ---> month_9 ---> object
26 ---> month_10 ---> object
27 ---> month_11 ---> object
28 ---> hour_0 ---> object
29 ---> hour_1 ---> object
30 ---> hour_2 ---> object
31 ---> hour_3 ---> object
32 ---> hour_4 ---> object
33 ---> hour_5 ---> object
34 ---> hour_6 ---> object
35 ---> hour_7 ---> object
36 --

In [3]:
name_of_day = forecast_start.strftime("%A")
for i in range(9,9+7):
    if name_of_day.lower() == prediction_df.columns[i]:
        prediction_df.iloc[:,i] = 1
    else:
        prediction_df.iloc[:,i] = 0
prediction_df.iloc[:,9:9+7]

Unnamed: 0,monday,tuesday,wednesday,thursday,friday,saturday,sunday
0,0,0,0,0,0,1,0
1,0,0,0,0,0,1,0
2,0,0,0,0,0,1,0
3,0,0,0,0,0,1,0
4,0,0,0,0,0,1,0
5,0,0,0,0,0,1,0
6,0,0,0,0,0,1,0
7,0,0,0,0,0,1,0
8,0,0,0,0,0,1,0
9,0,0,0,0,0,1,0


In [4]:
number_of_month = forecast_start.strftime("%m")
for i in range(16,16+12):
    if ((int(number_of_month)-1)+16) == i:
        prediction_df.iloc[:,i] = 1
    else:
        prediction_df.iloc[:,i] = 0
prediction_df.iloc[:, 16:16+12]

Unnamed: 0,month_0,month_1,month_2,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11
0,0,1,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,0,0,0,0,0,0
4,0,1,0,0,0,0,0,0,0,0,0,0
5,0,1,0,0,0,0,0,0,0,0,0,0
6,0,1,0,0,0,0,0,0,0,0,0,0
7,0,1,0,0,0,0,0,0,0,0,0,0
8,0,1,0,0,0,0,0,0,0,0,0,0
9,0,1,0,0,0,0,0,0,0,0,0,0


In [5]:
hr_matrix = np.identity(24)
prediction_df.iloc[:,28:28+24] = hr_matrix.astype(np.int64)
prediction_df.iloc[:,28:28+24] = prediction_df.iloc[:,28:28+24].astype('int64')
prediction_df.iloc[:,28:28+24]

Unnamed: 0,hour_0,hour_1,hour_2,hour_3,hour_4,hour_5,hour_6,hour_7,hour_8,hour_9,...,hour_14,hour_15,hour_16,hour_17,hour_18,hour_19,hour_20,hour_21,hour_22,hour_23
0,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,0,0,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
8,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
9,0,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


In [6]:
prediction_df.iloc[:,52] = year_of_run
prediction_df.iloc[:,52]

0     2021
1     2021
2     2021
3     2021
4     2021
5     2021
6     2021
7     2021
8     2021
9     2021
10    2021
11    2021
12    2021
13    2021
14    2021
15    2021
16    2021
17    2021
18    2021
19    2021
20    2021
21    2021
22    2021
23    2021
Name: year, dtype: int64

In [7]:
# Leap Year Check
is_leapyear = int(year_of_run) % 4 == 0 and int(year_of_run) % 100 != 0
xls = pd.ExcelFile('edmonton_sunrise_sunset.xls')
dfess_2015 = pd.read_excel(xls, '2015') #df= dafaframe, e= edmonston, ss= sunrise and sunset 
dfess_2016 = pd.read_excel(xls, '2016')
dfess_2015 = dfess_2015[['Sunrise_hr', 'Sunset_hr']]
dfess_2016 = dfess_2016[['Sunrise_hr', 'Sunset_hr']]
prediction_df['sunlight_avaialbility'] = 0

for i in range(0,prediction_df.shape[0]):
    if (prediction_df.iloc[i,prediction_df.columns.get_loc('year')]%4 ==0 and df.iloc[i,df.columns.get_loc('year')]%100 !=0):
        criteria = prediction_df.iloc[i,df.columns.get_loc('day')] #day_of_year
        sunrise  = dfess_2016.iloc[criteria-1,0] #sunrise
        sunset   = dfess_2016.iloc[criteria-1,1] #sunset
        if (prediction_df.iloc[i,df.columns.get_loc('hour_of_day')]>= sunrise) and (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')] <= sunset):
            prediction_df.iloc[i,prediction_df.columns.get_loc('sunlight_avaialbility')] = 1
    else:
        criteria = prediction_df.iloc[i,prediction_df.columns.get_loc('day')] #day_of_year
        sunrise  = dfess_2015.iloc[criteria-1,0]#sunrise
        sunset   = dfess_2015.iloc[criteria-1,1] #sunset
        if (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')]>= sunrise) and (prediction_df.iloc[i,prediction_df.columns.get_loc('hour_of_day')] <= sunset):
            prediction_df.iloc[i,prediction_df.columns.get_loc('sunlight_avaialbility')] = 1
            
prediction_df['sunlight_avaialbility']

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     1
9     1
10    1
11    1
12    1
13    1
14    1
15    1
16    1
17    0
18    0
19    0
20    0
21    0
22    0
23    0
Name: sunlight_avaialbility, dtype: int64

In [8]:
#one-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -1)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_24h_lagged'] = stream_df_yesterday.values
prediction_df['AIL_24h_lagged'] = prediction_df['AIL_24h_lagged'].astype('float64')

#2-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -2)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_2day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_2day_lagged'] = prediction_df['AIL_2day_lagged'].astype('float64')

#3-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -3)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_3day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_3day_lagged'] = prediction_df['AIL_3day_lagged'].astype('float64')

#4-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -4)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_4day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_4day_lagged'] = prediction_df['AIL_4day_lagged'].astype('float64')

#5-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -5)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_5day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_5day_lagged'] = prediction_df['AIL_5day_lagged'].astype('float64')

#6-day in the past feature
datetime_pastday                = now_time + pd.Timedelta(days= -6)
fromDate                        = '{}/{}/{}'.format(datetime_pastday.month,datetime_pastday.day, datetime_pastday.year )
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_yesterday             = pd.read_csv(STREAM_DATA, sep=";")
stream_df_yesterday             = stream_df_yesterday.iloc[14:38,:]
stream_df_yesterday.columns     = ["Datetime,AIL"]
temp_df                         = stream_df_yesterday['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_yesterday["Datetime"] = temp_df[0] 
stream_df_yesterday['AIL']      = temp_df[1] 
stream_df_yesterday['AIL']      = pd.to_numeric(stream_df_yesterday['AIL'],errors='coerce')
stream_df_yesterday             = stream_df_yesterday.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_6day_lagged'] = stream_df_yesterday.values
prediction_df['AIL_6day_lagged'] = prediction_df['AIL_6day_lagged'].astype('float64')


# 7-day in the past feature
datetime_pastweek               = now_time + pd.Timedelta(days= -7)
fromDate                        = '{}/{}/{}'.format(datetime_pastweek.month ,datetime_pastweek.day, datetime_pastweek.year)
toDate                          = fromDate
stream_data                     = nrgStreamApi.GetStreamDataByStreamId([3], fromDate, toDate, 'csv', '')        
STREAM_DATA                     = StringIO(stream_data)
stream_df_7daypast              = pd.read_csv(STREAM_DATA, sep=";")
stream_df_7daypast              = stream_df_7daypast.iloc[14:38,:]
stream_df_7daypast.columns      = ["Datetime,AIL"]
temp_df                         = stream_df_7daypast['Datetime,AIL'].str.split(",", n = 2, expand = True) 
stream_df_7daypast["Datetime"]  = temp_df[0] 
stream_df_7daypast['AIL']       = temp_df[1] 
stream_df_7daypast['AIL']       = pd.to_numeric(stream_df_7daypast['AIL'],errors='coerce')
stream_df_7daypast              = stream_df_7daypast.drop(columns=['Datetime,AIL','Datetime'],axis=1)
prediction_df['AIL_oneweek_lagged'] = stream_df_7daypast.values
prediction_df['AIL_oneweek_lagged'] = prediction_df['AIL_oneweek_lagged'].astype('float64')

2021-02-13 14:08:30.947192 Outputing stream /api/StreamData/3?fromDate=2/12/2021&toDate=2/12/2021 res code 200
2021-02-13 14:08:33.207038 Outputing stream /api/StreamData/3?fromDate=2/11/2021&toDate=2/11/2021 res code 200
2021-02-13 14:08:34.796034 Outputing stream /api/StreamData/3?fromDate=2/10/2021&toDate=2/10/2021 res code 200
2021-02-13 14:08:37.828642 Outputing stream /api/StreamData/3?fromDate=2/9/2021&toDate=2/9/2021 res code 200
2021-02-13 14:08:39.770479 Outputing stream /api/StreamData/3?fromDate=2/8/2021&toDate=2/8/2021 res code 200
2021-02-13 14:08:41.674855 Outputing stream /api/StreamData/3?fromDate=2/7/2021&toDate=2/7/2021 res code 200
2021-02-13 14:08:44.267666 Outputing stream /api/StreamData/3?fromDate=2/6/2021&toDate=2/6/2021 res code 200


In [9]:
prediction_df
#prediction_df.to_csv('input_data_{}_{}_{}_v6model_generate_date_{}.csv'.format(day_of_run, month_of_run, year_of_run, now_time))

Unnamed: 0,hour_of_day,off_peak,on_peak,day,sin.day,cos.day,sin.hour,cos.hour,weekend,monday,...,year,sunlight_avaialbility,AIL_previous_hour,AIL_24h_lagged,AIL_2day_lagged,AIL_3day_lagged,AIL_4day_lagged,AIL_5day_lagged,AIL_6day_lagged,AIL_oneweek_lagged
0,0,1,0,13,0.846397,0.532553,0.0,1.0,1,0,...,2021,0,,10668.0,10799.0,10809.0,10837.0,10626.0,10607.0,10451.0
1,1,1,0,13,0.846397,0.532553,0.258819,0.9659258,1,0,...,2021,0,,10569.0,10725.0,10729.0,10725.0,10571.0,10485.0,10337.0
2,2,1,0,13,0.846397,0.532553,0.5,0.8660254,1,0,...,2021,0,,10478.0,10662.0,10684.0,10644.0,10526.0,10416.0,10241.0
3,3,1,0,13,0.846397,0.532553,0.7071068,0.7071068,1,0,...,2021,0,,10419.0,10644.0,10689.0,10605.0,10532.0,10376.0,10248.0
4,4,1,0,13,0.846397,0.532553,0.8660254,0.5,1,0,...,2021,0,,10467.0,10659.0,10722.0,10621.0,10580.0,10380.0,10255.0
5,5,1,0,13,0.846397,0.532553,0.9659258,0.258819,1,0,...,2021,0,,10594.0,10687.0,10848.0,10735.0,10481.0,10450.0,10339.0
6,6,1,0,13,0.846397,0.532553,1.0,6.123234000000001e-17,1,0,...,2021,0,,10810.0,10999.0,10991.0,10978.0,10728.0,10576.0,10443.0
7,7,0,1,13,0.846397,0.532553,0.9659258,-0.258819,1,0,...,2021,0,,11209.0,11300.0,11315.0,11374.0,11105.0,10804.0,10646.0
8,8,0,1,13,0.846397,0.532553,0.8660254,-0.5,1,0,...,2021,1,,11343.0,11376.0,11420.0,11411.0,11220.0,10935.0,10834.0
9,9,0,1,13,0.846397,0.532553,0.7071068,-0.7071068,1,0,...,2021,1,,11251.0,11388.0,11462.0,11465.0,11316.0,11079.0,11039.0


In [10]:
#Generate the forecast

#loaded_model = joblib.load('model_generated_on_13_2_2021_v6_non_holiday.joblib.dat')
loaded_model = joblib.load('model_generated_on_13_2_2021_v6_non_holiday_huber_loss.joblib.dat')


forecast = pd.DataFrame() # this dataframe will contain the forecasts (timestamp and values)
forecast['datetime_of_forecast']= forecast_interval_stamps.values 
forecast['values']              = 0 #initializing the column with zero

prediction_df.iloc[0,prediction_df.columns.get_loc('AIL_previous_hour')] = prediction_df.iloc[23,prediction_df.columns.get_loc('AIL_24h_lagged')] # assignment of AIL_previous_hour (24th hour of previous day)
input_data               = prediction_df.iloc[0] # preparing input data
predictions              = loaded_model.predict(np.array(input_data).reshape((1,-1))) # temporary location for forecast value
prediction_df.iloc[1,prediction_df.columns.get_loc('AIL_previous_hour')] = predictions[0] #load forecast in AIL_previous_hour
forecast.iloc[0,1]       = predictions[0]

for i in range(1, 24):
    input_data          = prediction_df.iloc[i]
    predictions         = loaded_model.predict(np.array(input_data).reshape((1,-1)))
    forecast.iloc[i,1]  = predictions[0]
    if i+1 <= 23:
        prediction_df.iloc[i+1,prediction_df.columns.get_loc('AIL_previous_hour')] = predictions[0]

forecast
#forecast.to_csv('forecast_{}_{}_{}_generate_date_{}.csv'.format(day_of_run, month_of_run, year_of_run, now_time ))

Unnamed: 0,datetime_of_forecast,values
0,2021-02-13 00:00:00,10285.78418
1,2021-02-13 01:00:00,10130.692383
2,2021-02-13 02:00:00,10019.054688
3,2021-02-13 03:00:00,9940.112305
4,2021-02-13 04:00:00,9965.183594
5,2021-02-13 05:00:00,10027.503906
6,2021-02-13 06:00:00,10157.871094
7,2021-02-13 07:00:00,10467.133789
8,2021-02-13 08:00:00,10706.202148
9,2021-02-13 09:00:00,10838.195312
