# Forecasting example using harmonised data

## Load credentials and sensitive data

In [None]:
import json
# Opening JSON file
with open('../credentials/credentials.json', 'r') as openfile:
    # Reading from json file
    credentials = json.load(openfile)

# Configure the connection to the database and set the collections

In [None]:
import pymongo
import sys
from pymongo import MongoClient
import pandas as pd

In [None]:
db = MongoClient(credentials['pymongo_url'], 
                    credentials['pymongo_port'], 
                     username=credentials['pymongo_username'],
                     password=credentials['pymongo_password']).get_database(credentials['pymongo_database_name'])

In [None]:
current_collection = eval(credentials['meter_current_name'])
power_collection = eval(credentials['meter_power_name'])
voltage_collection = eval(credentials['meter_voltage_name'])

# Query to select dates to calculate the circuit results

In [None]:
import datetime
import ipywidgets as widgets
from ipywidgets import Layout, Box, Dropdown, Label


# available dates in collection:
power_collection_dates=sorted(power_collection.distinct("date"), key=lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
power_collection_meters=sorted(power_collection.distinct("id"))

start_date_selection= widgets.Select(
                                        options= power_collection_dates,
                                        value='2021-10-25',
                                        # rows=10,
                                        description='start date:',
                                        disabled=False
                                    )

display(start_date_selection)

end_date_selection= widgets.Select(
                                        options= power_collection_dates,
                                        value='2021-10-31',
                                        # rows=10,
                                        description='end date:',
                                        disabled=False
                                    )

display(end_date_selection)

## Get data for the selected timeframe

In [None]:
user_requested_timestamps= pd.date_range(start_date_selection.value, end_date_selection.value, freq='5Min').tolist()
user_query = { "date": { "$gte": start_date_selection.value , "$lte": end_date_selection.value}}
user_query_power_df = pd.DataFrame(power_collection.find(user_query))

# Get the searchable data for the known times

In [None]:
power_df = pd.DataFrame(power_collection.find())

## Process the retrieved data

In [None]:
def process_retrieved_data(user_query_power_df):
    db_columns=list(user_query_power_df.columns.values)
    time_format = "%H:%M:%S"
    db_times=[]
    db_colums_withoutTime=[]
    for col in db_columns:
        try:
            db_times.append(datetime.datetime.strptime(col, time_format).time())
            # print(f"{col} is the correct date string format.")
        except ValueError:
            db_colums_withoutTime.append(col)
            # print(f"{i} is the incorrect date string format.")
    
    header=[times.strftime("%H:%M:%S") for times in db_times]
    # header.insert(0,'date')
    
    melted_df=pd.melt(user_query_power_df, id_vars=['date', 'id', 'power_type'], value_vars=header, var_name='time', value_name='power')
    # user_query_power_df[user_query_power_df['power_type']=='positive_active'][header]
    pivoted_df=pd.pivot_table(melted_df, values='power', columns='power_type', index=['date', 'time', 'id'])\
                .reset_index()
    pivoted_df.columns.name=None
    pivoted_df.insert(loc=0, column='datetime', value=pd.to_datetime(pivoted_df['date'] + ' ' + pivoted_df['time'], format="%Y-%m-%d %H:%M:%S"))
    processed_power_df=pivoted_df.dropna()
    return processed_power_df

In [None]:
processed_power_df=process_retrieved_data(user_query_power_df)
processed_power_df

In [None]:
processed_power_df_all=process_retrieved_data(power_df)
processed_power_df_all

# Retrieve predictions

## Filter data corresponding to the measurement devices

In [None]:
procesed_power_df_meas_w4=processed_power_df.query('id=="W4"')
procesed_power_df_meas_w4

In [None]:
procesed_power_df_meas_w4_all=processed_power_df_all.query('id=="W4"')
procesed_power_df_meas_w4_all

In [None]:
procesed_power_df_meas_w6=processed_power_df.query('id=="W6"')
procesed_power_df_meas_w6

In [None]:
procesed_power_df_meas_w6_all=processed_power_df_all.query('id=="W6"')
procesed_power_df_meas_w6_all

## Prepare data for prediction requests

In [None]:
def get_power_series(processed_power_df_meas_wx, power_type):
    powerSeries_wx=processed_power_df_meas_wx[['datetime', power_type]].set_index('datetime')
    powerSeries15min_wx=powerSeries_wx.resample('15T').mean()
    return powerSeries15min_wx

In [None]:
powerSeries15min_w4_a=get_power_series(procesed_power_df_meas_w4, 'positive_active')
powerSeries15min_w4_a

In [None]:
powerSeries15min_w4_r=get_power_series(procesed_power_df_meas_w4, 'positive_reactive')
powerSeries15min_w4_r

In [None]:
powerSeries15min_w6_a=get_power_series(procesed_power_df_meas_w6, 'positive_active')
powerSeries15min_w6_a

In [None]:
powerSeries15min_w6_r=get_power_series(procesed_power_df_meas_w6, 'positive_reactive')
powerSeries15min_w6_r

## Get predictions

In [None]:
import requests
from io import StringIO
from requests_toolbelt.multipart.encoder import MultipartEncoder

# minimum length required for the input time series
lookback_window = 672

def predict(series, timesteps_ahead, device, power_type, prediction_server_url):
    """
    Function that returns the prediction of the chosen model using the chosen time series.

    Parameters
    ----------
    series
        The pandas.DataFrame to be used for prediction. It must follow the format shown above. 
        Also, it must have at least lookback window timesteps, as the models need to see that
        far back into the past in order to make a prediction. In this case, it must be at least 7 days
        long (672 timesteps, as we use a resolution of 15 minutes)
    timesteps_ahead
        The amount of timesteps ahead to be redicted. Prediction starts from the next timestep after the
        last one of series. So, as we will perform day ahead forecasting, we need timesteps_ahead=96
    power_type
        The power type that will be predicted, 4 models have been trained, and their ports are:
            - 3041 for lgbm w4 positive reactive
            - 3040 for lgbm w4 positive active
            - 3045 for lgbm w6 positive active
            - 3042 for lgbm w6 positive reactive
    prediction_server_url
        The url of the server that hosts the prediction service.
    Returns
    -------
    pandas.DataFrame
        The result of the prediction
    """
    port_sel={}
    port_sel['W4','active']=3040
    port_sel['W4','reactive']=3041
    port_sel['W6','active']=3045
    port_sel['W6','reactive']=3042
    port=port_sel[device,power_type]
    
    #print("Input dataframe:", series)

    #print ("Port : ", port)
    
    # check length
    if len(series) < lookback_window:
        print("Please input a larger historical time series")
        return

    series = series.to_json()
    input_example = MultipartEncoder(
    fields={"n": str(timesteps_ahead), #96 timesteps for 15min resolution = day ahead forecast
            "series": series,
           }
    )

    result = requests.post(
        f"http://{prediction_server_url}:{port}/predict", data=input_example, headers={"Content-Type": input_example.content_type}
    ).text

    df = pd.read_json(StringIO(result))
    return df

In [None]:
prediction_server_url=credentials['prediction_server_url']

In [None]:
df_w4_a = predict(powerSeries15min_w4_a, 96, 'W4','active',prediction_server_url)
df_w4_a

In [None]:
df_w4_r = predict(powerSeries15min_w4_r, 96, 'W4','reactive', prediction_server_url)

In [None]:
df_w6_a = predict(powerSeries15min_w6_a, 96, 'W6','active', prediction_server_url)
df_w6_a

In [None]:
df_w6_r = predict(powerSeries15min_w6_r, 96, 'W6','reactive', prediction_server_url)
df_w6_r

In [None]:
def create_predicted_df(df_wx_a, df_wx_r,wx_name):
    wx_df=df_wx_a.join(df_wx_r)
    wx_df['id']=wx_name
    wx_df['date'] = wx_df.index.date
    wx_df['time'] = wx_df.index.time
    wx_df.index.name='datetime'
    return wx_df

In [None]:
w4_df_pred=create_predicted_df(df_w4_a, df_w4_r, 'W4')
w4_df_pred

In [None]:
w6_df_pred=create_predicted_df(df_w6_a, df_w6_r, 'W6')
w6_df_pred

In [None]:
def get_power_candidates(df_wx, procesed_power_df_meas_wx_all, datetime_idx):
    val1=df_wx['positive_active'].loc[datetime_idx]
    #print (val1)
    vals=[val1*.9, val1*1.1]
    vals.sort()
    #print(vals)
    powerCandidates=procesed_power_df_meas_wx_all[procesed_power_df_meas_wx_all['positive_active'].between(vals[0], vals[1])]
    return powerCandidates

In [None]:
procesed_power_df_meas_w3=processed_power_df_all.query('id=="W3"')

In [None]:
procesed_power_df_meas_w5=processed_power_df_all.query('id=="W5"')

In [None]:
def filter_from_meas(procesed_power_df_meas_wx,filter2):
    filtered_power_wx=procesed_power_df_meas_wx.loc[procesed_power_df_meas_wx['datetime'].isin(filter2)]
    est_wx_a=filtered_power_wx['positive_active'].mean()
    est_wx_r=filtered_power_wx['positive_reactive'].mean()
    return est_wx_a, est_wx_r

In [None]:
lst_w3_a=[]
lst_w3_r=[]
lst_w5_a=[]
lst_w5_r=[]
for datetime_idx in w4_df_pred.index:
    powerCandidates1= get_power_candidates(w4_df_pred,procesed_power_df_meas_w4_all, datetime_idx)
    filter1=powerCandidates1['datetime']
    
    filtered_power_w6_all=procesed_power_df_meas_w6_all.loc[procesed_power_df_meas_w6_all['datetime'].isin(filter1)]
    powerCandidates2=get_power_candidates(df_w6_a,procesed_power_df_meas_w6_all,datetime_idx)
    filter2=powerCandidates2['datetime']

    est_w3_a, est_w3_r = filter_from_meas(procesed_power_df_meas_w3,filter2)
    lst_w3_a.append(est_w3_a)
    lst_w3_r.append(est_w3_r)
    #print("The W3 values on {} are active {} and reactive {}".format(datetime_idx,est_w3_a,est_w3_r))

    est_w5_a, est_w5_r = filter_from_meas(procesed_power_df_meas_w5,filter2)
    lst_w5_a.append(est_w5_a)
    lst_w5_r.append(est_w5_r)
    #print("The W5 values on {} are active {} and reactive {}".format(datetime_idx,est_w5_a,est_w5_r))

In [None]:
df_w3_a=pd.DataFrame(lst_w3_a, index=w4_df_pred.index, columns=['positive_active'])
df_w3_a

In [None]:
df_w3_r=pd.DataFrame(lst_w3_r, index=w4_df_pred.index, columns=['positive_reactive'])
df_w3_r

In [None]:
w3_df_pred=create_predicted_df(df_w3_a, df_w3_r, 'W3')
w3_df_pred

In [None]:
df_w5_a=pd.DataFrame(lst_w5_a, index=w4_df_pred.index, columns=['positive_active'])
df_w5_a

In [None]:
df_w5_r=pd.DataFrame(lst_w5_r, index=w4_df_pred.index, columns=['positive_reactive'])
df_w5_r

In [None]:
w5_df_pred=create_predicted_df(df_w5_a, df_w5_r, 'W5')
w5_df_pred

In [None]:
df_pred=pd.concat([w3_df_pred,w4_df_pred,w5_df_pred,w6_df_pred])
df_pred=df_pred.sort_values(by='datetime')
df_pred

In [None]:
df_pred.to_pickle('forecasted_values')