# Minimal database interaction example using PyMongo

## Load credentials and sensitive data

In [1]:
import json
# Opening JSON file
with open('../credentials/credentials.json', 'r') as openfile:
    # Reading from json file
    credentials = json.load(openfile)

# Configure the connection to the database and set the collections

In [2]:
import pymongo
import sys
from pymongo import MongoClient
import pandas as pd

In [3]:
db = MongoClient(credentials['pymongo_url'], 
                    credentials['pymongo_port'], 
                     username=credentials['pymongo_username'],
                     password=credentials['pymongo_password']).get_database(credentials['pymongo_database_name'])

In [4]:
current_collection = eval(credentials['meter_current_name'])
power_collection = eval(credentials['meter_power_name'])
voltage_collection = eval(credentials['meter_voltage_name'])

# Query to select dates to calculate the circuit results

In [5]:
import datetime
import ipywidgets as widgets
from ipywidgets import Layout, Box, Dropdown, Label


# available dates in collection:
power_collection_dates=sorted(power_collection.distinct("date"), key=lambda x: datetime.datetime.strptime(x, "%Y-%m-%d"))
power_collection_meters=sorted(power_collection.distinct("id"))

start_date_selection= widgets.Select(
                                        options= power_collection_dates,
                                        value='2021-10-25',
                                        # rows=10,
                                        description='start date:',
                                        disabled=False
                                    )

display(start_date_selection)

end_date_selection= widgets.Select(
                                        options= power_collection_dates,
                                        value='2021-10-31',
                                        # rows=10,
                                        description='end date:',
                                        disabled=False
                                    )

display(end_date_selection)

Select(description='start date:', index=81, options=('2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08', '…

Select(description='end date:', index=87, options=('2021-08-05', '2021-08-06', '2021-08-07', '2021-08-08', '20…

## Get data for the selected timeframe

In [6]:
user_requested_timestamps= pd.date_range(start_date_selection.value, end_date_selection.value, freq='5Min').tolist()
user_query = { "date": { "$gte": start_date_selection.value , "$lte": end_date_selection.value}}
user_query_power_df = pd.DataFrame(power_collection.find(user_query))
user_query_power_df

Unnamed: 0,_id,Unnamed: 2,date,00:00:00,00:05:00,00:10:00,00:15:00,00:20:00,00:25:00,00:30:00,...,23:20:00,23:25:00,23:30:00,23:35:00,23:40:00,23:45:00,23:50:00,23:55:00,id,power_type
0,6322fda1ce520b1fd2dc78ac,0,2021-10-26,41.547665,42.438880,41.258075,41.252773,41.965179,41.858038,41.559484,...,42.019979,41.830807,45.402264,51.917066,50.089197,48.985303,47.610372,46.150158,W4,apparent
1,6322fda1ce520b1fd2dc78ad,1,2021-10-26,40.171788,41.015590,39.796011,39.888964,40.603351,40.345284,39.932533,...,40.598639,40.481615,44.125054,51.308705,49.083353,47.865591,46.344943,44.947521,W4,positive_active
2,6322fda1ce520b1fd2dc78ae,2,2021-10-26,-8.754117,-8.927412,-9.062231,-8.632040,-8.606026,-9.382531,-9.661526,...,-9.395557,-8.977946,-8.778704,-5.516881,-7.955475,-8.551503,-9.244671,-8.846024,W4,positive_reactive
3,6322fed5ce520b1fd2dc78cd,0,2021-10-30,46.336499,50.084744,46.573778,44.946850,51.556749,48.142561,45.643011,...,40.045276,40.076982,39.747999,47.355186,52.160294,51.305784,48.664094,48.030849,W4,apparent
4,6322fed5ce520b1fd2dc78ce,1,2021-10-30,45.144727,49.224550,45.498884,43.630560,50.780907,47.163726,44.425855,...,38.728230,38.606091,38.165545,46.657992,51.640495,50.755338,47.909220,46.974602,W4,positive_active
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,63247a39ce520b1fd2dc9dc2,1,2021-10-29,0.042585,0.042765,0.042767,0.042893,0.042899,0.042827,0.042815,...,0.042538,0.042682,0.042537,0.042611,0.042831,0.042819,0.042929,0.042778,W6,positive_active
101,63247a39ce520b1fd2dc9dc3,2,2021-10-29,-0.047634,-0.047956,-0.048407,-0.048451,-0.048660,-0.048437,-0.048253,...,-0.048380,-0.048378,-0.048373,-0.048438,-0.048461,-0.048438,-0.048619,-0.048407,W6,positive_reactive
102,63247b2ace520b1fd2dc9ddc,0,2021-10-26,0.584763,0.588841,0.586150,0.585529,0.587172,0.583962,0.588329,...,0.587919,0.588853,0.586259,0.589545,0.589138,0.588579,0.589652,0.588322,W6,apparent
103,63247b2ace520b1fd2dc9ddd,1,2021-10-26,0.042859,0.043286,0.042874,0.042770,0.042870,0.042678,0.042771,...,0.042906,0.042659,0.042689,0.042721,0.042815,0.042461,0.042668,0.042545,W6,positive_active


## Process the retrieved data

In [7]:
db_columns=list(user_query_power_df.columns.values)
time_format = "%H:%M:%S"
db_times=[]
db_colums_withoutTime=[]
for col in db_columns:
    try:
        db_times.append(datetime.datetime.strptime(col, time_format).time())
        # print(f"{col} is the correct date string format.")
    except ValueError:
        db_colums_withoutTime.append(col)
        # print(f"{i} is the incorrect date string format.")

header=[times.strftime("%H:%M:%S") for times in db_times]
# header.insert(0,'date')

melted_df=pd.melt(user_query_power_df, id_vars=['date', 'id', 'power_type'], value_vars=header, var_name='time', value_name='power')
# user_query_power_df[user_query_power_df['power_type']=='positive_active'][header]
pivoted_df=pd.pivot_table(melted_df, values='power', columns='power_type', index=['date', 'time', 'id'])\
            .reset_index()
pivoted_df.columns.name=None
pivoted_df.insert(loc=0, column='datetime', value=pd.to_datetime(pivoted_df['date'] + ' ' + pivoted_df['time'], format="%Y-%m-%d %H:%M:%S"))
processed_power_df=pivoted_df.dropna()
processed_power_df

Unnamed: 0,datetime,date,time,id,apparent,positive_active,positive_reactive
0,2021-10-25 00:00:00,2021-10-25,00:00:00,W2,1.542820,0.863157,-1.130695
1,2021-10-25 00:00:00,2021-10-25,00:00:00,W3,4.167453,0.885610,4.037296
2,2021-10-25 00:00:00,2021-10-25,00:00:00,W4,39.954579,38.189706,-10.104962
3,2021-10-25 00:00:00,2021-10-25,00:00:00,W5,0.934930,0.020153,-0.918053
4,2021-10-25 00:00:00,2021-10-25,00:00:00,W6,0.584307,0.042963,-0.048518
...,...,...,...,...,...,...,...
10075,2021-10-31 23:55:00,2021-10-31,23:55:00,W2,1.534457,0.860742,-1.132833
10076,2021-10-31 23:55:00,2021-10-31,23:55:00,W3,4.136900,0.910842,4.000568
10077,2021-10-31 23:55:00,2021-10-31,23:55:00,W4,47.332395,46.345882,-8.114340
10078,2021-10-31 23:55:00,2021-10-31,23:55:00,W5,0.932285,0.019906,-0.917448


## Filter data corresponding to one of the measurement devices

In [8]:
procesed_power_df_meas=processed_power_df.query('id=="W4"')
procesed_power_df_meas

Unnamed: 0,datetime,date,time,id,apparent,positive_active,positive_reactive
2,2021-10-25 00:00:00,2021-10-25,00:00:00,W4,39.954579,38.189706,-10.104962
7,2021-10-25 00:05:00,2021-10-25,00:05:00,W4,38.884090,37.048807,-10.205871
12,2021-10-25 00:10:00,2021-10-25,00:10:00,W4,39.680958,37.703280,-10.745584
17,2021-10-25 00:15:00,2021-10-25,00:15:00,W4,39.332112,37.507837,-10.037663
22,2021-10-25 00:20:00,2021-10-25,00:20:00,W4,38.920400,37.015674,-10.195114
...,...,...,...,...,...,...,...
10057,2021-10-31 23:35:00,2021-10-31,23:35:00,W4,44.645709,43.447723,-9.009852
10062,2021-10-31 23:40:00,2021-10-31,23:40:00,W4,42.539431,41.336750,-8.583682
10067,2021-10-31 23:45:00,2021-10-31,23:45:00,W4,48.896885,48.074172,-7.126228
10072,2021-10-31 23:50:00,2021-10-31,23:50:00,W4,48.313760,47.271138,-8.438270


In [9]:
powerSeries=procesed_power_df_meas[['datetime', 'positive_active']].set_index('datetime')
powerSeries15min=powerSeries.resample('15T').mean()
powerSeries15min

Unnamed: 0_level_0,positive_active
datetime,Unnamed: 1_level_1
2021-10-25 00:00:00,37.647264
2021-10-25 00:15:00,37.335272
2021-10-25 00:30:00,36.781321
2021-10-25 00:45:00,36.311347
2021-10-25 01:00:00,36.821237
...,...
2021-10-31 22:45:00,46.027469
2021-10-31 23:00:00,44.334656
2021-10-31 23:15:00,44.344184
2021-10-31 23:30:00,43.690716


In [10]:
import requests
from requests_toolbelt.multipart.encoder import MultipartEncoder

# minimum length required for the input time series
lookback_window = 672

def predict(series, timesteps_ahead, device, power_type):
    """
    Function that returns the prediction of the chosen model using the chosen time series.

    Parameters
    ----------
    series
        The pandas.DataFrame to be used for prediction. It must follow the format shown above. 
        Also, it must have at least lookback window timesteps, as the models need to see that
        far back into the past in order to make a prediction. In this case, it must be at least 7 days
        long (672 timesteps, as we use a resolution of 15 minutes)
    timesteps_ahead
        The amount of timesteps ahead to be redicted. Prediction starts from the next timestep after the
        last one of series. So, as we will perform day ahead forecasting, we need timesteps_ahead=96
    port
        The port the model runs on. 4 models have been trained, and their ports are:
            - 3041 for lgbm w4 positive reactive
            - 3040 for lgbm w4 positive active
            - 3045 for lgbm w6 positive active
            - 3042 for lgbm w6 positive reactive
    Returns
    -------
    pandas.DataFrame
        The result of the prediction
    """
    port_sel={}
    port_sel['W4','active']=3040
    port_sel['W4','reactive']=3041
    port_sel['W6','active']=3045
    port_sel['W6','reactive']=3042
    port=port_sel[device,power_type]
    
    print("Input dataframe:", series)

    print ("Port : ", port)
    
    # check length
    if len(series) < lookback_window:
        print("Please input a larger historical time series")
        return

    series = series.to_json()
    input_example = MultipartEncoder(
    fields={"n": str(timesteps_ahead), #96 timesteps for 15min resolution = day ahead forecast
            "series": series,
           }
    )

    result = requests.post(
        f"http://131.154.97.48:{port}/predict", data=input_example, headers={"Content-Type": input_example.content_type}
    ).text

    df = pd.read_json(result)
    result.close()
    return df

In [None]:
df = predict(powerSeries15min, 96, 'W4','active')
df

Input dataframe:                      positive_active
datetime                            
2021-10-25 00:00:00        37.647264
2021-10-25 00:15:00        37.335272
2021-10-25 00:30:00        36.781321
2021-10-25 00:45:00        36.311347
2021-10-25 01:00:00        36.821237
...                              ...
2021-10-31 22:45:00        46.027469
2021-10-31 23:00:00        44.334656
2021-10-31 23:15:00        44.344184
2021-10-31 23:30:00        43.690716
2021-10-31 23:45:00        47.230397

[672 rows x 1 columns]
Port :  3040


In [None]:
df