In [1]:
import numpy as np
import pandas as pd
import random
import time
import math
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import init
from dateutil import parser
from pathlib import Path
import json 
import shutil
import logging
logger = logging.getLogger(str(os.getpid()))

from wattile.data_reading import read_dataset_from_file
from wattile.buildings_processing import correct_predictor_columns, correct_timestamps, resample_or_rolling_stats, timelag_predictors, timelag_predictors_target, roll_predictors_target
from wattile.time_processing import add_processed_time_columns
PROJECT_DIRECTORY = Path().resolve().parent.parent

# reading configs

In [2]:
"""
For this example, we will be using the default configs.
Check out the docs for an explaination of each config.
"""
##################################################################################
# choose the configs file to use as an input
##################################################################################
# main configs file
with open(PROJECT_DIRECTORY / "wattile" / "configs" / "configs.json", "r") as f:
    configs = json.load(f)
##################################################################################
# code testing configs file
# with open(PROJECT_DIRECTORY / "tests" / "fixtures" / "test_configs.json", "r") as f:
#     configs = json.load(f)
##################################################################################

exp_dir = PROJECT_DIRECTORY / "notebooks" / "exp_dir"
if exp_dir.exists():
    shutil.rmtree(exp_dir)
exp_dir.mkdir()

configs["data_input"]["exp_dir"] = str(PROJECT_DIRECTORY / exp_dir)
# configs["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / "data" / "Synthetic Site")
configs["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / ".." / "intelligentcampus-feature-eng" / "data" / "Cafe")
configs["data_input"]["data_config"] = "Cafe Config.json"
configs["data_input"]["predictor_columns"] = [
    "SRRL BMS Atmospheric Electric Field",
    "SRRL BMS Barometric Pressure",
    "SRRL BMS Dew Point Temperature",
    "SRRL BMS Diffuse Horizontal Irradiance",
    "SRRL BMS Direct Normal Irradiance",
    "SRRL BMS Dry Bulb Temperature",
    "SRRL BMS DWIR",
#     "SRRL BMS Global 40Â° South Irradiance",
#     "SRRL BMS Global 90Â° South Irradiance",
#     "SRRL BMS Global Horizontal Irradiance",
#     "SRRL BMS Global Illuminance",
    "SRRL BMS Global Normal Irradiance",
#     "SRRL BMS Global UV Index",
    "SRRL BMS Opaque Cloud Cover",
#     "SRRL BMS Peak Wind Speed at 19'",
#     "SRRL BMS Peak Wind Speed at 6'",
#     "SRRL BMS Rainfall",
    "SRRL BMS Relative Humidity",
#     "SRRL BMS Snow Depth",
    "SRRL BMS Total Cloud Cover",
    "SRRL BMS UWIR",
    "SRRL BMS UWSW",
    "SRRL BMS Wet Bulb Temperature",
#     "SRRL BMS Wind Direction at 19'",
#     "SRRL BMS Wind Direction at 6'",
#     "SRRL BMS Wind Speed at 19'",
#     "SRRL BMS Wind Speed at 6'",
]
configs["data_input"]["target_var"] = "Cafe Whole Building Real Power Total"

configs

{'data_input': {'data_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\..\\intelligentcampus-feature-eng\\data\\Cafe',
  'data_config': 'Cafe Config.json',
  'start_time': '2018-01-01T00:00:00-07:00',
  'end_time': '2022-01-01T00:00:00-07:00',
  'predictor_columns': ['SRRL BMS Atmospheric Electric Field',
   'SRRL BMS Barometric Pressure',
   'SRRL BMS Dew Point Temperature',
   'SRRL BMS Diffuse Horizontal Irradiance',
   'SRRL BMS Direct Normal Irradiance',
   'SRRL BMS Dry Bulb Temperature',
   'SRRL BMS DWIR',
   'SRRL BMS Global Normal Irradiance',
   'SRRL BMS Opaque Cloud Cover',
   'SRRL BMS Relative Humidity',
   'SRRL BMS Total Cloud Cover',
   'SRRL BMS UWIR',
   'SRRL BMS UWSW',
   'SRRL BMS Wet Bulb Temperature'],
  'target_var': 'Cafe Whole Building Real Power Total',
  'exp_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\notebooks\\exp_dir'},
 'data_output': {'exp_dir': 'exp_dir',
  'plot_comparison': True,
  'pl

# reading data

In [None]:
configs["target_feat_name"] = [configs["data_input"]["target_var"]]
data = read_dataset_from_file(configs)
data

# plot setting

In [3]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.colors import n_colors
from plotly.validators.scatter.marker import SymbolValidator
import plotly.io as pio
os.environ['path'] += r';C:/Users/JKIM4/Downloads/vips-dev-w64-all-8.11.0/vips-dev-8.11/bin'
import pyvips
import copy
import random

random.seed(1)
path = "../../tests/fixtures"
colorscale = "Earth"

# data processing 1

In [None]:
# Add time-based features
data = add_processed_time_columns(data, configs)

data

### feat_time paramters

In [None]:
list_line = ["solid", "dot", "dash", "longdash", "dashdot", "longdashdot"]

n_colors = len(list_line)
list_colors = px.colors.sample_colorscale(colorscale, [n/(n_colors -1) for n in range(n_colors)])
random.shuffle(copy.deepcopy(list_colors))

df_feat_time = data.loc[:, data.columns.str.contains("MOY|DOW|HOD")]
df_feat_time

dict_daterange = {
    "HOD":["2019-12-02 00:00:00+00:00","2019-12-03 00:00:00+00:00"],
    "DOW":["2019-12-02 00:00:00+00:00","2019-12-10 00:00:00+00:00"],
    "MOY":["2019-01-01 00:00:00+00:00","2020-01-01 00:00:00+00:00"]
}

dict_feat_include = {
    "HOD":[
        "sin_HOD",
        "cos_HOD",
        "HOD_binary_reg_10",
        "HOD_binary_reg_20",
        "HOD_binary_fuzzy_13",
        "HOD_binary_fuzzy_23"
    ],
    "DOW":[
        "DOW_binary_reg_0",
        "DOW_binary_reg_2",
        "DOW_binary_reg_4",
        "DOW_binary_fuzzy_1",
        "DOW_binary_fuzzy_3",
        "DOW_binary_fuzzy_5",
    ],
    "MOY":[
        "sin_MOY",
        "cos_MOY",
    ],
}

i=0
for feat in dict_daterange.keys():
    fig = go.Figure()
    
    df_temp = data.loc[dict_daterange[feat][0]:dict_daterange[feat][1], data.columns.str.contains(feat)][dict_feat_include[feat]]
    
    if feat == "DOW":
        df_temp = df_temp.resample("60min").mean()
    
    i_label = 0
    for col in df_temp.columns:
        
        fig.add_trace(go.Scatter(
            mode="lines",
            x=df_temp.index.values,
            y=df_temp[col].values,
            name=col,
            line=dict(
                color=list_colors[i_label],
                dash=list_line[i_label],
            ),
        ))
        
        i_label+=1

    fig.update_layout(
        width=800,
        height=250,
        margin=dict(
            l=0,
            r=0,
            t=30,
            b=0,
        ),
        plot_bgcolor="rgb(245,245,245)",
        legend=dict(
            orientation="h",
            yanchor="bottom",
            y=1,
            xanchor="center",
            x=0.5,
        )
    )

    filename = "example_feat_time_{}".format(feat)
    pio.write_image(fig, path + "/" + filename + ".svg")
    # https://stackoverflow.com/questions/51450134/how-to-convert-svg-to-png-or-jpeg-in-python
    image = pyvips.Image.thumbnail(path + "/" + filename + ".svg", 3000)
    image.write_to_file(path + "/" + filename + ".png")

    fig.show()
    
#     if i== 1:
#         break
        
    i+=1

### resample paramters

In [60]:
configs["data_input"]["target_var"] = "Synthetic Site Electricity Main Total Power"

configs["data_processing"]["resample"]["bin_interval"] = "3min"
configs["data_processing"]["resample"]["bin_closed"] = "right"
configs["data_processing"]["resample"]["bin_label"] = "right"

configs["data_processing"]["feat_stats"]["active"] = False

In [61]:
df_test = pd.read_csv("../../tests/fixtures/rolling_stats_input_w_target.csv", index_col=0)
df_test.index = pd.to_datetime(df_test.index)
df_test = df_test.loc[:, df_test.columns!="var2"]
df_test

Unnamed: 0_level_0,var1,Synthetic Site Electricity Main Total Power
ts,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-07-12 07:00:00+06:00,,1.0
2022-07-12 07:01:53+06:00,1.5,1.2
2022-07-12 07:03:17+06:00,2.2,1.3
2022-07-12 07:04:02+06:00,0.9,
2022-07-12 07:04:59+06:00,3.6,2.5
2022-07-12 07:05:00+06:00,,2.3
2022-07-12 07:06:22+06:00,3.3,4.3
2022-07-12 07:09:46+06:00,2.3,3.4
2022-07-12 07:10:00+06:00,,
2022-07-12 07:11:02+06:00,1.3,


In [62]:
df_resample = resample_or_rolling_stats(df_test, configs)
df_resample = df_resample.add_suffix("_resampled | {} interval | {} closed | {} label".format(
    configs["data_processing"]["resample"]["bin_interval"],
    configs["data_processing"]["resample"]["bin_closed"],
    configs["data_processing"]["resample"]["bin_label"]
))
df_resample

Unnamed: 0_level_0,var1_resampled | 3min interval | right closed | right label,Synthetic Site Electricity Main Total Power_resampled | 3min interval | right closed | right label
ts,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-07-12 07:00:00+06:00,,1.0
2022-07-12 07:03:00+06:00,1.5,1.2
2022-07-12 07:06:00+06:00,3.6,2.3
2022-07-12 07:09:00+06:00,3.3,4.3
2022-07-12 07:12:00+06:00,1.3,3.4
2022-07-12 07:15:00+06:00,4.1,1.1


In [63]:
df_test = df_test.loc[:, df_test.columns=="var1"]
df_resample = df_resample.loc[:, df_resample.columns.str.contains("var1")]

In [64]:
list_symbol = ["circle-open-dot", "square-open-dot"]

n_colors = len(list_symbol)
list_colors = px.colors.sample_colorscale(colorscale, [n/(n_colors -1) for n in range(n_colors)])
random.shuffle(copy.deepcopy(list_colors))

fig = go.Figure()
    
i_label = 0
for col in df_test.columns:

    fig.add_trace(go.Scatter(
        mode="markers",
        x=df_test.index.values,
        y=df_test[col].values,
        name=col,
        marker=dict(
            symbol=list_symbol[i_label],
            size=15,
            line_width=2,
        ),
        line=dict(
            color=list_colors[i_label],
        ),
    ))

    i_label+=1
    
for col in df_resample.columns:

    fig.add_trace(go.Scatter(
        mode="markers",
        x=df_resample.index.values,
        y=df_resample[col].values,
        name=col,
        marker=dict(
            symbol=list_symbol[i_label],
            size=10,
            line_width=2,
        ),
        line=dict(
            color=list_colors[i_label],
        ),
    ))

    i_label+=1

fig.update_layout(
    width=800,
    height=250,
    margin=dict(
        l=0,
        r=0,
        t=30,
        b=0,
    ),
    plot_bgcolor="rgb(245,245,245)",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1,
        xanchor="center",
        x=0.5,
    )
)

fig.update_xaxes(
    range=["2022-07-12 00:59:00+06:00","2022-07-12 01:16:00+06:00"],
    dtick=1000*60,
    showgrid=True,
    gridwidth=2, 
)

filename = "example_resample_{}-closed_{}-label".format(
    configs["data_processing"]["resample"]["bin_closed"],
    configs["data_processing"]["resample"]["bin_label"]
)
pio.write_image(fig, path + "/" + filename + ".svg")
# https://stackoverflow.com/questions/51450134/how-to-convert-svg-to-png-or-jpeg-in-python
image = pyvips.Image.thumbnail(path + "/" + filename + ".svg", 3000)
image.write_to_file(path + "/" + filename + ".png")

fig.show()

# #     if i== 1:
# #         break

# i+=1

### feat_stats

In [65]:
configs["data_input"]["target_var"] = "Synthetic Site Electricity Main Total Power"

configs["data_processing"]["resample"]["bin_interval"] = "3min"
configs["data_processing"]["resample"]["bin_closed"] = "right"
configs["data_processing"]["resample"]["bin_label"] = "right"

configs["data_processing"]["feat_stats"]["active"] = True
configs["data_processing"]["feat_stats"]["window_width"] = "3min"

In [66]:
df_test = pd.read_csv("../../tests/fixtures/rolling_stats_input_w_target.csv", index_col=0)
df_test.index = pd.to_datetime(df_test.index)
df_test = df_test.loc[:, df_test.columns!="var2"]
df_test

Unnamed: 0_level_0,var1,Synthetic Site Electricity Main Total Power
ts,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-07-12 07:00:00+06:00,,1.0
2022-07-12 07:01:53+06:00,1.5,1.2
2022-07-12 07:03:17+06:00,2.2,1.3
2022-07-12 07:04:02+06:00,0.9,
2022-07-12 07:04:59+06:00,3.6,2.5
2022-07-12 07:05:00+06:00,,2.3
2022-07-12 07:06:22+06:00,3.3,4.3
2022-07-12 07:09:46+06:00,2.3,3.4
2022-07-12 07:10:00+06:00,,
2022-07-12 07:11:02+06:00,1.3,


In [67]:
df_resample = resample_or_rolling_stats(df_test, configs)
df_resample = df_resample.add_suffix(" | {} window".format(
    configs["data_processing"]["feat_stats"]["window_width"]
))
df_resample

Unnamed: 0_level_0,var1_min | 3min window,var1_max | 3min window,var1_mean | 3min window,Synthetic Site Electricity Main Total Power | 3min window
ts,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-07-12 07:00:00+06:00,,,,1.0
2022-07-12 07:03:00+06:00,1.5,1.5,1.5,1.2
2022-07-12 07:06:00+06:00,0.9,3.6,2.233333,2.3
2022-07-12 07:09:00+06:00,3.3,3.3,3.3,4.3
2022-07-12 07:12:00+06:00,1.3,2.3,1.8,3.4
2022-07-12 07:15:00+06:00,4.1,4.3,4.2,1.1


In [68]:
df_test = df_test.loc[:, df_test.columns=="var1"]
df_resample = df_resample.loc[:, df_resample.columns.str.contains("var1")]

In [69]:
list_symbol = ["circle-open-dot", "square-open-dot", "diamond-open-dot", "x-open-dot"]

n_colors = len(list_symbol)
list_colors = px.colors.sample_colorscale(colorscale, [n/(n_colors -1) for n in range(n_colors)])
random.shuffle(copy.deepcopy(list_colors))

fig = go.Figure()
    
i_label = 0
for col in df_test.columns:

    fig.add_trace(go.Scatter(
        mode="markers",
        x=df_test.index.values,
        y=df_test[col].values,
        name=col,
        marker=dict(
            symbol=list_symbol[i_label],
            size=15,
            line_width=2,
        ),
        line=dict(
            color=list_colors[i_label],
        ),
    ))

    i_label+=1
    
for col in df_resample.columns:

    fig.add_trace(go.Scatter(
        mode="markers",
        x=df_resample.index.values,
        y=df_resample[col].values,
        name=col,
        marker=dict(
            symbol=list_symbol[i_label],
            size=10,
            line_width=2,
        ),
        line=dict(
            color=list_colors[i_label],
        ),
    ))

    i_label+=1

fig.update_layout(
    width=800,
    height=250,
    margin=dict(
        l=0,
        r=0,
        t=30,
        b=0,
    ),
    plot_bgcolor="rgb(245,245,245)",
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=1,
        xanchor="center",
        x=0.5,
    )
)

fig.update_xaxes(
    range=["2022-07-12 00:59:00+06:00","2022-07-12 01:16:00+06:00"],
    dtick=1000*60,
    showgrid=True,
    gridwidth=2, 
)

filename = "example_feat_stats"
pio.write_image(fig, path + "/" + filename + ".svg")
# https://stackoverflow.com/questions/51450134/how-to-convert-svg-to-png-or-jpeg-in-python
image = pyvips.Image.thumbnail(path + "/" + filename + ".svg", 3000)
image.write_to_file(path + "/" + filename + ".png")

fig.show()

# #     if i== 1:
# #         break

# i+=1