In [17]:
import numpy as np
import pandas as pd
import random
import time
import math
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import init
from dateutil import parser
from pathlib import Path
import json 
import shutil
import logging
logger = logging.getLogger(str(os.getpid()))

from wattile.data_reading import read_dataset_from_file
from wattile.buildings_processing import correct_predictor_columns, correct_timestamps, resample_or_rolling_stats, timelag_predictors, timelag_predictors_target, roll_predictors_target
from wattile.time_processing import add_processed_time_columns
PROJECT_DIRECTORY = Path().resolve().parent.parent

# reading configs

In [18]:
"""
For this example, we will be using the default configs.
Check out the docs for an explaination of each config.
"""
##################################################################################
# choose the configs file to use as an input
##################################################################################
# main configs file
with open(PROJECT_DIRECTORY / "wattile" / "configs" / "configs.json", "r") as f:
    configs = json.load(f)
##################################################################################
# code testing configs file
# with open(PROJECT_DIRECTORY / "tests" / "fixtures" / "test_configs.json", "r") as f:
#     configs = json.load(f)
##################################################################################

exp_dir = PROJECT_DIRECTORY / "notebooks" / "exp_dir"
if exp_dir.exists():
    shutil.rmtree(exp_dir)
exp_dir.mkdir()

configs["data_input"]["exp_dir"] = str(PROJECT_DIRECTORY / exp_dir)
configs["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / "data" / "Synthetic Site")

configs

{'data_input': {'data_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\data\\Synthetic Site',
  'data_config': 'Synthetic Site Config.json',
  'start_time': '2018-01-01T00:00:00-07:00',
  'end_time': '2022-01-01T00:00:00-07:00',
  'predictor_columns': ['Synthetic Weather Station Dew Point Temperature',
   'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'Synthetic Weather Station Direct Normal Irradiance',
   'Synthetic Weather Station Dry Bulb Temperature',
   'Synthetic Weather Station Global Horizontal Irradiance',
   'Synthetic Weather Station Relative Humidity',
   'Synthetic Weather Station Wind Speed'],
  'target_var': 'Synthetic Site Electricity Main Total Power',
  'exp_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\notebooks\\exp_dir'},
 'data_output': {'exp_dir': 'exp_dir',
  'plot_comparison': True,
  'plot_comparison_portion_start': 0.0,
  'plot_comparison_portion_end': 1.0},
 'data_processing': {'fea

# reading data

In [19]:
configs["target_feat_name"] = [configs["data_input"]["target_var"]]
data = read_dataset_from_file(configs)
data

Unnamed: 0_level_0,Synthetic Weather Station Dew Point Temperature,Synthetic Weather Station Diffuse Horizontal Irradiance,Synthetic Weather Station Direct Normal Irradiance,Synthetic Weather Station Dry Bulb Temperature,Synthetic Weather Station Global Horizontal Irradiance,Synthetic Weather Station Relative Humidity,Synthetic Weather Station Wind Speed,Synthetic Site Electricity Main Total Power
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2021-12-01 07:00:00+00:00,15.413733,-1.525850,-0.005199,56.408001,-1.810825,17.930000,10.457981,104.800003
2021-12-01 07:01:00+00:00,15.183906,-1.496226,0.207964,56.174000,-1.841866,17.879999,11.265539,96.650002
2021-12-01 07:02:00+00:00,14.891221,-1.441191,0.457520,55.886002,-1.873579,17.809999,12.777752,96.199997
2021-12-01 07:03:00+00:00,14.836164,-1.371754,0.473117,55.796001,-1.860084,17.820000,12.694983,95.599998
2021-12-01 07:04:00+00:00,14.782966,-1.317349,0.545903,55.723999,-1.843271,17.820000,11.632407,100.650002
...,...,...,...,...,...,...,...,...
2021-12-08 06:55:00+00:00,11.210565,-1.487212,-0.254757,42.285198,-1.814939,24.680000,0.000000,109.664803
2021-12-08 06:56:00+00:00,11.190062,-1.445182,-0.233960,41.997200,-1.852901,24.930000,0.000000,107.002800
2021-12-08 06:57:00+00:00,11.223961,-1.396302,-0.145575,41.669601,-1.841248,25.290001,3.545647,106.480400
2021-12-08 06:58:00+00:00,11.337669,-1.335073,0.171570,41.180000,-1.866031,25.920000,1.386941,110.419998


# data processing

In [20]:
# assert we have the correct columns and order them
data = correct_predictor_columns(configs, data)

# sort and trim data specified time period
data = correct_timestamps(configs, data)

# Add time-based features
data = add_processed_time_columns(data, configs)

# Add statistics features
data = resample_or_rolling_stats(data, configs)

data

Unnamed: 0_level_0,Synthetic Weather Station Dew Point Temperature_min,Synthetic Weather Station Diffuse Horizontal Irradiance_min,Synthetic Weather Station Direct Normal Irradiance_min,Synthetic Weather Station Dry Bulb Temperature_min,Synthetic Weather Station Global Horizontal Irradiance_min,Synthetic Weather Station Relative Humidity_min,Synthetic Weather Station Wind Speed_min,sin_HOD_min,cos_HOD_min,HOD_binary_reg_0_min,...,DOW_binary_fuzzy_0_mean,DOW_binary_fuzzy_1_mean,DOW_binary_fuzzy_2_mean,DOW_binary_fuzzy_3_mean,DOW_binary_fuzzy_4_mean,DOW_binary_fuzzy_5_mean,DOW_binary_fuzzy_6_mean,sin_MOY_mean,cos_MOY_mean,Synthetic Site Electricity Main Total Power
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-12-01 07:00:00+00:00,15.413733,-1.525850,-0.005199,56.408001,-1.810825,17.930000,10.457981,0.965926,-2.588190e-01,0.0,...,0.0,0.0,0.708333,0.291667,0.0,0.0,0.0,-0.493776,0.869589,104.800003
2021-12-01 07:15:00+00:00,14.782966,-1.496226,-0.540707,55.112000,-1.873579,17.809999,9.395406,0.946930,-3.214395e-01,0.0,...,0.0,0.0,0.708333,0.291667,0.0,0.0,0.0,-0.493776,0.869589,103.650002
2021-12-01 07:30:00+00:00,15.474850,-1.379412,-1.741706,54.608002,-1.918852,18.690001,8.612455,0.923880,-3.826834e-01,0.0,...,0.0,0.0,0.708333,0.291667,0.0,0.0,0.0,-0.493776,0.869589,98.050003
2021-12-01 07:45:00+00:00,16.614972,-1.642929,-2.147240,53.743999,-2.029242,19.320000,4.503084,0.896873,-4.422887e-01,0.0,...,0.0,0.0,0.708333,0.291667,0.0,0.0,0.0,-0.493776,0.869589,98.349998
2021-12-01 08:00:00+00:00,16.413343,-1.480409,-0.426325,52.807999,-1.958610,21.090000,3.746977,0.866025,-5.000000e-01,0.0,...,0.0,0.0,0.705556,0.294444,0.0,0.0,0.0,-0.493776,0.869589,102.949997
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-12-08 06:00:00+00:00,11.961447,-1.673728,-1.211398,41.388802,-1.909901,25.160000,0.000000,0.998135,-1.608123e-16,0.0,...,0.0,0.0,0.788889,0.211111,0.0,0.0,0.0,-0.385663,0.922640,104.668396
2021-12-08 06:15:00+00:00,11.008480,-1.489343,-0.504314,41.646198,-1.936419,24.240000,0.000000,0.997859,-6.540313e-02,0.0,...,0.0,0.0,0.750000,0.250000,0.0,0.0,0.0,-0.385663,0.922640,102.709602
2021-12-08 06:30:00+00:00,10.851442,-1.571301,-0.655083,42.024200,-1.834018,23.760000,0.000000,0.991445,-1.305262e-01,0.0,...,0.0,0.0,0.750000,0.250000,0.0,0.0,0.0,-0.385663,0.922640,105.972198
2021-12-08 06:45:00+00:00,10.827111,-1.430384,-0.826657,40.960400,-1.865315,24.330000,0.000000,0.980785,-1.950903e-01,0.0,...,0.0,0.0,0.750000,0.250000,0.0,0.0,0.0,-0.385663,0.922640,112.989998


In [21]:
# configs["learning_algorithm"]["arch_version"] = "charlie"

In [22]:
# if configs["learning_algorithm"]["arch_version"] == "alfa":
#     data = timelag_predictors(data, configs)
# elif configs["learning_algorithm"]["arch_version"] == "bravo":
#     data = timelag_predictors_target(data, configs)
# elif configs["learning_algorithm"]["arch_version"] == "charlie":
#     data = roll_predictors_target(data, configs)
    
# data.to_csv("./output.csv")

# data

In [23]:
# data_temp = data.loc[:, 
#          (data.columns.str.contains("Synthetic Weather Station Dew Point Temperature_mean")) |
#          data.columns.str.contains("Synthetic Site Electricity Main Total Power")
#         ]

# data_temp.to_csv("./output.csv")

# data triming testing

In [24]:
configs["learning_algorithm"]["arch_version"] = "charlie"
configs["data_processing"]["input_output_window"]["window_width_futurecast"] = "30min"

In [25]:
timestamp_cast = pd.to_datetime("2021-12-07 13:30:00+00:00")
timestamp_cast

Timestamp('2021-12-07 13:30:00+0000', tz='UTC')

### ----------------------------------------------------------------
### get_input_window_for_output_time
### ----------------------------------------------------------------

In [26]:
config_data_processing = configs["data_processing"]
config_feat_timelag = config_data_processing["feat_timelag"]
config_input_output_window = config_data_processing["input_output_window"]

if configs["learning_algorithm"]["arch_version"] == "alfa":
    
    window_start_offset = pd.Timedelta(config_feat_timelag["lag_interval"]) \
    * config_feat_timelag["lag_count"]

    window_end_offset = pd.Timedelta("0min") \
    + pd.Timedelta(config_input_output_window["window_width_futurecast"])
    
    timestamp_cast = timestamp_cast - window_end_offset
    
elif configs["learning_algorithm"]["arch_version"] == "bravo": 
    
    window_start_offset = pd.Timedelta(config_feat_timelag["lag_interval"]) \
    * config_feat_timelag["lag_count"]

    window_end_offset = pd.Timedelta(config_input_output_window["window_width_target"]) \
    - pd.Timedelta(config_data_processing["resample_interval"])\
    + pd.Timedelta(config_input_output_window["window_width_futurecast"])
    
    timestamp_cast = timestamp_cast - window_end_offset
    
elif configs["learning_algorithm"]["arch_version"] == "charlie":
    
    window_start_offset = pd.Timedelta(config_input_output_window["window_width_source"])
    
    window_end_offset = pd.Timedelta(config_input_output_window["window_width_target"]) \
    - pd.Timedelta(config_data_processing["resample_interval"])
    
print("window_start_offset = {}".format(window_start_offset))
print("window_end_offset = {}".format(window_end_offset))

prediction_window_start_time = timestamp_cast - window_start_offset
prediction_window_end_time = timestamp_cast + window_end_offset 

print("window_start_time = {}".format(prediction_window_start_time))
print("window_end_time = {}".format(prediction_window_end_time))

configs["data_input"]["prediction_window_start_time"] = prediction_window_start_time
configs["data_input"]["prediction_window_end_time"] = prediction_window_end_time 

window_start_offset = 0 days 03:00:00
window_end_offset = 0 days 00:30:00
window_start_time = 2021-12-07 10:30:00+00:00
window_end_time = 2021-12-07 14:00:00+00:00


In [27]:
data_test = data.loc[prediction_window_start_time:prediction_window_end_time, :]
data_test

Unnamed: 0_level_0,Synthetic Weather Station Dew Point Temperature_min,Synthetic Weather Station Diffuse Horizontal Irradiance_min,Synthetic Weather Station Direct Normal Irradiance_min,Synthetic Weather Station Dry Bulb Temperature_min,Synthetic Weather Station Global Horizontal Irradiance_min,Synthetic Weather Station Relative Humidity_min,Synthetic Weather Station Wind Speed_min,sin_HOD_min,cos_HOD_min,HOD_binary_reg_0_min,...,DOW_binary_fuzzy_0_mean,DOW_binary_fuzzy_1_mean,DOW_binary_fuzzy_2_mean,DOW_binary_fuzzy_3_mean,DOW_binary_fuzzy_4_mean,DOW_binary_fuzzy_5_mean,DOW_binary_fuzzy_6_mean,sin_MOY_mean,cos_MOY_mean,Synthetic Site Electricity Main Total Power
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-12-07 10:30:00+00:00,17.988364,-1.32799,-0.218362,43.624401,-1.98669,32.470001,2.601633,0.3826834,-0.92388,0.0,...,0.0,0.583333,0.416667,0.0,0.0,0.0,0.0,-0.401488,0.915864,103.348602
2021-12-07 10:45:00+00:00,16.985844,-1.43911,-0.441922,43.752201,-1.886374,29.67,0.53688,0.3214395,-0.94693,0.0,...,0.0,0.583333,0.416667,0.0,0.0,0.0,0.0,-0.401488,0.915864,109.137398
2021-12-07 11:00:00+00:00,16.570692,-1.379267,-1.653312,45.2318,-1.588402,27.24,10.372975,0.258819,-0.965926,0.0,...,0.0,0.580556,0.419444,0.0,0.0,0.0,0.0,-0.401488,0.915864,100.612
2021-12-07 11:15:00+00:00,16.536292,-1.158921,-0.733073,46.056198,-1.595179,27.34,7.970436,0.1950903,-0.980785,0.0,...,0.0,0.541667,0.458333,0.0,0.0,0.0,0.0,-0.401488,0.915864,102.493805
2021-12-07 11:30:00+00:00,16.613036,-1.275901,-0.072787,45.2696,-1.797867,28.25,4.614934,0.1305262,-0.991445,0.0,...,0.0,0.541667,0.458333,0.0,0.0,0.0,0.0,-0.401488,0.915864,105.330399
2021-12-07 11:45:00+00:00,16.804636,-1.28689,0.109181,41.5868,-1.955182,29.129999,4.559009,0.06540313,-0.997859,0.0,...,0.0,0.541667,0.458333,0.0,0.0,0.0,0.0,-0.401488,0.915864,110.798401
2021-12-07 12:00:00+00:00,17.245329,-1.308267,-0.51991,41.547199,-1.749122,32.18,6.207679,-3.216245e-16,-1.0,0.0,...,0.0,0.538889,0.461111,0.0,0.0,0.0,0.0,-0.401488,0.915864,110.442802
2021-12-07 12:15:00+00:00,17.764984,-1.28367,-0.077986,42.835999,-1.707108,32.970001,5.621584,-0.06540313,-0.99999,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,-0.401488,0.915864,105.058601
2021-12-07 12:30:00+00:00,18.143951,-1.215349,-0.421127,42.196999,-1.599398,33.91,9.059855,-0.1305262,-0.997564,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,-0.401488,0.915864,103.803001
2021-12-07 12:45:00+00:00,17.765068,-1.225133,-0.764266,41.9972,-1.510649,33.27,9.42225,-0.1950903,-0.990866,0.0,...,0.0,0.5,0.5,0.0,0.0,0.0,0.0,-0.401488,0.915864,109.332199


In [28]:
if configs["learning_algorithm"]["arch_version"] == "alfa":
    data_test = timelag_predictors(data_test, configs)
    print("ALFA")
elif configs["learning_algorithm"]["arch_version"] == "bravo":
    data_test = timelag_predictors_target(data_test, configs)
    print("BRAVO")
elif configs["learning_algorithm"]["arch_version"] == "charlie":
    data_test = roll_predictors_target(data_test, configs)
    print("CHARLIE")
    
data_test

CHARLIE


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[target_var] = target.shift(freq="-" + window_width_futurecast)


{'predictor': array([[[ 17.98836407,  -1.32799006,  -0.2183623 , ...,  -0.40148799,
            0.91586429, 100.61199951],
         [ 16.98584378,  -1.43911004,  -0.4419215 , ...,  -0.40148799,
            0.91586429, 102.49380493],
         [ 16.57069247,  -1.37926698,  -1.65331197, ...,  -0.40148799,
            0.91586429, 105.33039856],
         ...,
         [ 17.76506791,  -1.22513294,  -0.76426619, ...,  -0.40148799,
            0.91586429, 104.84539795],
         [ 17.84473407,  -1.31224597,  -0.76426941, ...,  -0.40148799,
            0.91586429, 112.98020172],
         [ 17.77192634,  -1.37123501,  -1.15419996, ...,  -0.40148799,
            0.91586429, 108.45140076]]]),
 'target': array([[[113.13939667],
         [         nan],
         [         nan]]]),
 'timestamp': DatetimeIndex(['2021-12-07 10:30:00+00:00'], dtype='datetime64[ns, UTC]', name='Timestamp', freq='15T')}

### ----------------------------------------------------------------
### get_prediction_vector_for_time
### ----------------------------------------------------------------

In [31]:
# initialize horizon vector
horizon_vector = []

# set up variables
resample_interval = configs["data_processing"]["resample_interval"]
window_start_delta = "0min"  # TODO: tie with window_width_futurecast in configs
window_width_target = configs["data_processing"]["input_output_window"][
    "window_width_target"
]
count_horizon = pd.Timedelta(window_width_target) // pd.Timedelta(
    resample_interval
)

# create horizon vector by adding timedelta via loop
timedelta = window_start_delta
for i in range(count_horizon):
    timedelta = pd.Timedelta(timedelta) + pd.Timedelta(resample_interval)
    horizon_vector.append(timedelta)
    
horizon_vector

[Timedelta('0 days 00:15:00'),
 Timedelta('0 days 00:30:00'),
 Timedelta('0 days 00:45:00')]