In [1]:
import numpy as np
import pandas as pd
import random
import time
import math
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import init
from dateutil import parser
from pathlib import Path
import json 
import shutil
import logging
logger = logging.getLogger(str(os.getpid()))

from wattile.data_reading import read_dataset_from_file
from wattile.data_processing import correct_predictor_columns, correct_timestamps, resample_or_rolling_stats
from wattile.time_processing import add_processed_time_columns
PROJECT_DIRECTORY = Path().resolve().parent.parent

PROJECT_DIRECTORY = C:\Users\JKIM4\Anaconda3\envs\wattile\Lib\site-packages\wattile


In [2]:
import copy

# reading configs

In [3]:
"""
For this example, we will be using the default configs.
Check out the docs for an explaination of each config.
"""
##################################################################################
# choose the configs file to use as an input
##################################################################################
# main configs file
with open(PROJECT_DIRECTORY / "wattile" / "configs" / "configs.json", "r") as f:
    configs = json.load(f)
##################################################################################
# code testing configs file
# with open(PROJECT_DIRECTORY / "tests" / "fixtures" / "test_configs.json", "r") as f:
#     configs = json.load(f)
##################################################################################

exp_dir = PROJECT_DIRECTORY / "notebooks" / "exp_dir"
if exp_dir.exists():
    shutil.rmtree(exp_dir)
exp_dir.mkdir()

configs["data_input"]["exp_dir"] = str(PROJECT_DIRECTORY / exp_dir)
configs["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / "data" / "Synthetic Site")

configs

{'data_input': {'data_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\data\\Synthetic Site',
  'data_config': 'Synthetic Site Config.json',
  'start_time': '2018-01-01T00:00:00-07:00',
  'end_time': '2022-01-01T00:00:00-07:00',
  'predictor_columns': ['Synthetic Weather Station Dew Point Temperature',
   'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'Synthetic Weather Station Direct Normal Irradiance',
   'Synthetic Weather Station Dry Bulb Temperature',
   'Synthetic Weather Station Global Horizontal Irradiance',
   'Synthetic Weather Station Relative Humidity',
   'Synthetic Weather Station Wind Speed'],
  'target_var': 'Synthetic Site Electricity Main Total Power',
  'exp_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\notebooks\\exp_dir'},
 'data_output': {'exp_dir': 'exp_dir',
  'plot_comparison': True,
  'plot_comparison_portion_start': 0.0,
  'plot_comparison_portion_end': 1.0},
 'data_processing': {'fea

# reading data

In [4]:
def get_dummy_data(start, end, iterval):
    data = pd.DataFrame(index=pd.date_range(start, end, freq=iterval))
    data["var_1"] = data.index.month * 1000000 + data.index.day * 10000 + data.index.hour * 100 + data.index.minute
    data["target_var"] = -1 * (data["var_1"])

    return data

In [5]:
####################################################################
# read "our" synthetic data
####################################################################
configs["target_feat_name"] = [configs["data_input"]["target_var"]]
data, congfigs = read_dataset_from_file(configs)
####################################################################
# create much simpler dummy data
####################################################################
target_var = configs["data_input"]["target_var"]
data = get_dummy_data(data.index[0], data.index[-1], "1min")
data = data.rename(columns={"target_var":target_var})

data

Unnamed: 0,var_1,Synthetic Site Electricity Main Total Power
2021-12-01 07:00:00+00:00,12010700,-12010700
2021-12-01 07:01:00+00:00,12010701,-12010701
2021-12-01 07:02:00+00:00,12010702,-12010702
2021-12-01 07:03:00+00:00,12010703,-12010703
2021-12-01 07:04:00+00:00,12010704,-12010704
...,...,...
2021-12-08 06:55:00+00:00,12080655,-12080655
2021-12-08 06:56:00+00:00,12080656,-12080656
2021-12-08 06:57:00+00:00,12080657,-12080657
2021-12-08 06:58:00+00:00,12080658,-12080658


# test setting

In [6]:
configs["learning_algorithm"]["arch_version"] = "bravo"

configs["data_processing"]["resample"]["bin_interval"] = "15min"

configs["data_processing"]["feat_timelag"]["lag_interval"] = "15min"
configs["data_processing"]["feat_timelag"]["lag_count"] = 4

configs["data_processing"]["input_output_window"]["window_width_source"] = "180min"
configs["data_processing"]["input_output_window"]["window_width_target"] = "60min"
configs["data_processing"]["input_output_window"]["window_width_futurecast"] = "30min"

configs["learning_algorithm"]["use_case"] = "prediction"

# data processing

In [7]:
# assert we have the correct columns and order them
# data = correct_predictor_columns(configs, data)

# sort and trim data specified time period
# data = correct_timestamps(configs, data)

# # Add time-based features
# data = add_processed_time_columns(data, configs)

# # Add statistics features
# data = resample_or_rolling_stats(data, configs)

data

Unnamed: 0,var_1,Synthetic Site Electricity Main Total Power
2021-12-01 07:00:00+00:00,12010700,-12010700
2021-12-01 07:01:00+00:00,12010701,-12010701
2021-12-01 07:02:00+00:00,12010702,-12010702
2021-12-01 07:03:00+00:00,12010703,-12010703
2021-12-01 07:04:00+00:00,12010704,-12010704
...,...,...
2021-12-08 06:55:00+00:00,12080655,-12080655
2021-12-08 06:56:00+00:00,12080656,-12080656
2021-12-08 06:57:00+00:00,12080657,-12080657
2021-12-08 06:58:00+00:00,12080658,-12080658


# test

### set timestamp for casting

- this is the timestamp that will be the basis for calculating relative window sizes
- and representing what time "now" is

In [8]:
timestamp_cast = pd.to_datetime("2021-12-06 13:30:00+00:00")
timestamp_cast

Timestamp('2021-12-06 13:30:00+0000', tz='UTC')

- below is ground truth target measurement(s) we want to predict from now (=`timestamp_cast`)

In [9]:
timestamp_predict = timestamp_cast + pd.Timedelta(configs["data_processing"]["input_output_window"]["window_width_futurecast"])
groundtruth_target = data.loc[data.index==timestamp_predict, configs["data_input"]["target_var"]].iloc[0]
data.loc[data.index==timestamp_predict, configs["data_input"]["target_var"]]

2021-12-06 14:00:00+00:00   -12061400
Freq: T, Name: Synthetic Site Electricity Main Total Power, dtype: int64

- so this value below is what we want to predict with `timestamp_cast` in deployment scenario

In [10]:
groundtruth_target

-12061400

### create output vector

In [11]:
def get_prediction_vector_for_time_alfa():
    """Given the time for which we want to predict, return a vector of actual timestamps
    corresponding to the predictions returned by the model

    :param output_time: the time for which we want to predict
    :type output_time: datetime
    :return: a vector of actual timestamps corresponding to the predictions
    :rtype: List[timedelta]
    """

    # set up variables
    config_data_processing = configs["data_processing"]
    window_width_futurecast = config_data_processing["input_output_window"][
        "window_width_futurecast"
    ]

    # set future horizon vector
    future_horizon_vector = [pd.Timedelta(window_width_futurecast)]

    return future_horizon_vector


def get_prediction_vector_for_time_bravo():
    """Given the time for which we want to predict, return a vector of actual timestamps
    corresponding to the predictions returned by the model

    :param output_time: the time for which we want to predict
    :type output_time: datetime
    :return: a vector of actual timestamps corresponding to the predictions
    :rtype: List[timedelta]
    """

    # initialize horizon vector
    future_horizon_vector = []

    # set up variables
    config_data_processing = configs["data_processing"]
    resample_interval = config_data_processing["resample"]["bin_interval"]
    window_width_futurecast = config_data_processing["input_output_window"][
        "window_width_futurecast"
    ]
    window_width_target = config_data_processing["input_output_window"][
        "window_width_target"
    ]

    # create horizon vector by adding timedelta via loop
    future_horizon_vector = pd.timedelta_range(
        start=window_width_futurecast,
        end=pd.Timedelta(window_width_target)
        + pd.Timedelta(window_width_futurecast),
        freq=resample_interval,
    ).tolist()

    return future_horizon_vector


def get_prediction_vector_for_time_charlie():
    """Given the time for which we want to predict, return a vector of actual timestamps
    corresponding to the predictions returned by the model

    :param output_time: the time for which we want to predict
    :type output_time: datetime
    :return: a vector of actual timestamps corresponding to the predictions
    :rtype: List[timedelta]
    """

    # initialize horizon vector
    future_horizon_vector = []

    # set up variables
    config_data_processing = configs["data_processing"]
    resample_interval = config_data_processing["resample"]["bin_interval"]
    window_start_delta = config_data_processing["input_output_window"][
        "window_width_futurecast"
    ]
    window_width_target = config_data_processing["input_output_window"][
        "window_width_target"
    ]

    # calculate future time horizon count
    count_horizon = (
        pd.Timedelta(window_width_target) // pd.Timedelta(resample_interval) + 1
    )

    # create horizon vector by adding timedelta via loop
    timedelta = pd.Timedelta(window_start_delta)
    for i in range(count_horizon):
        future_horizon_vector.append(timedelta)
        timedelta = pd.Timedelta(timedelta) + pd.Timedelta(resample_interval)

    return future_horizon_vector

In [12]:
if configs["learning_algorithm"]["arch_version"] == "alfa":
    
    output_vector = get_prediction_vector_for_time_alfa()
    
elif configs["learning_algorithm"]["arch_version"] == "bravo":
    
    output_vector = get_prediction_vector_for_time_bravo()
    
elif configs["learning_algorithm"]["arch_version"] == "charlie":
    
    output_vector = get_prediction_vector_for_time_charlie()
    
output_vector

[Timedelta('0 days 00:30:00'),
 Timedelta('0 days 00:45:00'),
 Timedelta('0 days 01:00:00'),
 Timedelta('0 days 01:15:00'),
 Timedelta('0 days 01:30:00')]