In [1]:
import numpy as np
import pandas as pd
import random
import time
import math
import os
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import init
from dateutil import parser
from pathlib import Path
import json 
import shutil
import logging
logger = logging.getLogger(str(os.getpid()))

from wattile.data_reading import read_dataset_from_file, _concat_data_from_files
from wattile.entry_point import init_logging
PROJECT_DIRECTORY = Path().resolve().parent.parent

PROJECT_DIRECTORY = C:\Users\JKIM4\Anaconda3\envs\wattile\Lib\site-packages\wattile


In [2]:
import copy
import pathlib

# reading configs

In [3]:
"""
For this example, we will be using the default configs.
Check out the docs for an explaination of each config.
"""
##################################################################################
# choose the configs file to use as an input
##################################################################################
# main configs file
with open(PROJECT_DIRECTORY / "wattile" / "configs" / "configs.json", "r") as f:
    configs = json.load(f)
##################################################################################
# code testing configs file
# with open(PROJECT_DIRECTORY / "tests" / "fixtures" / "test_configs.json", "r") as f:
#     configs = json.load(f)
##################################################################################

exp_dir = PROJECT_DIRECTORY / "notebooks" / "exp_dir"
if exp_dir.exists():
    shutil.rmtree(exp_dir)
exp_dir.mkdir()

configs["data_input"]["exp_dir"] = str(PROJECT_DIRECTORY / exp_dir)
configs["data_input"]["data_dir"] = str(PROJECT_DIRECTORY / "data" / "Synthetic Site")

configs

{'data_input': {'data_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\data\\Synthetic Site',
  'data_config': 'Synthetic Site Config.json',
  'start_time': '2018-01-01T00:00:00-07:00',
  'end_time': '2022-01-01T00:00:00-07:00',
  'predictor_columns': ['Synthetic Weather Station Dew Point Temperature',
   'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'Synthetic Weather Station Direct Normal Irradiance',
   'Synthetic Weather Station Dry Bulb Temperature',
   'Synthetic Weather Station Global Horizontal Irradiance',
   'Synthetic Weather Station Relative Humidity',
   'Synthetic Weather Station Wind Speed'],
  'target_var': 'Synthetic Site Electricity Main Total Power',
  'exp_dir': 'C:\\Users\\JKIM4\\Documents\\GitHub\\intelligentcampus-pred-analytics\\notebooks\\exp_dir'},
 'data_output': {'exp_dir': 'exp_dir',
  'plot_comparison': True,
  'plot_comparison_portion_start': 0.0,
  'plot_comparison_portion_end': 1.0},
 'data_processing': {'fea

In [4]:
def _get_dataset_config(configs):
    """Get dataset config as dataframe

    :param configs: configs
    :type configs: dict
    :return: dataset config
    :rtype: Tuple[pd.DataFrame, List[Dict]]
    """
    dataset_dir = Path(configs["data_input"]["data_dir"])
    configs_file_inputdata = dataset_dir / configs["data_input"]["data_config"]

    logger.info(
        "Pre-process: reading input data summary json file from {}".format(
            configs_file_inputdata
        )
    )

    with open(configs_file_inputdata, "r") as read_file:
        configs_input = json.load(read_file)
        df_inputdata = pd.DataFrame(configs_input["files"])

    # converting date time column into pandas datetime (raw format based on ISO 8601)
    df_inputdata["start"] = pd.to_datetime(
        df_inputdata.start, format="t:%Y-%m-%dT%H:%M:%S%z", exact=False, utc=True
    )
    df_inputdata["end"] = pd.to_datetime(
        df_inputdata.end, format="t:%Y-%m-%dT%H:%M:%S%z", exact=False, utc=True
    )

    df_inputdata["path"] = str(dataset_dir) + "/" + df_inputdata["filename"]

    return df_inputdata, configs_input

### read data config

In [5]:
df_inputdata, configs_input = _get_dataset_config(configs)
configs_input

{'dates': {'start': 't:2021-12-01T00:00:00-07:00 Denver',
  'end': 't:2021-12-08T00:00:00-07:00 Denver'},
 'predictors': [{'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Dew Point Temperature',
   'id': 'r:278f8943-6a199bd7 Synthetic Weather Station Dew Point Temperature',
   'description': 'Synthetic Weather Station Dew Point Temperature',
   'unit': 'Â°F',
   'pv': False},
  {'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'id': 'r:278f79c0-03da5abc Synthetic Weather Station Diffuse Horizontal Irradiance',
   'description': 'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'unit': 'W/mÂ²_irr',
   'pv': False},
  {'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Direct Normal Irradiance',
   'id': 'r:278f79c0-4fe536b4 Synthetic Weather Station Direct Normal Irradiance',
   'description': 'Synthetic Weather Station Direct Normal Irradiance',
   'unit': 'W/mÂ²_irr',
   'pv': False},
  {'

### initialize exp_dir

In [6]:
init_logging(local_results_dir=pathlib.Path(configs["data_output"]["exp_dir"]))

Logging to: exp_dir\output.out, PID: 18352


### read data based on predictor_columns

In [7]:
# read in predictor data
predictor_data_info = df_inputdata[df_inputdata.contentType == "predictors"]
data_full_p = _concat_data_from_files(
    predictor_data_info.path,
    needed_columns=configs["data_input"]["predictor_columns"],
)

data_full_p

Unnamed: 0_level_0,Synthetic Weather Station Dew Point Temperature,Synthetic Weather Station Diffuse Horizontal Irradiance,Synthetic Weather Station Direct Normal Irradiance,Synthetic Weather Station Dry Bulb Temperature,Synthetic Weather Station Global Horizontal Irradiance,Synthetic Weather Station Relative Humidity,Synthetic Weather Station Wind Speed
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-12-01 07:00:00+00:00,15.413733,-1.525850,-0.005199,56.408001,-1.810825,17.930000,10.457981
2021-12-01 07:01:00+00:00,15.183906,-1.496226,0.207964,56.174000,-1.841866,17.879999,11.265539
2021-12-01 07:02:00+00:00,14.891221,-1.441191,0.457520,55.886002,-1.873579,17.809999,12.777752
2021-12-01 07:03:00+00:00,14.836164,-1.371754,0.473117,55.796001,-1.860084,17.820000,12.694983
2021-12-01 07:04:00+00:00,14.782966,-1.317349,0.545903,55.723999,-1.843271,17.820000,11.632407
...,...,...,...,...,...,...,...
2021-12-08 06:55:00+00:00,11.210565,-1.487212,-0.254757,42.285198,-1.814939,24.680000,0.000000
2021-12-08 06:56:00+00:00,11.190062,-1.445182,-0.233960,41.997200,-1.852901,24.930000,0.000000
2021-12-08 06:57:00+00:00,11.223961,-1.396302,-0.145575,41.669601,-1.841248,25.290001,3.545647
2021-12-08 06:58:00+00:00,11.337669,-1.335073,0.171570,41.180000,-1.866031,25.920000,1.386941


### save predictor columns that are read to json

In [8]:
# predictor_path = Path(configs["data_output"]["exp_dir"]) / "predictors_config.json"
# final_predictors_data = {}
# final_predictors_data["predictors"] = []
# with open(predictor_path, "w") as fp:
#     final_predictors_data["predictors"] = [
#         p
#         for p in predictors_data
#         if p["column"] in configs["data_input"]["predictor_columns"]
#     ]
#     json.dump(final_predictors_data, fp)

In [9]:
# save final input data based on data config format
predictor_path = (
    Path(configs["data_output"]["exp_dir"]) / "predictors_target_config.json"
)
final_predictors_data = {}
final_predictors_data["predictors"] = []
with open(predictor_path, "w") as fp:
    final_predictors_data["predictors"] = [
        p
        for p in configs_input["predictors"]
        if p["column"] in list(data_full_p.columns)
    ]
    final_predictors_data["target"] = configs_input["targets"][0]
    json.dump(final_predictors_data, fp)

### check output

In [10]:
final_predictors_data

{'predictors': [{'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Dew Point Temperature',
   'id': 'r:278f8943-6a199bd7 Synthetic Weather Station Dew Point Temperature',
   'description': 'Synthetic Weather Station Dew Point Temperature',
   'unit': 'Â°F',
   'pv': False},
  {'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'id': 'r:278f79c0-03da5abc Synthetic Weather Station Diffuse Horizontal Irradiance',
   'description': 'Synthetic Weather Station Diffuse Horizontal Irradiance',
   'unit': 'W/mÂ²_irr',
   'pv': False},
  {'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Direct Normal Irradiance',
   'id': 'r:278f79c0-4fe536b4 Synthetic Weather Station Direct Normal Irradiance',
   'description': 'Synthetic Weather Station Direct Normal Irradiance',
   'unit': 'W/mÂ²_irr',
   'pv': False},
  {'site': 'Synthetic Site',
   'column': 'Synthetic Weather Station Dry Bulb Temperature',
   'id': 'r:278f79

In [11]:
final_predictors_data.keys()

dict_keys(['predictors', 'target'])

In [12]:
final_predictors_data['target']

{'site': 'Synthetic Site',
 'column': 'Synthetic Site Electricity Main Total Power',
 'id': 'r:294fd256-a17bb5c7 Synthetic Site Electricity Main Total Power',
 'description': 'Synthetic Site Electricity Main Total Power',
 'unit': 'kW',
 'pv': False}

In [13]:
final_predictors_data['predictors']

[{'site': 'Synthetic Site',
  'column': 'Synthetic Weather Station Dew Point Temperature',
  'id': 'r:278f8943-6a199bd7 Synthetic Weather Station Dew Point Temperature',
  'description': 'Synthetic Weather Station Dew Point Temperature',
  'unit': 'Â°F',
  'pv': False},
 {'site': 'Synthetic Site',
  'column': 'Synthetic Weather Station Diffuse Horizontal Irradiance',
  'id': 'r:278f79c0-03da5abc Synthetic Weather Station Diffuse Horizontal Irradiance',
  'description': 'Synthetic Weather Station Diffuse Horizontal Irradiance',
  'unit': 'W/mÂ²_irr',
  'pv': False},
 {'site': 'Synthetic Site',
  'column': 'Synthetic Weather Station Direct Normal Irradiance',
  'id': 'r:278f79c0-4fe536b4 Synthetic Weather Station Direct Normal Irradiance',
  'description': 'Synthetic Weather Station Direct Normal Irradiance',
  'unit': 'W/mÂ²_irr',
  'pv': False},
 {'site': 'Synthetic Site',
  'column': 'Synthetic Weather Station Dry Bulb Temperature',
  'id': 'r:278f79c0-8a8f26b0 Synthetic Weather Stati

In [14]:
final_predictors_data['predictors'][0]

{'site': 'Synthetic Site',
 'column': 'Synthetic Weather Station Dew Point Temperature',
 'id': 'r:278f8943-6a199bd7 Synthetic Weather Station Dew Point Temperature',
 'description': 'Synthetic Weather Station Dew Point Temperature',
 'unit': 'Â°F',
 'pv': False}

In [15]:
len(final_predictors_data['predictors'])

7

### test

In [16]:
saved_predictors = pd.DataFrame(final_predictors_data["predictors"])
saved_predictors

Unnamed: 0,site,column,id,description,unit,pv
0,Synthetic Site,Synthetic Weather Station Dew Point Temperature,r:278f8943-6a199bd7 Synthetic Weather Station ...,Synthetic Weather Station Dew Point Temperature,Â°F,False
1,Synthetic Site,Synthetic Weather Station Diffuse Horizontal I...,r:278f79c0-03da5abc Synthetic Weather Station ...,Synthetic Weather Station Diffuse Horizontal I...,W/mÂ²_irr,False
2,Synthetic Site,Synthetic Weather Station Direct Normal Irradi...,r:278f79c0-4fe536b4 Synthetic Weather Station ...,Synthetic Weather Station Direct Normal Irradi...,W/mÂ²_irr,False
3,Synthetic Site,Synthetic Weather Station Dry Bulb Temperature,r:278f79c0-8a8f26b0 Synthetic Weather Station ...,Synthetic Weather Station Dry Bulb Temperature,Â°F,False
4,Synthetic Site,Synthetic Weather Station Global Horizontal Ir...,r:278f79c0-8d722bdb Synthetic Weather Station ...,Synthetic Weather Station Global Horizontal Ir...,W/mÂ²_irr,False
5,Synthetic Site,Synthetic Weather Station Relative Humidity,r:278f79c0-e7db4305 Synthetic Weather Station ...,Synthetic Weather Station Relative Humidity,%RH,False
6,Synthetic Site,Synthetic Weather Station Wind Speed,r:278f79c0-c1f48e5c Synthetic Weather Station ...,Synthetic Weather Station Wind Speed,mph,False


In [17]:
saved_predictors.shape

(7, 6)